From ae1ff3d623905947158fd3394854c23026337810 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 13 Jul 2015 14:31:28 +0300
Subject: iommu: iova: Move iova cache management to the iova library

This is necessary to separate intel-iommu from the iova library.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/iova.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index 3920a19d8194..92f7177db2ce 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova)
 	return iova >> iova_shift(iovad);
 }
 
-int iommu_iova_cache_init(void);
-void iommu_iova_cache_destroy(void);
+int iova_cache_get(void);
+void iova_cache_put(void);
 
 struct iova *alloc_iova_mem(void);
 void free_iova_mem(struct iova *iova);
-- 
cgit v1.2.3


From 87db73aebf55554fefaa3eade0a28f282a1511b8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 7 Aug 2015 09:36:54 +0100
Subject: efi: Add support for EFI_MEMORY_RO attribute introduced by UEFIv2.5

The UEFI spec v2.5 introduces a new memory attribute
EFI_MEMORY_RO, which is now the preferred attribute to convey
that the nature of the contents of such a region allows it to be
mapped read-only (i.e., it contains .text and .rodata only).

The specification of the existing EFI_MEMORY_WP attribute has been
updated to align more closely with its common use as a
cacheability attribute rather than a permission attribute.

Add the #define and add the attribute to the memory map dumping
routine.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1438936621-5215-1-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c | 8 +++++---
 include/linux/efi.h        | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index d6144e3b97c5..d7a9160008d3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -588,16 +588,18 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
 
 	attr = md->attribute;
 	if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
-		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_WP |
-		     EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_RUNTIME))
+		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
+		     EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+		     EFI_MEMORY_RUNTIME))
 		snprintf(pos, size, "|attr=0x%016llx]",
 			 (unsigned long long)attr);
 	else
-		snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+		snprintf(pos, size, "|%3s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 			 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 			 attr & EFI_MEMORY_XP      ? "XP"  : "",
 			 attr & EFI_MEMORY_RP      ? "RP"  : "",
 			 attr & EFI_MEMORY_WP      ? "WP"  : "",
+			 attr & EFI_MEMORY_RO      ? "RO"  : "",
 			 attr & EFI_MEMORY_UCE     ? "UCE" : "",
 			 attr & EFI_MEMORY_WB      ? "WB"  : "",
 			 attr & EFI_MEMORY_WT      ? "WT"  : "",
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 85ef051ac6fb..26ca9e2fd30e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -99,6 +99,7 @@ typedef	struct {
 #define EFI_MEMORY_XP		((u64)0x0000000000004000ULL)	/* execute-protect */
 #define EFI_MEMORY_MORE_RELIABLE \
 				((u64)0x0000000000010000ULL)	/* higher reliability */
+#define EFI_MEMORY_RO		((u64)0x0000000000020000ULL)	/* read-only */
 #define EFI_MEMORY_RUNTIME	((u64)0x8000000000000000ULL)	/* range requires runtime mapping */
 #define EFI_MEMORY_DESCRIPTOR_VERSION	1
 
-- 
cgit v1.2.3


From a0175b9c76f59c1f5706f986d690e27ba06363dd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 12 Aug 2015 10:22:41 +0200
Subject: iio: st_sensors: add debugfs register read hook

This adds a debugfs hook to read/write registers in the ST
sensors using debugfs. Proved to be awesome help when trying
to debug why IRQs do not arrive.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/st_accel_core.c               |  1 +
 drivers/iio/common/st_sensors/st_sensors_core.c | 22 ++++++++++++++++++++++
 drivers/iio/gyro/st_gyro_core.c                 |  1 +
 drivers/iio/magnetometer/st_magn_core.c         |  1 +
 drivers/iio/pressure/st_pressure_core.c         |  1 +
 include/linux/iio/common/st_sensors.h           |  4 ++++
 6 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index ff30f8806880..dab8b76c1427 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -618,6 +618,7 @@ static const struct iio_info accel_info = {
 	.attrs = &st_accel_attribute_group,
 	.read_raw = &st_accel_read_raw,
 	.write_raw = &st_accel_write_raw,
+	.debugfs_reg_access = &st_sensors_debugfs_reg_access,
 };
 
 #ifdef CONFIG_IIO_TRIGGER
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 2e7fdb502645..25258e2c1a82 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -44,6 +44,28 @@ st_sensors_write_data_with_mask_error:
 	return err;
 }
 
+int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev,
+				  unsigned reg, unsigned writeval,
+				  unsigned *readval)
+{
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	u8 readdata;
+	int err;
+
+	if (!readval)
+		return sdata->tf->write_byte(&sdata->tb, sdata->dev,
+					     (u8)reg, (u8)writeval);
+
+	err = sdata->tf->read_byte(&sdata->tb, sdata->dev, (u8)reg, &readdata);
+	if (err < 0)
+		return err;
+
+	*readval = (unsigned)readdata;
+
+	return 0;
+}
+EXPORT_SYMBOL(st_sensors_debugfs_reg_access);
+
 static int st_sensors_match_odr(struct st_sensor_settings *sensor_settings,
 			unsigned int odr, struct st_sensor_odr_avl *odr_out)
 {
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 4b993a5bc9a1..02eddcebeea3 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -383,6 +383,7 @@ static const struct iio_info gyro_info = {
 	.attrs = &st_gyro_attribute_group,
 	.read_raw = &st_gyro_read_raw,
 	.write_raw = &st_gyro_write_raw,
+	.debugfs_reg_access = &st_sensors_debugfs_reg_access,
 };
 
 #ifdef CONFIG_IIO_TRIGGER
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index f8dc4b85d70c..b27f0146647b 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -560,6 +560,7 @@ static const struct iio_info magn_info = {
 	.attrs = &st_magn_attribute_group,
 	.read_raw = &st_magn_read_raw,
 	.write_raw = &st_magn_write_raw,
+	.debugfs_reg_access = &st_sensors_debugfs_reg_access,
 };
 
 #ifdef CONFIG_IIO_TRIGGER
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index eb41d2b92c24..b39a2fb0671c 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -400,6 +400,7 @@ static const struct iio_info press_info = {
 	.attrs = &st_press_attribute_group,
 	.read_raw = &st_press_read_raw,
 	.write_raw = &st_press_write_raw,
+	.debugfs_reg_access = &st_sensors_debugfs_reg_access,
 };
 
 #ifdef CONFIG_IIO_TRIGGER
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 3c17cd7fdf06..2fe939c73cd2 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -271,6 +271,10 @@ void st_sensors_power_enable(struct iio_dev *indio_dev);
 
 void st_sensors_power_disable(struct iio_dev *indio_dev);
 
+int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev,
+				  unsigned reg, unsigned writeval,
+				  unsigned *readval);
+
 int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr);
 
 int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable);
-- 
cgit v1.2.3


From 735ad074ffa72ccc4fdba8e54eb024df95545e7d Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
Date: Thu, 20 Aug 2015 22:37:39 +0300
Subject: iio: Support triggered events

Support triggered events.

This is useful for chips that don't have their own interrupt sources.
It allows to use generic/standalone iio triggers for those drivers.

Signed-off-by: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/Kconfig                        |  6 +++
 drivers/iio/Makefile                       |  2 +
 drivers/iio/industrialio-core.c            |  4 +-
 drivers/iio/industrialio-trigger.c         | 12 +++++-
 drivers/iio/industrialio-triggered-event.c | 68 ++++++++++++++++++++++++++++++
 include/linux/iio/iio.h                    |  3 ++
 include/linux/iio/triggered_event.h        | 11 +++++
 7 files changed, 102 insertions(+), 4 deletions(-)
 create mode 100644 drivers/iio/industrialio-triggered-event.c
 create mode 100644 include/linux/iio/triggered_event.h

(limited to 'include/linux')

diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index 3c6c6e28a60a..6fe0d6524c9c 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -38,6 +38,12 @@ config IIO_CONSUMERS_PER_TRIGGER
 	This value controls the maximum number of consumers that a
 	given trigger may handle. Default is 2.
 
+config IIO_TRIGGERED_EVENT
+	tristate
+	select IIO_TRIGGER
+	help
+	  Provides helper functions for setting up triggered events.
+
 source "drivers/iio/accel/Kconfig"
 source "drivers/iio/adc/Kconfig"
 source "drivers/iio/amplifiers/Kconfig"
diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
index 7ddb988338ec..40995f366843 100644
--- a/drivers/iio/Makefile
+++ b/drivers/iio/Makefile
@@ -7,6 +7,8 @@ industrialio-y := industrialio-core.o industrialio-event.o inkern.o
 industrialio-$(CONFIG_IIO_BUFFER) += industrialio-buffer.o
 industrialio-$(CONFIG_IIO_TRIGGER) += industrialio-trigger.o
 
+obj-$(CONFIG_IIO_TRIGGERED_EVENT) += industrialio-triggered-event.o
+
 obj-y += accel/
 obj-y += adc/
 obj-y += amplifiers/
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index b347524d1b6d..bef690ed0480 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -962,7 +962,7 @@ static void iio_device_unregister_sysfs(struct iio_dev *indio_dev)
 static void iio_dev_release(struct device *device)
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(device);
-	if (indio_dev->modes & INDIO_BUFFER_TRIGGERED)
+	if (indio_dev->modes & (INDIO_BUFFER_TRIGGERED | INDIO_EVENT_TRIGGERED))
 		iio_device_unregister_trigger_consumer(indio_dev);
 	iio_device_unregister_eventset(indio_dev);
 	iio_device_unregister_sysfs(indio_dev);
@@ -1243,7 +1243,7 @@ int iio_device_register(struct iio_dev *indio_dev)
 			"Failed to register event set\n");
 		goto error_free_sysfs;
 	}
-	if (indio_dev->modes & INDIO_BUFFER_TRIGGERED)
+	if (indio_dev->modes & (INDIO_BUFFER_TRIGGERED | INDIO_EVENT_TRIGGERED))
 		iio_device_register_trigger_consumer(indio_dev);
 
 	if ((indio_dev->modes & INDIO_ALL_BUFFER_MODES) &&
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 570606c2adbd..ae2806aafb72 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -366,10 +366,18 @@ static ssize_t iio_trigger_write_current(struct device *dev,
 
 	indio_dev->trig = trig;
 
-	if (oldtrig)
+	if (oldtrig) {
+		if (indio_dev->modes & INDIO_EVENT_TRIGGERED)
+			iio_trigger_detach_poll_func(oldtrig,
+						     indio_dev->pollfunc_event);
 		iio_trigger_put(oldtrig);
-	if (indio_dev->trig)
+	}
+	if (indio_dev->trig) {
 		iio_trigger_get(indio_dev->trig);
+		if (indio_dev->modes & INDIO_EVENT_TRIGGERED)
+			iio_trigger_attach_poll_func(indio_dev->trig,
+						     indio_dev->pollfunc_event);
+	}
 
 	return len;
 }
diff --git a/drivers/iio/industrialio-triggered-event.c b/drivers/iio/industrialio-triggered-event.c
new file mode 100644
index 000000000000..8cc254fac16a
--- /dev/null
+++ b/drivers/iio/industrialio-triggered-event.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Cogent Embedded, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/triggered_event.h>
+#include <linux/iio/trigger_consumer.h>
+
+/**
+ * iio_triggered_event_setup() - Setup pollfunc_event for triggered event
+ * @indio_dev:	IIO device structure
+ * @h:		Function which will be used as pollfunc_event top half
+ * @thread:	Function which will be used as pollfunc_event bottom half
+ *
+ * This function combines some common tasks which will normally be performed
+ * when setting up a triggered event. It will allocate the pollfunc_event and
+ * set mode to use it for triggered event.
+ *
+ * Before calling this function the indio_dev structure should already be
+ * completely initialized, but not yet registered. In practice this means that
+ * this function should be called right before iio_device_register().
+ *
+ * To free the resources allocated by this function call
+ * iio_triggered_event_cleanup().
+ */
+int iio_triggered_event_setup(struct iio_dev *indio_dev,
+			      irqreturn_t (*h)(int irq, void *p),
+			      irqreturn_t (*thread)(int irq, void *p))
+{
+	indio_dev->pollfunc_event = iio_alloc_pollfunc(h,
+						       thread,
+						       IRQF_ONESHOT,
+						       indio_dev,
+						       "%s_consumer%d",
+						       indio_dev->name,
+						       indio_dev->id);
+	if (indio_dev->pollfunc_event == NULL)
+		return -ENOMEM;
+
+	/* Flag that events polling is possible */
+	indio_dev->modes |= INDIO_EVENT_TRIGGERED;
+
+	return 0;
+}
+EXPORT_SYMBOL(iio_triggered_event_setup);
+
+/**
+ * iio_triggered_event_cleanup() - Free resources allocated by iio_triggered_event_setup()
+ * @indio_dev: IIO device structure
+ */
+void iio_triggered_event_cleanup(struct iio_dev *indio_dev)
+{
+	indio_dev->modes &= ~INDIO_EVENT_TRIGGERED;
+	iio_dealloc_pollfunc(indio_dev->pollfunc_event);
+}
+EXPORT_SYMBOL(iio_triggered_event_cleanup);
+
+MODULE_AUTHOR("Vladimir Barinov");
+MODULE_DESCRIPTION("IIO helper functions for setting up triggered events");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 7bb7f673cb3f..19c94c9acc81 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -294,6 +294,7 @@ static inline s64 iio_get_time_ns(void)
 #define INDIO_BUFFER_TRIGGERED		0x02
 #define INDIO_BUFFER_SOFTWARE		0x04
 #define INDIO_BUFFER_HARDWARE		0x08
+#define INDIO_EVENT_TRIGGERED		0x10
 
 #define INDIO_ALL_BUFFER_MODES					\
 	(INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | INDIO_BUFFER_SOFTWARE)
@@ -457,6 +458,7 @@ struct iio_buffer_setup_ops {
  * @scan_index_timestamp:[INTERN] cache of the index to the timestamp
  * @trig:		[INTERN] current device trigger (buffer modes)
  * @pollfunc:		[DRIVER] function run on trigger being received
+ * @pollfunc_event:	[DRIVER] function run on events trigger being received
  * @channels:		[DRIVER] channel specification structure table
  * @num_channels:	[DRIVER] number of channels specified in @channels.
  * @channel_attr_list:	[INTERN] keep track of automatically created channel
@@ -495,6 +497,7 @@ struct iio_dev {
 	unsigned			scan_index_timestamp;
 	struct iio_trigger		*trig;
 	struct iio_poll_func		*pollfunc;
+	struct iio_poll_func		*pollfunc_event;
 
 	struct iio_chan_spec const	*channels;
 	int				num_channels;
diff --git a/include/linux/iio/triggered_event.h b/include/linux/iio/triggered_event.h
new file mode 100644
index 000000000000..8fe8537085bb
--- /dev/null
+++ b/include/linux/iio/triggered_event.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_IIO_TRIGGERED_EVENT_H_
+#define _LINUX_IIO_TRIGGERED_EVENT_H_
+
+#include <linux/interrupt.h>
+
+int iio_triggered_event_setup(struct iio_dev *indio_dev,
+	irqreturn_t (*h)(int irq, void *p),
+	irqreturn_t (*thread)(int irq, void *p));
+void iio_triggered_event_cleanup(struct iio_dev *indio_dev);
+
+#endif
-- 
cgit v1.2.3


From a39f1d5e4303f535840a44eee87326fd9743de7c Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Thu, 13 Aug 2015 15:46:04 -0700
Subject: mtd: spi-nor: add forward declaration for mtd_info

This header can't actually stand alone, as it relies on the declaration
(but not definition) of struct mtd_info. Let's fix that.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Tested-by: Joachim Eastwood <manabian@gmail.com>
---
 include/linux/mtd/spi-nor.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index e5409524bb0a..85a24baea093 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -127,6 +127,8 @@ enum spi_nor_option_flags {
 	SNOR_F_USE_FSR		= BIT(0),
 };
 
+struct mtd_info;
+
 /**
  * struct spi_nor - Structure for defining a the SPI NOR layer
  * @mtd:		point to a mtd_info structure
-- 
cgit v1.2.3


From 1976367173a47f801c67b5f456922d79c60d0d42 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Thu, 13 Aug 2015 15:46:05 -0700
Subject: mtd: spi-nor: embed struct mtd_info within struct spi_nor

This reflects the proper layering, so let's do it.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Tested-by: Joachim Eastwood <manabian@gmail.com>
---
 drivers/mtd/devices/m25p80.c      | 10 ++++------
 drivers/mtd/spi-nor/fsl-quadspi.c | 10 ++++------
 drivers/mtd/spi-nor/nxp-spifi.c   |  6 ++----
 drivers/mtd/spi-nor/spi-nor.c     |  8 ++++----
 include/linux/mtd/spi-nor.h       |  2 +-
 5 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 24965ae9f7aa..b6bc9a2a5f87 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -31,7 +31,6 @@
 struct m25p {
 	struct spi_device	*spi;
 	struct spi_nor		spi_nor;
-	struct mtd_info		mtd;
 	u8			command[MAX_CMD_SIZE];
 };
 
@@ -159,7 +158,7 @@ static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 	struct m25p *flash = nor->priv;
 
 	dev_dbg(nor->dev, "%dKiB at 0x%08x\n",
-		flash->mtd.erasesize / 1024, (u32)offset);
+		flash->spi_nor.mtd.erasesize / 1024, (u32)offset);
 
 	/* Set up command buffer. */
 	flash->command[0] = nor->erase_opcode;
@@ -201,7 +200,6 @@ static int m25p_probe(struct spi_device *spi)
 	nor->read_reg = m25p80_read_reg;
 
 	nor->dev = &spi->dev;
-	nor->mtd = &flash->mtd;
 	nor->priv = flash;
 
 	spi_set_drvdata(spi, flash);
@@ -213,7 +211,7 @@ static int m25p_probe(struct spi_device *spi)
 		mode = SPI_NOR_DUAL;
 
 	if (data && data->name)
-		flash->mtd.name = data->name;
+		nor->mtd.name = data->name;
 
 	/* For some (historical?) reason many platforms provide two different
 	 * names in flash_platform_data: "name" and "type". Quite often name is
@@ -231,7 +229,7 @@ static int m25p_probe(struct spi_device *spi)
 
 	ppdata.of_node = spi->dev.of_node;
 
-	return mtd_device_parse_register(&flash->mtd, NULL, &ppdata,
+	return mtd_device_parse_register(&nor->mtd, NULL, &ppdata,
 			data ? data->parts : NULL,
 			data ? data->nr_parts : 0);
 }
@@ -242,7 +240,7 @@ static int m25p_remove(struct spi_device *spi)
 	struct m25p	*flash = spi_get_drvdata(spi);
 
 	/* Clean up MTD stuff. */
-	return mtd_device_unregister(&flash->mtd);
+	return mtd_device_unregister(&flash->spi_nor.mtd);
 }
 
 /*
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index ad9f73ae6eea..19f61783cb56 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -259,7 +259,6 @@ static struct fsl_qspi_devtype_data imx6ul_data = {
 
 #define FSL_QSPI_MAX_CHIP	4
 struct fsl_qspi {
-	struct mtd_info mtd[FSL_QSPI_MAX_CHIP];
 	struct spi_nor nor[FSL_QSPI_MAX_CHIP];
 	void __iomem *iobase;
 	void __iomem *ahb_addr;
@@ -888,7 +887,7 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs)
 	int ret;
 
 	dev_dbg(nor->dev, "%dKiB at 0x%08x:0x%08x\n",
-		nor->mtd->erasesize / 1024, q->chip_base_addr, (u32)offs);
+		nor->mtd.erasesize / 1024, q->chip_base_addr, (u32)offs);
 
 	ret = fsl_qspi_runcmd(q, nor->erase_opcode, offs, 0);
 	if (ret)
@@ -1013,9 +1012,8 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 			i *= 2;
 
 		nor = &q->nor[i];
-		mtd = &q->mtd[i];
+		mtd = &nor->mtd;
 
-		nor->mtd = mtd;
 		nor->dev = dev;
 		nor->priv = q;
 
@@ -1086,7 +1084,7 @@ last_init_failed:
 		/* skip the holes */
 		if (!q->has_second_chip)
 			i *= 2;
-		mtd_device_unregister(&q->mtd[i]);
+		mtd_device_unregister(&q->nor[i].mtd);
 	}
 mutex_failed:
 	mutex_destroy(&q->lock);
@@ -1106,7 +1104,7 @@ static int fsl_qspi_remove(struct platform_device *pdev)
 		/* skip the holes */
 		if (!q->has_second_chip)
 			i *= 2;
-		mtd_device_unregister(&q->mtd[i]);
+		mtd_device_unregister(&q->nor[i].mtd);
 	}
 
 	/* disable the hardware */
diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index ce6a478a02a5..0f6b452574bd 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c
@@ -60,7 +60,6 @@ struct nxp_spifi {
 	struct clk *clk_reg;
 	void __iomem *io_base;
 	void __iomem *flash_base;
-	struct mtd_info mtd;
 	struct spi_nor nor;
 	bool memory_mode;
 	u32 mcmd;
@@ -331,7 +330,6 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 
 	writel(ctrl, spifi->io_base + SPIFI_CTRL);
 
-	spifi->nor.mtd   = &spifi->mtd;
 	spifi->nor.dev   = spifi->dev;
 	spifi->nor.priv  = spifi;
 	spifi->nor.read  = nxp_spifi_read;
@@ -364,7 +362,7 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 	}
 
 	ppdata.of_node = np;
-	ret = mtd_device_parse_register(&spifi->mtd, NULL, &ppdata, NULL, 0);
+	ret = mtd_device_parse_register(&spifi->nor.mtd, NULL, &ppdata, NULL, 0);
 	if (ret) {
 		dev_err(spifi->dev, "mtd device parse failed\n");
 		return ret;
@@ -453,7 +451,7 @@ static int nxp_spifi_remove(struct platform_device *pdev)
 {
 	struct nxp_spifi *spifi = platform_get_drvdata(pdev);
 
-	mtd_device_unregister(&spifi->mtd);
+	mtd_device_unregister(&spifi->nor.mtd);
 	clk_disable_unprepare(spifi->clk_spifi);
 	clk_disable_unprepare(spifi->clk_reg);
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index d98180e73307..834a06049ca2 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -265,7 +265,7 @@ static int spi_nor_wait_till_ready(struct spi_nor *nor)
  */
 static int erase_chip(struct spi_nor *nor)
 {
-	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10));
+	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd.size >> 10));
 
 	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
 }
@@ -373,7 +373,7 @@ erase_err:
 
 static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 {
-	struct mtd_info *mtd = nor->mtd;
+	struct mtd_info *mtd = &nor->mtd;
 	uint32_t offset = ofs;
 	uint8_t status_old, status_new;
 	int ret = 0;
@@ -407,7 +407,7 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 
 static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 {
-	struct mtd_info *mtd = nor->mtd;
+	struct mtd_info *mtd = &nor->mtd;
 	uint32_t offset = ofs;
 	uint8_t status_old, status_new;
 	int ret = 0;
@@ -1004,7 +1004,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 {
 	const struct flash_info *info = NULL;
 	struct device *dev = nor->dev;
-	struct mtd_info *mtd = nor->mtd;
+	struct mtd_info *mtd = &nor->mtd;
 	struct device_node *np = dev->of_node;
 	int ret;
 	int i;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 85a24baea093..495433d3f56d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -162,7 +162,7 @@ struct mtd_info;
  * @priv:		the private data
  */
 struct spi_nor {
-	struct mtd_info		*mtd;
+	struct mtd_info		mtd;
 	struct mutex		lock;
 	struct device		*dev;
 	u32			page_size;
-- 
cgit v1.2.3


From a9cadf72bfb7185a680eb7599b9bda65d1515b9f Mon Sep 17 00:00:00 2001
From: Ezequiel García <ezequiel@vanguardiasur.com.ar>
Date: Fri, 21 Aug 2015 15:47:28 -0300
Subject: mtd: pxa3xx_nand: Remove unused platform-data flash specification

The driver supports board files specificating the flash
device, by passing a pxa3xx_nand_flash struct (with
flash parameters) in the platform data struct.

Currently this support is not being used by any board file.
Moreover, we'd like to deprecate such usage in favor of
using the device table in nand_ids.c.

So let's remove the ad-hoc flash specification.

Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/pxa3xx_nand.c                | 33 ++++++++++++++++++++++-----
 include/linux/platform_data/mtd-nand-pxa3xx.h | 27 ----------------------
 2 files changed, 27 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 740983a34626..165112c1a7d1 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -252,6 +252,30 @@ static bool use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transferring to/from NAND HW");
 
+struct pxa3xx_nand_timing {
+	unsigned int	tCH;  /* Enable signal hold time */
+	unsigned int	tCS;  /* Enable signal setup time */
+	unsigned int	tWH;  /* ND_nWE high duration */
+	unsigned int	tWP;  /* ND_nWE pulse time */
+	unsigned int	tRH;  /* ND_nRE high duration */
+	unsigned int	tRP;  /* ND_nRE pulse width */
+	unsigned int	tR;   /* ND_nWE high to ND_nRE low for read */
+	unsigned int	tWHR; /* ND_nWE high to ND_nRE low for status read */
+	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
+};
+
+struct pxa3xx_nand_flash {
+	char		*name;
+	uint32_t	chip_id;
+	unsigned int	page_per_block; /* Pages per block (PG_PER_BLK) */
+	unsigned int	page_size;	/* Page size in bytes (PAGE_SZ) */
+	unsigned int	flash_width;	/* Width of Flash memory (DWIDTH_M) */
+	unsigned int	dfc_width;	/* Width of flash controller(DWIDTH_C) */
+	unsigned int	num_blocks;	/* Number of physical blocks in Flash */
+
+	struct pxa3xx_nand_timing *timing;	/* NAND Flash timing */
+};
+
 static struct pxa3xx_nand_timing timing[] = {
 	{ 40, 80, 60, 100, 80, 100, 90000, 400, 40, },
 	{ 10,  0, 20,  40, 30,  40, 11123, 110, 10, },
@@ -1491,19 +1515,16 @@ static int pxa3xx_nand_scan(struct mtd_info *mtd)
 		return -EINVAL;
 	}
 
-	num = ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1;
+	num = ARRAY_SIZE(builtin_flash_types) - 1;
 	for (i = 0; i < num; i++) {
-		if (i < pdata->num_flash)
-			f = pdata->flash + i;
-		else
-			f = &builtin_flash_types[i - pdata->num_flash + 1];
+		f = &builtin_flash_types[i + 1];
 
 		/* find the chip in default list */
 		if (f->chip_id == id)
 			break;
 	}
 
-	if (i >= (ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1)) {
+	if (i >= (ARRAY_SIZE(builtin_flash_types) - 1)) {
 		dev_err(&info->pdev->dev, "ERROR!! flash not defined!!!\n");
 
 		return -EINVAL;
diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h
index ac4ea2e641c7..394d15597dc7 100644
--- a/include/linux/platform_data/mtd-nand-pxa3xx.h
+++ b/include/linux/platform_data/mtd-nand-pxa3xx.h
@@ -4,30 +4,6 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
-struct pxa3xx_nand_timing {
-	unsigned int	tCH;  /* Enable signal hold time */
-	unsigned int	tCS;  /* Enable signal setup time */
-	unsigned int	tWH;  /* ND_nWE high duration */
-	unsigned int	tWP;  /* ND_nWE pulse time */
-	unsigned int	tRH;  /* ND_nRE high duration */
-	unsigned int	tRP;  /* ND_nRE pulse width */
-	unsigned int	tR;   /* ND_nWE high to ND_nRE low for read */
-	unsigned int	tWHR; /* ND_nWE high to ND_nRE low for status read */
-	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
-};
-
-struct pxa3xx_nand_flash {
-	char		*name;
-	uint32_t	chip_id;
-	unsigned int	page_per_block; /* Pages per block (PG_PER_BLK) */
-	unsigned int	page_size;	/* Page size in bytes (PAGE_SZ) */
-	unsigned int	flash_width;	/* Width of Flash memory (DWIDTH_M) */
-	unsigned int	dfc_width;	/* Width of flash controller(DWIDTH_C) */
-	unsigned int	num_blocks;	/* Number of physical blocks in Flash */
-
-	struct pxa3xx_nand_timing *timing;	/* NAND Flash timing */
-};
-
 /*
  * Current pxa3xx_nand controller has two chip select which
  * both be workable.
@@ -63,9 +39,6 @@ struct pxa3xx_nand_platform_data {
 
 	const struct mtd_partition		*parts[NUM_CHIP_SELECT];
 	unsigned int				nr_parts[NUM_CHIP_SELECT];
-
-	const struct pxa3xx_nand_flash * 	flash;
-	size_t					num_flash;
 };
 
 extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
-- 
cgit v1.2.3


From 30035e45753b708e7d47a98398500ca005e02b86 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@ezchip.com>
Date: Wed, 29 Apr 2015 12:52:04 -0400
Subject: string: provide strscpy()

The strscpy() API is intended to be used instead of strlcpy(),
and instead of most uses of strncpy().

- Unlike strlcpy(), it doesn't read from memory beyond (src + size).

- Unlike strlcpy() or strncpy(), the API provides an easy way to check
  for destination buffer overflow: an -E2BIG error return value.

- The provided implementation is robust in the face of the source
  buffer being asynchronously changed during the copy, unlike the
  current implementation of strlcpy().

- Unlike strncpy(), the destination buffer will be NUL-terminated
  if the string in the source buffer is too long.

- Also unlike strncpy(), the destination buffer will not be updated
  beyond the NUL termination, avoiding strncpy's behavior of zeroing
  the entire tail end of the destination buffer.  (A memset() after
  the strscpy() can be used if this behavior is desired.)

- The implementation should be reasonably performant on all
  platforms since it uses the asm/word-at-a-time.h API rather than
  simple byte copy.  Kernel-to-kernel string copy is not considered
  to be performance critical in any case.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
---
 include/linux/string.h |  3 ++
 lib/string.c           | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index a8d90db9c4b0..9ef7795e65e4 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -25,6 +25,9 @@ extern char * strncpy(char *,const char *, __kernel_size_t);
 #ifndef __HAVE_ARCH_STRLCPY
 size_t strlcpy(char *, const char *, size_t);
 #endif
+#ifndef __HAVE_ARCH_STRSCPY
+ssize_t __must_check strscpy(char *, const char *, size_t);
+#endif
 #ifndef __HAVE_ARCH_STRCAT
 extern char * strcat(char *, const char *);
 #endif
diff --git a/lib/string.c b/lib/string.c
index 13d1e84ddb80..8dbb7b1eab50 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -27,6 +27,10 @@
 #include <linux/bug.h>
 #include <linux/errno.h>
 
+#include <asm/byteorder.h>
+#include <asm/word-at-a-time.h>
+#include <asm/page.h>
+
 #ifndef __HAVE_ARCH_STRNCASECMP
 /**
  * strncasecmp - Case insensitive, length-limited string comparison
@@ -146,6 +150,90 @@ size_t strlcpy(char *dest, const char *src, size_t size)
 EXPORT_SYMBOL(strlcpy);
 #endif
 
+#ifndef __HAVE_ARCH_STRSCPY
+/**
+ * strscpy - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @count: Size of destination buffer
+ *
+ * Copy the string, or as much of it as fits, into the dest buffer.
+ * The routine returns the number of characters copied (not including
+ * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * The behavior is undefined if the string buffers overlap.
+ * The destination buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Preferred to strlcpy() since the API doesn't require reading memory
+ * from the src string beyond the specified "count" bytes, and since
+ * the return value is easier to error-check than strlcpy()'s.
+ * In addition, the implementation is robust to the string changing out
+ * from underneath it, unlike the current strlcpy() implementation.
+ *
+ * Preferred to strncpy() since it always returns a valid string, and
+ * doesn't unnecessarily force the tail of the destination buffer to be
+ * zeroed.  If the zeroing is desired, it's likely cleaner to use strscpy()
+ * with an overflow test, then just memset() the tail of the dest buffer.
+ */
+ssize_t strscpy(char *dest, const char *src, size_t count)
+{
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+	size_t max = count;
+	long res = 0;
+
+	if (count == 0)
+		return -E2BIG;
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	/*
+	 * If src is unaligned, don't cross a page boundary,
+	 * since we don't know if the next page is mapped.
+	 */
+	if ((long)src & (sizeof(long) - 1)) {
+		size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1));
+		if (limit < max)
+			max = limit;
+	}
+#else
+	/* If src or dest is unaligned, don't do word-at-a-time. */
+	if (((long) dest | (long) src) & (sizeof(long) - 1))
+		max = 0;
+#endif
+
+	while (max >= sizeof(unsigned long)) {
+		unsigned long c, data;
+
+		c = *(unsigned long *)(src+res);
+		*(unsigned long *)(dest+res) = c;
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data);
+		}
+		res += sizeof(unsigned long);
+		count -= sizeof(unsigned long);
+		max -= sizeof(unsigned long);
+	}
+
+	while (count) {
+		char c;
+
+		c = src[res];
+		dest[res] = c;
+		if (!c)
+			return res;
+		res++;
+		count--;
+	}
+
+	/* Hit buffer length without finding a NUL; force NUL-termination. */
+	if (res)
+		dest[res-1] = '\0';
+
+	return -E2BIG;
+}
+EXPORT_SYMBOL(strscpy);
+#endif
+
 #ifndef __HAVE_ARCH_STRCAT
 /**
  * strcat - Append one %NUL-terminated string to another
-- 
cgit v1.2.3


From 11bff0b70cc003b995cf4c2acec4adab90957b02 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 3 Sep 2015 18:35:36 +0200
Subject: mtd: spi-nor: Decouple SPI NOR's device_node from controller device

The problem this patch is trying to address is such, that SPI NOR flash
devices attached to a dedicated SPI NOR controller cannot read their
properties from the associated struct device_node.

A couple of facts first:
1) Each SPI NOR flash has a struct spi_nor associated with it.
2) Each SPI NOR flash has certain device properties associated
   with it, for example the OF property 'm25p,fast-read' is a
   good pick. These properties are used by the SPI NOR core to
   select which opcodes are sent to such SPI NOR flash. These
   properties are coming from spi_nor .dev->of_node .

The problem is, that for SPI NOR controllers, the struct spi_nor .dev
element points to the struct device of the SPI NOR controller, not the
SPI NOR flash. Therefore, the associated dev->of_node also is the
one of the controller and therefore the SPI NOR core code is trying to
parse the SPI NOR controller's properties, not the properties of the
SPI NOR flash.

Note: The m25p80 driver is not affected, because the controller and
      the flash are the same device, so the associated device_node
      of the controller and the flash are the same.

This patch adjusts the SPI NOR core such that the device_node is not
picked from spi_nor .dev directly, but from a new separate spi_nor
.flash_node element. This let's the SPI NOR controller drivers set up
a different spi_nor .flash_node element for each SPI NOR flash.

This patch also fixes the controller drivers to be compatible with
this modification and correctly set the spi_nor .flash_node element.

This patch is inspired by 5844feeaa4154d1c46d3462c7a4653d22356d8b4
mtd: nand: add common DT init code

Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/m25p80.c      | 1 +
 drivers/mtd/spi-nor/fsl-quadspi.c | 1 +
 drivers/mtd/spi-nor/nxp-spifi.c   | 1 +
 drivers/mtd/spi-nor/spi-nor.c     | 2 +-
 include/linux/mtd/spi-nor.h       | 2 ++
 5 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index b6bc9a2a5f87..05bf0d71abea 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -200,6 +200,7 @@ static int m25p_probe(struct spi_device *spi)
 	nor->read_reg = m25p80_read_reg;
 
 	nor->dev = &spi->dev;
+	nor->flash_node = spi->dev.of_node;
 	nor->priv = flash;
 
 	spi_set_drvdata(spi, flash);
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index f28dcc1cd63f..f72483247c75 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -1013,6 +1013,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		mtd = &nor->mtd;
 
 		nor->dev = dev;
+		nor->flash_node = np;
 		nor->priv = q;
 
 		/* fill the hooks */
diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index 0f6b452574bd..ce7bf6b2916b 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c
@@ -331,6 +331,7 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 	writel(ctrl, spifi->io_base + SPIFI_CTRL);
 
 	spifi->nor.dev   = spifi->dev;
+	spifi->nor.flash_node = np;
 	spifi->nor.priv  = spifi;
 	spifi->nor.read  = nxp_spifi_read;
 	spifi->nor.write = nxp_spifi_write;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 55c67a4ea231..22cca87db9ca 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1005,7 +1005,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	const struct flash_info *info = NULL;
 	struct device *dev = nor->dev;
 	struct mtd_info *mtd = &nor->mtd;
-	struct device_node *np = dev->of_node;
+	struct device_node *np = nor->flash_node;
 	int ret;
 	int i;
 
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 495433d3f56d..6379e107f2e5 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -134,6 +134,7 @@ struct mtd_info;
  * @mtd:		point to a mtd_info structure
  * @lock:		the lock for the read/write/erase/lock/unlock operations
  * @dev:		point to a spi device, or a spi nor controller device.
+ * @flash_node:		point to a device node describing this flash instance.
  * @page_size:		the page size of the SPI NOR
  * @addr_width:		number of address bytes
  * @erase_opcode:	the opcode for erasing a sector
@@ -165,6 +166,7 @@ struct spi_nor {
 	struct mtd_info		mtd;
 	struct mutex		lock;
 	struct device		*dev;
+	struct device_node	*flash_node;
 	u32			page_size;
 	u8			addr_width;
 	u8			erase_opcode;
-- 
cgit v1.2.3


From 61528d888adf9470db73aaadf1ff9ca35942262a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 3 Sep 2015 18:35:37 +0200
Subject: mtd: nand: Rename nand_chip .dn to .flash_node

Use a more descriptive name for the device_node element in struct nand_chip .
This name matches the element name used for device_node property of a flash
in the spi-nor framework.

Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/brcmnand/brcmnand.c | 5 +++--
 drivers/mtd/nand/nand_base.c         | 4 ++--
 include/linux/mtd/nand.h             | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index fddb795eeb71..048e4e030fc8 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1792,7 +1792,8 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
 
 	memset(cfg, 0, sizeof(*cfg));
 
-	ret = of_property_read_u32(chip->dn, "brcm,nand-oob-sector-size",
+	ret = of_property_read_u32(chip->flash_node,
+				   "brcm,nand-oob-sector-size",
 				   &oob_sector);
 	if (ret) {
 		/* Use detected size */
@@ -1899,7 +1900,7 @@ static int brcmnand_init_cs(struct brcmnand_host *host)
 	mtd = &host->mtd;
 	chip = &host->chip;
 
-	chip->dn = dn;
+	chip->flash_node = dn;
 	chip->priv = host;
 	mtd->priv = chip;
 	mtd->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "brcmnand.%d",
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ceb68ca8277a..51f7816d67c9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3846,8 +3846,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	struct nand_flash_dev *type;
 	int ret;
 
-	if (chip->dn) {
-		ret = nand_dt_init(mtd, chip, chip->dn);
+	if (chip->flash_node) {
+		ret = nand_dt_init(mtd, chip, chip->flash_node);
 		if (ret)
 			return ret;
 	}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 272f42952f34..3140b3d94838 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -544,7 +544,7 @@ struct nand_buffers {
  *			flash device
  * @IO_ADDR_W:		[BOARDSPECIFIC] address to write the 8 I/O lines of the
  *			flash device.
- * @dn:			[BOARDSPECIFIC] device node describing this instance
+ * @flash_node:		[BOARDSPECIFIC] device node describing this instance
  * @read_byte:		[REPLACEABLE] read one byte from the chip
  * @read_word:		[REPLACEABLE] read one word from the chip
  * @write_byte:		[REPLACEABLE] write a single byte to the chip on the
@@ -647,7 +647,7 @@ struct nand_chip {
 	void __iomem *IO_ADDR_R;
 	void __iomem *IO_ADDR_W;
 
-	struct device_node *dn;
+	struct device_node *flash_node;
 
 	uint8_t (*read_byte)(struct mtd_info *mtd);
 	u16 (*read_word)(struct mtd_info *mtd);
-- 
cgit v1.2.3


From f9f3ce835ddce3c669eee869253105f88819888b Mon Sep 17 00:00:00 2001
From: Jagan Teki <jteki@openedev.com>
Date: Wed, 19 Aug 2015 15:26:44 +0530
Subject: mtd: spi-nor: Zap unneeded write_enable from write_reg

The 'write_enable' argument is unused and unneeded, so remove it from
the API.

Signed-off-by: Jagan Teki <jteki@openedev.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Han Xu <han.xu@freescale.com>
[Brian: fixed for nxp-spifi.c]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/m25p80.c      |  3 +--
 drivers/mtd/spi-nor/fsl-quadspi.c |  3 +--
 drivers/mtd/spi-nor/nxp-spifi.c   |  3 +--
 drivers/mtd/spi-nor/spi-nor.c     | 16 ++++++++--------
 include/linux/mtd/spi-nor.h       |  3 +--
 5 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 05bf0d71abea..4b5d7a4655fd 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -61,8 +61,7 @@ static int m25p_cmdsz(struct spi_nor *nor)
 	return 1 + nor->addr_width;
 }
 
-static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
-			int wr_en)
+static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 {
 	struct m25p *flash = nor->priv;
 	struct spi_device *spi = flash->spi;
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index f72483247c75..2954f89fc8be 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -797,8 +797,7 @@ static int fsl_qspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 	return 0;
 }
 
-static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
-			int write_enable)
+static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 {
 	struct fsl_qspi *q = nor->priv;
 	int ret;
diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index ce7bf6b2916b..9e82098ae644 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c
@@ -149,8 +149,7 @@ static int nxp_spifi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 	return nxp_spifi_wait_for_cmd(spifi);
 }
 
-static int nxp_spifi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
-			       int len, int write_enable)
+static int nxp_spifi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 {
 	struct nxp_spifi *spifi = nor->priv;
 	u32 cmd;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 3694f6697344..8818d4325d20 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -145,7 +145,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
 static inline int write_sr(struct spi_nor *nor, u8 val)
 {
 	nor->cmd_buf[0] = val;
-	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1);
 }
 
 /*
@@ -154,7 +154,7 @@ static inline int write_sr(struct spi_nor *nor, u8 val)
  */
 static inline int write_enable(struct spi_nor *nor)
 {
-	return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0);
 }
 
 /*
@@ -162,7 +162,7 @@ static inline int write_enable(struct spi_nor *nor)
  */
 static inline int write_disable(struct spi_nor *nor)
 {
-	return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0);
 }
 
 static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
@@ -188,7 +188,7 @@ static inline int set_4byte(struct spi_nor *nor, const struct flash_info *info,
 			write_enable(nor);
 
 		cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
-		status = nor->write_reg(nor, cmd, NULL, 0, 0);
+		status = nor->write_reg(nor, cmd, NULL, 0);
 		if (need_wren)
 			write_disable(nor);
 
@@ -196,7 +196,7 @@ static inline int set_4byte(struct spi_nor *nor, const struct flash_info *info,
 	default:
 		/* Spansion style */
 		nor->cmd_buf[0] = enable << 7;
-		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0);
+		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1);
 	}
 }
 static inline int spi_nor_sr_ready(struct spi_nor *nor)
@@ -267,7 +267,7 @@ static int erase_chip(struct spi_nor *nor)
 {
 	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd.size >> 10));
 
-	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0);
 }
 
 static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
@@ -893,7 +893,7 @@ static int write_sr_cr(struct spi_nor *nor, u16 val)
 	nor->cmd_buf[0] = val & 0xff;
 	nor->cmd_buf[1] = (val >> 8);
 
-	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 2, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 2);
 }
 
 static int spansion_quad_enable(struct spi_nor *nor)
@@ -935,7 +935,7 @@ static int micron_quad_enable(struct spi_nor *nor)
 
 	/* set EVCR, enable quad I/O */
 	nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON;
-	ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1, 0);
+	ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error while writing EVCR register\n");
 		return ret;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 6379e107f2e5..e9c912d73141 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -186,8 +186,7 @@ struct spi_nor {
 	int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
 			  u8 *buf, size_t len);
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
-	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
-			int write_enable);
+	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 
 	int (*read)(struct spi_nor *nor, loff_t from,
 			size_t len, size_t *retlen, u_char *read_buf);
-- 
cgit v1.2.3


From e0f5f3afd2cffa96291cd852056d83ff4e2e99c7 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Fri, 14 Aug 2015 17:23:09 +0100
Subject: sched/fair: Make load tracking frequency scale-invariant

Apply frequency scaling correction factor to per-entity load tracking to
make it frequency invariant. Currently, load appears bigger when the CPU
is running slower which affects load-balancing decisions.

Each segment of the sched_avg.load_sum geometric series is now scaled by
the current frequency so that the sched_avg.load_avg of each sched entity
will be invariant from frequency scaling.

Moreover, cfs_rq.runnable_load_sum is scaled by the current frequency as
well.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <Dietmar.Eggemann@arm.com>
Cc: Juri Lelli <Juri.Lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: daniel.lezcano@linaro.org
Cc: mturquette@baylibre.com
Cc: pang.xunlei@zte.com.cn
Cc: rjw@rjwysocki.net
Cc: sgurrappadi@nvidia.com
Cc: yuyang.du@intel.com
Link: http://lkml.kernel.org/r/1439569394-11974-2-git-send-email-morten.rasmussen@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  6 +++---
 kernel/sched/fair.c   | 27 +++++++++++++++++----------
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a4ab9daa387c..c8d923ba429d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1177,9 +1177,9 @@ struct load_weight {
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series.
- * 1) load_avg factors the amount of time that a sched_entity is
- * runnable on a rq into its weight. For cfs_rq, it is the aggregated
- * such weights of all runnable and blocked sched_entities.
+ * 1) load_avg factors frequency scaling into the amount of time that a
+ * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
+ * aggregated such weights of all runnable and blocked sched_entities.
  * 2) util_avg factors frequency scaling into the amount of time
  * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
  * For cfs_rq, it is the aggregated such times of all runnable and
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 47ece22e7ae1..86cb27cae4b7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2515,6 +2515,8 @@ static u32 __compute_runnable_contrib(u64 n)
 	return contrib + runnable_avg_yN_sum[n];
 }
 
+#define scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+
 /*
  * We can represent the historical contribution to runnable average as the
  * coefficients of a geometric series.  To do this we sub-divide our runnable
@@ -2547,9 +2549,9 @@ static __always_inline int
 __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 		  unsigned long weight, int running, struct cfs_rq *cfs_rq)
 {
-	u64 delta, periods;
+	u64 delta, scaled_delta, periods;
 	u32 contrib;
-	int delta_w, decayed = 0;
+	int delta_w, scaled_delta_w, decayed = 0;
 	unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
 
 	delta = now - sa->last_update_time;
@@ -2585,13 +2587,16 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 		 * period and accrue it.
 		 */
 		delta_w = 1024 - delta_w;
+		scaled_delta_w = scale(delta_w, scale_freq);
 		if (weight) {
-			sa->load_sum += weight * delta_w;
-			if (cfs_rq)
-				cfs_rq->runnable_load_sum += weight * delta_w;
+			sa->load_sum += weight * scaled_delta_w;
+			if (cfs_rq) {
+				cfs_rq->runnable_load_sum +=
+						weight * scaled_delta_w;
+			}
 		}
 		if (running)
-			sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT;
+			sa->util_sum += scaled_delta_w;
 
 		delta -= delta_w;
 
@@ -2608,23 +2613,25 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 
 		/* Efficiently calculate \sum (1..n_period) 1024*y^i */
 		contrib = __compute_runnable_contrib(periods);
+		contrib = scale(contrib, scale_freq);
 		if (weight) {
 			sa->load_sum += weight * contrib;
 			if (cfs_rq)
 				cfs_rq->runnable_load_sum += weight * contrib;
 		}
 		if (running)
-			sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT;
+			sa->util_sum += contrib;
 	}
 
 	/* Remainder of delta accrued against u_0` */
+	scaled_delta = scale(delta, scale_freq);
 	if (weight) {
-		sa->load_sum += weight * delta;
+		sa->load_sum += weight * scaled_delta;
 		if (cfs_rq)
-			cfs_rq->runnable_load_sum += weight * delta;
+			cfs_rq->runnable_load_sum += weight * scaled_delta;
 	}
 	if (running)
-		sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT;
+		sa->util_sum += scaled_delta;
 
 	sa->period_contrib += delta;
 
-- 
cgit v1.2.3


From e3279a2e6d697e00e74f905851ee7cf532f72b2d Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Sat, 15 Aug 2015 00:04:41 +0100
Subject: sched/fair: Make utilization tracking CPU scale-invariant

Besides the existing frequency scale-invariance correction factor, apply
CPU scale-invariance correction factor to utilization tracking to
compensate for any differences in compute capacity. This could be due to
micro-architectural differences (i.e. instructions per seconds) between
cpus in HMP systems (e.g. big.LITTLE), and/or differences in the current
maximum frequency supported by individual cpus in SMP systems. In the
existing implementation utilization isn't comparable between cpus as it
is relative to the capacity of each individual CPU.

Each segment of the sched_avg.util_sum geometric series is now scaled
by the CPU performance factor too so the sched_avg.util_avg of each
sched entity will be invariant from the particular CPU of the HMP/SMP
system on which the sched entity is scheduled.

With this patch, the utilization of a CPU stays relative to the max CPU
performance of the fastest CPU in the system.

In contrast to utilization (sched_avg.util_sum), load
(sched_avg.load_sum) should not be scaled by compute capacity. The
utilization metric is based on running time which only makes sense when
cpus are _not_ fully utilized (utilization cannot go beyond 100% even if
more tasks are added), where load is runnable time which isn't limited
by the capacity of the CPU and therefore is a better metric for
overloaded scenarios. If we run two nice-0 busy loops on two cpus with
different compute capacity their load should be similar since their
compute demands are the same. We have to assume that the compute demand
of any task running on a fully utilized CPU (no spare cycles = 100%
utilization) is high and the same no matter of the compute capacity of
its current CPU, hence we shouldn't scale load by CPU capacity.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/55CE7409.1000700@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 kernel/sched/fair.c   | 7 ++++---
 kernel/sched/sched.h  | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c8d923ba429d..bd38b3ee9e83 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1180,7 +1180,7 @@ struct load_weight {
  * 1) load_avg factors frequency scaling into the amount of time that a
  * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
  * aggregated such weights of all runnable and blocked sched_entities.
- * 2) util_avg factors frequency scaling into the amount of time
+ * 2) util_avg factors frequency and cpu scaling into the amount of time
  * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
  * For cfs_rq, it is the aggregated such times of all runnable and
  * blocked sched_entities.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 102cdf1e4e97..573dc98c6248 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2553,6 +2553,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 	u32 contrib;
 	int delta_w, scaled_delta_w, decayed = 0;
 	unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
+	unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
 
 	delta = now - sa->last_update_time;
 	/*
@@ -2596,7 +2597,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 			}
 		}
 		if (running)
-			sa->util_sum += scaled_delta_w;
+			sa->util_sum += scale(scaled_delta_w, scale_cpu);
 
 		delta -= delta_w;
 
@@ -2620,7 +2621,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 				cfs_rq->runnable_load_sum += weight * contrib;
 		}
 		if (running)
-			sa->util_sum += contrib;
+			sa->util_sum += scale(contrib, scale_cpu);
 	}
 
 	/* Remainder of delta accrued against u_0` */
@@ -2631,7 +2632,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 			cfs_rq->runnable_load_sum += weight * scaled_delta;
 	}
 	if (running)
-		sa->util_sum += scaled_delta;
+		sa->util_sum += scale(scaled_delta, scale_cpu);
 
 	sa->period_contrib += delta;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c0726d5fd6a3..167ab4844ee6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1398,7 +1398,7 @@ unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 static __always_inline
 unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-	if ((sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
+	if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
 		return sd->smt_gain / sd->span_weight;
 
 	return SCHED_CAPACITY_SCALE;
-- 
cgit v1.2.3


From b0e878759452314676fbdd71df4ac67e7d08de5d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Aug 2015 14:06:07 +0200
Subject: perf/abi: Document some more aspects of the perf ABI

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 105 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 92 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 092a0e8a479a..d61ee4459eee 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -140,27 +140,60 @@ struct hw_perf_event {
 		};
 #endif
 	};
+	/*
+	 * If the event is a per task event, this will point to the task in
+	 * question. See the comment in perf_event_alloc().
+	 */
 	struct task_struct		*target;
+
+/*
+ * hw_perf_event::state flags; used to track the PERF_EF_* state.
+ */
+#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
+#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
+#define PERF_HES_ARCH		0x04
+
 	int				state;
+
+	/*
+	 * The last observed hardware counter value, updated with a
+	 * local64_cmpxchg() such that pmu::read() can be called nested.
+	 */
 	local64_t			prev_count;
+
+	/*
+	 * The period to start the next sample with.
+	 */
 	u64				sample_period;
+
+	/*
+	 * The period we started this sample with.
+	 */
 	u64				last_period;
+
+	/*
+	 * However much is left of the current period; note that this is
+	 * a full 64bit value and allows for generation of periods longer
+	 * than hardware might allow.
+	 */
 	local64_t			period_left;
+
+	/*
+	 * State for throttling the event, see __perf_event_overflow() and
+	 * perf_adjust_freq_unthr_context().
+	 */
 	u64                             interrupts_seq;
 	u64				interrupts;
 
+	/*
+	 * State for freq target events, see __perf_event_overflow() and
+	 * perf_adjust_freq_unthr_context().
+	 */
 	u64				freq_time_stamp;
 	u64				freq_count_stamp;
 #endif
 };
 
-/*
- * hw_perf_event::state flags
- */
-#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
-#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
-#define PERF_HES_ARCH		0x04
-
 struct perf_event;
 
 /*
@@ -210,7 +243,19 @@ struct pmu {
 
 	/*
 	 * Try and initialize the event for this PMU.
-	 * Should return -ENOENT when the @event doesn't match this PMU.
+	 *
+	 * Returns:
+	 *  -ENOENT	-- @event is not for this PMU
+	 *
+	 *  -ENODEV	-- @event is for this PMU but PMU not present
+	 *  -EBUSY	-- @event is for this PMU but PMU temporarily unavailable
+	 *  -EINVAL	-- @event is for this PMU but @event is not valid
+	 *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
+	 *  -EACCESS	-- @event is for this PMU, @event is valid, but no privilidges
+	 *
+	 *  0		-- @event is for this PMU and valid
+	 *
+	 * Other error return values are allowed.
 	 */
 	int (*event_init)		(struct perf_event *event);
 
@@ -221,27 +266,61 @@ struct pmu {
 	void (*event_mapped)		(struct perf_event *event); /*optional*/
 	void (*event_unmapped)		(struct perf_event *event); /*optional*/
 
+	/*
+	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
+	 * matching hw_perf_event::state flags.
+	 */
 #define PERF_EF_START	0x01		/* start the counter when adding    */
 #define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
 #define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
 
 	/*
-	 * Adds/Removes a counter to/from the PMU, can be done inside
-	 * a transaction, see the ->*_txn() methods.
+	 * Adds/Removes a counter to/from the PMU, can be done inside a
+	 * transaction, see the ->*_txn() methods.
+	 *
+	 * The add/del callbacks will reserve all hardware resources required
+	 * to service the event, this includes any counter constraint
+	 * scheduling etc.
+	 *
+	 * Called with IRQs disabled and the PMU disabled on the CPU the event
+	 * is on.
+	 *
+	 * ->add() called without PERF_EF_START should result in the same state
+	 *  as ->add() followed by ->stop().
+	 *
+	 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
+	 *  ->stop() that must deal with already being stopped without
+	 *  PERF_EF_UPDATE.
 	 */
 	int  (*add)			(struct perf_event *event, int flags);
 	void (*del)			(struct perf_event *event, int flags);
 
 	/*
-	 * Starts/Stops a counter present on the PMU. The PMI handler
-	 * should stop the counter when perf_event_overflow() returns
-	 * !0. ->start() will be used to continue.
+	 * Starts/Stops a counter present on the PMU.
+	 *
+	 * The PMI handler should stop the counter when perf_event_overflow()
+	 * returns !0. ->start() will be used to continue.
+	 *
+	 * Also used to change the sample period.
+	 *
+	 * Called with IRQs disabled and the PMU disabled on the CPU the event
+	 * is on -- will be called from NMI context with the PMU generates
+	 * NMIs.
+	 *
+	 * ->stop() with PERF_EF_UPDATE will read the counter and update
+	 *  period/count values like ->read() would.
+	 *
+	 * ->start() with PERF_EF_RELOAD will reprogram the the counter
+	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
 	 */
 	void (*start)			(struct perf_event *event, int flags);
 	void (*stop)			(struct perf_event *event, int flags);
 
 	/*
 	 * Updates the counter value of the event.
+	 *
+	 * For sampling capable PMUs this will also update the software period
+	 * hw_perf_event::period_left field.
 	 */
 	void (*read)			(struct perf_event *event);
 
-- 
cgit v1.2.3


From fbbe07011581990ef74dfac06dc8511b1a14badb Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Thu, 3 Sep 2015 20:07:45 -0700
Subject: perf/core: Add a 'flags' parameter to the PMU transactional
 interfaces

Currently, the PMU interface allows reading only one counter at a time.
But some PMUs like the 24x7 counters in Power, support reading several
counters at once. To leveage this functionality, extend the transaction
interface to support a "transaction type".

The first type, PERF_PMU_TXN_ADD, refers to the existing transactions,
i.e. used to _schedule_ all the events on the PMU as a group. A second
transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch,
by the 24x7 counters to read several counters at once.

Extend the transaction interfaces to the PMU to accept a 'txn_flags'
parameter and use this parameter to ignore any transactions that are
not of type PERF_PMU_TXN_ADD.

Thanks to Peter Zijlstra for his input.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
[peterz: s390 compile fix]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1441336073-22750-3-git-send-email-sukadev@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/perf/core-book3s.c  | 30 +++++++++++++++++++++++++++++-
 arch/s390/kernel/perf_cpum_cf.c  | 30 +++++++++++++++++++++++++++++-
 arch/sparc/kernel/perf_event.c   | 25 ++++++++++++++++++++++++-
 arch/x86/kernel/cpu/perf_event.c | 32 +++++++++++++++++++++++++++++++-
 arch/x86/kernel/cpu/perf_event.h |  1 +
 include/linux/perf_event.h       | 14 +++++++++++---
 kernel/events/core.c             | 31 ++++++++++++++++++++++++++++---
 7 files changed, 153 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0382f3f1095..c84074185c80 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -49,6 +49,7 @@ struct cpu_hw_events {
 	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 
 	unsigned int group_flag;
+	unsigned int txn_flags;
 	int n_txn_start;
 
 	/* BHRB bits */
@@ -1586,11 +1587,21 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags)
  * Start group events scheduling transaction
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void power_pmu_start_txn(struct pmu *pmu)
+static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
@@ -1604,6 +1615,14 @@ static void power_pmu_start_txn(struct pmu *pmu)
 static void power_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
@@ -1621,7 +1640,15 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 
 	if (!ppmu)
 		return -EAGAIN;
+
 	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuhw->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuhw->n_events;
 	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
 		return -EAGAIN;
@@ -1633,6 +1660,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 56fdad479115..19138be412d6 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -72,6 +72,7 @@ struct cpu_hw_events {
 	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
 	u64			state, tx_state;
 	unsigned int		flags;
+	unsigned int		txn_flags;
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.ctr_set = {
@@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	},
 	.state = 0,
 	.flags = 0,
+	.txn_flags = 0,
 };
 
 static int get_counter_set(u64 event)
@@ -572,11 +574,21 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
 /*
  * Start group events scheduling transaction.
  * Set flags to perform a single test at commit time.
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void cpumf_pmu_start_txn(struct pmu *pmu)
+static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
 	cpuhw->flags |= PERF_EVENT_TXN;
 	cpuhw->tx_state = cpuhw->state;
@@ -589,8 +601,16 @@ static void cpumf_pmu_start_txn(struct pmu *pmu)
  */
 static void cpumf_pmu_cancel_txn(struct pmu *pmu)
 {
+	unsigned int txn_flags;
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	WARN_ON(cpuhw->tx_state != cpuhw->state);
 
 	cpuhw->flags &= ~PERF_EVENT_TXN;
@@ -607,6 +627,13 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 	u64 state;
 
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuhw->txn_flags = 0;
+		return 0;
+	}
+
 	/* check if the updated state can be scheduled */
 	state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 	state >>= CPUMF_LCCTL_ENABLE_SHIFT;
@@ -614,6 +641,7 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 		return -EPERM;
 
 	cpuhw->flags &= ~PERF_EVENT_TXN;
+	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index e3f89c7e5062..2c0984d146ec 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -109,6 +109,7 @@ struct cpu_hw_events {
 	int			enabled;
 
 	unsigned int		group_flag;
+	unsigned int		txn_flags;
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
 
@@ -1494,10 +1495,16 @@ static int sparc_pmu_event_init(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void sparc_pmu_start_txn(struct pmu *pmu)
+static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
@@ -1510,6 +1517,14 @@ static void sparc_pmu_start_txn(struct pmu *pmu)
 static void sparc_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
@@ -1528,6 +1543,13 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 	if (!sparc_pmu)
 		return -EINVAL;
 
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuc->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuc->n_events;
 	if (check_excludes(cpuc->event, 0, n))
 		return -EINVAL;
@@ -1535,6 +1557,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 		return -EAGAIN;
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
+	cpuc->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66dd3fe99b82..dc77ef470e0e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1748,9 +1748,21 @@ static inline void x86_pmu_read(struct perf_event *event)
  * Start group events scheduling transaction
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void x86_pmu_start_txn(struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON_ONCE(cpuc->txn_flags);		/* txn already in flight */
+
+	cpuc->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
 	__this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
 	__this_cpu_write(cpu_hw_events.n_txn, 0);
@@ -1763,6 +1775,16 @@ static void x86_pmu_start_txn(struct pmu *pmu)
  */
 static void x86_pmu_cancel_txn(struct pmu *pmu)
 {
+	unsigned int txn_flags;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuc->txn_flags;
+	cpuc->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
 	/*
 	 * Truncate collected array by the number of events added in this
@@ -1786,6 +1808,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	int assign[X86_PMC_IDX_MAX];
 	int n, ret;
 
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuc->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuc->n_events;
 
 	if (!x86_pmu_initialized())
@@ -1802,6 +1831,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
+	cpuc->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 5edf6d868fc1..c99c93b9e348 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -196,6 +196,7 @@ struct cpu_hw_events {
 	int			n_excl; /* the number of exclusive events */
 
 	unsigned int		group_flag;
+	unsigned int		txn_flags;
 	int			is_fake;
 
 	/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d61ee4459eee..ea3b5dd21a4c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -201,6 +201,8 @@ struct perf_event;
  */
 #define PERF_EVENT_TXN 0x1
 
+#define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
+
 /**
  * pmu::capabilities flags
  */
@@ -331,20 +333,26 @@ struct pmu {
 	 *
 	 * Start the transaction, after this ->add() doesn't need to
 	 * do schedulability tests.
+	 *
+	 * Optional.
 	 */
-	void (*start_txn)		(struct pmu *pmu); /* optional */
+	void (*start_txn)		(struct pmu *pmu, unsigned int txn_flags);
 	/*
 	 * If ->start_txn() disabled the ->add() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
+	 *
+	 * Optional.
 	 */
-	int  (*commit_txn)		(struct pmu *pmu); /* optional */
+	int  (*commit_txn)		(struct pmu *pmu);
 	/*
 	 * Will cancel the transaction, assumes ->del() is called
 	 * for each successful ->add() during the transaction.
+	 *
+	 * Optional.
 	 */
-	void (*cancel_txn)		(struct pmu *pmu); /* optional */
+	void (*cancel_txn)		(struct pmu *pmu);
 
 	/*
 	 * Will return the value for perf_event_mmap_page::index for this event,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 76e64be9bfb5..c80cee82959f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1914,7 +1914,7 @@ group_sched_in(struct perf_event *group_event,
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
 
-	pmu->start_txn(pmu);
+	pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
 
 	if (event_sched_in(group_event, cpuctx, ctx)) {
 		pmu->cancel_txn(pmu);
@@ -7267,24 +7267,49 @@ static void perf_pmu_nop_void(struct pmu *pmu)
 {
 }
 
+static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags)
+{
+}
+
 static int perf_pmu_nop_int(struct pmu *pmu)
 {
 	return 0;
 }
 
-static void perf_pmu_start_txn(struct pmu *pmu)
+DEFINE_PER_CPU(unsigned int, nop_txn_flags);
+
+static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
 {
+	__this_cpu_write(nop_txn_flags, flags);
+
+	if (flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
 }
 
 static int perf_pmu_commit_txn(struct pmu *pmu)
 {
+	unsigned int flags = __this_cpu_read(nop_txn_flags);
+
+	__this_cpu_write(nop_txn_flags, 0);
+
+	if (flags & ~PERF_PMU_TXN_ADD)
+		return 0;
+
 	perf_pmu_enable(pmu);
 	return 0;
 }
 
 static void perf_pmu_cancel_txn(struct pmu *pmu)
 {
+	unsigned int flags =  __this_cpu_read(nop_txn_flags);
+
+	__this_cpu_write(nop_txn_flags, 0);
+
+	if (flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_enable(pmu);
 }
 
@@ -7523,7 +7548,7 @@ got_cpu_context:
 			pmu->commit_txn = perf_pmu_commit_txn;
 			pmu->cancel_txn = perf_pmu_cancel_txn;
 		} else {
-			pmu->start_txn  = perf_pmu_nop_void;
+			pmu->start_txn  = perf_pmu_nop_txn;
 			pmu->commit_txn = perf_pmu_nop_int;
 			pmu->cancel_txn = perf_pmu_nop_void;
 		}
-- 
cgit v1.2.3


From 4a00c16e552ea5e71756cd29cd2df7557ec9cac4 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Thu, 3 Sep 2015 20:07:51 -0700
Subject: perf/core: Define PERF_PMU_TXN_READ interface

Define a new PERF_PMU_TXN_READ interface to read a group of counters
at once.

        pmu->start_txn()                // Initialize before first event

        for each event in group
                pmu->read(event);       // Queue each event to be read

        rc = pmu->commit_txn()          // Read/update all queued counters

Note that we use this interface with all PMUs.  PMUs that implement this
interface use the ->read() operation to _queue_ the counters to be read
and use ->commit_txn() to actually read all the queued counters at once.

PMUs that don't implement PERF_PMU_TXN_READ ignore ->start_txn() and
->commit_txn() and continue to read counters one at a time.

Thanks to input from Peter Zijlstra.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1441336073-22750-9-git-send-email-sukadev@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 24 +++++++++++++++++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ea3b5dd21a4c..b83cea932f74 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -202,6 +202,7 @@ struct perf_event;
 #define PERF_EVENT_TXN 0x1
 
 #define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
+#define PERF_PMU_TXN_READ 0x2		/* txn to read event group from PMU */
 
 /**
  * pmu::capabilities flags
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ade04dfab368..55b0f7cf2614 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3199,6 +3199,7 @@ static void __perf_event_read(void *info)
 	struct perf_event *sub, *event = data->event;
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct pmu *pmu = event->pmu;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -3217,18 +3218,31 @@ static void __perf_event_read(void *info)
 	}
 
 	update_event_times(event);
-	if (event->state == PERF_EVENT_STATE_ACTIVE)
-		event->pmu->read(event);
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		goto unlock;
 
-	if (!data->group)
+	if (!data->group) {
+		pmu->read(event);
+		data->ret = 0;
 		goto unlock;
+	}
+
+	pmu->start_txn(pmu, PERF_PMU_TXN_READ);
+
+	pmu->read(event);
 
 	list_for_each_entry(sub, &event->sibling_list, group_entry) {
 		update_event_times(sub);
-		if (sub->state == PERF_EVENT_STATE_ACTIVE)
+		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+			/*
+			 * Use sibling's PMU rather than @event's since
+			 * sibling could be on different (eg: software) PMU.
+			 */
 			sub->pmu->read(sub);
+		}
 	}
-	data->ret = 0;
+
+	data->ret = pmu->commit_txn(pmu);
 
 unlock:
 	raw_spin_unlock(&ctx->lock);
-- 
cgit v1.2.3


From 8f3e5684d3fbd91ead283916676fa3dac22615e5 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Thu, 3 Sep 2015 20:07:53 -0700
Subject: perf/core: Drop PERF_EVENT_TXN

We currently use PERF_EVENT_TXN flag to determine if we are in the middle
of a transaction. If in a transaction, we defer the schedulability checks
from pmu->add() operation to the pmu->commit() operation.

Now that we have "transaction types" (PERF_PMU_TXN_ADD, PERF_PMU_TXN_READ)
we can use the type to determine if we are in a transaction and drop the
PERF_EVENT_TXN flag.

When PERF_EVENT_TXN is dropped, the cpuhw->group_flag on some architectures
becomes unused, so drop that field as well.

This is an extension of the Powerpc patch from Peter Zijlstra to s390,
Sparc and x86 architectures.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1441336073-22750-11-git-send-email-sukadev@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/perf/core-book3s.c  | 6 +-----
 arch/s390/kernel/perf_cpum_cf.c  | 5 +----
 arch/sparc/kernel/perf_event.c   | 6 +-----
 arch/x86/kernel/cpu/perf_event.c | 7 ++-----
 arch/x86/kernel/cpu/perf_event.h | 1 -
 include/linux/perf_event.h       | 2 --
 6 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c84074185c80..d1e65ce545b3 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -48,7 +48,6 @@ struct cpu_hw_events {
 	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 
-	unsigned int group_flag;
 	unsigned int txn_flags;
 	int n_txn_start;
 
@@ -1442,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuhw->group_flag & PERF_EVENT_TXN)
+	if (cpuhw->txn_flags & PERF_PMU_TXN_ADD)
 		goto nocheck;
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -1603,7 +1602,6 @@ static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 		return;
 
 	perf_pmu_disable(pmu);
-	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
 
@@ -1624,7 +1622,6 @@ static void power_pmu_cancel_txn(struct pmu *pmu)
 	if (txn_flags & ~PERF_PMU_TXN_ADD)
 		return;
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -1659,7 +1656,6 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 	for (i = cpuhw->n_txn_start; i < n; ++i)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 19138be412d6..cb774ff6e749 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -536,7 +536,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
 	 * For group events transaction, the authorization check is
 	 * done in cpumf_pmu_commit_txn().
 	 */
-	if (!(cpuhw->flags & PERF_EVENT_TXN))
+	if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD))
 		if (validate_ctr_auth(&event->hw))
 			return -EPERM;
 
@@ -590,7 +590,6 @@ static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 		return;
 
 	perf_pmu_disable(pmu);
-	cpuhw->flags |= PERF_EVENT_TXN;
 	cpuhw->tx_state = cpuhw->state;
 }
 
@@ -613,7 +612,6 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu)
 
 	WARN_ON(cpuhw->tx_state != cpuhw->state);
 
-	cpuhw->flags &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -640,7 +638,6 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 	if ((state & cpuhw->info.auth_ctl) != state)
 		return -EPERM;
 
-	cpuhw->flags &= ~PERF_EVENT_TXN;
 	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 2c0984d146ec..b0da5aedb336 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -108,7 +108,6 @@ struct cpu_hw_events {
 	/* Enabled/disable state.  */
 	int			enabled;
 
-	unsigned int		group_flag;
 	unsigned int		txn_flags;
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
@@ -1380,7 +1379,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN)
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto nocheck;
 
 	if (check_excludes(cpuc->event, n0, 1))
@@ -1506,7 +1505,6 @@ static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 		return;
 
 	perf_pmu_disable(pmu);
-	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
 /*
@@ -1526,7 +1524,6 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu)
 	if (txn_flags & ~PERF_PMU_TXN_ADD)
 		return;
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -1556,7 +1553,6 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 	if (sparc_check_constraints(cpuc->event, cpuc->events, n))
 		return -EAGAIN;
 
-	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	cpuc->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index dc77ef470e0e..4562cf070c27 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time (->commit_txn) as a whole.
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN)
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto done_collect;
 
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	 * XXX assumes any ->del() called during a TXN will only be on
 	 * an event added during that same TXN.
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN)
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		return;
 
 	/*
@@ -1764,7 +1764,6 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 		return;
 
 	perf_pmu_disable(pmu);
-	__this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
 	__this_cpu_write(cpu_hw_events.n_txn, 0);
 }
 
@@ -1785,7 +1784,6 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
 	if (txn_flags & ~PERF_PMU_TXN_ADD)
 		return;
 
-	__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
 	/*
 	 * Truncate collected array by the number of events added in this
 	 * transaction. See x86_pmu_add() and x86_pmu_*_txn().
@@ -1830,7 +1828,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
-	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	cpuc->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c99c93b9e348..953a0e4e3284 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -195,7 +195,6 @@ struct cpu_hw_events {
 
 	int			n_excl; /* the number of exclusive events */
 
-	unsigned int		group_flag;
 	unsigned int		txn_flags;
 	int			is_fake;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b83cea932f74..d841d33bcdc9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -199,8 +199,6 @@ struct perf_event;
 /*
  * Common implementation detail of pmu::{start,commit,cancel}_txn
  */
-#define PERF_EVENT_TXN 0x1
-
 #define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
 #define PERF_PMU_TXN_READ 0x2		/* txn to read event group from PMU */
 
-- 
cgit v1.2.3


From 445b0eb058f5f31c844a731cb82e7441d0d9e578 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 27 Aug 2015 04:36:14 +0200
Subject: ACPI / property: Add support for data-only subnodes

In some cases, the information expressed via device properties is
hierarchical by nature.  For example, the properties of a composite
device consisting of multiple semi-dependent components may need
to be represented in the form of a tree of property data sets
corresponding to specific components of the device.

Unfortunately, using ACPI device objects for this purpose turns out
to be problematic, mostly due to the assumption made by some operating
systems (that platform firmware generally needs to work with) that
each device object in the ACPI namespace represents a device requiring
a separate driver.  That assumption leads to complications which
reportedly are impractically difficult to overcome and a different
approach is needed for the sake of interoperability.

The approach implemented here is based on extending _DSD via pointers
(links) to additional ACPI objects returning data packages formatted
in accordance with the _DSD formatting rules defined by Section 6.2.5
of ACPI 6.  Those additional objects are referred to as data-only
subnodes of the device object containing the _DSD pointing to them.

The links to them need to be located in a separate section of the
_DSD data package following UUID dbb8e3e6-5886-4ba6-8795-1319f52a966b
referred to as the Hierarchical Data Extension UUID as defined in [1].
Each of them is represented by a package of two strings.  The first
string in that package (the key) is regarded as the name of the
data-only subnode pointed to by the link.  The second string in it
(the target) is expected to hold the ACPI namespace path (possibly
utilizing the usual ACPI namespace search rules) of an ACPI object
evaluating to a data package extending the _DSD.

The device properties initialization code follows those links,
creates a struct acpi_data_node object for each of them to store
the data returned by the ACPI object pointed to by it and processes
those data recursively (which may lead to the creation of more
struct acpi_data_node objects if the returned data package contains
the Hierarchical Data Extension UUID section with more links in it).

All of the struct acpi_data_node objects are present until the the
ACPI device object containing the _DSD with links to them is deleted
and they are deleted along with that object.

[1]: http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.pdf

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/property.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/acpi/acpi_bus.h |   9 ++++
 include/linux/fwnode.h  |   1 +
 3 files changed, 142 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 8163b4bd7a61..17c436de376b 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -24,6 +24,115 @@ static const u8 prp_uuid[16] = {
 	0x14, 0xd8, 0xff, 0xda, 0xba, 0x6e, 0x8c, 0x4d,
 	0x8a, 0x91, 0xbc, 0x9b, 0xbf, 0x4a, 0xa3, 0x01
 };
+/* ACPI _DSD data subnodes UUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */
+static const u8 ads_uuid[16] = {
+	0xe6, 0xe3, 0xb8, 0xdb, 0x86, 0x58, 0xa6, 0x4b,
+	0x87, 0x95, 0x13, 0x19, 0xf5, 0x2a, 0x96, 0x6b
+};
+
+static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
+					   const union acpi_object *desc,
+					   struct acpi_device_data *data);
+static bool acpi_extract_properties(const union acpi_object *desc,
+				    struct acpi_device_data *data);
+
+static bool acpi_nondev_subnode_ok(acpi_handle scope,
+				   const union acpi_object *link,
+				   struct list_head *list)
+{
+	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
+	struct acpi_data_node *dn;
+	acpi_handle handle;
+	acpi_status status;
+
+	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+	if (!dn)
+		return false;
+
+	dn->name = link->package.elements[0].string.pointer;
+	dn->fwnode.type = FWNODE_ACPI_DATA;
+	INIT_LIST_HEAD(&dn->data.subnodes);
+
+	status = acpi_get_handle(scope, link->package.elements[1].string.pointer,
+				 &handle);
+	if (ACPI_FAILURE(status))
+		goto fail;
+
+	status = acpi_evaluate_object_typed(handle, NULL, NULL, &buf,
+					    ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status))
+		goto fail;
+
+	if (acpi_extract_properties(buf.pointer, &dn->data))
+		dn->data.pointer = buf.pointer;
+
+	if (acpi_enumerate_nondev_subnodes(scope, buf.pointer, &dn->data))
+		dn->data.pointer = buf.pointer;
+
+	if (dn->data.pointer) {
+		list_add_tail(&dn->sibling, list);
+		return true;
+	}
+
+	acpi_handle_debug(handle, "Invalid properties/subnodes data, skipping\n");
+
+ fail:
+	ACPI_FREE(buf.pointer);
+	kfree(dn);
+	return false;
+}
+
+static int acpi_add_nondev_subnodes(acpi_handle scope,
+				    const union acpi_object *links,
+				    struct list_head *list)
+{
+	bool ret = false;
+	int i;
+
+	for (i = 0; i < links->package.count; i++) {
+		const union acpi_object *link;
+
+		link = &links->package.elements[i];
+		/* Only two elements allowed, both must be strings. */
+		if (link->package.count == 2
+		    && link->package.elements[0].type == ACPI_TYPE_STRING
+		    && link->package.elements[1].type == ACPI_TYPE_STRING
+		    && acpi_nondev_subnode_ok(scope, link, list))
+			ret = true;
+	}
+
+	return ret;
+}
+
+static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
+					   const union acpi_object *desc,
+					   struct acpi_device_data *data)
+{
+	int i;
+
+	/* Look for the ACPI data subnodes UUID. */
+	for (i = 0; i < desc->package.count; i += 2) {
+		const union acpi_object *uuid, *links;
+
+		uuid = &desc->package.elements[i];
+		links = &desc->package.elements[i + 1];
+
+		/*
+		 * The first element must be a UUID and the second one must be
+		 * a package.
+		 */
+		if (uuid->type != ACPI_TYPE_BUFFER || uuid->buffer.length != 16
+		    || links->type != ACPI_TYPE_PACKAGE)
+			break;
+
+		if (memcmp(uuid->buffer.pointer, ads_uuid, sizeof(ads_uuid)))
+			continue;
+
+		return acpi_add_nondev_subnodes(scope, links, &data->subnodes);
+	}
+
+	return false;
+}
 
 static bool acpi_property_value_ok(const union acpi_object *value)
 {
@@ -147,6 +256,8 @@ void acpi_init_properties(struct acpi_device *adev)
 	acpi_status status;
 	bool acpi_of = false;
 
+	INIT_LIST_HEAD(&adev->data.subnodes);
+
 	/*
 	 * Check if ACPI_DT_NAMESPACE_HID is present and inthat case we fill in
 	 * Device Tree compatible properties for this device.
@@ -167,7 +278,11 @@ void acpi_init_properties(struct acpi_device *adev)
 		adev->data.pointer = buf.pointer;
 		if (acpi_of)
 			acpi_init_of_compatible(adev);
-	} else {
+	}
+	if (acpi_enumerate_nondev_subnodes(adev->handle, buf.pointer, &adev->data))
+		adev->data.pointer = buf.pointer;
+
+	if (!adev->data.pointer) {
 		acpi_handle_debug(adev->handle, "Invalid _DSD data, skipping\n");
 		ACPI_FREE(buf.pointer);
 	}
@@ -178,8 +293,24 @@ void acpi_init_properties(struct acpi_device *adev)
 			 ACPI_DT_NAMESPACE_HID " requires 'compatible' property\n");
 }
 
+static void acpi_destroy_nondev_subnodes(struct list_head *list)
+{
+	struct acpi_data_node *dn, *next;
+
+	if (list_empty(list))
+		return;
+
+	list_for_each_entry_safe_reverse(dn, next, list, sibling) {
+		acpi_destroy_nondev_subnodes(&dn->data.subnodes);
+		list_del(&dn->sibling);
+		ACPI_FREE((void *)dn->data.pointer);
+		kfree(dn);
+	}
+}
+
 void acpi_free_properties(struct acpi_device *adev)
 {
+	acpi_destroy_nondev_subnodes(&adev->data.subnodes);
 	ACPI_FREE((void *)adev->data.pointer);
 	adev->data.of_compatible = NULL;
 	adev->data.pointer = NULL;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 5ba8fb64f664..79cfee646d6b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -343,6 +343,7 @@ struct acpi_device_data {
 	const union acpi_object *pointer;
 	const union acpi_object *properties;
 	const union acpi_object *of_compatible;
+	struct list_head subnodes;
 };
 
 struct acpi_gpio_mapping;
@@ -378,6 +379,14 @@ struct acpi_device {
 	void (*remove)(struct acpi_device *);
 };
 
+/* Non-device subnode */
+struct acpi_data_node {
+	const char *name;
+	struct fwnode_handle fwnode;
+	struct acpi_device_data data;
+	struct list_head sibling;
+};
+
 static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
 {
 	bool ret = false;
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 0408545bce42..b08d6ba5c1e6 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -16,6 +16,7 @@ enum fwnode_type {
 	FWNODE_INVALID = 0,
 	FWNODE_OF,
 	FWNODE_ACPI,
+	FWNODE_ACPI_DATA,
 	FWNODE_PDATA,
 };
 
-- 
cgit v1.2.3


From 3a7a2ab839ad18c2d542b40f4a647c98d068e55a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 27 Aug 2015 04:40:05 +0200
Subject: ACPI / property: Extend fwnode_property_* to data-only subnodes

Modify is_acpi_node() to return "true" for ACPI data-only subnodes as
well as for ACPI device objects and change the name of to_acpi_node()
to to_acpi_device_node() so it is clear that it covers ACPI device
objects only.  Accordingly, introduce to_acpi_data_node() to cover
data-only subnodes in an analogous way.

With that, make the fwnode_property_* family of functions work with
ACPI data-only subnodes introduced previously.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/property.c | 138 ++++++++++++++++++++++++++++++++++++------------
 drivers/base/property.c |  16 +++---
 drivers/gpio/gpiolib.c  |   6 +--
 include/acpi/acpi_bus.h |  21 +++++++-
 include/linux/acpi.h    |  53 +++++++++++++------
 5 files changed, 173 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 333f9146d19e..e78551726acb 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -19,6 +19,11 @@
 
 #include "internal.h"
 
+static int acpi_data_get_property_array(struct acpi_device_data *data,
+					const char *name,
+					acpi_object_type type,
+					const union acpi_object **obj);
+
 /* ACPI _DSD device properties UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */
 static const u8 prp_uuid[16] = {
 	0x14, 0xd8, 0xff, 0xda, 0xba, 0x6e, 0x8c, 0x4d,
@@ -191,8 +196,8 @@ static void acpi_init_of_compatible(struct acpi_device *adev)
 	const union acpi_object *of_compatible;
 	int ret;
 
-	ret = acpi_dev_get_property_array(adev, "compatible", ACPI_TYPE_STRING,
-					  &of_compatible);
+	ret = acpi_data_get_property_array(&adev->data, "compatible",
+					   ACPI_TYPE_STRING, &of_compatible);
 	if (ret) {
 		ret = acpi_dev_get_property(adev, "compatible",
 					    ACPI_TYPE_STRING, &of_compatible);
@@ -320,8 +325,8 @@ void acpi_free_properties(struct acpi_device *adev)
 }
 
 /**
- * acpi_dev_get_property - return an ACPI property with given name
- * @adev: ACPI device to get property
+ * acpi_data_get_property - return an ACPI property with given name
+ * @data: ACPI device deta object to get the property from
  * @name: Name of the property
  * @type: Expected property type
  * @obj: Location to store the property value (if not %NULL)
@@ -330,26 +335,27 @@ void acpi_free_properties(struct acpi_device *adev)
  * object at the location pointed to by @obj if found.
  *
  * Callers must not attempt to free the returned objects.  These objects will be
- * freed by the ACPI core automatically during the removal of @adev.
+ * freed by the ACPI core automatically during the removal of @data.
  *
  * Return: %0 if property with @name has been found (success),
  *         %-EINVAL if the arguments are invalid,
  *         %-ENODATA if the property doesn't exist,
  *         %-EPROTO if the property value type doesn't match @type.
  */
-int acpi_dev_get_property(struct acpi_device *adev, const char *name,
-			  acpi_object_type type, const union acpi_object **obj)
+static int acpi_data_get_property(struct acpi_device_data *data,
+				  const char *name, acpi_object_type type,
+				  const union acpi_object **obj)
 {
 	const union acpi_object *properties;
 	int i;
 
-	if (!adev || !name)
+	if (!data || !name)
 		return -EINVAL;
 
-	if (!adev->data.pointer || !adev->data.properties)
+	if (!data->pointer || !data->properties)
 		return -ENODATA;
 
-	properties = adev->data.properties;
+	properties = data->properties;
 	for (i = 0; i < properties->package.count; i++) {
 		const union acpi_object *propname, *propvalue;
 		const union acpi_object *property;
@@ -370,11 +376,50 @@ int acpi_dev_get_property(struct acpi_device *adev, const char *name,
 	}
 	return -ENODATA;
 }
+
+/**
+ * acpi_dev_get_property - return an ACPI property with given name.
+ * @adev: ACPI device to get the property from.
+ * @name: Name of the property.
+ * @type: Expected property type.
+ * @obj: Location to store the property value (if not %NULL).
+ */
+int acpi_dev_get_property(struct acpi_device *adev, const char *name,
+			  acpi_object_type type, const union acpi_object **obj)
+{
+	return adev ? acpi_data_get_property(&adev->data, name, type, obj) : -EINVAL;
+}
 EXPORT_SYMBOL_GPL(acpi_dev_get_property);
 
+static struct acpi_device_data *acpi_device_data_of_node(struct fwnode_handle *fwnode)
+{
+	if (fwnode->type == FWNODE_ACPI) {
+		struct acpi_device *adev = to_acpi_device_node(fwnode);
+		return &adev->data;
+	} else if (fwnode->type == FWNODE_ACPI_DATA) {
+		struct acpi_data_node *dn = to_acpi_data_node(fwnode);
+		return &dn->data;
+	}
+	return NULL;
+}
+
 /**
- * acpi_dev_get_property_array - return an ACPI array property with given name
- * @adev: ACPI device to get property
+ * acpi_node_prop_get - return an ACPI property with given name.
+ * @fwnode: Firmware node to get the property from.
+ * @propname: Name of the property.
+ * @valptr: Location to store a pointer to the property value (if not %NULL).
+ */
+int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname,
+		       void **valptr)
+{
+	return acpi_data_get_property(acpi_device_data_of_node(fwnode),
+				      propname, ACPI_TYPE_ANY,
+				      (const union acpi_object **)valptr);
+}
+
+/**
+ * acpi_data_get_property_array - return an ACPI array property with given name
+ * @adev: ACPI data object to get the property from
  * @name: Name of the property
  * @type: Expected type of array elements
  * @obj: Location to store a pointer to the property value (if not NULL)
@@ -383,7 +428,7 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_property);
  * ACPI object at the location pointed to by @obj if found.
  *
  * Callers must not attempt to free the returned objects.  Those objects will be
- * freed by the ACPI core automatically during the removal of @adev.
+ * freed by the ACPI core automatically during the removal of @data.
  *
  * Return: %0 if array property (package) with @name has been found (success),
  *         %-EINVAL if the arguments are invalid,
@@ -391,14 +436,15 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_property);
  *         %-EPROTO if the property is not a package or the type of its elements
  *           doesn't match @type.
  */
-int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
-				acpi_object_type type,
-				const union acpi_object **obj)
+static int acpi_data_get_property_array(struct acpi_device_data *data,
+					const char *name,
+					acpi_object_type type,
+					const union acpi_object **obj)
 {
 	const union acpi_object *prop;
 	int ret, i;
 
-	ret = acpi_dev_get_property(adev, name, ACPI_TYPE_PACKAGE, &prop);
+	ret = acpi_data_get_property(data, name, ACPI_TYPE_PACKAGE, &prop);
 	if (ret)
 		return ret;
 
@@ -413,7 +459,6 @@ int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(acpi_dev_get_property_array);
 
 /**
  * acpi_dev_get_property_reference - returns handle to the referenced object
@@ -518,15 +563,9 @@ int acpi_dev_get_property_reference(struct acpi_device *adev,
 }
 EXPORT_SYMBOL_GPL(acpi_dev_get_property_reference);
 
-int acpi_dev_prop_get(struct acpi_device *adev, const char *propname,
-		      void **valptr)
-{
-	return acpi_dev_get_property(adev, propname, ACPI_TYPE_ANY,
-				     (const union acpi_object **)valptr);
-}
-
-int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
-			      enum dev_prop_type proptype, void *val)
+static int acpi_data_prop_read_single(struct acpi_device_data *data,
+				      const char *propname,
+				      enum dev_prop_type proptype, void *val)
 {
 	const union acpi_object *obj;
 	int ret;
@@ -535,7 +574,7 @@ int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
 		return -EINVAL;
 
 	if (proptype >= DEV_PROP_U8 && proptype <= DEV_PROP_U64) {
-		ret = acpi_dev_get_property(adev, propname, ACPI_TYPE_INTEGER, &obj);
+		ret = acpi_data_get_property(data, propname, ACPI_TYPE_INTEGER, &obj);
 		if (ret)
 			return ret;
 
@@ -560,7 +599,7 @@ int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
 			break;
 		}
 	} else if (proptype == DEV_PROP_STRING) {
-		ret = acpi_dev_get_property(adev, propname, ACPI_TYPE_STRING, &obj);
+		ret = acpi_data_get_property(data, propname, ACPI_TYPE_STRING, &obj);
 		if (ret)
 			return ret;
 
@@ -571,6 +610,12 @@ int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
 	return ret;
 }
 
+int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
+			      enum dev_prop_type proptype, void *val)
+{
+	return adev ? acpi_data_prop_read_single(&adev->data, propname, proptype, val) : -EINVAL;
+}
+
 static int acpi_copy_property_array_u8(const union acpi_object *items, u8 *val,
 				       size_t nval)
 {
@@ -647,20 +692,22 @@ static int acpi_copy_property_array_string(const union acpi_object *items,
 	return 0;
 }
 
-int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
-		       enum dev_prop_type proptype, void *val, size_t nval)
+static int acpi_data_prop_read(struct acpi_device_data *data,
+			       const char *propname,
+			       enum dev_prop_type proptype,
+			       void *val, size_t nval)
 {
 	const union acpi_object *obj;
 	const union acpi_object *items;
 	int ret;
 
 	if (val && nval == 1) {
-		ret = acpi_dev_prop_read_single(adev, propname, proptype, val);
+		ret = acpi_data_prop_read_single(data, propname, proptype, val);
 		if (!ret)
 			return ret;
 	}
 
-	ret = acpi_dev_get_property_array(adev, propname, ACPI_TYPE_ANY, &obj);
+	ret = acpi_data_get_property_array(data, propname, ACPI_TYPE_ANY, &obj);
 	if (ret)
 		return ret;
 
@@ -696,3 +743,28 @@ int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
 	}
 	return ret;
 }
+
+int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
+		       enum dev_prop_type proptype, void *val, size_t nval)
+{
+	return adev ? acpi_data_prop_read(&adev->data, propname, proptype, val, nval) : -EINVAL;
+}
+
+/**
+ * acpi_node_prop_read - retrieve the value of an ACPI property with given name.
+ * @fwnode: Firmware node to get the property from.
+ * @propname: Name of the property.
+ * @proptype: Expected property type.
+ * @val: Location to store the property value (if not %NULL).
+ * @nval: Size of the array pointed to by @val.
+ *
+ * If @val is %NULL, return the number of array elements comprising the value
+ * of the property.  Otherwise, read at most @nval values to the array at the
+ * location pointed to by @val.
+ */
+int acpi_node_prop_read(struct fwnode_handle *fwnode,  const char *propname,
+		        enum dev_prop_type proptype, void *val, size_t nval)
+{
+	return acpi_data_prop_read(acpi_device_data_of_node(fwnode),
+				   propname, proptype, val, nval);
+}
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 2d75366c61e0..ca118169a6c5 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -134,7 +134,7 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 	if (is_of_node(fwnode))
 		return of_property_read_bool(to_of_node(fwnode), propname);
 	else if (is_acpi_node(fwnode))
-		return !acpi_dev_prop_get(to_acpi_node(fwnode), propname, NULL);
+		return !acpi_node_prop_get(fwnode, propname, NULL);
 
 	return !!pset_prop_get(to_pset(fwnode), propname);
 }
@@ -298,8 +298,8 @@ EXPORT_SYMBOL_GPL(device_property_read_string);
 		_ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_, \
 					       _type_, _val_, _nval_); \
 	else if (is_acpi_node(_fwnode_)) \
-		_ret_ = acpi_dev_prop_read(to_acpi_node(_fwnode_), _propname_, \
-					   _proptype_, _val_, _nval_); \
+		_ret_ = acpi_node_prop_read(_fwnode_, _propname_, _proptype_, \
+					    _val_, _nval_); \
 	else if (is_pset(_fwnode_)) \
 		_ret_ = pset_prop_read_array(to_pset(_fwnode_), _propname_, \
 					     _proptype_, _val_, _nval_); \
@@ -440,8 +440,8 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
 						      propname, val, nval) :
 			of_property_count_strings(to_of_node(fwnode), propname);
 	else if (is_acpi_node(fwnode))
-		return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
-					  DEV_PROP_STRING, val, nval);
+		return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
+					   val, nval);
 	else if (is_pset(fwnode))
 		return pset_prop_read_array(to_pset(fwnode), propname,
 					    DEV_PROP_STRING, val, nval);
@@ -470,8 +470,8 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode,
 	if (is_of_node(fwnode))
 		return of_property_read_string(to_of_node(fwnode), propname, val);
 	else if (is_acpi_node(fwnode))
-		return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
-					  DEV_PROP_STRING, val, 1);
+		return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
+					   val, 1);
 
 	return pset_prop_read_array(to_pset(fwnode), propname,
 				    DEV_PROP_STRING, val, 1);
@@ -495,7 +495,7 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev,
 	} else if (IS_ENABLED(CONFIG_ACPI)) {
 		struct acpi_device *node;
 
-		node = acpi_get_next_child(dev, to_acpi_node(child));
+		node = acpi_get_next_child(dev, to_acpi_device_node(child));
 		if (node)
 			return acpi_fwnode_handle(node);
 	}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 980c1f87866a..f43e808a49d9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2083,11 +2083,11 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 						&flags);
 		if (!IS_ERR(desc))
 			active_low = flags & OF_GPIO_ACTIVE_LOW;
-	} else if (is_acpi_node(fwnode)) {
+	} else if (is_acpi_device_node(fwnode)) {
 		struct acpi_gpio_info info;
 
-		desc = acpi_get_gpiod_by_index(to_acpi_node(fwnode), propname, 0,
-					       &info);
+		desc = acpi_get_gpiod_by_index(to_acpi_device_node(fwnode),
+					       propname, 0, &info);
 		if (!IS_ERR(desc))
 			active_low = info.active_low;
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index e0d7c193d6e0..e234725eadc7 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -424,16 +424,33 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
 }
 
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
+{
+	return fwnode && (fwnode->type == FWNODE_ACPI
+		|| fwnode->type == FWNODE_ACPI_DATA);
+}
+
+static inline bool is_acpi_device_node(struct fwnode_handle *fwnode)
 {
 	return fwnode && fwnode->type == FWNODE_ACPI;
 }
 
-static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode)
 {
-	return is_acpi_node(fwnode) ?
+	return is_acpi_device_node(fwnode) ?
 		container_of(fwnode, struct acpi_device, fwnode) : NULL;
 }
 
+static inline bool is_acpi_data_node(struct fwnode_handle *fwnode)
+{
+	return fwnode && fwnode->type == FWNODE_ACPI_DATA;
+}
+
+static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode)
+{
+	return is_acpi_data_node(fwnode) ?
+		container_of(fwnode, struct acpi_data_node, fwnode) : NULL;
+}
+
 static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev)
 {
 	return &adev->fwnode;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7235c4851460..6be94ba4e980 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -49,7 +49,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 	return adev ? adev->handle : NULL;
 }
 
-#define ACPI_COMPANION(dev)		to_acpi_node((dev)->fwnode)
+#define ACPI_COMPANION(dev)		to_acpi_device_node((dev)->fwnode)
 #define ACPI_COMPANION_SET(dev, adev)	set_primary_fwnode(dev, (adev) ? \
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
@@ -69,7 +69,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 
 static inline bool has_acpi_companion(struct device *dev)
 {
-	return is_acpi_node(dev->fwnode);
+	return is_acpi_device_node(dev->fwnode);
 }
 
 static inline void acpi_preset_companion(struct device *dev,
@@ -461,7 +461,22 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 	return false;
 }
 
-static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_device_node(struct fwnode_handle *fwnode)
+{
+	return false;
+}
+
+static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
+static inline bool is_acpi_data_node(struct fwnode_handle *fwnode)
+{
+	return false;
+}
+
+static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
@@ -743,17 +758,16 @@ struct acpi_reference_args {
 #ifdef CONFIG_ACPI
 int acpi_dev_get_property(struct acpi_device *adev, const char *name,
 			  acpi_object_type type, const union acpi_object **obj);
-int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
-				acpi_object_type type,
-				const union acpi_object **obj);
 int acpi_dev_get_property_reference(struct acpi_device *adev,
 				    const char *name, size_t index,
 				    struct acpi_reference_args *args);
 
-int acpi_dev_prop_get(struct acpi_device *adev, const char *propname,
-		      void **valptr);
+int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname,
+		       void **valptr);
 int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
 			      enum dev_prop_type proptype, void *val);
+int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname,
+		        enum dev_prop_type proptype, void *val, size_t nval);
 int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
 		       enum dev_prop_type proptype, void *val, size_t nval);
 
@@ -766,13 +780,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 {
 	return -ENXIO;
 }
-static inline int acpi_dev_get_property_array(struct acpi_device *adev,
-					      const char *name,
-					      acpi_object_type type,
-					      const union acpi_object **obj)
-{
-	return -ENXIO;
-}
+
 static inline int acpi_dev_get_property_reference(struct acpi_device *adev,
 				const char *name, const char *cells_name,
 				size_t index, struct acpi_reference_args *args)
@@ -780,6 +788,13 @@ static inline int acpi_dev_get_property_reference(struct acpi_device *adev,
 	return -ENXIO;
 }
 
+static inline int acpi_node_prop_get(struct fwnode_handle *fwnode,
+				     const char *propname,
+				     void **valptr)
+{
+	return -ENXIO;
+}
+
 static inline int acpi_dev_prop_get(struct acpi_device *adev,
 				    const char *propname,
 				    void **valptr)
@@ -795,6 +810,14 @@ static inline int acpi_dev_prop_read_single(struct acpi_device *adev,
 	return -ENXIO;
 }
 
+static inline int acpi_node_prop_read(struct fwnode_handle *fwnode,
+				      const char *propname,
+				      enum dev_prop_type proptype,
+				      void *val, size_t nval)
+{
+	return -ENXIO;
+}
+
 static inline int acpi_dev_prop_read(struct acpi_device *adev,
 				     const char *propname,
 				     enum dev_prop_type proptype,
-- 
cgit v1.2.3


From 504a33749971c36c54ba5ccb1364872dee1f17a7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 27 Aug 2015 04:42:33 +0200
Subject: ACPI / property: Extend device_get_next_child_node() to data-only
 nodes

Make device_get_next_child_node() work with ACPI data-only subnodes
introduced previously.

Namely, replace acpi_get_next_child() with acpi_get_next_subnode()
that can handle (and return) child device objects as well as child
data-only subnodes of the given device and modify the ACPI part
of the GPIO subsystem to handle data-only subnodes returned by it.

To that end, introduce acpi_node_get_gpiod() taking a struct
fwnode_handle pointer as the first argument.  That argument may
point to an ACPI device object as well as to a data-only subnode
and the function should do the right thing (ie. look for the matching
GPIO descriptor correctly) in either case.

Next, modify fwnode_get_named_gpiod() to use acpi_node_get_gpiod()
instead of acpi_get_gpiod_by_index() which automatically causes
devm_get_gpiod_from_child() to work with ACPI data-only subnodes
that may be returned by device_get_next_child_node() which in turn
is required by the users of that function (the gpio_keys_polled
and gpio-leds drivers).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/acpi/property.c     | 88 ++++++++++++++++++++++++++++++++++++++++-----
 drivers/acpi/scan.c         | 20 -----------
 drivers/base/property.c     |  6 +---
 drivers/gpio/gpiolib-acpi.c | 58 +++++++++++++++++++++++++++---
 drivers/gpio/gpiolib.c      |  5 ++-
 drivers/gpio/gpiolib.h      | 10 +++++-
 include/linux/acpi.h        | 17 +++++----
 7 files changed, 153 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index e78551726acb..14654435c295 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -461,9 +461,9 @@ static int acpi_data_get_property_array(struct acpi_device_data *data,
 }
 
 /**
- * acpi_dev_get_property_reference - returns handle to the referenced object
- * @adev: ACPI device to get property
- * @name: Name of the property
+ * acpi_data_get_property_reference - returns handle to the referenced object
+ * @data: ACPI device data object containing the property
+ * @propname: Name of the property
  * @index: Index of the reference to return
  * @args: Location to store the returned reference with optional arguments
  *
@@ -477,16 +477,16 @@ static int acpi_data_get_property_array(struct acpi_device_data *data,
  *
  * Return: %0 on success, negative error code on failure.
  */
-int acpi_dev_get_property_reference(struct acpi_device *adev,
-				    const char *name, size_t index,
-				    struct acpi_reference_args *args)
+static int acpi_data_get_property_reference(struct acpi_device_data *data,
+					    const char *propname, size_t index,
+					    struct acpi_reference_args *args)
 {
 	const union acpi_object *element, *end;
 	const union acpi_object *obj;
 	struct acpi_device *device;
 	int ret, idx = 0;
 
-	ret = acpi_dev_get_property(adev, name, ACPI_TYPE_ANY, &obj);
+	ret = acpi_data_get_property(data, propname, ACPI_TYPE_ANY, &obj);
 	if (ret)
 		return ret;
 
@@ -561,7 +561,23 @@ int acpi_dev_get_property_reference(struct acpi_device *adev,
 
 	return -EPROTO;
 }
-EXPORT_SYMBOL_GPL(acpi_dev_get_property_reference);
+
+/**
+ * acpi_node_get_property_reference - get a handle to the referenced object.
+ * @fwnode: Firmware node to get the property from.
+ * @propname: Name of the property.
+ * @index: Index of the reference to return.
+ * @args: Location to store the returned reference with optional arguments.
+ */
+int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
+				     const char *name, size_t index,
+				     struct acpi_reference_args *args)
+{
+	struct acpi_device_data *data = acpi_device_data_of_node(fwnode);
+
+	return data ? acpi_data_get_property_reference(data, name, index, args) : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(acpi_node_get_property_reference);
 
 static int acpi_data_prop_read_single(struct acpi_device_data *data,
 				      const char *propname,
@@ -768,3 +784,59 @@ int acpi_node_prop_read(struct fwnode_handle *fwnode,  const char *propname,
 	return acpi_data_prop_read(acpi_device_data_of_node(fwnode),
 				   propname, proptype, val, nval);
 }
+
+/**
+ * acpi_get_next_subnode - Return the next child node handle for a device.
+ * @dev: Device to find the next child node for.
+ * @child: Handle to one of the device's child nodes or a null handle.
+ */
+struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
+					    struct fwnode_handle *child)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+	struct list_head *head, *next;
+
+	if (!adev)
+		return NULL;
+
+	if (!child || child->type == FWNODE_ACPI) {
+		head = &adev->children;
+		if (list_empty(head))
+			goto nondev;
+
+		if (child) {
+			adev = to_acpi_device_node(child);
+			next = adev->node.next;
+			if (next == head) {
+				child = NULL;
+				goto nondev;
+			}
+			adev = list_entry(next, struct acpi_device, node);
+		} else {
+			adev = list_first_entry(head, struct acpi_device, node);
+		}
+		return acpi_fwnode_handle(adev);
+	}
+
+ nondev:
+	if (!child || child->type == FWNODE_ACPI_DATA) {
+		struct acpi_data_node *dn;
+
+		head = &adev->data.subnodes;
+		if (list_empty(head))
+			return NULL;
+
+		if (child) {
+			dn = to_acpi_data_node(child);
+			next = dn->sibling.next;
+			if (next == head)
+				return NULL;
+
+			dn = list_entry(next, struct acpi_data_node, sibling);
+		} else {
+			dn = list_first_entry(head, struct acpi_data_node, sibling);
+		}
+		return &dn->fwnode;
+	}
+	return NULL;
+}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 01136b879038..d1ce377db3e9 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -695,26 +695,6 @@ int acpi_device_add(struct acpi_device *device,
 	return result;
 }
 
-struct acpi_device *acpi_get_next_child(struct device *dev,
-					struct acpi_device *child)
-{
-	struct acpi_device *adev = ACPI_COMPANION(dev);
-	struct list_head *head, *next;
-
-	if (!adev)
-		return NULL;
-
-	head = &adev->children;
-	if (list_empty(head))
-		return NULL;
-
-	if (!child)
-		return list_first_entry(head, struct acpi_device, node);
-
-	next = child->node.next;
-	return next == head ? NULL : list_entry(next, struct acpi_device, node);
-}
-
 /* --------------------------------------------------------------------------
                                  Device Enumeration
    -------------------------------------------------------------------------- */
diff --git a/drivers/base/property.c b/drivers/base/property.c
index ca118169a6c5..660ecec60bdf 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -493,11 +493,7 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev,
 		if (node)
 			return &node->fwnode;
 	} else if (IS_ENABLED(CONFIG_ACPI)) {
-		struct acpi_device *node;
-
-		node = acpi_get_next_child(dev, to_acpi_device_node(child));
-		if (node)
-			return acpi_fwnode_handle(node);
+		return acpi_get_next_subnode(dev, child);
 	}
 	return NULL;
 }
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index a38cf16a5828..69a83626f1ae 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -453,7 +453,7 @@ static int acpi_gpio_resource_lookup(struct acpi_gpio_lookup *lookup,
 	return 0;
 }
 
-static int acpi_gpio_property_lookup(struct acpi_device *adev,
+static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 				     const char *propname, int index,
 				     struct acpi_gpio_lookup *lookup)
 {
@@ -461,10 +461,16 @@ static int acpi_gpio_property_lookup(struct acpi_device *adev,
 	int ret;
 
 	memset(&args, 0, sizeof(args));
-	ret = acpi_dev_get_property_reference(adev, propname, index, &args);
-	if (ret && !acpi_get_driver_gpio_data(adev, propname, index, &args))
-		return ret;
+	ret = acpi_node_get_property_reference(fwnode, propname, index, &args);
+	if (ret) {
+		struct acpi_device *adev = to_acpi_device_node(fwnode);
 
+		if (!adev)
+			return ret;
+
+		if (!acpi_get_driver_gpio_data(adev, propname, index, &args))
+			return ret;
+	}
 	/*
 	 * The property was found and resolved, so need to lookup the GPIO based
 	 * on returned args.
@@ -518,7 +524,8 @@ struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
 	if (propname) {
 		dev_dbg(&adev->dev, "GPIO: looking up %s\n", propname);
 
-		ret = acpi_gpio_property_lookup(adev, propname, index, &lookup);
+		ret = acpi_gpio_property_lookup(acpi_fwnode_handle(adev),
+						propname, index, &lookup);
 		if (ret)
 			return ERR_PTR(ret);
 
@@ -534,6 +541,47 @@ struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
 	return ret ? ERR_PTR(ret) : lookup.desc;
 }
 
+/**
+ * acpi_node_get_gpiod() - get a GPIO descriptor from ACPI resources
+ * @fwnode: pointer to an ACPI firmware node to get the GPIO information from
+ * @propname: Property name of the GPIO
+ * @index: index of GpioIo/GpioInt resource (starting from %0)
+ * @info: info pointer to fill in (optional)
+ *
+ * If @fwnode is an ACPI device object, call %acpi_get_gpiod_by_index() for it.
+ * Otherwise (ie. it is a data-only non-device object), use the property-based
+ * GPIO lookup to get to the GPIO resource with the relevant information and use
+ * that to obtain the GPIO descriptor to return.
+ */
+struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
+				      const char *propname, int index,
+				      struct acpi_gpio_info *info)
+{
+	struct acpi_gpio_lookup lookup;
+	struct acpi_device *adev;
+	int ret;
+
+	adev = to_acpi_device_node(fwnode);
+	if (adev)
+		return acpi_get_gpiod_by_index(adev, propname, index, info);
+
+	if (!is_acpi_data_node(fwnode))
+		return ERR_PTR(-ENODEV);
+
+	if (!propname)
+		return ERR_PTR(-EINVAL);
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.index = index;
+
+	ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = acpi_gpio_resource_lookup(&lookup, info);
+	return ret ? ERR_PTR(ret) : lookup.desc;
+}
+
 /**
  * acpi_dev_gpio_irq_get() - Find GpioInt and translate it to Linux IRQ number
  * @adev: pointer to a ACPI device to get IRQ from
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f43e808a49d9..7d61b506c42f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2083,11 +2083,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 						&flags);
 		if (!IS_ERR(desc))
 			active_low = flags & OF_GPIO_ACTIVE_LOW;
-	} else if (is_acpi_device_node(fwnode)) {
+	} else if (is_acpi_node(fwnode)) {
 		struct acpi_gpio_info info;
 
-		desc = acpi_get_gpiod_by_index(to_acpi_device_node(fwnode),
-					       propname, 0, &info);
+		desc = acpi_node_get_gpiod(fwnode, propname, 0, &info);
 		if (!IS_ERR(desc))
 			active_low = info.active_low;
 	}
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index bf343004b008..e69c7157cdad 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -42,6 +42,9 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip);
 struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
 					  const char *propname, int index,
 					  struct acpi_gpio_info *info);
+struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
+				      const char *propname, int index,
+				      struct acpi_gpio_info *info);
 
 int acpi_gpio_count(struct device *dev, const char *con_id);
 #else
@@ -60,7 +63,12 @@ acpi_get_gpiod_by_index(struct acpi_device *adev, const char *propname,
 {
 	return ERR_PTR(-ENOSYS);
 }
-
+static inline struct gpio_desc *
+acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname,
+		    int index, struct acpi_gpio_info *info)
+{
+	return ERR_PTR(-ENXIO);
+}
 static inline int acpi_gpio_count(struct device *dev, const char *con_id)
 {
 	return -ENODEV;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6be94ba4e980..865d948c60e6 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -758,9 +758,9 @@ struct acpi_reference_args {
 #ifdef CONFIG_ACPI
 int acpi_dev_get_property(struct acpi_device *adev, const char *name,
 			  acpi_object_type type, const union acpi_object **obj);
-int acpi_dev_get_property_reference(struct acpi_device *adev,
-				    const char *name, size_t index,
-				    struct acpi_reference_args *args);
+int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
+				     const char *name, size_t index,
+				     struct acpi_reference_args *args);
 
 int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname,
 		       void **valptr);
@@ -771,8 +771,8 @@ int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname,
 int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
 		       enum dev_prop_type proptype, void *val, size_t nval);
 
-struct acpi_device *acpi_get_next_child(struct device *dev,
-					struct acpi_device *child);
+struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
+					    struct fwnode_handle *subnode);
 #else
 static inline int acpi_dev_get_property(struct acpi_device *adev,
 					const char *name, acpi_object_type type,
@@ -781,7 +781,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 	return -ENXIO;
 }
 
-static inline int acpi_dev_get_property_reference(struct acpi_device *adev,
+static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
 				const char *name, const char *cells_name,
 				size_t index, struct acpi_reference_args *args)
 {
@@ -826,12 +826,11 @@ static inline int acpi_dev_prop_read(struct acpi_device *adev,
 	return -ENXIO;
 }
 
-static inline struct acpi_device *acpi_get_next_child(struct device *dev,
-						      struct acpi_device *child)
+static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
+						struct fwnode_handle *subnode)
 {
 	return NULL;
 }
-
 #endif
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From f0489a5ef4d011e29f78021ad13a543e8769d619 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 4 Sep 2015 13:47:23 +0530
Subject: PM / OPP: Rename opp init/free table routines

free-table routines are opposite of init-table ones, and must be named
to make that clear. Opposite of 'init' is 'exit', but those doesn't suit
really well.

Replace 'init' with 'add' and 'free' with 'remove'.

Reported-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-imx/mach-imx6q.c       |  2 +-
 drivers/base/power/opp.c             | 41 ++++++++++++++++++------------------
 drivers/cpufreq/arm_big_little.h     |  2 +-
 drivers/cpufreq/arm_big_little_dt.c  |  4 ++--
 drivers/cpufreq/cpufreq-dt.c         |  6 +++---
 drivers/cpufreq/exynos5440-cpufreq.c |  6 +++---
 drivers/cpufreq/imx6q-cpufreq.c      |  6 +++---
 drivers/cpufreq/mt8173-cpufreq.c     |  6 +++---
 include/linux/pm_opp.h               | 16 +++++++-------
 9 files changed, 44 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 9602cc12d2f1..3286eec91d92 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -350,7 +350,7 @@ static void __init imx6q_opp_init(void)
 		return;
 	}
 
-	if (of_init_opp_table(cpu_dev)) {
+	if (of_add_opp_table(cpu_dev)) {
 		pr_warn("failed to init OPP table\n");
 		goto put_node;
 	}
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index c3a738652d13..9f0a2929821b 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -828,8 +828,8 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
  * The opp is made available by default and it can be controlled using
  * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove.
  *
- * NOTE: "dynamic" parameter impacts OPPs added by the of_init_opp_table and
- * freed by of_free_opp_table.
+ * NOTE: "dynamic" parameter impacts OPPs added by the of_add_opp_table and
+ * freed by of_remove_opp_table.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -1213,7 +1213,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
 
 #ifdef CONFIG_OF
 /**
- * of_free_opp_table() - Free OPP table entries created from static DT entries
+ * of_remove_opp_table() - Free OPP table entries created from static DT entries
  * @dev:	device pointer used to lookup device OPPs.
  *
  * Free OPPs created using static entries present in DT.
@@ -1224,7 +1224,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-void of_free_opp_table(struct device *dev)
+void of_remove_opp_table(struct device *dev)
 {
 	struct device_opp *dev_opp;
 	struct dev_pm_opp *opp, *tmp;
@@ -1259,9 +1259,9 @@ void of_free_opp_table(struct device *dev)
 unlock:
 	mutex_unlock(&dev_opp_list_lock);
 }
-EXPORT_SYMBOL_GPL(of_free_opp_table);
+EXPORT_SYMBOL_GPL(of_remove_opp_table);
 
-void of_cpumask_free_opp_table(cpumask_var_t cpumask)
+void of_cpumask_remove_opp_table(cpumask_var_t cpumask)
 {
 	struct device *cpu_dev;
 	int cpu;
@@ -1276,10 +1276,10 @@ void of_cpumask_free_opp_table(cpumask_var_t cpumask)
 			continue;
 		}
 
-		of_free_opp_table(cpu_dev);
+		of_remove_opp_table(cpu_dev);
 	}
 }
-EXPORT_SYMBOL_GPL(of_cpumask_free_opp_table);
+EXPORT_SYMBOL_GPL(of_cpumask_remove_opp_table);
 
 /* Returns opp descriptor node for a device, caller must do of_node_put() */
 static struct device_node *_of_get_opp_desc_node(struct device *dev)
@@ -1295,8 +1295,7 @@ static struct device_node *_of_get_opp_desc_node(struct device *dev)
 }
 
 /* Initializes OPP tables based on new bindings */
-static int _of_init_opp_table_v2(struct device *dev,
-				 struct device_node *opp_np)
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 {
 	struct device_node *np;
 	struct device_opp *dev_opp;
@@ -1338,13 +1337,13 @@ static int _of_init_opp_table_v2(struct device *dev,
 	return 0;
 
 free_table:
-	of_free_opp_table(dev);
+	of_remove_opp_table(dev);
 
 	return ret;
 }
 
 /* Initializes OPP tables based on old-deprecated bindings */
-static int _of_init_opp_table_v1(struct device *dev)
+static int _of_add_opp_table_v1(struct device *dev)
 {
 	const struct property *prop;
 	const __be32 *val;
@@ -1381,7 +1380,7 @@ static int _of_init_opp_table_v1(struct device *dev)
 }
 
 /**
- * of_init_opp_table() - Initialize opp table from device tree
+ * of_add_opp_table() - Initialize opp table from device tree
  * @dev:	device pointer used to lookup device OPPs.
  *
  * Register the initial OPP table with the OPP library for given device.
@@ -1403,7 +1402,7 @@ static int _of_init_opp_table_v1(struct device *dev)
  * -ENODATA	when empty 'operating-points' property is found
  * -EINVAL	when invalid entries are found in opp-v2 table
  */
-int of_init_opp_table(struct device *dev)
+int of_add_opp_table(struct device *dev)
 {
 	struct device_node *opp_np;
 	int ret;
@@ -1418,17 +1417,17 @@ int of_init_opp_table(struct device *dev)
 		 * Try old-deprecated bindings for backward compatibility with
 		 * older dtbs.
 		 */
-		return _of_init_opp_table_v1(dev);
+		return _of_add_opp_table_v1(dev);
 	}
 
-	ret = _of_init_opp_table_v2(dev, opp_np);
+	ret = _of_add_opp_table_v2(dev, opp_np);
 	of_node_put(opp_np);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(of_init_opp_table);
+EXPORT_SYMBOL_GPL(of_add_opp_table);
 
-int of_cpumask_init_opp_table(cpumask_var_t cpumask)
+int of_cpumask_add_opp_table(cpumask_var_t cpumask)
 {
 	struct device *cpu_dev;
 	int cpu, ret = 0;
@@ -1443,20 +1442,20 @@ int of_cpumask_init_opp_table(cpumask_var_t cpumask)
 			continue;
 		}
 
-		ret = of_init_opp_table(cpu_dev);
+		ret = of_add_opp_table(cpu_dev);
 		if (ret) {
 			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
 			       __func__, cpu, ret);
 
 			/* Free all other OPPs */
-			of_cpumask_free_opp_table(cpumask);
+			of_cpumask_remove_opp_table(cpumask);
 			break;
 		}
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(of_cpumask_init_opp_table);
+EXPORT_SYMBOL_GPL(of_cpumask_add_opp_table);
 
 /* Required only for V1 bindings, as v2 can manage it from DT itself */
 int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask)
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index a211f7db9d32..d7ac320ead23 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -28,7 +28,7 @@ struct cpufreq_arm_bL_ops {
 
 	/*
 	 * This must set opp table for cpu_dev in a similar way as done by
-	 * of_init_opp_table().
+	 * of_add_opp_table().
 	 */
 	int (*init_opp_table)(struct device *cpu_dev);
 
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 36d91dba2965..47ac0ff54e3a 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c
@@ -54,7 +54,7 @@ static int dt_init_opp_table(struct device *cpu_dev)
 		return -ENOENT;
 	}
 
-	ret = of_init_opp_table(cpu_dev);
+	ret = of_add_opp_table(cpu_dev);
 	of_node_put(np);
 
 	return ret;
@@ -82,7 +82,7 @@ static struct cpufreq_arm_bL_ops dt_bL_ops = {
 	.name	= "dt-bl",
 	.get_transition_latency = dt_get_transition_latency,
 	.init_opp_table = dt_init_opp_table,
-	.free_opp_table = of_free_opp_table,
+	.free_opp_table = of_remove_opp_table,
 };
 
 static int generic_bL_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 7c0d70e2a861..df8333bb4f56 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -238,7 +238,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 *
 	 * OPPs might be populated at runtime, don't check for error here
 	 */
-	of_cpumask_init_opp_table(policy->cpus);
+	of_cpumask_add_opp_table(policy->cpus);
 
 	/*
 	 * But we need OPP table to function so if it is not there let's
@@ -368,7 +368,7 @@ out_free_cpufreq_table:
 out_free_priv:
 	kfree(priv);
 out_free_opp:
-	of_cpumask_free_opp_table(policy->cpus);
+	of_cpumask_remove_opp_table(policy->cpus);
 out_node_put:
 	of_node_put(np);
 out_put_reg_clk:
@@ -385,7 +385,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 
 	cpufreq_cooling_unregister(priv->cdev);
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
-	of_cpumask_free_opp_table(policy->related_cpus);
+	of_cpumask_remove_opp_table(policy->related_cpus);
 	clk_put(policy->clk);
 	if (!IS_ERR(priv->cpu_reg))
 		regulator_put(priv->cpu_reg);
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index 21a90ed7f3d8..fc46813d3b83 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -360,7 +360,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 		goto err_put_node;
 	}
 
-	ret = of_init_opp_table(dvfs_info->dev);
+	ret = of_add_opp_table(dvfs_info->dev);
 	if (ret) {
 		dev_err(dvfs_info->dev, "failed to init OPP table: %d\n", ret);
 		goto err_put_node;
@@ -424,7 +424,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 err_free_table:
 	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
 err_free_opp:
-	of_free_opp_table(dvfs_info->dev);
+	of_remove_opp_table(dvfs_info->dev);
 err_put_node:
 	of_node_put(np);
 	dev_err(&pdev->dev, "%s: failed initialization\n", __func__);
@@ -435,7 +435,7 @@ static int exynos_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&exynos_driver);
 	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
-	of_free_opp_table(dvfs_info->dev);
+	of_remove_opp_table(dvfs_info->dev);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 380a90d3c57e..3802765aeb9f 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -202,7 +202,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	 */
 	num = dev_pm_opp_get_opp_count(cpu_dev);
 	if (num < 0) {
-		ret = of_init_opp_table(cpu_dev);
+		ret = of_add_opp_table(cpu_dev);
 		if (ret < 0) {
 			dev_err(cpu_dev, "failed to init OPP table: %d\n", ret);
 			goto put_reg;
@@ -312,7 +312,7 @@ free_freq_table:
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_free_opp:
 	if (free_opp)
-		of_free_opp_table(cpu_dev);
+		of_remove_opp_table(cpu_dev);
 put_reg:
 	if (!IS_ERR(arm_reg))
 		regulator_put(arm_reg);
@@ -340,7 +340,7 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
 	cpufreq_unregister_driver(&imx6q_cpufreq_driver);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 	if (free_opp)
-		of_free_opp_table(cpu_dev);
+		of_remove_opp_table(cpu_dev);
 	regulator_put(arm_reg);
 	if (!IS_ERR(pu_reg))
 		regulator_put(pu_reg);
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 49caed293a3b..519e83146e86 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -344,7 +344,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	/* Both presence and absence of sram regulator are valid cases. */
 	sram_reg = regulator_get_exclusive(cpu_dev, "sram");
 
-	ret = of_init_opp_table(cpu_dev);
+	ret = of_add_opp_table(cpu_dev);
 	if (ret) {
 		pr_warn("no OPP table for cpu%d\n", cpu);
 		goto out_free_resources;
@@ -378,7 +378,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	return 0;
 
 out_free_opp_table:
-	of_free_opp_table(cpu_dev);
+	of_remove_opp_table(cpu_dev);
 
 out_free_resources:
 	if (!IS_ERR(proc_reg))
@@ -404,7 +404,7 @@ static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info)
 	if (!IS_ERR(info->inter_clk))
 		clk_put(info->inter_clk);
 
-	of_free_opp_table(info->cpu_dev);
+	of_remove_opp_table(info->cpu_dev);
 }
 
 static int mtk_cpufreq_init(struct cpufreq_policy *policy)
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index e817722ee3f0..20adcb4ce443 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -132,28 +132,28 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-int of_init_opp_table(struct device *dev);
-void of_free_opp_table(struct device *dev);
-int of_cpumask_init_opp_table(cpumask_var_t cpumask);
-void of_cpumask_free_opp_table(cpumask_var_t cpumask);
+int of_add_opp_table(struct device *dev);
+void of_remove_opp_table(struct device *dev);
+int of_cpumask_add_opp_table(cpumask_var_t cpumask);
+void of_cpumask_remove_opp_table(cpumask_var_t cpumask);
 int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask);
 int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask);
 #else
-static inline int of_init_opp_table(struct device *dev)
+static inline int of_add_opp_table(struct device *dev)
 {
 	return -EINVAL;
 }
 
-static inline void of_free_opp_table(struct device *dev)
+static inline void of_remove_opp_table(struct device *dev)
 {
 }
 
-static inline int of_cpumask_init_opp_table(cpumask_var_t cpumask)
+static inline int of_cpumask_add_opp_table(cpumask_var_t cpumask)
 {
 	return -ENOSYS;
 }
 
-static inline void of_cpumask_free_opp_table(cpumask_var_t cpumask)
+static inline void of_cpumask_remove_opp_table(cpumask_var_t cpumask)
 {
 }
 
-- 
cgit v1.2.3


From 8f8d37b2537a28b5b2e3cb60dfc85a2a1303f99b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 4 Sep 2015 13:47:24 +0530
Subject: PM / OPP: Prefix exported opp routines with dev_pm_opp_

That's the naming convention followed in most of opp core, but few
routines didn't follow this, fix them.

Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-imx/mach-imx6q.c       |  2 +-
 drivers/base/power/opp.c             | 41 ++++++++++++++++++------------------
 drivers/cpufreq/arm_big_little.h     |  2 +-
 drivers/cpufreq/arm_big_little_dt.c  |  4 ++--
 drivers/cpufreq/cpufreq-dt.c         | 10 ++++-----
 drivers/cpufreq/exynos5440-cpufreq.c |  6 +++---
 drivers/cpufreq/imx6q-cpufreq.c      |  6 +++---
 drivers/cpufreq/mt8173-cpufreq.c     |  6 +++---
 include/linux/pm_opp.h               | 24 ++++++++++-----------
 9 files changed, 51 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 3286eec91d92..3878494bd118 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -350,7 +350,7 @@ static void __init imx6q_opp_init(void)
 		return;
 	}
 
-	if (of_add_opp_table(cpu_dev)) {
+	if (dev_pm_opp_of_add_table(cpu_dev)) {
 		pr_warn("failed to init OPP table\n");
 		goto put_node;
 	}
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 9f0a2929821b..aeff1cfb46f2 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -828,8 +828,8 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
  * The opp is made available by default and it can be controlled using
  * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove.
  *
- * NOTE: "dynamic" parameter impacts OPPs added by the of_add_opp_table and
- * freed by of_remove_opp_table.
+ * NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table
+ * and freed by dev_pm_opp_of_remove_table.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -1213,7 +1213,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
 
 #ifdef CONFIG_OF
 /**
- * of_remove_opp_table() - Free OPP table entries created from static DT entries
+ * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
+ *				  entries
  * @dev:	device pointer used to lookup device OPPs.
  *
  * Free OPPs created using static entries present in DT.
@@ -1224,7 +1225,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-void of_remove_opp_table(struct device *dev)
+void dev_pm_opp_of_remove_table(struct device *dev)
 {
 	struct device_opp *dev_opp;
 	struct dev_pm_opp *opp, *tmp;
@@ -1259,9 +1260,9 @@ void of_remove_opp_table(struct device *dev)
 unlock:
 	mutex_unlock(&dev_opp_list_lock);
 }
-EXPORT_SYMBOL_GPL(of_remove_opp_table);
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
 
-void of_cpumask_remove_opp_table(cpumask_var_t cpumask)
+void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
 {
 	struct device *cpu_dev;
 	int cpu;
@@ -1276,10 +1277,10 @@ void of_cpumask_remove_opp_table(cpumask_var_t cpumask)
 			continue;
 		}
 
-		of_remove_opp_table(cpu_dev);
+		dev_pm_opp_of_remove_table(cpu_dev);
 	}
 }
-EXPORT_SYMBOL_GPL(of_cpumask_remove_opp_table);
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
 
 /* Returns opp descriptor node for a device, caller must do of_node_put() */
 static struct device_node *_of_get_opp_desc_node(struct device *dev)
@@ -1337,7 +1338,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 	return 0;
 
 free_table:
-	of_remove_opp_table(dev);
+	dev_pm_opp_of_remove_table(dev);
 
 	return ret;
 }
@@ -1380,7 +1381,7 @@ static int _of_add_opp_table_v1(struct device *dev)
 }
 
 /**
- * of_add_opp_table() - Initialize opp table from device tree
+ * dev_pm_opp_of_add_table() - Initialize opp table from device tree
  * @dev:	device pointer used to lookup device OPPs.
  *
  * Register the initial OPP table with the OPP library for given device.
@@ -1402,7 +1403,7 @@ static int _of_add_opp_table_v1(struct device *dev)
  * -ENODATA	when empty 'operating-points' property is found
  * -EINVAL	when invalid entries are found in opp-v2 table
  */
-int of_add_opp_table(struct device *dev)
+int dev_pm_opp_of_add_table(struct device *dev)
 {
 	struct device_node *opp_np;
 	int ret;
@@ -1425,9 +1426,9 @@ int of_add_opp_table(struct device *dev)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(of_add_opp_table);
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
 
-int of_cpumask_add_opp_table(cpumask_var_t cpumask)
+int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
 {
 	struct device *cpu_dev;
 	int cpu, ret = 0;
@@ -1442,23 +1443,23 @@ int of_cpumask_add_opp_table(cpumask_var_t cpumask)
 			continue;
 		}
 
-		ret = of_add_opp_table(cpu_dev);
+		ret = dev_pm_opp_of_add_table(cpu_dev);
 		if (ret) {
 			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
 			       __func__, cpu, ret);
 
 			/* Free all other OPPs */
-			of_cpumask_remove_opp_table(cpumask);
+			dev_pm_opp_of_cpumask_remove_table(cpumask);
 			break;
 		}
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(of_cpumask_add_opp_table);
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
 
 /* Required only for V1 bindings, as v2 can manage it from DT itself */
-int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
 	struct device_list_opp *list_dev;
 	struct device_opp *dev_opp;
@@ -1496,7 +1497,7 @@ out_rcu_read_unlock:
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(set_cpus_sharing_opps);
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
 
 /*
  * Works only for OPP v2 bindings.
@@ -1504,7 +1505,7 @@ EXPORT_SYMBOL_GPL(set_cpus_sharing_opps);
  * cpumask should be already set to mask of cpu_dev->id.
  * Returns -ENOENT if operating-points-v2 bindings aren't supported.
  */
-int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
 	struct device_node *np, *tmp_np;
 	struct device *tcpu_dev;
@@ -1554,5 +1555,5 @@ put_cpu_node:
 	of_node_put(np);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(of_get_cpus_sharing_opps);
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
 #endif
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index d7ac320ead23..b88889d9387e 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -28,7 +28,7 @@ struct cpufreq_arm_bL_ops {
 
 	/*
 	 * This must set opp table for cpu_dev in a similar way as done by
-	 * of_add_opp_table().
+	 * dev_pm_opp_of_add_table().
 	 */
 	int (*init_opp_table)(struct device *cpu_dev);
 
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 47ac0ff54e3a..16ddeefe9443 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c
@@ -54,7 +54,7 @@ static int dt_init_opp_table(struct device *cpu_dev)
 		return -ENOENT;
 	}
 
-	ret = of_add_opp_table(cpu_dev);
+	ret = dev_pm_opp_of_add_table(cpu_dev);
 	of_node_put(np);
 
 	return ret;
@@ -82,7 +82,7 @@ static struct cpufreq_arm_bL_ops dt_bL_ops = {
 	.name	= "dt-bl",
 	.get_transition_latency = dt_get_transition_latency,
 	.init_opp_table = dt_init_opp_table,
-	.free_opp_table = of_remove_opp_table,
+	.free_opp_table = dev_pm_opp_of_remove_table,
 };
 
 static int generic_bL_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index df8333bb4f56..90d64081ddb3 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -216,7 +216,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	}
 
 	/* Get OPP-sharing information from "operating-points-v2" bindings */
-	ret = of_get_cpus_sharing_opps(cpu_dev, policy->cpus);
+	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
 	if (ret) {
 		/*
 		 * operating-points-v2 not supported, fallback to old method of
@@ -238,7 +238,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 *
 	 * OPPs might be populated at runtime, don't check for error here
 	 */
-	of_cpumask_add_opp_table(policy->cpus);
+	dev_pm_opp_of_cpumask_add_table(policy->cpus);
 
 	/*
 	 * But we need OPP table to function so if it is not there let's
@@ -261,7 +261,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		 * OPP tables are initialized only for policy->cpu, do it for
 		 * others as well.
 		 */
-		ret = set_cpus_sharing_opps(cpu_dev, policy->cpus);
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
 		if (ret)
 			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
 				__func__, ret);
@@ -368,7 +368,7 @@ out_free_cpufreq_table:
 out_free_priv:
 	kfree(priv);
 out_free_opp:
-	of_cpumask_remove_opp_table(policy->cpus);
+	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
 out_node_put:
 	of_node_put(np);
 out_put_reg_clk:
@@ -385,7 +385,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 
 	cpufreq_cooling_unregister(priv->cdev);
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
-	of_cpumask_remove_opp_table(policy->related_cpus);
+	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	clk_put(policy->clk);
 	if (!IS_ERR(priv->cpu_reg))
 		regulator_put(priv->cpu_reg);
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index fc46813d3b83..c0f3373706f4 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -360,7 +360,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 		goto err_put_node;
 	}
 
-	ret = of_add_opp_table(dvfs_info->dev);
+	ret = dev_pm_opp_of_add_table(dvfs_info->dev);
 	if (ret) {
 		dev_err(dvfs_info->dev, "failed to init OPP table: %d\n", ret);
 		goto err_put_node;
@@ -424,7 +424,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 err_free_table:
 	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
 err_free_opp:
-	of_remove_opp_table(dvfs_info->dev);
+	dev_pm_opp_of_remove_table(dvfs_info->dev);
 err_put_node:
 	of_node_put(np);
 	dev_err(&pdev->dev, "%s: failed initialization\n", __func__);
@@ -435,7 +435,7 @@ static int exynos_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&exynos_driver);
 	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
-	of_remove_opp_table(dvfs_info->dev);
+	dev_pm_opp_of_remove_table(dvfs_info->dev);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 3802765aeb9f..84fbc8e8fc56 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -202,7 +202,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	 */
 	num = dev_pm_opp_get_opp_count(cpu_dev);
 	if (num < 0) {
-		ret = of_add_opp_table(cpu_dev);
+		ret = dev_pm_opp_of_add_table(cpu_dev);
 		if (ret < 0) {
 			dev_err(cpu_dev, "failed to init OPP table: %d\n", ret);
 			goto put_reg;
@@ -312,7 +312,7 @@ free_freq_table:
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_free_opp:
 	if (free_opp)
-		of_remove_opp_table(cpu_dev);
+		dev_pm_opp_of_remove_table(cpu_dev);
 put_reg:
 	if (!IS_ERR(arm_reg))
 		regulator_put(arm_reg);
@@ -340,7 +340,7 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
 	cpufreq_unregister_driver(&imx6q_cpufreq_driver);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 	if (free_opp)
-		of_remove_opp_table(cpu_dev);
+		dev_pm_opp_of_remove_table(cpu_dev);
 	regulator_put(arm_reg);
 	if (!IS_ERR(pu_reg))
 		regulator_put(pu_reg);
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 519e83146e86..83001dc5b646 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -344,7 +344,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	/* Both presence and absence of sram regulator are valid cases. */
 	sram_reg = regulator_get_exclusive(cpu_dev, "sram");
 
-	ret = of_add_opp_table(cpu_dev);
+	ret = dev_pm_opp_of_add_table(cpu_dev);
 	if (ret) {
 		pr_warn("no OPP table for cpu%d\n", cpu);
 		goto out_free_resources;
@@ -378,7 +378,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	return 0;
 
 out_free_opp_table:
-	of_remove_opp_table(cpu_dev);
+	dev_pm_opp_of_remove_table(cpu_dev);
 
 out_free_resources:
 	if (!IS_ERR(proc_reg))
@@ -404,7 +404,7 @@ static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info)
 	if (!IS_ERR(info->inter_clk))
 		clk_put(info->inter_clk);
 
-	of_remove_opp_table(info->cpu_dev);
+	dev_pm_opp_of_remove_table(info->cpu_dev);
 }
 
 static int mtk_cpufreq_init(struct cpufreq_policy *policy)
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 20adcb4ce443..9a2e50337af9 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -132,37 +132,37 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-int of_add_opp_table(struct device *dev);
-void of_remove_opp_table(struct device *dev);
-int of_cpumask_add_opp_table(cpumask_var_t cpumask);
-void of_cpumask_remove_opp_table(cpumask_var_t cpumask);
-int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask);
-int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_of_add_table(struct device *dev);
+void dev_pm_opp_of_remove_table(struct device *dev);
+int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask);
+void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask);
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
 #else
-static inline int of_add_opp_table(struct device *dev)
+static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
 	return -EINVAL;
 }
 
-static inline void of_remove_opp_table(struct device *dev)
+static inline void dev_pm_opp_of_remove_table(struct device *dev)
 {
 }
 
-static inline int of_cpumask_add_opp_table(cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
 {
 	return -ENOSYS;
 }
 
-static inline void of_cpumask_remove_opp_table(cpumask_var_t cpumask)
+static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
 {
 }
 
-static inline int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
 	return -ENOSYS;
 }
 
-static inline int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From c40a2c8817e42273a4627c48c884b805475a733f Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 7 Sep 2015 16:05:38 +0200
Subject: CDC: common parser for extra headers

CDC drivers all implement their own parser for the extra headers.
This patch fixes the code duplication introducing a single common
parser in usbnet.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/cdc.h  |  47 ++++++++++++++++
 2 files changed, 185 insertions(+)
 create mode 100644 include/linux/usb/cdc.h

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index b4cf10781348..060918f49fea 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -42,6 +42,7 @@
 #include <linux/mii.h>
 #include <linux/usb.h>
 #include <linux/usb/usbnet.h>
+#include <linux/usb/cdc.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/pm_runtime.h>
@@ -1962,6 +1963,143 @@ out:
 	return err;
 }
 
+int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
+				struct usb_interface *intf,
+				u8 *buffer,
+				int buflen)
+{
+	/* duplicates are ignored */
+	struct usb_cdc_union_desc *union_header = NULL;
+
+	/* duplicates are not tolerated */
+	struct usb_cdc_header_desc *header = NULL;
+	struct usb_cdc_ether_desc *ether = NULL;
+	struct usb_cdc_mdlm_detail_desc *detail = NULL;
+	struct usb_cdc_mdlm_desc *desc = NULL;
+
+	unsigned int elength;
+	int cnt = 0;
+
+	memset(hdr, 0x00, sizeof(struct usb_cdc_parsed_header));
+	hdr->phonet_magic_present = false;
+	while (buflen > 0) {
+		elength = buffer[0];
+		if (!elength) {
+			dev_err(&intf->dev, "skipping garbage byte\n");
+			elength = 1;
+			goto next_desc;
+		}
+		if (buffer[1] != USB_DT_CS_INTERFACE) {
+			dev_err(&intf->dev, "skipping garbage\n");
+			goto next_desc;
+		}
+
+		switch (buffer[2]) {
+		case USB_CDC_UNION_TYPE: /* we've found it */
+			if (elength < sizeof(struct usb_cdc_union_desc))
+				goto next_desc;
+			if (union_header) {
+				dev_err(&intf->dev, "More than one union descriptor, skipping ...\n");
+				goto next_desc;
+			}
+			union_header = (struct usb_cdc_union_desc *)buffer;
+			break;
+		case USB_CDC_COUNTRY_TYPE:
+			if (elength < sizeof(struct usb_cdc_country_functional_desc))
+				goto next_desc;
+			hdr->usb_cdc_country_functional_desc =
+				(struct usb_cdc_country_functional_desc *)buffer;
+			break;
+		case USB_CDC_HEADER_TYPE:
+			if (elength != sizeof(struct usb_cdc_header_desc))
+				goto next_desc;
+			if (header)
+				return -EINVAL;
+			header = (struct usb_cdc_header_desc *)buffer;
+			break;
+		case USB_CDC_ACM_TYPE:
+			if (elength < sizeof(struct usb_cdc_acm_descriptor))
+				goto next_desc;
+			hdr->usb_cdc_acm_descriptor =
+				(struct usb_cdc_acm_descriptor *)buffer;
+			break;
+		case USB_CDC_ETHERNET_TYPE:
+			if (elength != sizeof(struct usb_cdc_ether_desc))
+				goto next_desc;
+			if (ether)
+				return -EINVAL;
+			ether = (struct usb_cdc_ether_desc *)buffer;
+			break;
+		case USB_CDC_CALL_MANAGEMENT_TYPE:
+			if (elength < sizeof(struct usb_cdc_call_mgmt_descriptor))
+				goto next_desc;
+			hdr->usb_cdc_call_mgmt_descriptor =
+				(struct usb_cdc_call_mgmt_descriptor *)buffer;
+			break;
+		case USB_CDC_DMM_TYPE:
+			if (elength < sizeof(struct usb_cdc_dmm_desc))
+				goto next_desc;
+			hdr->usb_cdc_dmm_desc =
+				(struct usb_cdc_dmm_desc *)buffer;
+			break;
+		case USB_CDC_MDLM_TYPE:
+			if (elength < sizeof(struct usb_cdc_mdlm_desc *))
+				goto next_desc;
+			if (desc)
+				return -EINVAL;
+			desc = (struct usb_cdc_mdlm_desc *)buffer;
+			break;
+		case USB_CDC_MDLM_DETAIL_TYPE:
+			if (elength < sizeof(struct usb_cdc_mdlm_detail_desc *))
+				goto next_desc;
+			if (detail)
+				return -EINVAL;
+			detail = (struct usb_cdc_mdlm_detail_desc *)buffer;
+			break;
+		case USB_CDC_NCM_TYPE:
+			if (elength < sizeof(struct usb_cdc_ncm_desc))
+				goto next_desc;
+			hdr->usb_cdc_ncm_desc = (struct usb_cdc_ncm_desc *)buffer;
+			break;
+		case USB_CDC_MBIM_TYPE:
+			if (elength < sizeof(struct usb_cdc_mbim_desc))
+				goto next_desc;
+
+			hdr->usb_cdc_mbim_desc = (struct usb_cdc_mbim_desc *)buffer;
+			break;
+		case USB_CDC_MBIM_EXTENDED_TYPE:
+			if (elength < sizeof(struct usb_cdc_mbim_extended_desc))
+				break;
+			hdr->usb_cdc_mbim_extended_desc =
+				(struct usb_cdc_mbim_extended_desc *)buffer;
+			break;
+		case CDC_PHONET_MAGIC_NUMBER:
+			hdr->phonet_magic_present = true;
+			break;
+		default:
+			/*
+			 * there are LOTS more CDC descriptors that
+			 * could legitimately be found here.
+			 */
+			dev_dbg(&intf->dev, "Ignoring descriptor: type %02x, length %ud\n",
+					buffer[2], elength);
+			goto next_desc;
+		}
+		cnt++;
+next_desc:
+		buflen -= elength;
+		buffer += elength;
+	}
+	hdr->usb_cdc_union_desc = union_header;
+	hdr->usb_cdc_header_desc = header;
+	hdr->usb_cdc_mdlm_detail_desc = detail;
+	hdr->usb_cdc_mdlm_desc = desc;
+	hdr->usb_cdc_ether_desc = ether;
+	return cnt;
+}
+
+EXPORT_SYMBOL(cdc_parse_cdc_header);
+
 /*
  * The function can't be called inside suspend/resume callback,
  * otherwise deadlock will be caused.
diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
new file mode 100644
index 000000000000..cd8f2e1c3bdf
--- /dev/null
+++ b/include/linux/usb/cdc.h
@@ -0,0 +1,47 @@
+/*
+ * USB CDC common helpers
+ *
+ * Copyright (c) 2015 Oliver Neukum <oneukum@suse.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <uapi/linux/usb/cdc.h>
+
+/*
+ * inofficial magic numbers
+ */
+
+#define CDC_PHONET_MAGIC_NUMBER		0xAB
+
+/*
+ * parsing CDC headers
+ */
+
+struct usb_cdc_parsed_header {
+	struct usb_cdc_union_desc *usb_cdc_union_desc;
+	struct usb_cdc_header_desc *usb_cdc_header_desc;
+
+	struct usb_cdc_call_mgmt_descriptor *usb_cdc_call_mgmt_descriptor;
+	struct usb_cdc_acm_descriptor *usb_cdc_acm_descriptor;
+	struct usb_cdc_country_functional_desc *usb_cdc_country_functional_desc;
+	struct usb_cdc_network_terminal_desc *usb_cdc_network_terminal_desc;
+	struct usb_cdc_ether_desc *usb_cdc_ether_desc;
+	struct usb_cdc_dmm_desc *usb_cdc_dmm_desc;
+	struct usb_cdc_mdlm_desc *usb_cdc_mdlm_desc;
+	struct usb_cdc_mdlm_detail_desc *usb_cdc_mdlm_detail_desc;
+	struct usb_cdc_obex_desc *usb_cdc_obex_desc;
+	struct usb_cdc_ncm_desc *usb_cdc_ncm_desc;
+	struct usb_cdc_mbim_desc *usb_cdc_mbim_desc;
+	struct usb_cdc_mbim_extended_desc *usb_cdc_mbim_extended_desc;
+
+	bool phonet_magic_present;
+};
+
+
+int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
+				struct usb_interface *intf,
+				u8 *buffer,
+				int buflen);
-- 
cgit v1.2.3


From ad1e7b97b3adb91d46f3adb70a7867a50fc274cf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Sep 2015 13:25:03 -0700
Subject: cdc: Fix build warning.

   In file included from drivers/usb/gadget/function/u_serial.h:16:0,
                    from drivers/usb/gadget/function/f_acm.c:23:
>> include/linux/usb/cdc.h:47:5: warning: 'struct usb_interface' declared inside parameter list
        int buflen);
        ^
>> include/linux/usb/cdc.h:47:5: warning: its scope is only this definition or declaration, which is probably not what you want

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index cd8f2e1c3bdf..959d0c838113 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -40,7 +40,7 @@ struct usb_cdc_parsed_header {
 	bool phonet_magic_present;
 };
 
-
+struct usb_interface;
 int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
 				struct usb_interface *intf,
 				u8 *buffer,
-- 
cgit v1.2.3


From b84ee0d7f375ed7840c7c110d46eac24cf94b2a2 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 16 Sep 2015 11:10:16 +1000
Subject: cdc: add header guards

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc.h      | 4 ++++
 include/uapi/linux/usb/cdc.h | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index 959d0c838113..b5706f94ee9e 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -7,6 +7,8 @@
  * modify it under the terms of the GNU General Public License
  * version 2 as published by the Free Software Foundation.
  */
+#ifndef __LINUX_USB_CDC_H
+#define __LINUX_USB_CDC_H
 
 #include <uapi/linux/usb/cdc.h>
 
@@ -45,3 +47,5 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
 				struct usb_interface *intf,
 				u8 *buffer,
 				int buflen);
+
+#endif /* __LINUX_USB_CDC_H */
diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h
index b6a9cdd6e096..e2bc417b243b 100644
--- a/include/uapi/linux/usb/cdc.h
+++ b/include/uapi/linux/usb/cdc.h
@@ -6,8 +6,8 @@
  * firmware based USB peripherals.
  */
 
-#ifndef __LINUX_USB_CDC_H
-#define __LINUX_USB_CDC_H
+#ifndef __UAPI_LINUX_USB_CDC_H
+#define __UAPI_LINUX_USB_CDC_H
 
 #include <linux/types.h>
 
@@ -444,4 +444,4 @@ struct usb_cdc_ncm_ndp_input_size {
 #define USB_CDC_NCM_CRC_NOT_APPENDED			0x00
 #define USB_CDC_NCM_CRC_APPENDED			0x01
 
-#endif /* __LINUX_USB_CDC_H */
+#endif /* __UAPI_LINUX_USB_CDC_H */
-- 
cgit v1.2.3


From a6f5f0dd4e21191ce35030dd4d6421e1cca10ee4 Mon Sep 17 00:00:00 2001
From: Alexandra Yates <alexandra.yates@linux.intel.com>
Date: Tue, 15 Sep 2015 10:32:46 -0700
Subject: PM / sleep: Report interrupt that caused system wakeup

Add a sysfs attribute, /sys/power/pm_wakeup_irq, reporting the IRQ
number of the first wakeup interrupt (that is, the first interrupt
from an IRQ line armed for system wakeup) seen by the kernel during
the most recent system suspend/resume cycle.

This feature will be useful for system wakeup diagnostics of
spurious wakeup interrupts.

Signed-off-by: Alexandra Yates <alexandra.yates@linux.intel.com>
[ rjw: Fixed up pm_wakeup_irq definition ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-power | 12 ++++++++++++
 drivers/base/power/wakeup.c           | 12 ++++++++++++
 include/linux/suspend.h               |  3 +++
 kernel/irq/pm.c                       |  2 +-
 kernel/power/main.c                   | 17 +++++++++++++++++
 5 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index f4551816329e..50b368d490b5 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -256,3 +256,15 @@ Description:
 		Writing a "1" enables this printing while writing a "0"
 		disables it.  The default value is "0".  Reading from this file
 		will display the current value.
+
+What:		/sys/power/pm_wakeup_irq
+Date:		April 2015
+Contact:	Alexandra Yates <alexandra.yates@linux.intel.org>
+Description:
+		The /sys/power/pm_wakeup_irq file reports to user space the IRQ
+		number of the first wakeup interrupt (that is, the first
+		interrupt from an IRQ line armed for system wakeup) seen by the
+		kernel during the most recent system suspend/resume cycle.
+
+		This output is useful for system wakeup diagnostics of spurious
+		wakeup interrupts.
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 51f15bc15774..3b361ecfaffc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -25,6 +25,9 @@
  */
 bool events_check_enabled __read_mostly;
 
+/* First wakeup IRQ seen by the kernel in the last cycle. */
+unsigned int pm_wakeup_irq __read_mostly;
+
 /* If set and the system is suspending, terminate the suspend. */
 static bool pm_abort_suspend __read_mostly;
 
@@ -868,6 +871,15 @@ EXPORT_SYMBOL_GPL(pm_system_wakeup);
 void pm_wakeup_clear(void)
 {
 	pm_abort_suspend = false;
+	pm_wakeup_irq = 0;
+}
+
+void pm_system_irq_wakeup(unsigned int irq_number)
+{
+	if (pm_wakeup_irq == 0) {
+		pm_wakeup_irq = irq_number;
+		pm_system_wakeup();
+	}
 }
 
 /**
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5efe743ce1e8..33aaf9a9596c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -387,10 +387,12 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 
 /* drivers/base/power/wakeup.c */
 extern bool events_check_enabled;
+extern unsigned int pm_wakeup_irq;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
 extern void pm_wakeup_clear(void);
+extern void pm_system_irq_wakeup(unsigned int irq_number);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -440,6 +442,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
 static inline void pm_wakeup_clear(void) {}
+static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 21c62617a35a..e80c4400118a 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -21,7 +21,7 @@ bool irq_pm_check_wakeup(struct irq_desc *desc)
 		desc->istate |= IRQS_SUSPENDED | IRQS_PENDING;
 		desc->depth++;
 		irq_disable(desc);
-		pm_system_wakeup();
+		pm_system_irq_wakeup(irq_desc_get_irq(desc));
 		return true;
 	}
 	return false;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 63d395b5df93..b2dd4d999900 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -272,6 +272,22 @@ static inline void pm_print_times_init(void)
 {
 	pm_print_times_enabled = !!initcall_debug;
 }
+
+static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					char *buf)
+{
+	return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA;
+}
+
+static ssize_t pm_wakeup_irq_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t n)
+{
+	return -EINVAL;
+}
+power_attr(pm_wakeup_irq);
+
 #else /* !CONFIG_PM_SLEEP_DEBUG */
 static inline void pm_print_times_init(void) {}
 #endif /* CONFIG_PM_SLEEP_DEBUG */
@@ -604,6 +620,7 @@ static struct attribute * g[] = {
 #endif
 #ifdef CONFIG_PM_SLEEP_DEBUG
 	&pm_print_times_attr.attr,
+	&pm_wakeup_irq_attr.attr,
 #endif
 #endif
 #ifdef CONFIG_FREEZER
-- 
cgit v1.2.3


From 1ed1328792ff46e4bb86a3d7f7be2971f4549f6c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2015 12:53:17 -0400
Subject: sched, cgroup: replace signal_struct->group_rwsem with a global
 percpu_rwsem

Note: This commit was originally committed as d59cfc09c32a but got
      reverted by 0c986253b939 due to the performance regression from
      the percpu_rwsem write down/up operations added to cgroup task
      migration path.  percpu_rwsem changes which alleviate the
      performance issue are pending for v4.4-rc1 merge window.
      Re-apply.

The cgroup side of threadgroup locking uses signal_struct->group_rwsem
to synchronize against threadgroup changes.  This per-process rwsem
adds small overhead to thread creation, exit and exec paths, forces
cgroup code paths to do lock-verify-unlock-retry dance in a couple
places and makes it impossible to atomically perform operations across
multiple processes.

This patch replaces signal_struct->group_rwsem with a global
percpu_rwsem cgroup_threadgroup_rwsem which is cheaper on the reader
side and contained in cgroups proper.  This patch converts one-to-one.

This does make writer side heavier and lower the granularity; however,
cgroup process migration is a fairly cold path, we do want to optimize
thread operations over it and cgroup migration operations don't take
enough time for the lower granularity to matter.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/cgroup-defs.h | 27 ++++++++++++++--
 include/linux/init_task.h   |  8 -----
 include/linux/sched.h       | 12 -------
 kernel/cgroup.c             | 77 ++++++++++++---------------------------------
 kernel/fork.c               |  4 ---
 5 files changed, 45 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8492721b39be..4d8fcf2187dc 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -473,8 +473,31 @@ struct cgroup_subsys {
 	unsigned int depends_on;
 };
 
-void cgroup_threadgroup_change_begin(struct task_struct *tsk);
-void cgroup_threadgroup_change_end(struct task_struct *tsk);
+extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+
+/**
+ * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
+ * @tsk: target task
+ *
+ * Called from threadgroup_change_begin() and allows cgroup operations to
+ * synchronize against threadgroup changes using a percpu_rw_semaphore.
+ */
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+	percpu_down_read(&cgroup_threadgroup_rwsem);
+}
+
+/**
+ * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
+ * @tsk: target task
+ *
+ * Called from threadgroup_change_end().  Counterpart of
+ * cgroup_threadcgroup_change_begin().
+ */
+static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
+{
+	percpu_up_read(&cgroup_threadgroup_rwsem);
+}
 
 #else	/* CONFIG_CGROUPS */
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e38681f4912d..d0b380ee7d67 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,13 +25,6 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
-#ifdef CONFIG_CGROUPS
-#define INIT_GROUP_RWSEM(sig)						\
-	.group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
-#else
-#define INIT_GROUP_RWSEM(sig)
-#endif
-
 #ifdef CONFIG_CPUSETS
 #define INIT_CPUSET_SEQ(tsk)							\
 	.mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq),
@@ -64,7 +57,6 @@ extern struct fs_struct init_fs;
 	INIT_PREV_CPUTIME(sig)						\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
-	INIT_GROUP_RWSEM(sig)						\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501b41af..a4ab9daa387c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -762,18 +762,6 @@ struct signal_struct {
 	unsigned audit_tty_log_passwd;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
-#ifdef CONFIG_CGROUPS
-	/*
-	 * group_rwsem prevents new tasks from entering the threadgroup and
-	 * member tasks from exiting,a more specifically, setting of
-	 * PF_EXITING.  fork and exit paths are protected with this rwsem
-	 * using threadgroup_change_begin/end().  Users which require
-	 * threadgroup to remain stable should use threadgroup_[un]lock()
-	 * which also takes care of exec path.  Currently, cgroup is the
-	 * only user.
-	 */
-	struct rw_semaphore group_rwsem;
-#endif
 
 	oom_flags_t oom_flags;
 	short oom_score_adj;		/* OOM kill score adjustment */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2c9eae6ad970..115091efa889 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,6 +46,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
+#include <linux/percpu-rwsem.h>
 #include <linux/string.h>
 #include <linux/sort.h>
 #include <linux/kmod.h>
@@ -103,6 +104,8 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
  */
 static DEFINE_SPINLOCK(release_agent_path_lock);
 
+struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+
 #define cgroup_assert_mutex_or_rcu_locked()				\
 	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
 			   !lockdep_is_held(&cgroup_mutex),		\
@@ -871,48 +874,6 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	return cset;
 }
 
-void cgroup_threadgroup_change_begin(struct task_struct *tsk)
-{
-	down_read(&tsk->signal->group_rwsem);
-}
-
-void cgroup_threadgroup_change_end(struct task_struct *tsk)
-{
-	up_read(&tsk->signal->group_rwsem);
-}
-
-/**
- * threadgroup_lock - lock threadgroup
- * @tsk: member task of the threadgroup to lock
- *
- * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
- * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
- * change ->group_leader/pid.  This is useful for cases where the threadgroup
- * needs to stay stable across blockable operations.
- *
- * fork and exit explicitly call threadgroup_change_{begin|end}() for
- * synchronization.  While held, no new task will be added to threadgroup
- * and no existing live task will have its PF_EXITING set.
- *
- * de_thread() does threadgroup_change_{begin|end}() when a non-leader
- * sub-thread becomes a new leader.
- */
-static void threadgroup_lock(struct task_struct *tsk)
-{
-	down_write(&tsk->signal->group_rwsem);
-}
-
-/**
- * threadgroup_unlock - unlock threadgroup
- * @tsk: member task of the threadgroup to unlock
- *
- * Reverse threadgroup_lock().
- */
-static inline void threadgroup_unlock(struct task_struct *tsk)
-{
-	up_write(&tsk->signal->group_rwsem);
-}
-
 static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
 {
 	struct cgroup *root_cgrp = kf_root->kn->priv;
@@ -2113,9 +2074,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
 	lockdep_assert_held(&css_set_rwsem);
 
 	/*
-	 * We are synchronized through threadgroup_lock() against PF_EXITING
-	 * setting such that we can't race against cgroup_exit() changing the
-	 * css_set to init_css_set and dropping the old one.
+	 * We are synchronized through cgroup_threadgroup_rwsem against
+	 * PF_EXITING setting such that we can't race against cgroup_exit()
+	 * changing the css_set to init_css_set and dropping the old one.
 	 */
 	WARN_ON_ONCE(tsk->flags & PF_EXITING);
 	old_cset = task_css_set(tsk);
@@ -2172,10 +2133,11 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
  * @src_cset and add it to @preloaded_csets, which should later be cleaned
  * up by cgroup_migrate_finish().
  *
- * This function may be called without holding threadgroup_lock even if the
- * target is a process.  Threads may be created and destroyed but as long
- * as cgroup_mutex is not dropped, no new css_set can be put into play and
- * the preloaded css_sets are guaranteed to cover all migrations.
+ * This function may be called without holding cgroup_threadgroup_rwsem
+ * even if the target is a process.  Threads may be created and destroyed
+ * but as long as cgroup_mutex is not dropped, no new css_set can be put
+ * into play and the preloaded css_sets are guaranteed to cover all
+ * migrations.
  */
 static void cgroup_migrate_add_src(struct css_set *src_cset,
 				   struct cgroup *dst_cgrp,
@@ -2278,7 +2240,7 @@ err:
  * @threadgroup: whether @leader points to the whole process or a single task
  *
  * Migrate a process or task denoted by @leader to @cgrp.  If migrating a
- * process, the caller must be holding threadgroup_lock of @leader.  The
+ * process, the caller must be holding cgroup_threadgroup_rwsem.  The
  * caller is also responsible for invoking cgroup_migrate_add_src() and
  * cgroup_migrate_prepare_dst() on the targets before invoking this
  * function and following up with cgroup_migrate_finish().
@@ -2406,7 +2368,7 @@ out_release_tset:
  * @leader: the task or the leader of the threadgroup to be attached
  * @threadgroup: attach the whole threadgroup?
  *
- * Call holding cgroup_mutex and threadgroup_lock of @leader.
+ * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
  */
 static int cgroup_attach_task(struct cgroup *dst_cgrp,
 			      struct task_struct *leader, bool threadgroup)
@@ -2528,7 +2490,7 @@ retry_find_task:
 	get_task_struct(tsk);
 	rcu_read_unlock();
 
-	threadgroup_lock(tsk);
+	percpu_down_write(&cgroup_threadgroup_rwsem);
 	if (threadgroup) {
 		if (!thread_group_leader(tsk)) {
 			/*
@@ -2538,7 +2500,7 @@ retry_find_task:
 			 * try again; this is
 			 * "double-double-toil-and-trouble-check locking".
 			 */
-			threadgroup_unlock(tsk);
+			percpu_up_write(&cgroup_threadgroup_rwsem);
 			put_task_struct(tsk);
 			goto retry_find_task;
 		}
@@ -2548,7 +2510,7 @@ retry_find_task:
 	if (!ret)
 		ret = cgroup_attach_task(cgrp, tsk, threadgroup);
 
-	threadgroup_unlock(tsk);
+	percpu_up_write(&cgroup_threadgroup_rwsem);
 
 	put_task_struct(tsk);
 out_unlock_cgroup:
@@ -2751,17 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 				goto out_finish;
 			last_task = task;
 
-			threadgroup_lock(task);
+			percpu_down_write(&cgroup_threadgroup_rwsem);
 			/* raced against de_thread() from another thread? */
 			if (!thread_group_leader(task)) {
-				threadgroup_unlock(task);
+				percpu_up_write(&cgroup_threadgroup_rwsem);
 				put_task_struct(task);
 				continue;
 			}
 
 			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
 
-			threadgroup_unlock(task);
+			percpu_up_write(&cgroup_threadgroup_rwsem);
 			put_task_struct(task);
 
 			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -5083,6 +5045,7 @@ int __init cgroup_init(void)
 	unsigned long key;
 	int ssid, err;
 
+	BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 2845623fb582..7d5f0f118a63 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1149,10 +1149,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	tty_audit_fork(sig);
 	sched_autogroup_fork(sig);
 
-#ifdef CONFIG_CGROUPS
-	init_rwsem(&sig->group_rwsem);
-#endif
-
 	sig->oom_score_adj = current->signal->oom_score_adj;
 	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
 
-- 
cgit v1.2.3


From 7b7d1968e4c8d8392e8e63906d45d0bcad079037 Mon Sep 17 00:00:00 2001
From: Guo Zeng <Guo.Zeng@csr.com>
Date: Thu, 17 Sep 2015 05:23:20 +0000
Subject: regmap: irq: add support for chips who have separate unmask registers

Some chips have separate unmask registers from mask registers for
some consideration of concurrency SMP write performance. And this
patch adds a flag for it.

An user will be CSR SiRFSoC ARM chips.

Signed-off-by: Guo Zeng <Guo.Zeng@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 31 ++++++++++++++++++++++++++++---
 include/linux/regmap.h           |  3 +++
 2 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 38d1f72d869c..e65eec2e7def 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -63,6 +63,7 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 	struct regmap *map = d->map;
 	int i, ret;
 	u32 reg;
+	u32 unmask_offset;
 
 	if (d->chip->runtime_pm) {
 		ret = pm_runtime_get_sync(map->dev);
@@ -79,12 +80,28 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 	for (i = 0; i < d->chip->num_regs; i++) {
 		reg = d->chip->mask_base +
 			(i * map->reg_stride * d->irq_reg_stride);
-		if (d->chip->mask_invert)
+		if (d->chip->mask_invert) {
 			ret = regmap_update_bits(d->map, reg,
 					 d->mask_buf_def[i], ~d->mask_buf[i]);
-		else
+		} else if (d->chip->unmask_base) {
+			/* set mask with mask_base register */
+			ret = regmap_update_bits(d->map, reg,
+					d->mask_buf_def[i], ~d->mask_buf[i]);
+			if (ret < 0)
+				dev_err(d->map->dev,
+					"Failed to sync unmasks in %x\n",
+					reg);
+			unmask_offset = d->chip->unmask_base -
+							d->chip->mask_base;
+			/* clear mask with unmask_base register */
+			ret = regmap_update_bits(d->map,
+					reg + unmask_offset,
+					d->mask_buf_def[i],
+					d->mask_buf[i]);
+		} else {
 			ret = regmap_update_bits(d->map, reg,
 					 d->mask_buf_def[i], d->mask_buf[i]);
+		}
 		if (ret != 0)
 			dev_err(d->map->dev, "Failed to sync masks in %x\n",
 				reg);
@@ -339,6 +356,7 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 	int i;
 	int ret = -ENOMEM;
 	u32 reg;
+	u32 unmask_offset;
 
 	if (chip->num_regs <= 0)
 		return -EINVAL;
@@ -420,7 +438,14 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 		if (chip->mask_invert)
 			ret = regmap_update_bits(map, reg,
 					 d->mask_buf[i], ~d->mask_buf[i]);
-		else
+		else if (d->chip->unmask_base) {
+			unmask_offset = d->chip->unmask_base -
+					d->chip->mask_base;
+			ret = regmap_update_bits(d->map,
+					reg + unmask_offset,
+					d->mask_buf[i],
+					d->mask_buf[i]);
+		} else
 			ret = regmap_update_bits(map, reg,
 					 d->mask_buf[i], d->mask_buf[i]);
 		if (ret != 0) {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 8fc0bfd8edc4..f98fe9f5faa2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -800,6 +800,8 @@ struct regmap_irq {
  *
  * @status_base: Base status register address.
  * @mask_base:   Base mask register address.
+ * @unmask_base:  Base unmask register address. for chips who have
+ *                separate mask and unmask registers
  * @ack_base:    Base ack address. If zero then the chip is clear on read.
  *               Using zero value is possible with @use_ack bit.
  * @wake_base:   Base address for wake enables.  If zero unsupported.
@@ -820,6 +822,7 @@ struct regmap_irq_chip {
 
 	unsigned int status_base;
 	unsigned int mask_base;
+	unsigned int unmask_base;
 	unsigned int ack_base;
 	unsigned int wake_base;
 	unsigned int irq_reg_stride;
-- 
cgit v1.2.3


From a650fdd9427f1f5236f83d2d8137bea9b452fa53 Mon Sep 17 00:00:00 2001
From: Guo Zeng <Guo.Zeng@csr.com>
Date: Thu, 17 Sep 2015 05:23:21 +0000
Subject: regmap: irq: add ack_invert flag for chips using cleared bits as ack

An user will be CSR SiRFSoC ARM chips.

Signed-off-by: Guo Zeng <Guo.Zeng@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 12 ++++++++++--
 include/linux/regmap.h           |  2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index e65eec2e7def..8d16db533527 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -133,7 +133,11 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		if (d->mask_buf[i] && (d->chip->ack_base || d->chip->use_ack)) {
 			reg = d->chip->ack_base +
 				(i * map->reg_stride * d->irq_reg_stride);
-			ret = regmap_write(map, reg, d->mask_buf[i]);
+			/* some chips ack by write 0 */
+			if (d->chip->ack_invert)
+				ret = regmap_write(map, reg, ~d->mask_buf[i]);
+			else
+				ret = regmap_write(map, reg, d->mask_buf[i]);
 			if (ret != 0)
 				dev_err(d->map->dev, "Failed to ack 0x%x: %d\n",
 					reg, ret);
@@ -470,7 +474,11 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 		if (d->status_buf[i] && (chip->ack_base || chip->use_ack)) {
 			reg = chip->ack_base +
 				(i * map->reg_stride * d->irq_reg_stride);
-			ret = regmap_write(map, reg,
+			if (chip->ack_invert)
+				ret = regmap_write(map, reg,
+					~(d->status_buf[i] & d->mask_buf[i]));
+			else
+				ret = regmap_write(map, reg,
 					d->status_buf[i] & d->mask_buf[i]);
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f98fe9f5faa2..f36c9f96d32d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -809,6 +809,7 @@ struct regmap_irq {
  * @init_ack_masked: Ack all masked interrupts once during initalization.
  * @mask_invert: Inverted mask register: cleared bits are masked out.
  * @use_ack:     Use @ack register even if it is zero.
+ * @ack_invert:  Inverted ack register: cleared bits for ack.
  * @wake_invert: Inverted wake register: cleared bits are wake enabled.
  * @runtime_pm:  Hold a runtime PM lock on the device when accessing it.
  *
@@ -829,6 +830,7 @@ struct regmap_irq_chip {
 	bool init_ack_masked:1;
 	bool mask_invert:1;
 	bool use_ack:1;
+	bool ack_invert:1;
 	bool wake_invert:1;
 	bool runtime_pm:1;
 
-- 
cgit v1.2.3


From c15f6ed3a18f10cdc33f64906ab353f17a6df114 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Mon, 17 Aug 2015 11:52:54 +0800
Subject: spi: bitbang: Replace spinlock by mutex

chipselect (in the case of spi-gpio: spi_gpio_chipselect, which
calls gpiod_set_raw_value_cansleep) can sleep, so we should not
hold a spinlock while calling it from spi_bitbang_setup.

This issue was introduced by this commit, which converted spi-gpio
to cansleep variants:
d9dda5a191 "spi: spi-gpio: Use 'cansleep' variants to access GPIO"

Replacing the lock variable by a mutex fixes the issue: This is
safe as all instances where the lock is used are called from
contexts that can sleep.

Finally, update spi-ppc4xx and and spi-s3c24xx to use mutex
functions, as they directly hold the lock for similar purpose.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang.c       | 17 +++++++----------
 drivers/spi/spi-ppc4xx.c        |  4 ++--
 drivers/spi/spi-s3c24xx.c       |  4 ++--
 include/linux/spi/spi_bitbang.h |  2 +-
 4 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 840a4984d365..ef43ef507c9a 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -180,7 +180,6 @@ int spi_bitbang_setup(struct spi_device *spi)
 {
 	struct spi_bitbang_cs	*cs = spi->controller_state;
 	struct spi_bitbang	*bitbang;
-	unsigned long		flags;
 
 	bitbang = spi_master_get_devdata(spi->master);
 
@@ -210,12 +209,12 @@ int spi_bitbang_setup(struct spi_device *spi)
 	 */
 
 	/* deselect chip (low or high) */
-	spin_lock_irqsave(&bitbang->lock, flags);
+	mutex_lock(&bitbang->lock);
 	if (!bitbang->busy) {
 		bitbang->chipselect(spi, BITBANG_CS_INACTIVE);
 		ndelay(cs->nsecs);
 	}
-	spin_unlock_irqrestore(&bitbang->lock, flags);
+	mutex_unlock(&bitbang->lock);
 
 	return 0;
 }
@@ -255,13 +254,12 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
 static int spi_bitbang_prepare_hardware(struct spi_master *spi)
 {
 	struct spi_bitbang	*bitbang;
-	unsigned long		flags;
 
 	bitbang = spi_master_get_devdata(spi);
 
-	spin_lock_irqsave(&bitbang->lock, flags);
+	mutex_lock(&bitbang->lock);
 	bitbang->busy = 1;
-	spin_unlock_irqrestore(&bitbang->lock, flags);
+	mutex_unlock(&bitbang->lock);
 
 	return 0;
 }
@@ -378,13 +376,12 @@ static int spi_bitbang_transfer_one(struct spi_master *master,
 static int spi_bitbang_unprepare_hardware(struct spi_master *spi)
 {
 	struct spi_bitbang	*bitbang;
-	unsigned long		flags;
 
 	bitbang = spi_master_get_devdata(spi);
 
-	spin_lock_irqsave(&bitbang->lock, flags);
+	mutex_lock(&bitbang->lock);
 	bitbang->busy = 0;
-	spin_unlock_irqrestore(&bitbang->lock, flags);
+	mutex_unlock(&bitbang->lock);
 
 	return 0;
 }
@@ -427,7 +424,7 @@ int spi_bitbang_start(struct spi_bitbang *bitbang)
 	if (!master || !bitbang->chipselect)
 		return -EINVAL;
 
-	spin_lock_init(&bitbang->lock);
+	mutex_init(&bitbang->lock);
 
 	if (!master->mode_bits)
 		master->mode_bits = SPI_CPOL | SPI_CPHA | bitbang->flags;
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 54fb984a3e17..dd3d0a218d8b 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -210,12 +210,12 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
 	if (in_8(&hw->regs->cdm) != cdm)
 		out_8(&hw->regs->cdm, cdm);
 
-	spin_lock(&hw->bitbang.lock);
+	mutex_lock(&hw->bitbang.lock);
 	if (!hw->bitbang.busy) {
 		hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
 		/* Need to ndelay here? */
 	}
-	spin_unlock(&hw->bitbang.lock);
+	mutex_unlock(&hw->bitbang.lock);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index f36bc320a807..4e7d1bfed7e6 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -198,12 +198,12 @@ static int s3c24xx_spi_setup(struct spi_device *spi)
 	if (ret)
 		return ret;
 
-	spin_lock(&hw->bitbang.lock);
+	mutex_lock(&hw->bitbang.lock);
 	if (!hw->bitbang.busy) {
 		hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
 		/* need to ndelay for 0.5 clocktick ? */
 	}
-	spin_unlock(&hw->bitbang.lock);
+	mutex_unlock(&hw->bitbang.lock);
 
 	return 0;
 }
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 85578d4be034..154788ed218c 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -4,7 +4,7 @@
 #include <linux/workqueue.h>
 
 struct spi_bitbang {
-	spinlock_t		lock;
+	struct mutex		lock;
 	u8			busy;
 	u8			use_dma;
 	u8			flags;		/* extra spi->mode support */
-- 
cgit v1.2.3


From 54552d03023cfd485cedf8d7471d1554139d58aa Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 2 Sep 2015 14:21:29 +0200
Subject: ieee802154: 6lowpan: check on valid 802.15.4 frame

This patch adds frame control checks to check if the received frame is
something which could contain a 6LoWPAN packet.

Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h  | 25 +++++++++++++++++++++++++
 include/net/mac802154.h     | 15 +++++++++++++++
 net/ieee802154/6lowpan/rx.c |  8 ++++++++
 3 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 1dc1f4ed4001..db01492814d3 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -205,6 +205,31 @@ enum {
 	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
 };
 
+/* frame control handling */
+#define IEEE802154_FCTL_FTYPE		0x0003
+#define IEEE802154_FCTL_INTRA_PAN	0x0040
+
+#define IEEE802154_FTYPE_DATA		0x0001
+
+/*
+ * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline int ieee802154_is_data(__le16 fc)
+{
+	return (fc & cpu_to_le16(IEEE802154_FCTL_FTYPE)) ==
+		cpu_to_le16(IEEE802154_FTYPE_DATA);
+}
+
+/**
+ * ieee802154_is_intra_pan - check if intra pan id communication
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline bool ieee802154_is_intra_pan(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN);
+}
+
 /**
  * ieee802154_is_valid_psdu_len - check if psdu len is valid
  * available lengths:
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index b7f99615224b..32bd7c0467d4 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -249,6 +249,21 @@ struct ieee802154_ops {
 						const bool on);
 };
 
+/**
+ * ieee802154_get_fc_from_skb - get the frame control field from an skb
+ * @skb: skb where the frame control field will be get from
+ */
+static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
+{
+	/* return some invalid fc on failure */
+	if (unlikely(skb->mac_len < 2)) {
+		WARN_ON(1);
+		return cpu_to_le16(0);
+	}
+
+	return (__force __le16)__get_unaligned_memmove16(skb_mac_header(skb));
+}
+
 /**
  * ieee802154_be64_to_le64 - copies and convert be64 to le64
  * @le64_dst: le64 destination pointer
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 45ce121369c2..f7eb091203a8 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -11,6 +11,7 @@
 #include <linux/if_arp.h>
 
 #include <net/6lowpan.h>
+#include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
 
 #include "6lowpan_i.h"
@@ -280,6 +281,13 @@ static inline bool lowpan_is_reserved(u8 dispatch)
  */
 static inline bool lowpan_rx_h_check(struct sk_buff *skb)
 {
+	__le16 fc = ieee802154_get_fc_from_skb(skb);
+
+	/* check on ieee802154 conform 6LoWPAN header */
+	if (!ieee802154_is_data(fc) ||
+	    !ieee802154_is_intra_pan(fc))
+		return false;
+
 	/* check if we can dereference the dispatch */
 	if (unlikely(!skb->len))
 		return false;
-- 
cgit v1.2.3


From b07461a8e45b7a62ef7fb46e4f6ada66f63406a8 Mon Sep 17 00:00:00 2001
From: Taku Izumi <izumi.taku@jp.fujitsu.com>
Date: Thu, 17 Sep 2015 10:09:37 -0500
Subject: PCI/AER: Clear error status registers during enumeration and restore

AER errors might be recorded when powering-on devices.  These errors can be
ignored, so firmware usually clears them before the OS enumerates devices.
However, firmware is not involved when devices are added via hotplug, so
the OS may discover power-up errors that should be ignored.  The same may
happen when powering up devices when resuming after suspend.

Clear the AER error status registers during enumeration and resume.

[bhelgaas: changelog, remove repetitive comments]
Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c                  |  3 +++
 drivers/pci/pcie/aer/aerdrv_core.c | 28 ++++++++++++++++++++++++++++
 drivers/pci/probe.c                |  3 +++
 include/linux/aer.h                |  5 +++++
 4 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6a9a1116f1eb..62ecf45a4230 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -27,6 +27,7 @@
 #include <linux/pci_hotplug.h>
 #include <asm-generic/pci-bridge.h>
 #include <asm/setup.h>
+#include <linux/aer.h>
 #include "pci.h"
 
 const char *pci_power_names[] = {
@@ -1099,6 +1100,8 @@ void pci_restore_state(struct pci_dev *dev)
 	pci_restore_ats_state(dev);
 	pci_restore_vc_state(dev);
 
+	pci_cleanup_aer_error_status_regs(dev);
+
 	pci_restore_config_space(dev);
 
 	pci_restore_pcix_state(dev);
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 9803e3d039fe..fba785e9df75 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -74,6 +74,34 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
 
+int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
+{
+	int pos;
+	u32 status;
+	int port_type;
+
+	if (!pci_is_pcie(dev))
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return -EIO;
+
+	port_type = pci_pcie_type(dev);
+	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
+		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
+		pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status);
+	}
+
+	pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
+	pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
+
+	return 0;
+}
+
 /**
  * add_error_device - list device to be handled
  * @e_info: pointer to error info
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 0b2be174d981..8cd9710498e4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/cpumask.h>
 #include <linux/pci-aspm.h>
+#include <linux/aer.h>
 #include <asm-generic/pci-bridge.h>
 #include "pci.h"
 
@@ -1621,6 +1622,8 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 	/* Enable ACS P2P upstream forwarding */
 	pci_enable_acs(dev);
+
+	pci_cleanup_aer_error_status_regs(dev);
 }
 
 static void pci_set_msi_domain(struct pci_dev *dev)
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4fef65e57023..744b997d6a94 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -42,6 +42,7 @@ struct aer_capability_regs {
 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+int pci_cleanup_aer_error_status_regs(struct pci_dev *dev);
 #else
 static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
@@ -55,6 +56,10 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 {
 	return -EINVAL;
 }
+static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
+{
+	return -EINVAL;
+}
 #endif
 
 void cper_print_aer(struct pci_dev *dev, int cper_severity,
-- 
cgit v1.2.3


From f81c11af617ca4bad5028e9e431feae8d4166bc7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 4 Sep 2015 17:50:08 +0100
Subject: of/pci: Add of_pci_check_probe_only to parse "linux,pci-probe-only"

Both pci-host-generic and Pseries parse the "linux,pci-probe-only" property
to engage the PCI_PROBE_ONLY mode, and both have a subtle bug that can be
triggered if the property has no parameter.

Provide a generic, safe implementation that can be used by both.

[bhelgaas: fold in #include <asm-generic/pci-bridge.h>]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 drivers/of/of_pci.c    | 26 ++++++++++++++++++++++++++
 include/linux/of_pci.h |  3 +++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 5751dc5b6494..d0c8902dfbf4 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -5,6 +5,7 @@
 #include <linux/of_device.h>
 #include <linux/of_pci.h>
 #include <linux/slab.h>
+#include <asm-generic/pci-bridge.h>
 
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int data)
@@ -117,6 +118,31 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
+/**
+ * of_pci_check_probe_only - Setup probe only mode if linux,pci-probe-only
+ *                           is present and valid
+ */
+void of_pci_check_probe_only(void)
+{
+	u32 val;
+	int ret;
+
+	ret = of_property_read_u32(of_chosen, "linux,pci-probe-only", &val);
+	if (ret) {
+		if (ret == -ENODATA || ret == -EOVERFLOW)
+			pr_warn("linux,pci-probe-only without valid value, ignoring\n");
+		return;
+	}
+
+	if (val)
+		pci_add_flags(PCI_PROBE_ONLY);
+	else
+		pci_clear_flags(PCI_PROBE_ONLY);
+
+	pr_info("PCI: PROBE_ONLY %sabled\n", val ? "en" : "dis");
+}
+EXPORT_SYMBOL_GPL(of_pci_check_probe_only);
+
 /**
  * of_pci_dma_configure - Setup DMA configuration
  * @dev: ptr to pci_dev struct of the PCI device
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 29fd3fe1c035..38c0533a3359 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -17,6 +17,7 @@ int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 void of_pci_dma_configure(struct pci_dev *pci_dev);
+void of_pci_check_probe_only(void);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -53,6 +54,8 @@ of_get_pci_domain_nr(struct device_node *node)
 }
 
 static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { }
+
+static inline void of_pci_check_probe_only(void) { }
 #endif
 
 #if defined(CONFIG_OF_ADDRESS)
-- 
cgit v1.2.3


From fb884253a919148677c5bf347ffb62c539370440 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:03:49 -0500
Subject: netfilter: Remove !CONFIG_NETFITLER definition of nf_hook_thresh

The !CONFIG_NETFILTER definition of nf_hook_thresh calls okfn when
the CONFIG_NETFITLER defintion does not, making it buggy.

As the !CONFIG_NETFILTER defintion of nf_hook_thresh is not used remove
it rather than fix it.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 36a652531791..1abac85ec907 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -344,15 +344,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 #else /* !CONFIG_NETFILTER */
 #define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb)
 #define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb)
-static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
-				 struct sock *sk,
-				 struct sk_buff *skb,
-				 struct net_device *indev,
-				 struct net_device *outdev,
-				 int (*okfn)(struct sock *sk, struct sk_buff *), int thresh)
-{
-	return okfn(sk, skb);
-}
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
 			  struct sk_buff *skb, struct net_device *indev,
 			  struct net_device *outdev,
-- 
cgit v1.2.3


From b11b1f652dccde707d568f4012b01a8ec5bd5f57 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:03:50 -0500
Subject: netfilter: Store net in nf_hook_state

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h         | 5 ++++-
 include/linux/netfilter_ingress.h | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 1abac85ec907..889ac0e11f01 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -54,6 +54,7 @@ struct nf_hook_state {
 	struct net_device *in;
 	struct net_device *out;
 	struct sock *sk;
+	struct net *net;
 	struct list_head *hook_list;
 	int (*okfn)(struct sock *, struct sk_buff *);
 };
@@ -65,6 +66,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      struct net_device *indev,
 				      struct net_device *outdev,
 				      struct sock *sk,
+				      struct net *net,
 				      int (*okfn)(struct sock *, struct sk_buff *))
 {
 	p->hook = hook;
@@ -73,6 +75,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 	p->in = indev;
 	p->out = outdev;
 	p->sk = sk;
+	p->net = net;
 	p->hook_list = hook_list;
 	p->okfn = okfn;
 }
@@ -181,7 +184,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 		struct nf_hook_state state;
 
 		nf_hook_state_init(&state, hook_list, hook, thresh,
-				   pf, indev, outdev, sk, okfn);
+				   pf, indev, outdev, sk, net, okfn);
 		return nf_hook_slow(skb, &state);
 	}
 	return 1;
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index cb0727fe2b3d..187feabe557c 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -17,7 +17,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 
 	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
 			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
-			   skb->dev, NULL, NULL);
+			   skb->dev, NULL, dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
 }
 
-- 
cgit v1.2.3


From 7a7735044e1eab1415697b3139e758d24a600099 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:03:51 -0500
Subject: netfilter: Pass net to nf_hook_thresh

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 889ac0e11f01..042148dc1e22 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -170,6 +170,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
  *	value indicates the packet has been consumed by the hook.
  */
 static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
+				 struct net *net,
 				 struct sock *sk,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
@@ -177,7 +178,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 int (*okfn)(struct sock *, struct sk_buff *),
 				 int thresh)
 {
-	struct net *net = dev_net(indev ? indev : outdev);
 	struct list_head *hook_list = &net->nf.hooks[pf][hook];
 
 	if (nf_hook_list_active(hook_list, pf, hook)) {
@@ -195,7 +195,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
 			  struct net_device *outdev,
 			  int (*okfn)(struct sock *, struct sk_buff *))
 {
-	return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN);
+	struct net *net = dev_net(indev ? indev : outdev);
+	return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
 }
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -221,7 +222,8 @@ NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk,
 	       struct net_device *out,
 	       int (*okfn)(struct sock *, struct sk_buff *), int thresh)
 {
-	int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh);
+	struct net *net = dev_net(in ? in : out);
+	int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
 	if (ret == 1)
 		ret = okfn(sk, skb);
 	return ret;
@@ -232,10 +234,11 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk,
 	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
 	     int (*okfn)(struct sock *, struct sk_buff *), bool cond)
 {
+	struct net *net = dev_net(in ? in : out);
 	int ret;
 
 	if (!cond ||
-	    ((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1))
+	    ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
 		ret = okfn(sk, skb);
 	return ret;
 }
-- 
cgit v1.2.3


From 2b4aa3cec4873005a0d5155395b34641584b3a4e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:07 -0500
Subject: net: Remove dev_queue_xmit_sk

A function with weird arguments that it will never use to accomdate a
netfilter callback prototype is absolutely in the core of the
networking stack.  Frankly it does not make sense and it causes a lot
of confusion as to why arguments that are never used are being passed
to the function.

As I am preparing to make a second change to arguments to the okfn even
the names stops making sense.

As I have removed the two callers of this function remove this confusion
from the networking stack.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +-----
 net/core/dev.c            | 4 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88a00694eda5..e664f87c8e4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2213,11 +2213,7 @@ int dev_close(struct net_device *dev);
 int dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb);
-static inline int dev_queue_xmit(struct sk_buff *skb)
-{
-	return dev_queue_xmit_sk(skb->sk, skb);
-}
+int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
diff --git a/net/core/dev.c b/net/core/dev.c
index 877c84834d81..dcf9ff913925 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3143,11 +3143,11 @@ out:
 	return rc;
 }
 
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
 {
 	return __dev_queue_xmit(skb, NULL);
 }
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
 
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
 {
-- 
cgit v1.2.3


From 04eb44890e5bb3cc855e5c0f18a05eb7311364b7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:15 -0500
Subject: bridge: Add br_netif_receive_skb remove netif_receive_skb_sk

netif_receive_skb_sk is only called once in the bridge code, replace
it with a bridge specific function that calls netif_receive_skb.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +-----
 net/bridge/br_input.c     | 7 ++++++-
 net/core/dev.c            | 4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e664f87c8e4c..97ab5c9a7069 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2985,11 +2985,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
 
 int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb);
-static inline int netif_receive_skb(struct sk_buff *skb)
-{
-	return netif_receive_skb_sk(skb->sk, skb);
-}
+int netif_receive_skb(struct sk_buff *skb);
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f921a5dce22d..2359c041e27c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,6 +26,11 @@
 br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
 EXPORT_SYMBOL(br_should_route_hook);
 
+static int br_netif_receive_skb(struct sock *sk, struct sk_buff *skb)
+{
+	return netif_receive_skb(skb);
+}
+
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
@@ -57,7 +62,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
 		       indev, NULL,
-		       netif_receive_skb_sk);
+		       br_netif_receive_skb);
 }
 
 static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
diff --git a/net/core/dev.c b/net/core/dev.c
index dcf9ff913925..7db9b012dfb7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3982,13 +3982,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
  *	NET_RX_SUCCESS: no congestion
  *	NET_RX_DROP: packet was dropped
  */
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
 {
 	trace_netif_receive_skb_entry(skb);
 
 	return netif_receive_skb_internal(skb);
 }
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
-- 
cgit v1.2.3


From 29a26a56803855a79dbd028cd61abee56237d6e5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:16 -0500
Subject: netfilter: Pass struct net into the netfilter hooks

Pass a network namespace parameter into the netfilter hooks.  At the
call site of the netfilter hooks the path a packet is taking through
the network stack is well known which allows the network namespace to
be easily and reliabily.

This allows the replacement of magic code like
"dev_net(state->in?:state->out)" that appears at the start of most
netfilter hooks with "state->net".

In almost all cases the network namespace passed in is derived
from the first network device passed in, guaranteeing those
paths will not see any changes in practice.

The exceptions are:
xfrm/xfrm_output.c:xfrm_output_resume()         xs_net(skb_dst(skb)->xfrm)
ipvs/ip_vs_xmit.c:ip_vs_nat_send_or_cont()      ip_vs_conn_net(cp)
ipvs/ip_vs_xmit.c:ip_vs_send_or_cont()          ip_vs_conn_net(cp)
ipv4/raw.c:raw_send_hdrinc()                    sock_net(sk)
ipv6/ip6_output.c:ip6_xmit()			sock_net(sk)
ipv6/ndisc.c:ndisc_send_skb()                   dev_net(skb->dev) not dev_net(dst->dev)
ipv6/raw.c:raw6_send_hdrinc()                   sock_net(sk)
br_netfilter_hooks.c:br_nf_pre_routing_finish() dev_net(skb->dev) before skb->dev is set to nf_bridge->physindev

In all cases these exceptions seem to be a better expression for the
network namespace the packet is being processed in then the historic
"dev_net(in?in:out)".  I am documenting them in case something odd
pops up and someone starts trying to track down what happened.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c                         |  7 ++++---
 include/linux/netfilter.h                 | 27 ++++++++++++---------------
 net/bridge/br_forward.c                   | 13 +++++++------
 net/bridge/br_input.c                     | 13 +++++++------
 net/bridge/br_multicast.c                 |  4 ++--
 net/bridge/br_netfilter_hooks.c           | 15 ++++++++-------
 net/bridge/br_netfilter_ipv6.c            |  7 ++++---
 net/bridge/br_stp_bpdu.c                  |  4 ++--
 net/decnet/dn_neigh.c                     | 15 +++++++++------
 net/decnet/dn_nsp_in.c                    |  4 ++--
 net/decnet/dn_route.c                     | 24 ++++++++++++------------
 net/ipv4/arp.c                            | 10 ++++++----
 net/ipv4/ip_forward.c                     |  5 +++--
 net/ipv4/ip_input.c                       |  8 ++++----
 net/ipv4/ip_output.c                      | 22 +++++++++++++---------
 net/ipv4/ipmr.c                           |  4 ++--
 net/ipv4/raw.c                            |  5 +++--
 net/ipv4/xfrm4_input.c                    |  4 ++--
 net/ipv4/xfrm4_output.c                   |  6 ++++--
 net/ipv6/ip6_input.c                      |  8 ++++----
 net/ipv6/ip6_output.c                     | 15 ++++++++-------
 net/ipv6/ip6mr.c                          |  4 ++--
 net/ipv6/mcast.c                          |  7 ++++---
 net/ipv6/ndisc.c                          |  4 ++--
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c |  2 +-
 net/ipv6/output_core.c                    |  6 ++++--
 net/ipv6/raw.c                            |  2 +-
 net/ipv6/xfrm6_input.c                    |  4 ++--
 net/ipv6/xfrm6_output.c                   |  6 ++++--
 net/netfilter/ipvs/ip_vs_xmit.c           |  4 ++--
 net/xfrm/xfrm_output.c                    |  3 ++-
 31 files changed, 142 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 8c9ab5ebea23..979a4db9c6bc 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -298,14 +298,15 @@ err:
 static int vrf_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
+	struct net *net = dev_net(dev);
 
-	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
+	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, dev,
 			    vrf_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 042148dc1e22..295f2650b5dc 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -190,12 +190,11 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 	return 1;
 }
 
-static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
-			  struct sk_buff *skb, struct net_device *indev,
-			  struct net_device *outdev,
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
+			  struct sock *sk, struct sk_buff *skb,
+			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sock *, struct sk_buff *))
 {
-	struct net *net = dev_net(indev ? indev : outdev);
 	return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
 }
                    
@@ -217,12 +216,11 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
 */
 
 static inline int
-NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk,
+NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	       struct sk_buff *skb, struct net_device *in,
 	       struct net_device *out,
 	       int (*okfn)(struct sock *, struct sk_buff *), int thresh)
 {
-	struct net *net = dev_net(in ? in : out);
 	int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
 	if (ret == 1)
 		ret = okfn(sk, skb);
@@ -230,11 +228,10 @@ NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk,
 }
 
 static inline int
-NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk,
+NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
 	     int (*okfn)(struct sock *, struct sk_buff *), bool cond)
 {
-	struct net *net = dev_net(in ? in : out);
 	int ret;
 
 	if (!cond ||
@@ -244,11 +241,11 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk,
 }
 
 static inline int
-NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb,
+NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb,
 	struct net_device *in, struct net_device *out,
 	int (*okfn)(struct sock *, struct sk_buff *))
 {
-	return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN);
+	return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
 }
 
 /* Call setsockopt() */
@@ -348,11 +345,11 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 }
 
 #else /* !CONFIG_NETFILTER */
-#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb)
-#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb)
-static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
-			  struct sk_buff *skb, struct net_device *indev,
-			  struct net_device *outdev,
+#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(sk, skb)
+#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb)
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
+			  struct sock *sk, struct sk_buff *skb,
+			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sock *, struct sk_buff *))
 {
 	return 1;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fa7bfced888e..2dd2a23ce707 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -67,8 +67,9 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
 
 int br_forward_finish(struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb,
-		       NULL, skb->dev,
+	struct net *net = dev_net(skb->dev);
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
+		       net, sk, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
 
 }
@@ -92,8 +93,8 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 		return;
 	}
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
-		NULL, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+		dev_net(skb->dev), NULL, skb,NULL, skb->dev,
 		br_forward_finish);
 }
 
@@ -114,8 +115,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb,
-		indev, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
+		dev_net(indev), NULL, skb, indev, skb->dev,
 		br_forward_finish);
 }
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 2359c041e27c..78fa7acd836e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,8 +60,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	if (!skb)
 		return NET_RX_DROP;
 
-	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
-		       indev, NULL,
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+		       dev_net(indev), NULL, skb, indev, NULL,
 		       br_netif_receive_skb);
 }
 
@@ -283,8 +283,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 		}
 
 		/* Deliver packet to local host only */
-		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
-			    skb->dev, NULL, br_handle_local_finish)) {
+		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+			    dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+			    br_handle_local_finish)) {
 			return RX_HANDLER_CONSUMED; /* consumed by filter */
 		} else {
 			*pskb = skb;
@@ -308,8 +309,8 @@ forward:
 		if (ether_addr_equal(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb,
-			skb->dev, NULL,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+			dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 			br_handle_frame_finish);
 		break;
 	default:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 66efdc21f548..b4d858a18eb6 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -829,8 +829,8 @@ static void __br_multicast_send_query(struct net_bridge *br,
 
 	if (port) {
 		skb->dev = port->dev;
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
-			NULL, skb->dev,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+			dev_net(port->dev), NULL, skb, NULL, skb->dev,
 			br_dev_queue_push_xmit);
 	} else {
 		br_multicast_select_own_querier(br, ip, skb);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c1127908e23a..7886c9d7e23d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -394,7 +394,7 @@ bridged_dnat:
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
 					       NF_BR_PRE_ROUTING,
-					       sk, skb, skb->dev, NULL,
+					       net, sk, skb, skb->dev, NULL,
 					       br_nf_pre_routing_finish_bridge,
 					       1);
 				return 0;
@@ -414,7 +414,7 @@ bridged_dnat:
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
 		       skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
@@ -512,7 +512,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 
 	skb->protocol = htons(ETH_P_IP);
 
-	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
 		skb->dev, NULL,
 		br_nf_pre_routing_finish);
 
@@ -539,6 +539,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
 static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+	struct net *net = dev_net(skb->dev);
 	struct net_device *in;
 
 	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
@@ -560,7 +561,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_bridge_push_encap_header(skb);
 
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
 		       in, skb->dev, br_forward_finish, 1);
 	return 0;
 }
@@ -627,7 +628,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 
-	NF_HOOK(pf, NF_INET_FORWARD, NULL, skb,
+	NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
 		brnf_get_logical_dev(skb, state->in),
 		parent,	br_nf_forward_finish);
 
@@ -662,7 +663,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 	*d = state->in;
-	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb,
+	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
 		state->in, state->out, br_nf_forward_finish);
 
 	return NF_STOLEN;
@@ -842,7 +843,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 
-	NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb,
+	NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
 		NULL, realoutdev,
 		br_nf_dev_queue_xmit);
 
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 77383bfe7ea3..5d19361ad5d3 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -166,6 +166,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
 	struct net_device *dev = skb->dev;
+	struct net *net = dev_net(dev);
 	const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
 
 	nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
@@ -189,7 +190,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 			nf_bridge_update_protocol(skb);
 			nf_bridge_push_encap_header(skb);
 			NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
-				       sk, skb, skb->dev, NULL,
+				       net, sk, skb, skb->dev, NULL,
 				       br_nf_pre_routing_finish_bridge,
 				       1);
 			return 0;
@@ -208,7 +209,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
 		       skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
@@ -237,7 +238,7 @@ unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
 	nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
 
 	skb->protocol = htons(ETH_P_IPV6);
-	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
 		skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 3017a396cdef..8e2e8c352198 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -59,8 +59,8 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
 	skb_reset_mac_header(skb);
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
-		NULL, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+		dev_net(p->dev), NULL, skb, NULL, skb->dev,
 		br_send_bpdu_finish);
 }
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 4507b188fc51..305ab2fe25cd 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -246,8 +246,9 @@ static int dn_long_output(struct neighbour *neigh, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
-		       NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+		       &init_net, sk, skb, NULL, neigh->dev,
+		       dn_neigh_output_packet);
 }
 
 /*
@@ -286,8 +287,9 @@ static int dn_short_output(struct neighbour *neigh, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
-		       NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+		       &init_net, sk, skb, NULL, neigh->dev,
+		       dn_neigh_output_packet);
 }
 
 /*
@@ -327,8 +329,9 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
-		       NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+		       &init_net, sk, skb, NULL, neigh->dev,
+		       dn_neigh_output_packet);
 }
 
 int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb)
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index a321eac9fd0c..e7b0605ca34a 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -814,8 +814,8 @@ free_out:
 
 int dn_nsp_rx(struct sk_buff *skb)
 {
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
+		       &init_net, NULL, skb, skb->dev, NULL,
 		       dn_nsp_rx_packet);
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 03227ffd19ce..fefcd2e85ef9 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -573,8 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
 	ptr++;
 	cb->hops = *ptr++; /* Visit Count */
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+		       &init_net, NULL, skb, skb->dev, NULL,
 		       dn_route_rx_packet);
 
 drop_it:
@@ -601,8 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
 	ptr += 2;
 	cb->hops = *ptr & 0x3f;
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+		       &init_net, NULL, skb, skb->dev, NULL,
 		       dn_route_rx_packet);
 
 drop_it:
@@ -706,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 		switch (flags & DN_RT_CNTL_MSK) {
 		case DN_RT_PKT_HELO:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_route_ptp_hello);
 
 		case DN_RT_PKT_L1RT:
 		case DN_RT_PKT_L2RT:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_route_discard);
 		case DN_RT_PKT_ERTH:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_neigh_router_hello);
 
 		case DN_RT_PKT_EEDH:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_neigh_endnode_hello);
 		}
 	} else {
@@ -770,8 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb)
 	cb->rt_flags |= DN_RT_F_IE;
 	cb->hops = 0;
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb,
-		       NULL, dev,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
+		       &init_net, sk, skb, NULL, dev,
 		       dn_to_neigh_output);
 
 error:
@@ -819,8 +819,8 @@ static int dn_forward(struct sk_buff *skb)
 	if (rt->rt_flags & RTCF_DOREDIRECT)
 		cb->rt_flags |= DN_RT_F_IE;
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb,
-		       dev, skb->dev,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
+		       &init_net, NULL, skb, dev, skb->dev,
 		       dn_to_neigh_output);
 
 drop:
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 3632e98eb0f9..ae71e9ade5f9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -632,8 +632,9 @@ static int arp_xmit_finish(struct sock *sk, struct sk_buff *skb)
 void arp_xmit(struct sk_buff *skb)
 {
 	/* Send it off, maybe filter it using firewalling first.  */
-	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb,
-		NULL, skb->dev, arp_xmit_finish);
+	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
+		dev_net(skb->dev), NULL, skb, NULL, skb->dev,
+		arp_xmit_finish);
 }
 EXPORT_SYMBOL(arp_xmit);
 
@@ -897,8 +898,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
-	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb,
-		       dev, NULL, arp_process);
+	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN,
+		       dev_net(dev), NULL, skb, dev, NULL,
+		       arp_process);
 
 consumeskb:
 	consume_skb(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 95235c813f18..0a3c45a2e757 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -146,8 +146,9 @@ int ip_forward(struct sk_buff *skb)
 
 	skb->priority = rt_tos2priority(iph->tos);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
-		       skb->dev, rt->dst.dev, ip_forward_finish);
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+		       net, NULL, skb, skb->dev, rt->dst.dev,
+		       ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index cc242b9501d9..991d082c7312 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -254,8 +254,8 @@ int ip_local_deliver(struct sk_buff *skb)
 			return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
+		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		       ip_local_deliver_finish);
 }
 
@@ -453,8 +453,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
-		       dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
 		       ip_rcv_finish);
 
 csum_error:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index fc550e97daac..4c9532259a7f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -97,12 +97,14 @@ EXPORT_SYMBOL(ip_send_check);
 
 static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct iphdr *iph = ip_hdr(skb);
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
-	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL,
-		       skb_dst(skb)->dev, dst_output);
+	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		       net, sk, skb, NULL, skb_dst(skb)->dev,
+		       dst_output);
 }
 
 int __ip_local_out(struct sk_buff *skb)
@@ -322,7 +324,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 			if (newskb)
 				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-					sk, newskb, NULL, newskb->dev,
+					net, sk, newskb, NULL, newskb->dev,
 					dev_loopback_xmit);
 		}
 
@@ -337,12 +339,14 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
 	if (rt->rt_flags&RTCF_BROADCAST) {
 		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 		if (newskb)
-			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb,
-				NULL, newskb->dev, dev_loopback_xmit);
+			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+				net, sk, newskb, NULL, newskb->dev,
+				dev_loopback_xmit);
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL,
-			    skb->dev, ip_finish_output,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, skb->dev,
+			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
@@ -356,8 +360,8 @@ int ip_output(struct sock *sk, struct sk_buff *skb)
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, dev,
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index dfe4e8ec6c3a..a88c0c5374ff 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1788,8 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	 * not mrouter) cannot join to more than one interface - it will
 	 * result in receiving multiple packets.
 	 */
-	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
-		skb->dev, dev,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+		net, NULL, skb, skb->dev, dev,
 		ipmr_forward_finish);
 	return;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 09ab5bb6913a..2045b1aaa6ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -411,8 +411,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
-	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, rt->dst.dev, dst_output);
+	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, rt->dst.dev,
+		      dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 60b032f58ccc..5093000d3d5e 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -52,8 +52,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
-	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
-		skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 737f5e395a6a..e4a85199e015 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -96,8 +96,10 @@ static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
 
 int xfrm4_output(struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, skb_dst(skb)->dev, __xfrm4_output,
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, skb_dst(skb)->dev,
+			    __xfrm4_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c628dba477d4..583cf959c23d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -183,8 +183,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
-		       dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
 		       ip6_rcv_finish);
 err:
 	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -278,8 +278,8 @@ discard:
 
 int ip6_input(struct sk_buff *skb)
 {
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		       ip6_input_finish);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8cab909b181e..96e76ddd4a44 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -83,7 +83,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 			 */
 			if (newskb)
 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
-					sk, newskb, NULL, newskb->dev,
+					net, sk, newskb, NULL, newskb->dev,
 					dev_loopback_xmit);
 
 			if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -142,8 +142,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, dev,
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
@@ -223,8 +223,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
-		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-			       NULL, dst->dev, dst_output);
+		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+			       net, sk, skb, NULL, dst->dev,
+			       dst_output);
 	}
 
 	skb->dev = dst->dev;
@@ -511,8 +512,8 @@ int ip6_forward(struct sk_buff *skb)
 
 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
-		       skb->dev, dst->dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+		       net, NULL, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
 
 error:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3e3085b37a91..e830942b2090 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2064,8 +2064,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
 
 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
-		       skb->dev, dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+		       net, NULL, skb, skb->dev, dev,
 		       ip6mr_forward2_finish);
 
 out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5b3f2841acf6..124338a39e29 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1645,7 +1645,7 @@ static void mld_sendpack(struct sk_buff *skb)
 	payload_len = skb->len;
 
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
-		      net->ipv6.igmp_sk, skb, NULL, skb->dev,
+		      net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
@@ -2008,8 +2008,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	}
 
 	skb_dst_set(skb, dst);
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, skb->dev, dst_output);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, skb->dev,
+		      dst_output);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 349ac1b022b6..dd2b08d7c8d1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -463,8 +463,8 @@ static void ndisc_send_skb(struct sk_buff *skb,
 	idev = __in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, dst->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 6d9c0b3d5b8c..6b576be3c83e 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 
 	nf_ct_frag6_consume_orig(reasm);
 
-	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm,
+	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->net, state->sk, reasm,
 		       state->in, state->out,
 		       state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 8178f72fe90d..9cc9127fb5e7 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -140,6 +140,7 @@ EXPORT_SYMBOL(ip6_dst_hoplimit);
 
 static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	int len;
 
 	len = skb->len - sizeof(struct ipv6hdr);
@@ -148,8 +149,9 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
 	ipv6_hdr(skb)->payload_len = htons(len);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
-	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		       NULL, skb_dst(skb)->dev, dst_output);
+	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+		       net, sk, skb, NULL, skb_dst(skb)->dev,
+		       dst_output);
 }
 
 int __ip6_local_out(struct sk_buff *skb)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5aa461302716..dc65ec198f7c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -654,7 +654,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 		goto error_fault;
 
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
 		      NULL, rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 74bd17882a2f..0eaab1fa6be5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,8 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
 
-	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
-		skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		ip6_rcv_finish);
 	return -1;
 }
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index b0fcd6c09837..431ae2c22234 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -168,7 +168,9 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
 
 int xfrm6_output(struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, skb_dst(skb)->dev, __xfrm6_output,
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+			    net, sk, skb,  NULL, skb_dst(skb)->dev,
+			    __xfrm6_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b8e5544af87f..65c996c14bca 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -573,7 +573,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
 		skb_forward_csum(skb);
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
-		NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, ip_vs_conn_net(cp), NULL, skb,
 			NULL, skb_dst(skb)->dev, dst_output);
 	} else
 		ret = NF_ACCEPT;
@@ -595,7 +595,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
 		skb_forward_csum(skb);
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
-		NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, ip_vs_conn_net(cp), NULL, skb,
 			NULL, skb_dst(skb)->dev, dst_output);
 	} else
 		ret = NF_ACCEPT;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 57a50f6ce28c..c21f1a02ce13 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -131,6 +131,7 @@ out:
 
 int xfrm_output_resume(struct sk_buff *skb, int err)
 {
+	struct net *net = xs_net(skb_dst(skb)->xfrm);
 	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
 		nf_reset(skb);
 
@@ -142,7 +143,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err)
 			return dst_output(skb->sk, skb);
 
 		err = nf_hook(skb_dst(skb)->ops->family,
-			      NF_INET_POST_ROUTING, skb->sk, skb,
+			      NF_INET_POST_ROUTING, net, skb->sk, skb,
 			      NULL, skb_dst(skb)->dev, xfrm_output2);
 		if (unlikely(err != 1))
 			goto out;
-- 
cgit v1.2.3


From 0c4b51f0054ce85c0ec578ab818f0631834573eb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:18 -0500
Subject: netfilter: Pass net into okfn

This is immediately motivated by the bridge code that chains functions that
call into netfilter.  Without passing net into the okfns the bridge code would
need to guess about the best expression for the network namespace to process
packets in.

As net is frequently one of the first things computed in continuation functions
after netfilter has done it's job passing in the desired network namespace is in
many cases a code simplification.

To support this change the function dst_output_okfn is introduced to
simplify passing dst_output as an okfn.  For the moment dst_output_okfn
just silently drops the struct net.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c                    |  2 +-
 include/linux/netdevice.h            |  2 +-
 include/linux/netfilter.h            | 26 ++++++++++++++------------
 include/linux/netfilter_bridge.h     |  2 +-
 include/net/dn_neigh.h               |  6 +++---
 include/net/dst.h                    |  4 ++++
 include/net/ipv6.h                   |  2 +-
 include/net/netfilter/br_netfilter.h |  2 +-
 net/bridge/br_forward.c              |  5 ++---
 net/bridge/br_input.c                |  7 ++++---
 net/bridge/br_netfilter_hooks.c      | 21 +++++++++------------
 net/bridge/br_netfilter_ipv6.c       |  3 +--
 net/bridge/br_private.h              |  6 +++---
 net/bridge/br_stp_bpdu.c             |  3 ++-
 net/core/dev.c                       |  4 +++-
 net/decnet/dn_neigh.c                |  8 ++++----
 net/decnet/dn_nsp_in.c               |  3 ++-
 net/decnet/dn_route.c                |  6 +++---
 net/ipv4/arp.c                       |  7 +++----
 net/ipv4/ip_forward.c                |  3 +--
 net/ipv4/ip_input.c                  |  7 ++-----
 net/ipv4/ip_output.c                 |  4 ++--
 net/ipv4/ipmr.c                      |  4 ++--
 net/ipv4/raw.c                       |  2 +-
 net/ipv4/xfrm4_input.c               |  3 ++-
 net/ipv4/xfrm4_output.c              |  2 +-
 net/ipv6/ip6_input.c                 |  5 ++---
 net/ipv6/ip6_output.c                |  7 ++++---
 net/ipv6/ip6mr.c                     |  3 +--
 net/ipv6/mcast.c                     |  4 ++--
 net/ipv6/ndisc.c                     |  2 +-
 net/ipv6/output_core.c               |  2 +-
 net/ipv6/raw.c                       |  2 +-
 net/ipv6/xfrm6_output.c              |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c      |  4 ++--
 net/netfilter/nf_queue.c             |  2 +-
 net/xfrm/xfrm_output.c               | 12 ++++++------
 37 files changed, 95 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 979a4db9c6bc..637e9fd1e14c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -253,7 +253,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 /* modelled after ip_finish_output2 */
-static int vrf_finish_output(struct sock *sk, struct sk_buff *skb)
+static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct rtable *rt = (struct rtable *)dst;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97ab5c9a7069..b791405958b4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2212,7 +2212,7 @@ int dev_open(struct net_device *dev);
 int dev_close(struct net_device *dev);
 int dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
 int register_netdevice(struct net_device *dev);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 295f2650b5dc..0b4d4560f33d 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -56,7 +56,7 @@ struct nf_hook_state {
 	struct sock *sk;
 	struct net *net;
 	struct list_head *hook_list;
-	int (*okfn)(struct sock *, struct sk_buff *);
+	int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
 static inline void nf_hook_state_init(struct nf_hook_state *p,
@@ -67,7 +67,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      struct net_device *outdev,
 				      struct sock *sk,
 				      struct net *net,
-				      int (*okfn)(struct sock *, struct sk_buff *))
+				      int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	p->hook = hook;
 	p->thresh = thresh;
@@ -175,7 +175,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sock *, struct sk_buff *),
+				 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
 				 int thresh)
 {
 	struct list_head *hook_list = &net->nf.hooks[pf][hook];
@@ -193,7 +193,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct sock *, struct sk_buff *))
+			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
 }
@@ -219,31 +219,33 @@ static inline int
 NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	       struct sk_buff *skb, struct net_device *in,
 	       struct net_device *out,
-	       int (*okfn)(struct sock *, struct sk_buff *), int thresh)
+	       int (*okfn)(struct net *, struct sock *, struct sk_buff *),
+	       int thresh)
 {
 	int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
 	if (ret == 1)
-		ret = okfn(sk, skb);
+		ret = okfn(net, sk, skb);
 	return ret;
 }
 
 static inline int
 NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
-	     int (*okfn)(struct sock *, struct sk_buff *), bool cond)
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *),
+	     bool cond)
 {
 	int ret;
 
 	if (!cond ||
 	    ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
-		ret = okfn(sk, skb);
+		ret = okfn(net, sk, skb);
 	return ret;
 }
 
 static inline int
 NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb,
 	struct net_device *in, struct net_device *out,
-	int (*okfn)(struct sock *, struct sk_buff *))
+	int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
 }
@@ -345,12 +347,12 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 }
 
 #else /* !CONFIG_NETFILTER */
-#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(sk, skb)
-#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb)
+#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(net, sk, skb)
+#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(net, sk, skb)
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct sock *, struct sk_buff *))
+			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	return 1;
 }
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 2437b8a5d7a9..2ed40c402b5e 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -17,7 +17,7 @@ enum nf_br_hook_priorities {
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
 {
diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h
index d0424269313f..5e902fc3f4eb 100644
--- a/include/net/dn_neigh.h
+++ b/include/net/dn_neigh.h
@@ -18,11 +18,11 @@ struct dn_neigh {
 
 void dn_neigh_init(void);
 void dn_neigh_cleanup(void);
-int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb);
-int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb);
+int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
+int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
 void dn_neigh_pointopoint_hello(struct sk_buff *skb);
 int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
-int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb);
+int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 extern struct neigh_table dn_neigh_table;
 
diff --git a/include/net/dst.h b/include/net/dst.h
index c72e58474e52..df0481a07029 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -458,6 +458,10 @@ static inline int dst_output(struct sock *sk, struct sk_buff *skb)
 {
 	return skb_dst(skb)->output(sk, skb);
 }
+static inline int dst_output_okfn(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return dst_output(sk, skb);
+}
 
 /* Input packet from network to transport.  */
 static inline int dst_input(struct sk_buff *skb)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 711cca428cc8..384a93cf07d6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -807,7 +807,7 @@ static inline u8 ip6_tclass(__be32 flowinfo)
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
 	     struct packet_type *pt, struct net_device *orig_dev);
 
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb);
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 /*
  *	upper-layer output functions
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index d4c6b5f30acd..8fe266504900 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -31,7 +31,7 @@ static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
 	skb->network_header -= len;
 }
 
-int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb);
+int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 2dd2a23ce707..48afca729ed7 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 		p->state == BR_STATE_FORWARDING;
 }
 
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if (!is_skb_forwardable(skb->dev, skb))
 		goto drop;
@@ -65,9 +65,8 @@ drop:
 }
 EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
 
-int br_forward_finish(struct sock *sk, struct sk_buff *skb)
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dev);
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
 		       net, sk, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 78fa7acd836e..223f4040d9df 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,7 +26,8 @@
 br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
 EXPORT_SYMBOL(br_should_route_hook);
 
-static int br_netif_receive_skb(struct sock *sk, struct sk_buff *skb)
+static int
+br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return netif_receive_skb(skb);
 }
@@ -125,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 }
 
 /* note: already called with rcu_read_lock */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
@@ -213,7 +214,7 @@ drop:
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
 /* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb)
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7886c9d7e23d..e6e76bbdc82f 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -256,7 +256,7 @@ void nf_bridge_update_protocol(struct sk_buff *skb)
  * don't, we use the neighbour framework to find out. In both cases, we make
  * sure that br_handle_frame_finish() is called afterwards.
  */
-int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
+int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct neighbour *neigh;
 	struct dst_entry *dst;
@@ -273,7 +273,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
 		if (neigh->hh.hh_len) {
 			neigh_hh_bridge(&neigh->hh, skb);
 			skb->dev = nf_bridge->physindev;
-			ret = br_handle_frame_finish(sk, skb);
+			ret = br_handle_frame_finish(net, sk, skb);
 		} else {
 			/* the neighbour function below overwrites the complete
 			 * MAC header, so we save the Ethernet source address and
@@ -342,11 +342,10 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
  * device, we proceed as if ip_route_input() succeeded. If it differs from the
  * logical bridge port or if ip_route_output_key() fails we drop the packet.
  */
-static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct iphdr *iph = ip_hdr(skb);
-	struct net *net = dev_net(dev);
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
 	int err;
@@ -536,10 +535,9 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
 }
 
 /* PF_BRIDGE/FORWARD *************************************************/
-static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
-	struct net *net = dev_net(skb->dev);
 	struct net_device *in;
 
 	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
@@ -692,7 +690,7 @@ static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff
 	__skb_push(skb, data->encap_size);
 
 	nf_bridge_info_free(skb);
-	return br_dev_queue_push_xmit(sk, skb);
+	return br_dev_queue_push_xmit(net, sk, skb);
 }
 static int br_nf_push_frag_xmit_sk(struct sock *sk, struct sk_buff *skb)
 {
@@ -728,17 +726,16 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 	return 0;
 }
 
-static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
+static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge;
 	unsigned int mtu_reserved;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	mtu_reserved = nf_bridge_mtu_reduction(skb);
 
 	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
 		nf_bridge_info_free(skb);
-		return br_dev_queue_push_xmit(sk, skb);
+		return br_dev_queue_push_xmit(net, sk, skb);
 	}
 
 	nf_bridge = nf_bridge_info_get(skb);
@@ -797,7 +794,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 	nf_bridge_info_free(skb);
-	return br_dev_queue_push_xmit(sk, skb);
+	return br_dev_queue_push_xmit(net, sk, skb);
  drop:
 	kfree_skb(skb);
 	return 0;
@@ -887,7 +884,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 
 	nf_bridge->physoutdev = NULL;
-	br_handle_frame_finish(NULL, skb);
+	br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
 }
 
 static int br_nf_dev_xmit(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5d19361ad5d3..e4dbbe44c724 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -161,12 +161,11 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
  * for br_nf_pre_routing_finish(), same logic is used here but
  * equivalent IPv6 function ip6_route_input() called indirectly.
  */
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
 	struct net_device *dev = skb->dev;
-	struct net *net = dev_net(dev);
 	const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
 
 	nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 213baf7aaa93..74e99c75c8e4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -413,10 +413,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 
 /* br_forward.c */
 void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb);
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb, struct sk_buff *skb0);
-int br_forward_finish(struct sock *sk, struct sk_buff *skb);
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
 void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
 		      struct sk_buff *skb2, bool unicast);
@@ -434,7 +434,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
 void br_manage_promisc(struct net_bridge *br);
 
 /* br_input.c */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
 
 static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 8e2e8c352198..5881fbc114a9 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -30,7 +30,8 @@
 
 #define LLC_RESERVE sizeof(struct llc_pdu_un)
 
-static int br_send_bpdu_finish(struct sock *sk, struct sk_buff *skb)
+static int br_send_bpdu_finish(struct net *net, struct sock *sk,
+			       struct sk_buff *skb)
 {
 	return dev_queue_xmit(skb);
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 7db9b012dfb7..00dccfac8939 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2915,9 +2915,11 @@ EXPORT_SYMBOL(xmit_recursion);
 
 /**
  *	dev_loopback_xmit - loop back @skb
+ *	@net: network namespace this loopback is happening in
+ *	@sk:  sk needed to be a netfilter okfn
  *	@skb: buffer to transmit
  */
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	skb_reset_mac_header(skb);
 	__skb_pull(skb, skb_network_offset(skb));
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 305ab2fe25cd..482730cd8a56 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
 	return err;
 }
 
-static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
@@ -334,7 +334,7 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
 		       dn_neigh_output_packet);
 }
 
-int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb)
+int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *) dst;
@@ -378,7 +378,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb)
 /*
  * Ethernet router hello message received
  */
-int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
 
@@ -440,7 +440,7 @@ int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
 /*
  * Endnode hello message received
  */
-int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
 	struct neighbour *neigh;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index e7b0605ca34a..7ac086d5c0c0 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -714,7 +714,8 @@ out:
 	return ret;
 }
 
-static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb)
+static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
+			    struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sock *sk = NULL;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index fefcd2e85ef9..e930321e2c1d 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb)
  *
  * Returns: result of input function if route is found, error code otherwise
  */
-static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb;
 	int err;
@@ -610,7 +610,7 @@ drop_it:
 	return NET_RX_DROP;
 }
 
-static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
+static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	/*
 	 * I know we drop the packet here, but thats considered success in
@@ -620,7 +620,7 @@ static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
 	return NET_RX_SUCCESS;
 }
 
-static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb)
+static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	dn_dev_hello(skb);
 	dn_neigh_pointopoint_hello(skb);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ae71e9ade5f9..61ff5ea31283 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -621,7 +621,7 @@ out:
 }
 EXPORT_SYMBOL(arp_create);
 
-static int arp_xmit_finish(struct sock *sk, struct sk_buff *skb)
+static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return dev_queue_xmit(skb);
 }
@@ -642,7 +642,7 @@ EXPORT_SYMBOL(arp_xmit);
  *	Process an arp request.
  */
 
-static int arp_process(struct sock *sk, struct sk_buff *skb)
+static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -654,7 +654,6 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
 	u16 dev_type = dev->type;
 	int addr_type;
 	struct neighbour *n;
-	struct net *net = dev_net(dev);
 	bool is_garp = false;
 
 	/* arp_rcv below verifies the ARP header and verifies the device
@@ -865,7 +864,7 @@ out:
 
 static void parp_redo(struct sk_buff *skb)
 {
-	arp_process(NULL, skb);
+	arp_process(dev_net(skb->dev), NULL, skb);
 }
 
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 0a3c45a2e757..d66cfb35ba74 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -61,9 +61,8 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 }
 
 
-static int ip_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ip_options *opt	= &(IPCB(skb)->opt);
 
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 991d082c7312..7cc9f7bb7fb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -188,10 +188,8 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	return false;
 }
 
-static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dev);
-
 	__skb_pull(skb, skb_network_header_len(skb));
 
 	rcu_read_lock();
@@ -311,10 +309,9 @@ drop:
 int sysctl_ip_early_demux __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_ip_early_demux);
 
-static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct net *net = dev_net(skb->dev);
 	struct rtable *rt;
 
 	if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4c9532259a7f..09a6b7bb7ea3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -104,7 +104,7 @@ static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
 	ip_send_check(iph);
 	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		       net, sk, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+		       dst_output_okfn);
 }
 
 int __ip_local_out(struct sk_buff *skb)
@@ -266,7 +266,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
 	return ret;
 }
 
-static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int mtu;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a88c0c5374ff..cfcb996ec51b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1678,10 +1678,10 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
 	nf_reset(skb);
 }
 
-static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
+				      struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
 	IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2045b1aaa6ef..28ef8a913130 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -413,7 +413,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, rt->dst.dev,
-		      dst_output);
+		      dst_output_okfn);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5093000d3d5e..62e1e72db461 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,7 +22,8 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 	return xfrm4_extract_header(skb);
 }
 
-static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb)
+static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
+					 struct sk_buff *skb)
 {
 	if (!skb_dst(skb)) {
 		const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index e4a85199e015..28ae2048b93a 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -80,7 +80,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
-static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 583cf959c23d..9075acf081dd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,7 +47,7 @@
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
 		const struct inet6_protocol *ipprot;
@@ -199,9 +199,8 @@ drop:
  */
 
 
-static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct inet6_protocol *ipprot;
 	struct inet6_dev *idev;
 	unsigned int nhoff;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 96e76ddd4a44..d8d68e81d123 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -121,7 +121,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 	return -EINVAL;
 }
 
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 	    dst_allfrag(skb_dst(skb)) ||
@@ -225,7 +225,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 			       net, sk, skb, NULL, dst->dev,
-			       dst_output);
+			       dst_output_okfn);
 	}
 
 	skb->dev = dst->dev;
@@ -317,7 +317,8 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 	return 0;
 }
 
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+				     struct sk_buff *skb)
 {
 	skb_sender_cpu_clear(skb);
 	return dst_output(sk, skb);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e830942b2090..5e5d16e7ce85 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1985,9 +1985,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 }
 #endif
 
-static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
 	IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..a8bf57ca74d3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1646,7 +1646,7 @@ static void mld_sendpack(struct sk_buff *skb)
 
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
-		      dst_output);
+		      dst_output_okfn);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
@@ -2010,7 +2010,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	skb_dst_set(skb, dst);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, skb->dev,
-		      dst_output);
+		      dst_output_okfn);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index dd2b08d7c8d1..dde5a1e5875a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -465,7 +465,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
 
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, dst->dev,
-		      dst_output);
+		      dst_output_okfn);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 9cc9127fb5e7..e77102c4f804 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -151,7 +151,7 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
 
 	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		       net, sk, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+		       dst_output_okfn);
 }
 
 int __ip6_local_out(struct sk_buff *skb)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..fec0151522a2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -655,7 +655,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
-		      NULL, rt->dst.dev, dst_output);
+		      NULL, rt->dst.dev, dst_output_okfn);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 431ae2c22234..68a996f8a044 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -131,7 +131,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
-static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_state *x = dst->xfrm;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 65c996c14bca..cc7299033af8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -574,7 +574,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
 		NF_HOOK(pf, NF_INET_LOCAL_OUT, ip_vs_conn_net(cp), NULL, skb,
-			NULL, skb_dst(skb)->dev, dst_output);
+			NULL, skb_dst(skb)->dev, dst_output_okfn);
 	} else
 		ret = NF_ACCEPT;
 
@@ -596,7 +596,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
 		NF_HOOK(pf, NF_INET_LOCAL_OUT, ip_vs_conn_net(cp), NULL, skb,
-			NULL, skb_dst(skb)->dev, dst_output);
+			NULL, skb_dst(skb)->dev, dst_output_okfn);
 	} else
 		ret = NF_ACCEPT;
 	return ret;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96777f9a9350..9f3c3c25fa73 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -215,7 +215,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	case NF_ACCEPT:
 	case NF_STOP:
 		local_bh_disable();
-		entry->state.okfn(entry->state.sk, skb);
+		entry->state.okfn(entry->state.net, entry->state.sk, skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index c21f1a02ce13..61ba99f61dc8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb);
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static int xfrm_skb_check_space(struct sk_buff *skb)
 {
@@ -157,12 +157,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(xfrm_output_resume);
 
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return xfrm_output_resume(skb, 1);
 }
 
-static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff *segs;
 
@@ -178,7 +178,7 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
 		int err;
 
 		segs->next = NULL;
-		err = xfrm_output2(sk, segs);
+		err = xfrm_output2(net, sk, segs);
 
 		if (unlikely(err)) {
 			kfree_skb_list(nskb);
@@ -197,7 +197,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	if (skb_is_gso(skb))
-		return xfrm_output_gso(sk, skb);
+		return xfrm_output_gso(net, sk, skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		err = skb_checksum_help(skb);
@@ -208,7 +208,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	return xfrm_output2(sk, skb);
+	return xfrm_output2(net, sk, skb);
 }
 EXPORT_SYMBOL_GPL(xfrm_output);
 
-- 
cgit v1.2.3


From 58d607d3e52f2b15902f58a1161da9fb3b0f6d47 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Sep 2015 15:24:20 -0700
Subject: tcp: provide skb->hash to synack packets

In commit b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf
on xmit"), Tom provided a l4 hash to most outgoing TCP packets.

We'd like to provide one as well for SYNACK packets, so that all packets
of a given flow share same txhash, to later enable bonding driver to
also use skb->hash to perform slave selection.

Note that a SYNACK retransmit shuffles the tx hash, as Tom did
in commit 265f94ff54d62 ("net: Recompute sk_txhash on negative routing
advice") for established sockets.

This has nice effect making TCP flows resilient to some kind of black
holes, even at connection establish phase.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@herbertland.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 include/net/sock.h    | 12 ++++++++----
 net/ipv4/tcp_input.c  |  1 +
 net/ipv4/tcp_ipv4.c   |  2 +-
 net/ipv4/tcp_output.c |  2 ++
 net/ipv6/tcp_ipv6.c   |  2 +-
 6 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 48c3696e8645..937b97893d5f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -113,6 +113,7 @@ struct tcp_request_sock {
 	struct inet_request_sock 	req;
 	const struct tcp_request_sock_ops *af_specific;
 	bool				tfo_listener;
+	u32				txhash;
 	u32				rcv_isn;
 	u32				snt_isn;
 	u32				snt_synack; /* synack sent time */
diff --git a/include/net/sock.h b/include/net/sock.h
index 7aa78440559a..94dff7f566f5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1654,12 +1654,16 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 kuid_t sock_i_uid(struct sock *sk);
 unsigned long sock_i_ino(struct sock *sk);
 
-static inline void sk_set_txhash(struct sock *sk)
+static inline u32 net_tx_rndhash(void)
 {
-	sk->sk_txhash = prandom_u32();
+	u32 v = prandom_u32();
+
+	return v ?: 1;
+}
 
-	if (unlikely(!sk->sk_txhash))
-		sk->sk_txhash = 1;
+static inline void sk_set_txhash(struct sock *sk)
+{
+	sk->sk_txhash = net_tx_rndhash();
 }
 
 static inline void sk_rethink_txhash(struct sock *sk)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f515bb19c4..a62e9c76d485 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6228,6 +6228,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	}
 
 	tcp_rsk(req)->snt_isn = isn;
+	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
 	fastopen = !want_cookie &&
 		   tcp_try_fastopen(sk, skb, req, &foc, dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 93898e093d4e..d671d742a239 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1276,8 +1276,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	newinet->rcv_tos      = ip_hdr(skb)->tos;
+	newsk->sk_txhash      = tcp_rsk(req)->txhash;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	sk_set_txhash(newsk);
 	if (inet_opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 	newinet->inet_id = newtp->write_seq ^ jiffies;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f9a8a12b62ee..d0ad3554c333 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2987,6 +2987,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	rcu_read_lock();
 	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
+	skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
 	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
 					     foc) + sizeof(*th);
 
@@ -3505,6 +3506,7 @@ int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
 	struct flowi fl;
 	int res;
 
+	tcp_rsk(req)->txhash = net_tx_rndhash();
 	res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
 	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 97d9314ea361..f9c0e2640671 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
 	newsk->sk_bound_dev_if = ireq->ir_iif;
 
-	sk_set_txhash(newsk);
+	newsk->sk_txhash = tcp_rsk(req)->txhash;
 
 	/* Now IPv6 options...
 
-- 
cgit v1.2.3


From d6157e4f18173ad24441aa9ca04e7e9121a9b4c7 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:41:45 +0200
Subject: ARM: at91: remove useless includes in platform_data/atmel.h

include/linux/platform_data/atmel.h has a lot of useless includes, remove
them.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 include/linux/platform_data/atmel.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 527a85c61924..4d67a5e82c83 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -9,15 +9,7 @@
 
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
-#include <linux/device.h>
-#include <linux/i2c.h>
-#include <linux/leds.h>
-#include <linux/spi/spi.h>
-#include <linux/usb/atmel_usba_udc.h>
-#include <linux/atmel-mci.h>
-#include <sound/atmel-ac97c.h>
 #include <linux/serial.h>
-#include <linux/platform_data/macb.h>
 
 /*
  * at91: 6 USARTs and one DBGU port (SAM9260)
-- 
cgit v1.2.3


From fa128fd735bd236b6b04d3fedfed7a784137c185 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 11:56:28 -0400
Subject: jump_label: make static_key_enabled() work on static_key_true/false
 types too

static_key_enabled() can be used on struct static_key but not on its
wrapper types static_key_true and static_key_false.  The function is
useful for debugging and management of static keys.  Update it so that
it can be used for the wrapper types too.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/jump_label.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7f653e8f6690..c9ca050de846 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -216,11 +216,6 @@ static inline int jump_label_apply_nops(struct module *mod)
 #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
 #define jump_label_enabled static_key_enabled
 
-static inline bool static_key_enabled(struct static_key *key)
-{
-	return static_key_count(key) > 0;
-}
-
 static inline void static_key_enable(struct static_key *key)
 {
 	int count = static_key_count(key);
@@ -267,6 +262,17 @@ struct static_key_false {
 #define DEFINE_STATIC_KEY_FALSE(name)	\
 	struct static_key_false name = STATIC_KEY_FALSE_INIT
 
+extern bool ____wrong_branch_error(void);
+
+#define static_key_enabled(x)							\
+({										\
+	if (!__builtin_types_compatible_p(typeof(*x), struct static_key) &&	\
+	    !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\
+	    !__builtin_types_compatible_p(typeof(*x), struct static_key_false))	\
+		____wrong_branch_error();					\
+	static_key_count((struct static_key *)x) > 0;				\
+})
+
 #ifdef HAVE_JUMP_LABEL
 
 /*
@@ -325,8 +331,6 @@ struct static_key_false {
  * See jump_label_type() / jump_label_init_type().
  */
 
-extern bool ____wrong_branch_error(void);
-
 #define static_branch_likely(x)							\
 ({										\
 	bool branch;								\
-- 
cgit v1.2.3


From 49d1dc4b81797f88270832b11e9f73809e7e7209 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 11:56:28 -0400
Subject: cgroup: implement static_key based cgroup_subsys_enabled() and
 cgroup_subsys_on_dfl()

Whether a subsys is enabled and attached to the default hierarchy
seldom changes and may be tested in the hot paths.  This patch
implements static_key based cgroup_subsys_enabled() and
cgroup_subsys_on_dfl() tests.

The following patches will update the users and remove duplicate
mechanisms.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup.h | 21 +++++++++++++++++++++
 kernel/cgroup.c        | 27 ++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index eb7ca55f72ef..c3a9f1eb4097 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
+#include <linux/jump_label.h>
 
 #include <linux/cgroup-defs.h>
 
@@ -50,6 +51,26 @@ extern struct css_set init_css_set;
 #include <linux/cgroup_subsys.h>
 #undef SUBSYS
 
+#define SUBSYS(_x)								\
+	extern struct static_key_true _x ## _cgrp_subsys_enabled_key;		\
+	extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+/**
+ * cgroup_subsys_enabled - fast test on whether a subsys is enabled
+ * @ss: subsystem in question
+ */
+#define cgroup_subsys_enabled(ss)						\
+	static_branch_likely(&ss ## _enabled_key)
+
+/**
+ * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy
+ * @ss: subsystem in question
+ */
+#define cgroup_subsys_on_dfl(ss)						\
+	static_branch_likely(&ss ## _on_dfl_key)
+
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cf0f79f1fc9..361938911bd1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -139,6 +139,27 @@ static const char *cgroup_subsys_name[] = {
 };
 #undef SUBSYS
 
+/* array of static_keys for cgroup_subsys_enabled() and cgroup_subsys_on_dfl() */
+#define SUBSYS(_x)								\
+	DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key);			\
+	DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key);			\
+	EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key);			\
+	EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
+static struct static_key_true *cgroup_subsys_enabled_key[] = {
+#include <linux/cgroup_subsys.h>
+};
+#undef SUBSYS
+
+#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
+static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
+#include <linux/cgroup_subsys.h>
+};
+#undef SUBSYS
+
 /*
  * The default hierarchy, reserved for the subsystems that are otherwise
  * unattached - it never has more than a single cgroup, and all tasks are
@@ -1319,9 +1340,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 
 		/* default hierarchy doesn't enable controllers by default */
 		dst_root->subsys_mask |= 1 << ssid;
-		if (dst_root != &cgrp_dfl_root) {
+		if (dst_root == &cgrp_dfl_root) {
+			static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
+		} else {
 			dst_root->cgrp.subtree_control |= 1 << ssid;
 			cgroup_refresh_child_subsys_mask(&dst_root->cgrp);
+			static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
 		}
 
 		if (ss->bind)
@@ -5483,6 +5507,7 @@ static int __init cgroup_disable(char *str)
 			    strcmp(token, ss->legacy_name))
 				continue;
 
+			static_branch_disable(cgroup_subsys_enabled_key[i]);
 			ss->disabled = 1;
 			printk(KERN_INFO "Disabling %s control group subsystem\n",
 			       ss->name);
-- 
cgit v1.2.3


From fc5ed1e95410ad73b2ab8f33cd90eb3bcf6c98a1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 11:56:28 -0400
Subject: cgroup: replace cgroup_subsys->disabled tests with
 cgroup_subsys_enabled()

Replace cgroup_subsys->disabled tests in controllers with
cgroup_subsys_enabled().  cgroup_subsys_enabled() requires literal
subsys name as its parameter and thus can't be used for cgroup core
which iterates through controllers.  For cgroup core, introduce and
use cgroup_ssid_enabled() which uses slower static_key_enabled() test
and can be indexed by subsys ID.

This leaves cgroup_subsys->disabled unused.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
---
 include/linux/cgroup-defs.h    |  1 -
 include/linux/hugetlb_cgroup.h |  4 +---
 include/linux/memcontrol.h     |  4 +---
 kernel/cgroup.c                | 28 +++++++++++++++++++++-------
 4 files changed, 23 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4d8fcf2187dc..c5d41c3c1f00 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -419,7 +419,6 @@ struct cgroup_subsys {
 		     struct task_struct *task);
 	void (*bind)(struct cgroup_subsys_state *root_css);
 
-	int disabled;
 	int early_init;
 
 	/*
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index bcc853eccc85..7edd30515298 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -48,9 +48,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
 
 static inline bool hugetlb_cgroup_disabled(void)
 {
-	if (hugetlb_cgrp_subsys.disabled)
-		return true;
-	return false;
+	return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
 }
 
 extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ad800e62cb7a..9aa7820c2177 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -347,9 +347,7 @@ ino_t page_cgroup_ino(struct page *page);
 
 static inline bool mem_cgroup_disabled(void)
 {
-	if (memory_cgrp_subsys.disabled)
-		return true;
-	return false;
+	return !cgroup_subsys_enabled(memory_cgrp_subsys);
 }
 
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 361938911bd1..5703ba791b3d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -224,6 +224,19 @@ static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
 
+/**
+ * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
+ * @ssid: subsys ID of interest
+ *
+ * cgroup_subsys_enabled() can only be used with literal subsys names which
+ * is fine for individual subsystems but unsuitable for cgroup core.  This
+ * is slower static_key_enabled() based test indexed by @ssid.
+ */
+static bool cgroup_ssid_enabled(int ssid)
+{
+	return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
+}
+
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 			    gfp_t gfp_mask)
@@ -1482,7 +1495,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 		for_each_subsys(ss, i) {
 			if (strcmp(token, ss->legacy_name))
 				continue;
-			if (ss->disabled)
+			if (!cgroup_ssid_enabled(i))
 				continue;
 
 			/* Mutually exclusive option 'all' + subsystem name */
@@ -1513,7 +1526,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 	 */
 	if (all_ss || (!one_ss && !opts->none && !opts->name))
 		for_each_subsys(ss, i)
-			if (!ss->disabled)
+			if (cgroup_ssid_enabled(i))
 				opts->subsys_mask |= (1 << i);
 
 	/*
@@ -2762,7 +2775,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		if (tok[0] == '\0')
 			continue;
 		for_each_subsys_which(ss, ssid, &tmp_ss_mask) {
-			if (ss->disabled || strcmp(tok + 1, ss->name))
+			if (!cgroup_ssid_enabled(ssid) ||
+			    strcmp(tok + 1, ss->name))
 				continue;
 
 			if (*tok == '+') {
@@ -3320,7 +3334,7 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 {
 	int ret;
 
-	if (ss->disabled)
+	if (!cgroup_ssid_enabled(ss->id))
 		return 0;
 
 	if (!cfts || cfts[0].name[0] == '\0')
@@ -5082,7 +5096,7 @@ int __init cgroup_init(void)
 		 * disabled flag and cftype registration needs kmalloc,
 		 * both of which aren't available during early_init.
 		 */
-		if (ss->disabled)
+		if (!cgroup_ssid_enabled(ssid))
 			continue;
 
 		cgrp_dfl_root.subsys_mask |= 1 << ss->id;
@@ -5217,7 +5231,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
 	for_each_subsys(ss, i)
 		seq_printf(m, "%s\t%d\t%d\t%d\n",
 			   ss->legacy_name, ss->root->hierarchy_id,
-			   atomic_read(&ss->root->nr_cgrps), !ss->disabled);
+			   atomic_read(&ss->root->nr_cgrps),
+			   cgroup_ssid_enabled(i));
 
 	mutex_unlock(&cgroup_mutex);
 	return 0;
@@ -5508,7 +5523,6 @@ static int __init cgroup_disable(char *str)
 				continue;
 
 			static_branch_disable(cgroup_subsys_enabled_key[i]);
-			ss->disabled = 1;
 			printk(KERN_INFO "Disabling %s control group subsystem\n",
 			       ss->name);
 			break;
-- 
cgit v1.2.3


From 9e10a130d9b62af976d17d120c95f3650769312c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 11:56:28 -0400
Subject: cgroup: replace cgroup_on_dfl() tests in controllers with
 cgroup_subsys_on_dfl()

cgroup_on_dfl() tests whether the cgroup's root is the default
hierarchy; however, an individual controller is only interested in
whether the controller is attached to the default hierarchy and never
tests a cgroup which doesn't belong to the hierarchy that the
controller is attached to.

This patch replaces cgroup_on_dfl() tests in controllers with faster
static_key based cgroup_subsys_on_dfl().  This leaves cgroup core as
the only user of cgroup_on_dfl() and the function is moved from the
header file to cgroup.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
---
 block/blk-throttle.c   |  2 +-
 block/cfq-iosched.c    |  4 ++--
 include/linux/cgroup.h | 58 --------------------------------------------------
 kernel/cgroup.c        | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/cpuset.c        | 23 +++++++++++---------
 mm/memcontrol.c        |  4 ++--
 6 files changed, 76 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c75a2636dd40..2149a1ddbacf 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -369,7 +369,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	 * regardless of the position of the group in the hierarchy.
 	 */
 	sq->parent_sq = &td->service_queue;
-	if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent)
+	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 	tg->td = td;
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 04de88463a98..1f9093e901da 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1581,7 +1581,7 @@ static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp)
 static void cfq_cpd_init(struct blkcg_policy_data *cpd)
 {
 	struct cfq_group_data *cgd = cpd_to_cfqgd(cpd);
-	unsigned int weight = cgroup_on_dfl(blkcg_root.css.cgroup) ?
+	unsigned int weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
 			      CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
 
 	if (cpd_to_blkcg(cpd) == &blkcg_root)
@@ -1599,7 +1599,7 @@ static void cfq_cpd_free(struct blkcg_policy_data *cpd)
 static void cfq_cpd_bind(struct blkcg_policy_data *cpd)
 {
 	struct blkcg *blkcg = cpd_to_blkcg(cpd);
-	bool on_dfl = cgroup_on_dfl(blkcg_root.css.cgroup);
+	bool on_dfl = cgroup_subsys_on_dfl(io_cgrp_subsys);
 	unsigned int weight = on_dfl ? CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
 
 	if (blkcg == &blkcg_root)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c3a9f1eb4097..355bf2ed8867 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -433,64 +433,6 @@ static inline struct cgroup *task_cgroup(struct task_struct *task,
 	return task_css(task, subsys_id)->cgroup;
 }
 
-/**
- * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
- * @cgrp: the cgroup of interest
- *
- * The default hierarchy is the v2 interface of cgroup and this function
- * can be used to test whether a cgroup is on the default hierarchy for
- * cases where a subsystem should behave differnetly depending on the
- * interface version.
- *
- * The set of behaviors which change on the default hierarchy are still
- * being determined and the mount option is prefixed with __DEVEL__.
- *
- * List of changed behaviors:
- *
- * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
- *   and "name" are disallowed.
- *
- * - When mounting an existing superblock, mount options should match.
- *
- * - Remount is disallowed.
- *
- * - rename(2) is disallowed.
- *
- * - "tasks" is removed.  Everything should be at process granularity.  Use
- *   "cgroup.procs" instead.
- *
- * - "cgroup.procs" is not sorted.  pids will be unique unless they got
- *   recycled inbetween reads.
- *
- * - "release_agent" and "notify_on_release" are removed.  Replacement
- *   notification mechanism will be implemented.
- *
- * - "cgroup.clone_children" is removed.
- *
- * - "cgroup.subtree_populated" is available.  Its value is 0 if the cgroup
- *   and its descendants contain no task; otherwise, 1.  The file also
- *   generates kernfs notification which can be monitored through poll and
- *   [di]notify when the value of the file changes.
- *
- * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
- *   take masks of ancestors with non-empty cpus/mems, instead of being
- *   moved to an ancestor.
- *
- * - cpuset: a task can be moved into an empty cpuset, and again it takes
- *   masks of ancestors.
- *
- * - memcg: use_hierarchy is on by default and the cgroup file for the flag
- *   is not created.
- *
- * - blkcg: blk-throttle becomes properly hierarchical.
- *
- * - debug: disallowed on the default hierarchy.
- */
-static inline bool cgroup_on_dfl(const struct cgroup *cgrp)
-{
-	return cgrp->root == &cgrp_dfl_root;
-}
-
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_has_tasks(struct cgroup *cgrp)
 {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5703ba791b3d..c24f929fdeb7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -237,6 +237,64 @@ static bool cgroup_ssid_enabled(int ssid)
 	return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
 }
 
+/**
+ * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
+ * @cgrp: the cgroup of interest
+ *
+ * The default hierarchy is the v2 interface of cgroup and this function
+ * can be used to test whether a cgroup is on the default hierarchy for
+ * cases where a subsystem should behave differnetly depending on the
+ * interface version.
+ *
+ * The set of behaviors which change on the default hierarchy are still
+ * being determined and the mount option is prefixed with __DEVEL__.
+ *
+ * List of changed behaviors:
+ *
+ * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
+ *   and "name" are disallowed.
+ *
+ * - When mounting an existing superblock, mount options should match.
+ *
+ * - Remount is disallowed.
+ *
+ * - rename(2) is disallowed.
+ *
+ * - "tasks" is removed.  Everything should be at process granularity.  Use
+ *   "cgroup.procs" instead.
+ *
+ * - "cgroup.procs" is not sorted.  pids will be unique unless they got
+ *   recycled inbetween reads.
+ *
+ * - "release_agent" and "notify_on_release" are removed.  Replacement
+ *   notification mechanism will be implemented.
+ *
+ * - "cgroup.clone_children" is removed.
+ *
+ * - "cgroup.subtree_populated" is available.  Its value is 0 if the cgroup
+ *   and its descendants contain no task; otherwise, 1.  The file also
+ *   generates kernfs notification which can be monitored through poll and
+ *   [di]notify when the value of the file changes.
+ *
+ * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
+ *   take masks of ancestors with non-empty cpus/mems, instead of being
+ *   moved to an ancestor.
+ *
+ * - cpuset: a task can be moved into an empty cpuset, and again it takes
+ *   masks of ancestors.
+ *
+ * - memcg: use_hierarchy is on by default and the cgroup file for the flag
+ *   is not created.
+ *
+ * - blkcg: blk-throttle becomes properly hierarchical.
+ *
+ * - debug: disallowed on the default hierarchy.
+ */
+static bool cgroup_on_dfl(const struct cgroup *cgrp)
+{
+	return cgrp->root == &cgrp_dfl_root;
+}
+
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 			    gfp_t gfp_mask)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f0acff0f66c9..20eedd8098c0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -473,7 +473,8 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 
 	/* On legacy hiearchy, we must be a subset of our parent cpuset. */
 	ret = -EACCES;
-	if (!cgroup_on_dfl(cur->css.cgroup) && !is_cpuset_subset(trial, par))
+	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+	    !is_cpuset_subset(trial, par))
 		goto out;
 
 	/*
@@ -879,7 +880,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
 		 * If it becomes empty, inherit the effective mask of the
 		 * parent, which is guaranteed to have some CPUs.
 		 */
-		if (cgroup_on_dfl(cp->css.cgroup) && cpumask_empty(new_cpus))
+		if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+		    cpumask_empty(new_cpus))
 			cpumask_copy(new_cpus, parent->effective_cpus);
 
 		/* Skip the whole subtree if the cpumask remains the same. */
@@ -896,7 +898,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
 		cpumask_copy(cp->effective_cpus, new_cpus);
 		spin_unlock_irq(&callback_lock);
 
-		WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
+		WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
 			!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
 
 		update_tasks_cpumask(cp);
@@ -1135,7 +1137,8 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
 		 * If it becomes empty, inherit the effective mask of the
 		 * parent, which is guaranteed to have some MEMs.
 		 */
-		if (cgroup_on_dfl(cp->css.cgroup) && nodes_empty(*new_mems))
+		if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+		    nodes_empty(*new_mems))
 			*new_mems = parent->effective_mems;
 
 		/* Skip the whole subtree if the nodemask remains the same. */
@@ -1152,7 +1155,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
 		cp->effective_mems = *new_mems;
 		spin_unlock_irq(&callback_lock);
 
-		WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
+		WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
 			!nodes_equal(cp->mems_allowed, cp->effective_mems));
 
 		update_tasks_nodemask(cp);
@@ -1440,7 +1443,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
 
 	/* allow moving tasks into an empty cpuset if on default hierarchy */
 	ret = -ENOSPC;
-	if (!cgroup_on_dfl(css->cgroup) &&
+	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
 	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
 		goto out_unlock;
 
@@ -1952,7 +1955,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	cpuset_inc();
 
 	spin_lock_irq(&callback_lock);
-	if (cgroup_on_dfl(cs->css.cgroup)) {
+	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
 		cpumask_copy(cs->effective_cpus, parent->effective_cpus);
 		cs->effective_mems = parent->effective_mems;
 	}
@@ -2029,7 +2032,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
 	mutex_lock(&cpuset_mutex);
 	spin_lock_irq(&callback_lock);
 
-	if (cgroup_on_dfl(root_css->cgroup)) {
+	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
 		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
 		top_cpuset.mems_allowed = node_possible_map;
 	} else {
@@ -2210,7 +2213,7 @@ retry:
 	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
 	mems_updated = !nodes_equal(new_mems, cs->effective_mems);
 
-	if (cgroup_on_dfl(cs->css.cgroup))
+	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
 		hotplug_update_tasks(cs, &new_cpus, &new_mems,
 				     cpus_updated, mems_updated);
 	else
@@ -2241,7 +2244,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 	static cpumask_t new_cpus;
 	static nodemask_t new_mems;
 	bool cpus_updated, mems_updated;
-	bool on_dfl = cgroup_on_dfl(top_cpuset.css.cgroup);
+	bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
 
 	mutex_lock(&cpuset_mutex);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ddaeba34e09..b35c4cc47a30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -434,7 +434,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
 
 	memcg = page->mem_cgroup;
 
-	if (!memcg || !cgroup_on_dfl(memcg->css.cgroup))
+	if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		memcg = root_mem_cgroup;
 
 	rcu_read_unlock();
@@ -5059,7 +5059,7 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
 	 * guarantees that @root doesn't have any children, so turning it
 	 * on for the root memcg is enough.
 	 */
-	if (cgroup_on_dfl(root_css->cgroup))
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		root_mem_cgroup->use_hierarchy = true;
 	else
 		root_mem_cgroup->use_hierarchy = false;
-- 
cgit v1.2.3


From 339e5b44eda2150baad183def6b7030fad5ec44e Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 18 Sep 2015 13:58:34 -0500
Subject: PCI: Add msi_controller setup_irqs() method for special multivector
 setup

Add a msi_controller setup_irqs() method so MSI chip providers can
implement their own multivector MSI setup.

[bhelgaas: changelog]
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Pratyush Anand <pratyush.anand@gmail.com>
---
 drivers/pci/msi.c   | 3 +++
 include/linux/msi.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d4497141d083..74319f497656 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -105,9 +105,12 @@ void __weak arch_teardown_msi_irq(unsigned int irq)
 
 int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
+	struct msi_controller *chip = dev->bus->msi;
 	struct msi_desc *entry;
 	int ret;
 
+	if (chip && chip->setup_irqs)
+		return chip->setup_irqs(chip, dev, nvec, type);
 	/*
 	 * If an architecture wants to support multiple MSI, it needs to
 	 * override arch_setup_msi_irqs()
diff --git a/include/linux/msi.h b/include/linux/msi.h
index ad939d0ba816..0be5db110bfa 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -163,6 +163,8 @@ struct msi_controller {
 
 	int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev,
 			 struct msi_desc *desc);
+	int (*setup_irqs)(struct msi_controller *chip, struct pci_dev *dev,
+			  int nvec, int type);
 	void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
 };
 
-- 
cgit v1.2.3


From 97b59c3a91d5ee4777658ff2136d1fdf13bd23d0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 18 Sep 2015 14:32:54 -0500
Subject: netfilter: ebtables: Simplify the arguments to ebt_do_table

Nearly everything thing of interest to ebt_do_table is already present
in nf_hook_state.  Simplify ebt_do_table by just passing in the skb,
nf_hook_state, and the table.  This make the code easier to read and
maintenance easier.

To support this create an nf_hook_state on the stack in ebt_broute
(the only caller without a nf_hook_state already available).  This new
nf_hook_state adds no new computations to ebt_broute, but does use a
few more bytes of stack.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h |  6 +++---
 net/bridge/netfilter/ebtable_broute.c     |  8 ++++++--
 net/bridge/netfilter/ebtable_filter.c     |  6 ++----
 net/bridge/netfilter/ebtable_nat.c        |  6 ++----
 net/bridge/netfilter/ebtables.c           | 13 +++++++------
 5 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 8ca6d6464ea3..2ea517c7c6b9 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -111,9 +111,9 @@ struct ebt_table {
 extern struct ebt_table *ebt_register_table(struct net *net,
 					    const struct ebt_table *table);
 extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
-extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
-   const struct net_device *in, const struct net_device *out,
-   struct ebt_table *table);
+extern unsigned int ebt_do_table(struct sk_buff *skb,
+				 const struct nf_hook_state *state,
+				 struct ebt_table *table);
 
 /* Used in the kernel match() functions */
 #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg))
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d2cdf5d6e98c..ec94c6f1ae88 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -50,10 +50,14 @@ static const struct ebt_table broute_table = {
 
 static int ebt_broute(struct sk_buff *skb)
 {
+	struct nf_hook_state state;
 	int ret;
 
-	ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
-			   dev_net(skb->dev)->xt.broute_table);
+	nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
+			   dev_net(skb->dev), NULL);
+
+	ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
 	if (ret == NF_DROP)
 		return 1; /* route it */
 	return 0; /* bridge it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index ab20d6ed6e2f..118ce40ac181 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -60,16 +60,14 @@ static unsigned int
 ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    state->net->xt.frame_filter);
+	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
 static unsigned int
 ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    state->net->xt.frame_filter);
+	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index ad81a5a65644..56c3329d6c37 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -60,16 +60,14 @@ static unsigned int
 ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	   const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    state->net->xt.frame_nat);
+	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
 static unsigned int
 ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    state->net->xt.frame_nat);
+	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 48b6b01295de..8d5a3975b963 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -183,10 +183,11 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
 }
 
 /* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
-   const struct net_device *in, const struct net_device *out,
-   struct ebt_table *table)
+unsigned int ebt_do_table(struct sk_buff *skb,
+			  const struct nf_hook_state *state,
+			  struct ebt_table *table)
 {
+	unsigned int hook = state->hook;
 	int i, nentries;
 	struct ebt_entry *point;
 	struct ebt_counter *counter_base, *cb_base;
@@ -199,8 +200,8 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct xt_action_param acpar;
 
 	acpar.family  = NFPROTO_BRIDGE;
-	acpar.in      = in;
-	acpar.out     = out;
+	acpar.in      = state->in;
+	acpar.out     = state->out;
 	acpar.hotdrop = false;
 	acpar.hooknum = hook;
 
@@ -220,7 +221,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	base = private->entries;
 	i = 0;
 	while (i < nentries) {
-		if (ebt_basic_match(point, skb, in, out))
+		if (ebt_basic_match(point, skb, state->in, state->out))
 			goto letscontinue;
 
 		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
-- 
cgit v1.2.3


From 6cb8ff3f1a535b1d8eb5ea318932513d08eb3da7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 18 Sep 2015 14:32:55 -0500
Subject: inet netfilter: Remove hook from ip6t_do_table, arp_do_table,
 ipt_do_table

The values of ops->hooknum and state->hook are guaraneted to be equal
making the hook argument to ip6t_do_table, arp_do_table, and
ipt_do_table is unnecessary. Remove the unnecessary hook argument.

In the callers use state->hook instead of ops->hooknum for clarity and
to reduce the number of cachelines the callers touch.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_arp/arp_tables.h  |  1 -
 include/linux/netfilter_ipv4/ip_tables.h  |  1 -
 include/linux/netfilter_ipv6/ip6_tables.h |  1 -
 net/ipv4/netfilter/arp_tables.c           |  2 +-
 net/ipv4/netfilter/arptable_filter.c      |  3 +--
 net/ipv4/netfilter/ip_tables.c            |  2 +-
 net/ipv4/netfilter/iptable_filter.c       |  5 ++---
 net/ipv4/netfilter/iptable_mangle.c       | 12 +++++-------
 net/ipv4/netfilter/iptable_nat.c          |  3 +--
 net/ipv4/netfilter/iptable_raw.c          |  5 ++---
 net/ipv4/netfilter/iptable_security.c     |  5 ++---
 net/ipv6/netfilter/ip6_tables.c           |  2 +-
 net/ipv6/netfilter/ip6table_filter.c      |  3 +--
 net/ipv6/netfilter/ip6table_mangle.c      | 12 +++++-------
 net/ipv6/netfilter/ip6table_nat.c         |  3 +--
 net/ipv6/netfilter/ip6table_raw.c         |  3 +--
 net/ipv6/netfilter/ip6table_security.c    |  3 +--
 17 files changed, 25 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index c22a7fb8d0df..6f074db2f23d 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -53,7 +53,6 @@ extern struct xt_table *arpt_register_table(struct net *net,
 					    const struct arpt_replace *repl);
 extern void arpt_unregister_table(struct xt_table *table);
 extern unsigned int arpt_do_table(struct sk_buff *skb,
-				  unsigned int hook,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
 
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 4073510da485..aa598f942c01 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -64,7 +64,6 @@ struct ipt_error {
 
 extern void *ipt_alloc_initial_table(const struct xt_table *);
 extern unsigned int ipt_do_table(struct sk_buff *skb,
-				 unsigned int hook,
 				 const struct nf_hook_state *state,
 				 struct xt_table *table);
 
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index b40d2b635778..0f76e5c674f9 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -30,7 +30,6 @@ extern struct xt_table *ip6t_register_table(struct net *net,
 					    const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct net *net, struct xt_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
-				  unsigned int hook,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8f87fc38ccde..10eb2b297450 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -247,10 +247,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
 }
 
 unsigned int arpt_do_table(struct sk_buff *skb,
-			   unsigned int hook,
 			   const struct nf_hook_state *state,
 			   struct xt_table *table)
 {
+	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
 	const struct arphdr *arp;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d217e4c19645..1352e12d4068 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -30,8 +30,7 @@ static unsigned int
 arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	return arpt_do_table(skb, ops->hooknum, state,
-			     state->net->ipv4.arptable_filter);
+	return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 5d514eac4c31..2b049e135de8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -285,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ipt_do_table(struct sk_buff *skb,
-	     unsigned int hook,
 	     const struct nf_hook_state *state,
 	     struct xt_table *table)
 {
+	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 32feff32b116..02d4c5395d6e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -36,14 +36,13 @@ static unsigned int
 iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT &&
+	if (state->hook == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	return ipt_do_table(skb, ops->hooknum, state,
-			    state->net->ipv4.iptable_filter);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4a5150fc9510..dc2ff6884999 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -58,8 +58,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state,
-			   state->net->ipv4.iptable_mangle);
+	ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
@@ -83,14 +82,13 @@ iptable_mangle_hook(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT)
+	if (state->hook == NF_INET_LOCAL_OUT)
 		return ipt_mangle_out(skb, state);
-	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, ops->hooknum, state,
+	if (state->hook == NF_INET_POST_ROUTING)
+		return ipt_do_table(skb, state,
 				    state->net->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
-	return ipt_do_table(skb, ops->hooknum, state,
-			    state->net->ipv4.iptable_mangle);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 4f4c64f81169..8ff63ac1f0d6 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -33,8 +33,7 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct)
 {
-	return ipt_do_table(skb, ops->hooknum, state,
-			    state->net->ipv4.nat_table);
+	return ipt_do_table(skb, state, state->net->ipv4.nat_table);
 }
 
 static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 20126e469ffb..bbb0523d87de 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -23,14 +23,13 @@ static unsigned int
 iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		 const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT &&
+	if (state->hook == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	return ipt_do_table(skb, ops->hooknum, state,
-			    state->net->ipv4.iptable_raw);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 82fefd609b85..b92417038705 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -40,14 +40,13 @@ static unsigned int
 iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT &&
+	if (state->hook == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* Somebody is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	return ipt_do_table(skb, ops->hooknum, state,
-			    state->net->ipv4.iptable_security);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cd9b401231d3..da6446b6e3f9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -314,10 +314,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ip6t_do_table(struct sk_buff *skb,
-	      unsigned int hook,
 	      const struct nf_hook_state *state,
 	      struct xt_table *table)
 {
+	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 2449005fb5dc..a7327f61b90c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -35,8 +35,7 @@ static unsigned int
 ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     state->net->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a46dbf097d29..c2e061dcedf3 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state,
-			    state->net->ipv6.ip6table_mangle);
+	ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -79,14 +78,13 @@ static unsigned int
 ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT)
+	if (state->hook == NF_INET_LOCAL_OUT)
 		return ip6t_mangle_out(skb, state);
-	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, ops->hooknum, state,
+	if (state->hook == NF_INET_POST_ROUTING)
+		return ip6t_do_table(skb, state,
 				     state->net->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     state->net->ipv6.ip6table_mangle);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index a56451de127f..efa6754c4d06 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -35,8 +35,7 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
 					  const struct nf_hook_state *state,
 					  struct nf_conn *ct)
 {
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     state->net->ipv6.ip6table_nat);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
 }
 
 static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 18e831e35782..fac6ad7c0a7c 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -22,8 +22,7 @@ static unsigned int
 ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		  const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     state->net->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 83bc96ae5d73..96c94fc240c8 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -39,8 +39,7 @@ static unsigned int
 ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     state->net->ipv6.ip6table_security);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
-- 
cgit v1.2.3


From 156c196f6038610770588a708b9e0f7df2ead74a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 18 Sep 2015 14:32:58 -0500
Subject: netfilter: x_tables: Pass struct net in xt_action_param

As xt_action_param lives on the stack this does not bloat any
persistent data structures.

This is a first step in making netfilter code that needs to know
which network namespace it is executing in simpler.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 3 ++-
 include/net/netfilter/nf_tables.h  | 1 +
 net/bridge/netfilter/ebtables.c    | 1 +
 net/ipv4/netfilter/arp_tables.c    | 1 +
 net/ipv4/netfilter/ip_tables.c     | 1 +
 net/ipv6/netfilter/ip6_tables.c    | 1 +
 net/sched/act_ipt.c                | 1 +
 net/sched/em_ipset.c               | 1 +
 8 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b006b719183f..c5577410c25d 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -13,6 +13,7 @@
  * @target:	the target extension
  * @matchinfo:	per-match data
  * @targetinfo:	per-target data
+ * @net		network namespace through which the action was invoked
  * @in:		input netdevice
  * @out:	output netdevice
  * @fragoff:	packet is a fragment, this is the data offset
@@ -24,7 +25,6 @@
  * Fields written to by extensions:
  *
  * @hotdrop:	drop packet if we had inspection problems
- * Network namespace obtainable using dev_net(in/out)
  */
 struct xt_action_param {
 	union {
@@ -34,6 +34,7 @@ struct xt_action_param {
 	union {
 		const void *matchinfo, *targinfo;
 	};
+	struct net *net;
 	const struct net_device *in, *out;
 	int fragoff;
 	unsigned int thoff;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c0899f97ff8d..c0516529e8a0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -30,6 +30,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 				   const struct nf_hook_state *state)
 {
 	pkt->skb = skb;
+	pkt->xt.net = state->net;
 	pkt->in = pkt->xt.in = state->in;
 	pkt->out = pkt->xt.out = state->out;
 	pkt->hook = pkt->xt.hooknum = state->hook;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 8d5a3975b963..f46ca417bf2d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -200,6 +200,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 	struct xt_action_param acpar;
 
 	acpar.family  = NFPROTO_BRIDGE;
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.hotdrop = false;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 10eb2b297450..2dad3e1c5f11 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -285,6 +285,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	 */
 	e = get_entry(table_base, private->hook_entry[hook]);
 
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.hooknum = hook;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2b049e135de8..42d0946956db 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -315,6 +315,7 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
 	acpar.hotdrop = false;
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.family  = NFPROTO_IPV4;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index da6446b6e3f9..80e3bd72b715 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
 	acpar.hotdrop = false;
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.family  = NFPROTO_IPV6;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 99c9cc1c7af9..d05869646515 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	 * worry later - danger - this API seems to have changed
 	 * from earlier kernels
 	 */
+	par.net	     = dev_net(skb->dev);
 	par.in       = skb->dev;
 	par.out      = NULL;
 	par.hooknum  = ipt->tcfi_hook;
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df0328ba6a48..c66ca9400ab4 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -95,6 +95,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	if (skb->skb_iif)
 		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
+	acpar.net     = em->net;
 	acpar.in      = indev ? indev : dev;
 	acpar.out     = dev;
 
-- 
cgit v1.2.3


From 06198b34a3e09e06d9aecaa3727e0d37206cea77 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 18 Sep 2015 14:33:06 -0500
Subject: netfilter: Pass priv instead of nf_hook_ops to netfilter hooks

Only pass the void *priv parameter out of the nf_hook_ops.  That is
all any of the functions are interested now, and by limiting what is
passed it becomes simpler to change implementation details.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                      |  2 +-
 include/net/netfilter/br_netfilter.h           |  2 +-
 include/net/netfilter/nf_nat_l3proto.h         | 32 +++++++++++++-------------
 include/net/netfilter/nf_tables.h              |  3 +--
 net/bridge/br_netfilter_hooks.c                | 14 +++++------
 net/bridge/br_netfilter_ipv6.c                 |  2 +-
 net/bridge/netfilter/ebtable_filter.c          |  4 ++--
 net/bridge/netfilter/ebtable_nat.c             |  4 ++--
 net/bridge/netfilter/nf_tables_bridge.c        |  4 ++--
 net/decnet/netfilter/dn_rtmsg.c                |  2 +-
 net/ipv4/netfilter/arptable_filter.c           |  2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |  2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c              |  2 +-
 net/ipv4/netfilter/iptable_filter.c            |  2 +-
 net/ipv4/netfilter/iptable_mangle.c            |  2 +-
 net/ipv4/netfilter/iptable_nat.c               | 18 +++++++--------
 net/ipv4/netfilter/iptable_raw.c               |  2 +-
 net/ipv4/netfilter/iptable_security.c          |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  8 +++----
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  2 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       | 24 +++++++++----------
 net/ipv4/netfilter/nf_tables_arp.c             |  4 ++--
 net/ipv4/netfilter/nf_tables_ipv4.c            |  8 +++----
 net/ipv4/netfilter/nft_chain_nat_ipv4.c        | 20 ++++++++--------
 net/ipv4/netfilter/nft_chain_route_ipv4.c      |  4 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c             |  2 +-
 net/ipv6/netfilter/ip6table_filter.c           |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c           |  2 +-
 net/ipv6/netfilter/ip6table_nat.c              | 18 +++++++--------
 net/ipv6/netfilter/ip6table_raw.c              |  2 +-
 net/ipv6/netfilter/ip6table_security.c         |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  8 +++----
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  2 +-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c       | 24 +++++++++----------
 net/ipv6/netfilter/nf_tables_ipv6.c            |  8 +++----
 net/ipv6/netfilter/nft_chain_nat_ipv6.c        | 20 ++++++++--------
 net/ipv6/netfilter/nft_chain_route_ipv6.c      |  4 ++--
 net/netfilter/core.c                           |  2 +-
 net/netfilter/ipvs/ip_vs_core.c                | 24 +++++++++----------
 net/netfilter/nf_tables_core.c                 |  4 ++--
 net/netfilter/nf_tables_netdev.c               |  4 ++--
 security/selinux/hooks.c                       | 10 ++++----
 security/smack/smack_netfilter.c               |  4 ++--
 43 files changed, 156 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0b4d4560f33d..987c74cd523c 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -80,7 +80,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 	p->okfn = okfn;
 }
 
-typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
+typedef unsigned int nf_hookfn(void *priv,
 			       struct sk_buff *skb,
 			       const struct nf_hook_state *state);
 
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 8fe266504900..c93c75fa41ad 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -46,7 +46,7 @@ void br_netfilter_enable(void);
 
 #if IS_ENABLED(CONFIG_IPV6)
 int br_validate_ipv6(struct sk_buff *skb);
-unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
+unsigned int br_nf_pre_routing_ipv6(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state);
 #else
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index a3127325f624..aef3e5fc9fd9 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -43,31 +43,31 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned int hooknum);
 
-unsigned int nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
 			    const struct nf_hook_state *state,
-			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+			    unsigned int (*do_chain)(void *priv,
 						     struct sk_buff *skb,
 						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
 
-unsigned int nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
 			     const struct nf_hook_state *state,
-			     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+			     unsigned int (*do_chain)(void *priv,
 						      struct sk_buff *skb,
 						      const struct nf_hook_state *state,
 						      struct nf_conn *ct));
 
-unsigned int nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+unsigned int nf_nat_ipv4_local_fn(void *priv,
 				  struct sk_buff *skb,
 				  const struct nf_hook_state *state,
-				  unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+				  unsigned int (*do_chain)(void *priv,
 							   struct sk_buff *skb,
 							   const struct nf_hook_state *state,
 							   struct nf_conn *ct));
 
-unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 			    const struct nf_hook_state *state,
-			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+			    unsigned int (*do_chain)(void *priv,
 						     struct sk_buff *skb,
 						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
@@ -76,31 +76,31 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int hooknum, unsigned int hdrlen);
 
-unsigned int nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
 			    const struct nf_hook_state *state,
-			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+			    unsigned int (*do_chain)(void *priv,
 						     struct sk_buff *skb,
 						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
 
-unsigned int nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
 			     const struct nf_hook_state *state,
-			     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+			     unsigned int (*do_chain)(void *priv,
 						      struct sk_buff *skb,
 						      const struct nf_hook_state *state,
 						      struct nf_conn *ct));
 
-unsigned int nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+unsigned int nf_nat_ipv6_local_fn(void *priv,
 				  struct sk_buff *skb,
 				  const struct nf_hook_state *state,
-				  unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+				  unsigned int (*do_chain)(void *priv,
 							   struct sk_buff *skb,
 							   const struct nf_hook_state *state,
 							   struct nf_conn *ct));
 
-unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 			    const struct nf_hook_state *state,
-			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+			    unsigned int (*do_chain)(void *priv,
 						     struct sk_buff *skb,
 						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 42e239e55aa3..c9149cc0a02d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -816,8 +816,7 @@ int nft_register_basechain(struct nft_base_chain *basechain,
 void nft_unregister_basechain(struct nft_base_chain *basechain,
 			      unsigned int hook_nops);
 
-unsigned int nft_do_chain(struct nft_pktinfo *pkt,
-			  const struct nf_hook_ops *ops);
+unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
 
 /**
  *	struct nft_table - nf_tables table
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index e6e76bbdc82f..e21e44c13e07 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -464,7 +464,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
  * receiving device) to make netfilter happy, the REDIRECT
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+static unsigned int br_nf_pre_routing(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
@@ -486,7 +486,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 			return NF_ACCEPT;
 
 		nf_bridge_pull_encap_header_rcsum(skb);
-		return br_nf_pre_routing_ipv6(ops, skb, state);
+		return br_nf_pre_routing_ipv6(priv, skb, state);
 	}
 
 	if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -526,7 +526,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
  * took place when the packet entered the bridge), but we
  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
  * prevent this from happening. */
-static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+static unsigned int br_nf_local_in(void *priv,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
@@ -570,7 +570,7 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
  * but we are still able to filter on the 'real' indev/outdev
  * because of the physdev module. For ARP, indev and outdev are the
  * bridge ports. */
-static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+static unsigned int br_nf_forward_ip(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
@@ -633,7 +633,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	return NF_STOLEN;
 }
 
-static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+static unsigned int br_nf_forward_arp(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
@@ -801,7 +801,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 }
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+static unsigned int br_nf_post_routing(void *priv,
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *state)
 {
@@ -850,7 +850,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
 /* IP/SABOTAGE *****************************************************/
 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
  * for the second time. */
-static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+static unsigned int ip_sabotage_in(void *priv,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index e4dbbe44c724..c51cc3fd50d9 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -218,7 +218,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
 /* Replicate the checks that IPv6 does on packet reception and pass the packet
  * to ip6tables.
  */
-unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
+unsigned int br_nf_pre_routing_ipv6(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 118ce40ac181..f9242dffa65e 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -57,14 +57,14 @@ static const struct ebt_table frame_filter = {
 };
 
 static unsigned int
-ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_in_hook(void *priv, struct sk_buff *skb,
 	    const struct nf_hook_state *state)
 {
 	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
 static unsigned int
-ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_out_hook(void *priv, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
 	return ebt_do_table(skb, state, state->net->xt.frame_filter);
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 56c3329d6c37..4bbefe03ab58 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -57,14 +57,14 @@ static struct ebt_table frame_nat = {
 };
 
 static unsigned int
-ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_nat_in(void *priv, struct sk_buff *skb,
 	   const struct nf_hook_state *state)
 {
 	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
 static unsigned int
-ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_nat_out(void *priv, struct sk_buff *skb,
 	    const struct nf_hook_state *state)
 {
 	return ebt_do_table(skb, state, state->net->xt.frame_nat);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 318d825e4207..62f6b1b19589 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -87,7 +87,7 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 }
 
 static unsigned int
-nft_do_chain_bridge(const struct nf_hook_ops *ops,
+nft_do_chain_bridge(void *priv,
 		    struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
@@ -105,7 +105,7 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops,
 		break;
 	}
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
 static struct nft_af_info nft_af_bridge __read_mostly = {
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index af34fc9bdf69..85f2fdc360c2 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
 }
 
 
-static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
+static unsigned int dnrmg_hook(void *priv,
 			struct sk_buff *skb,
 			const struct nf_hook_state *state)
 {
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 1352e12d4068..1897ee160920 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,7 +27,7 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c */
 static unsigned int
-arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+arptable_filter_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 69157d8eba95..3f32c03e8b2e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -507,7 +507,7 @@ static void arp_print(struct arp_payload *payload)
 #endif
 
 static unsigned int
-arp_mangle(const struct nf_hook_ops *ops,
+arp_mangle(void *priv,
 	   struct sk_buff *skb,
 	   const struct nf_hook_state *state)
 {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index dfab314981e9..d7021f28c3f0 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -299,7 +299,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv4_synproxy_hook(void *priv,
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *nhs)
 {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 02d4c5395d6e..397ef2dd133e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,7 +33,7 @@ static const struct xt_table packet_filter = {
 };
 
 static unsigned int
-iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_filter_hook(void *priv, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT &&
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index dc2ff6884999..2d6fc911866f 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -78,7 +78,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_mangle_hook(const struct nf_hook_ops *ops,
+iptable_mangle_hook(void *priv,
 		     struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 8ff63ac1f0d6..3a2e4d830a0b 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -28,7 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = {
 	.af		= NFPROTO_IPV4,
 };
 
-static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_do_chain(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct)
@@ -36,32 +36,32 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
 	return ipt_do_table(skb, state, state->net->ipv4.nat_table);
 }
 
-static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_fn(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain);
 }
 
-static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_in(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain);
 }
 
-static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_out(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain);
 }
 
-static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_local_fn(void *priv,
 					      struct sk_buff *skb,
 					      const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index bbb0523d87de..1ba02811acb0 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,7 +20,7 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_raw_hook(void *priv, struct sk_buff *skb,
 		 const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT &&
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b92417038705..f534e2f05bad 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,7 +37,7 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_security_hook(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT &&
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 15749cc5cf2b..752fb40adcf8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv4_helper(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
 {
@@ -119,7 +119,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
 			    ct, ctinfo);
 }
 
-static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv4_confirm(void *priv,
 				 struct sk_buff *skb,
 				 const struct nf_hook_state *state)
 {
@@ -143,14 +143,14 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_in(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
 
-static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 8aea536d2e83..b246346ee849 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -61,7 +61,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 		return IP_DEFRAG_CONNTRACK_OUT + zone_id;
 }
 
-static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_defrag(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 16da45a76dac..8593a9d88619 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,9 +255,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 
 unsigned int
-nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state,
 					struct nf_conn *ct))
@@ -308,7 +308,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = do_chain(ops, skb, state, ct);
+			ret = do_chain(priv, skb, state, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 
@@ -345,9 +345,9 @@ oif_changed:
 EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
 
 unsigned int
-nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct))
@@ -355,7 +355,7 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	unsigned int ret;
 	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    daddr != ip_hdr(skb)->daddr)
 		skb_dst_drop(skb);
@@ -365,9 +365,9 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
 
 unsigned int
-nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
 		const struct nf_hook_state *state,
-		unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		unsigned int (*do_chain)(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state,
 					  struct nf_conn *ct))
@@ -384,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -407,9 +407,9 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
 
 unsigned int
-nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state,
-		     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		     unsigned int (*do_chain)(void *priv,
 					       struct sk_buff *skb,
 					       const struct nf_hook_state *state,
 					       struct nf_conn *ct))
@@ -424,7 +424,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 883bbf83fe09..9d09d4f59545 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -15,7 +15,7 @@
 #include <net/netfilter/nf_tables.h>
 
 static unsigned int
-nft_do_chain_arp(const struct nf_hook_ops *ops,
+nft_do_chain_arp(void *priv,
 		  struct sk_buff *skb,
 		  const struct nf_hook_state *state)
 {
@@ -23,7 +23,7 @@ nft_do_chain_arp(const struct nf_hook_ops *ops,
 
 	nft_set_pktinfo(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
 static struct nft_af_info nft_af_arp __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 805be5c9fcc3..ca9dc3c46c4f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -18,7 +18,7 @@
 #include <net/ip.h>
 #include <net/netfilter/nf_tables_ipv4.h>
 
-static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv4(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
@@ -26,10 +26,10 @@ static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
 
 	nft_set_pktinfo_ipv4(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv4_output(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
@@ -41,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 
-	return nft_do_chain_ipv4(ops, skb, state);
+	return nft_do_chain_ipv4(priv, skb, state);
 }
 
 struct nft_af_info nft_af_ipv4 __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index c3ffecf28d38..f5c66a7a4bf2 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -26,7 +26,7 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ip.h>
 
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state,
 				      struct nf_conn *ct)
@@ -35,35 +35,35 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 
 	nft_set_pktinfo_ipv4(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_fn(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_fn(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_in(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_in(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_out(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_out(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_local_fn(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv4 = {
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 2a1e3d8a3e43..9f486b302108 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -21,7 +21,7 @@
 #include <net/route.h>
 #include <net/ip.h>
 
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
@@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = nft_do_chain(&pkt, ops);
+	ret = nft_do_chain(&pkt, priv);
 	if (ret != NF_DROP && ret != NF_QUEUE) {
 		iph = ip_hdr(skb);
 
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 41451809b37c..c2356602158a 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -316,7 +316,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv6_synproxy_hook(void *priv,
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *nhs)
 {
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index a7327f61b90c..8b277b983ca5 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,7 +32,7 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_filter_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c2e061dcedf3..8745b592b2f6 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -75,7 +75,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index efa6754c4d06..abea175d5853 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -30,7 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = {
 	.af		= NFPROTO_IPV6,
 };
 
-static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_do_chain(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state,
 					  struct nf_conn *ct)
@@ -38,32 +38,32 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
 	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
 }
 
-static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_fn(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_in(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_out(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_local_fn(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index fac6ad7c0a7c..9021963565c3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,7 +19,7 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_raw_hook(void *priv, struct sk_buff *skb,
 		  const struct nf_hook_state *state)
 {
 	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 96c94fc240c8..0d856fedfeb0 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,7 +36,7 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_security_hook(void *priv, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
 	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 339be1d59afc..dd83ad42f8f6 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv6_helper(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
 {
@@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
 	return helper->help(skb, protoff, ct, ctinfo);
 }
 
-static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv6_confirm(void *priv,
 				 struct sk_buff *skb,
 				 const struct nf_hook_state *state)
 {
@@ -165,14 +165,14 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_in(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
 	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
 }
 
-static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a9c08520596b..a99baf63eccf 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -51,7 +51,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 		return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
 }
 
-static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv6_defrag(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
 {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 8bc94907dbd9..357f57ba47e4 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
 
 unsigned int
-nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state,
 					struct nf_conn *ct))
@@ -317,7 +317,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = do_chain(ops, skb, state, ct);
+			ret = do_chain(priv, skb, state, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 
@@ -353,9 +353,9 @@ oif_changed:
 EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
 
 unsigned int
-nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state,
 					struct nf_conn *ct))
@@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	unsigned int ret;
 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
 		skb_dst_drop(skb);
@@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
 
 unsigned int
-nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
 		const struct nf_hook_state *state,
-		unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		unsigned int (*do_chain)(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct))
@@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
 
 unsigned int
-nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state,
-		     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		     unsigned int (*do_chain)(void *priv,
 					      struct sk_buff *skb,
 					      const struct nf_hook_state *state,
 					      struct nf_conn *ct))
@@ -430,7 +430,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 41340b794f9b..120ea9131be0 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -16,7 +16,7 @@
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv6(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
@@ -26,10 +26,10 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
 	if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
 		return NF_DROP;
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv6_output(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
@@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 
-	return nft_do_chain_ipv6(ops, skb, state);
+	return nft_do_chain_ipv6(priv, skb, state);
 }
 
 struct nft_af_info nft_af_ipv6 __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index e96feaefeb14..443cd306c0b0 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,7 +24,7 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ipv6.h>
 
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state,
 				     struct nf_conn *ct)
@@ -33,35 +33,35 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 
 	nft_set_pktinfo_ipv6(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_fn(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_in(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_out(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_local_fn(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv6 = {
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index d1bcd2ed7bcc..d42bbc1d7555 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -22,7 +22,7 @@
 #include <net/netfilter/nf_tables_ipv6.h>
 #include <net/route.h>
 
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
@@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u32 *)ipv6_hdr(skb));
 
-	ret = nft_do_chain(&pkt, ops);
+	ret = nft_do_chain(&pkt, priv);
 	if (ret != NF_DROP && ret != NF_QUEUE &&
 	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
 	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8e47f8113495..2e907335ee81 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -269,7 +269,7 @@ unsigned int nf_iterate(struct list_head *head,
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(*elemp, skb, state);
+		verdict = (*elemp)->hook((*elemp)->priv, skb, state);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 40e3c85f83b5..1fa12edccbcc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1311,7 +1311,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply4(void *priv, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
 	return ip_vs_out(state->hook, skb, AF_INET);
@@ -1322,7 +1322,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply4(void *priv, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
 	return ip_vs_out(state->hook, skb, AF_INET);
@@ -1336,7 +1336,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply6(void *priv, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
 	return ip_vs_out(state->hook, skb, AF_INET6);
@@ -1347,7 +1347,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply6(void *priv, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
 	return ip_vs_out(state->hook, skb, AF_INET6);
@@ -1847,7 +1847,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request4(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
 	return ip_vs_in(state->hook, skb, AF_INET);
@@ -1858,7 +1858,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request4(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	return ip_vs_in(state->hook, skb, AF_INET);
@@ -1871,7 +1871,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request6(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
 	return ip_vs_in(state->hook, skb, AF_INET6);
@@ -1882,7 +1882,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request6(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	return ip_vs_in(state->hook, skb, AF_INET6);
@@ -1901,7 +1901,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
 	int r;
@@ -1917,12 +1917,12 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(skb, &r, ops->hooknum);
+	return ip_vs_in_icmp(skb, &r, state->hook);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 static unsigned int
-ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
 	int r;
@@ -1940,7 +1940,7 @@ ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
+	return ip_vs_in_icmp_v6(skb, &r, state->hook, &iphdr);
 }
 #endif
 
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index e5c1f332e45e..f3695a497408 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -109,9 +109,9 @@ struct nft_jumpstack {
 };
 
 unsigned int
-nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 {
-	const struct nft_chain *chain = ops->priv, *basechain = chain;
+	const struct nft_chain *chain = priv, *basechain = chain;
 	const struct net *net = pkt->net;
 	const struct nft_rule *rule;
 	const struct nft_expr *expr, *last;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index db416a3396e9..7b9c053ba750 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -89,7 +89,7 @@ static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 }
 
 static unsigned int
-nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nft_do_chain_netdev(void *priv, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
@@ -106,7 +106,7 @@ nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		break;
 	}
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
 static struct nft_af_info nft_af_netdev __read_mostly = {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e4369d86e588..64340160f4ac 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4866,7 +4866,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
+static unsigned int selinux_ipv4_forward(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
@@ -4874,7 +4874,7 @@ static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
+static unsigned int selinux_ipv6_forward(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
@@ -4924,7 +4924,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
+static unsigned int selinux_ipv4_output(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
@@ -5099,7 +5099,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
+static unsigned int selinux_ipv4_postroute(void *priv,
 					   struct sk_buff *skb,
 					   const struct nf_hook_state *state)
 {
@@ -5107,7 +5107,7 @@ static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
+static unsigned int selinux_ipv6_postroute(void *priv,
 					   struct sk_buff *skb,
 					   const struct nf_hook_state *state)
 {
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index a455cfc9ec1f..a9e41da05d28 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -21,7 +21,7 @@
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 
-static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops,
+static unsigned int smack_ipv6_output(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
@@ -38,7 +38,7 @@ static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops,
 }
 #endif	/* IPV6 */
 
-static unsigned int smack_ipv4_output(const struct nf_hook_ops *ops,
+static unsigned int smack_ipv4_output(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-- 
cgit v1.2.3


From 4a07c222d3afb00e1113834fee38d23a8e5d71dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 17:54:22 -0400
Subject: cgroup: replace "cgroup.populated" with "cgroup.events"

memcg already uses "memory.events" for event reporting and other
controllers may need event reporting too.  Let's standardize on
"$SUBSYS.events" interface file for reporting events which don't
happen too frequently and thus can share event notification.

"cgroup.populated" is replaced with "populated" field in
"cgroup.events" and documentation is updated accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 Documentation/cgroups/unified-hierarchy.txt | 15 ++++++++++-----
 include/linux/cgroup-defs.h                 |  2 +-
 kernel/cgroup.c                             | 17 +++++++++--------
 3 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
index e0975c2cf03d..176b940f8327 100644
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -341,11 +341,11 @@ is riddled with issues.
   unnecessarily complicated and probably done this way because event
   delivery itself was expensive.
 
-Unified hierarchy implements an interface file "cgroup.populated"
-which can be used to monitor whether the cgroup's subhierarchy has
-tasks in it or not.  Its value is 0 if there is no task in the cgroup
-and its descendants; otherwise, 1.  poll and [id]notify events are
-triggered when the value changes.
+Unified hierarchy implements "populated" field in "cgroup.events"
+interface file which can be used to monitor whether the cgroup's
+subhierarchy has tasks in it or not.  Its value is 0 if there is no
+task in the cgroup and its descendants; otherwise, 1.  poll and
+[id]notify events are triggered when the value changes.
 
 This is significantly lighter and simpler and trivially allows
 delegating management of subhierarchy - subhierarchy monitoring can
@@ -435,6 +435,11 @@ may be specified in any order and not all pairs have to be specified.
   the first entry in the file.  Specific entries can use "default" as
   its value to indicate inheritance of the default value.
 
+- For events which are not very high frequency, an interface file
+  "events" should be created which lists event key value pairs.
+  Whenever a notifiable event happens, file modified event should be
+  generated on the file.
+
 
 5-4. Per-Controller Changes
 
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c5d41c3c1f00..d95cc88e9dc2 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -226,7 +226,7 @@ struct cgroup {
 
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *procs_kn;	/* kn for "cgroup.procs" */
-	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
+	struct kernfs_node *events_kn;	/* kn for "cgroup.events" */
 
 	/*
 	 * The bitmask of subsystems enabled on the child cgroups.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c24f929fdeb7..75eba25d8dfd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -611,8 +611,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 		if (!trigger)
 			break;
 
-		if (cgrp->populated_kn)
-			kernfs_notify(cgrp->populated_kn);
+		if (cgrp->events_kn)
+			kernfs_notify(cgrp->events_kn);
 		cgrp = cgroup_parent(cgrp);
 	} while (cgrp);
 }
@@ -3045,9 +3045,10 @@ err_undo_css:
 	goto out_unlock;
 }
 
-static int cgroup_populated_show(struct seq_file *seq, void *v)
+static int cgroup_events_show(struct seq_file *seq, void *v)
 {
-	seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
+	seq_printf(seq, "populated %d\n",
+		   (bool)seq_css(seq)->cgroup->populated_cnt);
 	return 0;
 }
 
@@ -3214,8 +3215,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 
 	if (cft->write == cgroup_procs_write)
 		cgrp->procs_kn = kn;
-	else if (cft->seq_show == cgroup_populated_show)
-		cgrp->populated_kn = kn;
+	else if (cft->seq_show == cgroup_events_show)
+		cgrp->events_kn = kn;
 	return 0;
 }
 
@@ -4388,9 +4389,9 @@ static struct cftype cgroup_dfl_base_files[] = {
 		.write = cgroup_subtree_control_write,
 	},
 	{
-		.name = "cgroup.populated",
+		.name = "cgroup.events",
 		.flags = CFTYPE_NOT_ON_ROOT,
-		.seq_show = cgroup_populated_show,
+		.seq_show = cgroup_events_show,
 	},
 	{ }	/* terminate */
 };
-- 
cgit v1.2.3


From 7dbdb199d3bf88f043ea17e97113eb28d5b100bc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 17:54:23 -0400
Subject: cgroup: replace cftype->mode with CFTYPE_WORLD_WRITABLE

cftype->mode allows controllers to give arbitrary permissions to
interface knobs.  Except for "cgroup.event_control", the existing uses
are spurious.

* Some explicitly specify S_IRUGO | S_IWUSR even though that's the
  default.

* "cpuset.memory_pressure" specifies S_IRUGO while also setting a
  write callback which returns -EACCES.  All it needs to do is simply
  not setting a write callback.

"cgroup.event_control" uses cftype->mode to make the file
world-writable.  It's a misdesigned interface and we don't want
controllers to be tweaking interface file permissions in general.
This patch removes cftype->mode and all its spurious uses and
implements CFTYPE_WORLD_WRITABLE for "cgroup.event_control" which is
marked as compatibility-only.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup-defs.h |  6 +-----
 kernel/cgroup.c             | 19 +++++++------------
 kernel/cpuset.c             |  6 ------
 mm/memcontrol.c             |  3 +--
 4 files changed, 9 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index d95cc88e9dc2..10d814bcd487 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -76,6 +76,7 @@ enum {
 	CFTYPE_ONLY_ON_ROOT	= (1 << 0),	/* only create on root cgrp */
 	CFTYPE_NOT_ON_ROOT	= (1 << 1),	/* don't create on root cgrp */
 	CFTYPE_NO_PREFIX	= (1 << 3),	/* (DON'T USE FOR NEW FILES) no subsys prefix */
+	CFTYPE_WORLD_WRITABLE	= (1 << 4),	/* (DON'T USE FOR NEW FILES) S_IWUGO */
 
 	/* internal flags, do not use outside cgroup core proper */
 	__CFTYPE_ONLY_ON_DFL	= (1 << 16),	/* only on default hierarchy */
@@ -324,11 +325,6 @@ struct cftype {
 	 */
 	char name[MAX_CFTYPE_NAME];
 	unsigned long private;
-	/*
-	 * If not 0, file mode is set to this value, otherwise it will
-	 * be figured out automatically
-	 */
-	umode_t mode;
 
 	/*
 	 * The maximum length of string, excluding trailing nul, that can
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 75eba25d8dfd..5031edc6f077 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1139,23 +1139,21 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
  * cgroup_file_mode - deduce file mode of a control file
  * @cft: the control file in question
  *
- * returns cft->mode if ->mode is not 0
- * returns S_IRUGO|S_IWUSR if it has both a read and a write handler
- * returns S_IRUGO if it has only a read handler
- * returns S_IWUSR if it has only a write hander
+ * S_IRUGO for read, S_IWUSR for write.
  */
 static umode_t cgroup_file_mode(const struct cftype *cft)
 {
 	umode_t mode = 0;
 
-	if (cft->mode)
-		return cft->mode;
-
 	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
 		mode |= S_IRUGO;
 
-	if (cft->write_u64 || cft->write_s64 || cft->write)
-		mode |= S_IWUSR;
+	if (cft->write_u64 || cft->write_s64 || cft->write) {
+		if (cft->flags & CFTYPE_WORLD_WRITABLE)
+			mode |= S_IWUGO;
+		else
+			mode |= S_IWUSR;
+	}
 
 	return mode;
 }
@@ -4371,7 +4369,6 @@ static struct cftype cgroup_dfl_base_files[] = {
 		.seq_show = cgroup_pidlist_show,
 		.private = CGROUP_FILE_PROCS,
 		.write = cgroup_procs_write,
-		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
 		.name = "cgroup.controllers",
@@ -4406,7 +4403,6 @@ static struct cftype cgroup_legacy_base_files[] = {
 		.seq_show = cgroup_pidlist_show,
 		.private = CGROUP_FILE_PROCS,
 		.write = cgroup_procs_write,
-		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
 		.name = "cgroup.clone_children",
@@ -4426,7 +4422,6 @@ static struct cftype cgroup_legacy_base_files[] = {
 		.seq_show = cgroup_pidlist_show,
 		.private = CGROUP_FILE_TASKS,
 		.write = cgroup_tasks_write,
-		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
 		.name = "notify_on_release",
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 20eedd8098c0..312961ef3ccc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1597,9 +1597,6 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 	case FILE_MEMORY_PRESSURE_ENABLED:
 		cpuset_memory_pressure_enabled = !!val;
 		break;
-	case FILE_MEMORY_PRESSURE:
-		retval = -EACCES;
-		break;
 	case FILE_SPREAD_PAGE:
 		retval = update_flag(CS_SPREAD_PAGE, cs, val);
 		break;
@@ -1866,9 +1863,6 @@ static struct cftype files[] = {
 	{
 		.name = "memory_pressure",
 		.read_u64 = cpuset_read_u64,
-		.write_u64 = cpuset_write_u64,
-		.private = FILE_MEMORY_PRESSURE,
-		.mode = S_IRUGO,
 	},
 
 	{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b35c4cc47a30..e672f2689326 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4060,8 +4060,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "cgroup.event_control",		/* XXX: for compat */
 		.write = memcg_write_event_control,
-		.flags = CFTYPE_NO_PREFIX,
-		.mode = S_IWUGO,
+		.flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE,
 	},
 	{
 		.name = "swappiness",
-- 
cgit v1.2.3


From 6f60eade2433cb3a38687d5f8a4f44b92c6c51bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 17:54:23 -0400
Subject: cgroup: generalize obtaining the handles of and notifying cgroup
 files

cgroup core handles creations and removals of cgroup interface files
as described by cftypes.  There are cases where the handle for a given
file instance is necessary, for example, to generate a file modified
event.  Currently, this is handled by explicitly matching the callback
method pointer and storing the file handle manually in
cgroup_add_file().  While this simple approach works for cgroup core
files, it can't for controller interface files.

This patch generalizes cgroup interface file handle handling.  struct
cgroup_file is defined and each cftype can optionally tell cgroup core
to store the file handle by setting ->file_offset.  A file handle
remains accessible as long as the containing css is accessible.

Both "cgroup.procs" and "cgroup.events" are converted to use the new
generic mechanism instead of hooking directly into cgroup_add_file().
Also, cgroup_file_notify() which takes a struct cgroup_file and
generates a file modified event on it is added and replaces explicit
kernfs_notify() invocations.

This generalizes cgroup file handle handling and allows controllers to
generate file modified notifications.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup-defs.h | 26 ++++++++++++++++++++++++--
 include/linux/cgroup.h      | 13 +++++++++++++
 kernel/cgroup.c             | 26 +++++++++++++++++++-------
 3 files changed, 56 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 10d814bcd487..df589a097539 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -83,6 +83,17 @@ enum {
 	__CFTYPE_NOT_ON_DFL	= (1 << 17),	/* not on default hierarchy */
 };
 
+/*
+ * cgroup_file is the handle for a file instance created in a cgroup which
+ * is used, for example, to generate file changed notifications.  This can
+ * be obtained by setting cftype->file_offset.
+ */
+struct cgroup_file {
+	/* do not access any fields from outside cgroup core */
+	struct list_head node;			/* anchored at css->files */
+	struct kernfs_node *kn;
+};
+
 /*
  * Per-subsystem/per-cgroup state maintained by the system.  This is the
  * fundamental structural building block that controllers deal with.
@@ -123,6 +134,9 @@ struct cgroup_subsys_state {
 	 */
 	u64 serial_nr;
 
+	/* all cgroup_files associated with this css */
+	struct list_head files;
+
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
@@ -226,8 +240,8 @@ struct cgroup {
 	int populated_cnt;
 
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
-	struct kernfs_node *procs_kn;	/* kn for "cgroup.procs" */
-	struct kernfs_node *events_kn;	/* kn for "cgroup.events" */
+	struct cgroup_file procs_file;	/* handle for "cgroup.procs" */
+	struct cgroup_file events_file;	/* handle for "cgroup.events" */
 
 	/*
 	 * The bitmask of subsystems enabled on the child cgroups.
@@ -335,6 +349,14 @@ struct cftype {
 	/* CFTYPE_* flags */
 	unsigned int flags;
 
+	/*
+	 * If non-zero, should contain the offset from the start of css to
+	 * a struct cgroup_file field.  cgroup will record the handle of
+	 * the created file into it.  The recorded handle can be used as
+	 * long as the containing css remains accessible.
+	 */
+	unsigned int file_offset;
+
 	/*
 	 * Fields used for internal bookkeeping.  Initialized automatically
 	 * during registration.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 355bf2ed8867..fb717f2cba5b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -490,6 +490,19 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
+/**
+ * cgroup_file_notify - generate a file modified event for a cgroup_file
+ * @cfile: target cgroup_file
+ *
+ * @cfile must have been obtained by setting cftype->file_offset.
+ */
+static inline void cgroup_file_notify(struct cgroup_file *cfile)
+{
+	/* might not have been created due to one of the CFTYPE selector flags */
+	if (cfile->kn)
+		kernfs_notify(cfile->kn);
+}
+
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 06c9d1aeea9d..0be276ffe08a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -612,8 +612,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 		if (!trigger)
 			break;
 
-		if (cgrp->events_kn)
-			kernfs_notify(cgrp->events_kn);
+		cgroup_file_notify(&cgrp->events_file);
+
 		cgrp = cgroup_parent(cgrp);
 	} while (cgrp);
 }
@@ -1771,6 +1771,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
 	INIT_LIST_HEAD(&cgrp->self.sibling);
 	INIT_LIST_HEAD(&cgrp->self.children);
+	INIT_LIST_HEAD(&cgrp->self.files);
 	INIT_LIST_HEAD(&cgrp->cset_links);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
@@ -2562,7 +2563,7 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 			cgrp = cgroup_parent(cgrp);
 
 		ret = -ENOMEM;
-		inode = kernfs_get_inode(sb, cgrp->procs_kn);
+		inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
 		if (inode) {
 			ret = inode_permission(inode, MAY_WRITE);
 			iput(inode);
@@ -3253,10 +3254,14 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 		return ret;
 	}
 
-	if (cft->write == cgroup_procs_write)
-		cgrp->procs_kn = kn;
-	else if (cft->seq_show == cgroup_events_show)
-		cgrp->events_kn = kn;
+	if (cft->file_offset) {
+		struct cgroup_file *cfile = (void *)css + cft->file_offset;
+
+		kernfs_get(kn);
+		cfile->kn = kn;
+		list_add(&cfile->node, &css->files);
+	}
+
 	return 0;
 }
 
@@ -4408,6 +4413,7 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
 static struct cftype cgroup_dfl_base_files[] = {
 	{
 		.name = "cgroup.procs",
+		.file_offset = offsetof(struct cgroup, procs_file),
 		.seq_start = cgroup_pidlist_start,
 		.seq_next = cgroup_pidlist_next,
 		.seq_stop = cgroup_pidlist_stop,
@@ -4433,6 +4439,7 @@ static struct cftype cgroup_dfl_base_files[] = {
 	{
 		.name = "cgroup.events",
 		.flags = CFTYPE_NOT_ON_ROOT,
+		.file_offset = offsetof(struct cgroup, events_file),
 		.seq_show = cgroup_events_show,
 	},
 	{ }	/* terminate */
@@ -4511,9 +4518,13 @@ static void css_free_work_fn(struct work_struct *work)
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup_subsys *ss = css->ss;
 	struct cgroup *cgrp = css->cgroup;
+	struct cgroup_file *cfile;
 
 	percpu_ref_exit(&css->refcnt);
 
+	list_for_each_entry(cfile, &css->files, node)
+		kernfs_put(cfile->kn);
+
 	if (ss) {
 		/* css free path */
 		int id = css->id;
@@ -4618,6 +4629,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	css->ss = ss;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
+	INIT_LIST_HEAD(&css->files);
 	css->serial_nr = css_serial_nr_next++;
 
 	if (cgroup_parent(cgrp)) {
-- 
cgit v1.2.3


From 6b7bc0618ff1a333d2265131b124e966335d5dee Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Mon, 22 Jun 2015 13:02:04 +0000
Subject: spi: add transfer histogram statistics via sysfs

report transfer sizes as a histogram via the following files:
  /sys/class/spi_master/spi*/statistics/transfer_bytes_histo_*
  /sys/class/spi_master/spi*/spi*.*/statistics/transfer_bytes_histo_*

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  4 ++++
 2 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 3abb3903f2ad..73face0f6b9c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -123,6 +123,28 @@ SPI_STATISTICS_SHOW(bytes, "%llu");
 SPI_STATISTICS_SHOW(bytes_rx, "%llu");
 SPI_STATISTICS_SHOW(bytes_tx, "%llu");
 
+#define SPI_STATISTICS_TRANSFER_BYTES_HISTO(index, number)		\
+	SPI_STATISTICS_SHOW_NAME(transfer_bytes_histo##index,		\
+				 "transfer_bytes_histo_" number,	\
+				 transfer_bytes_histo[index],  "%lu")
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(0,  "0-1");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(1,  "2-3");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(2,  "4-7");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(3,  "8-15");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(4,  "16-31");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(5,  "32-63");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(6,  "64-127");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(7,  "128-255");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(8,  "256-511");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(9,  "512-1023");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(10, "1024-2047");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(11, "2048-4095");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(12, "4096-8191");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(13, "8192-16383");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(14, "16384-32767");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(15, "32768-65535");
+SPI_STATISTICS_TRANSFER_BYTES_HISTO(16, "65536+");
+
 static struct attribute *spi_dev_attrs[] = {
 	&dev_attr_modalias.attr,
 	NULL,
@@ -143,6 +165,23 @@ static struct attribute *spi_device_statistics_attrs[] = {
 	&dev_attr_spi_device_bytes.attr,
 	&dev_attr_spi_device_bytes_rx.attr,
 	&dev_attr_spi_device_bytes_tx.attr,
+	&dev_attr_spi_device_transfer_bytes_histo0.attr,
+	&dev_attr_spi_device_transfer_bytes_histo1.attr,
+	&dev_attr_spi_device_transfer_bytes_histo2.attr,
+	&dev_attr_spi_device_transfer_bytes_histo3.attr,
+	&dev_attr_spi_device_transfer_bytes_histo4.attr,
+	&dev_attr_spi_device_transfer_bytes_histo5.attr,
+	&dev_attr_spi_device_transfer_bytes_histo6.attr,
+	&dev_attr_spi_device_transfer_bytes_histo7.attr,
+	&dev_attr_spi_device_transfer_bytes_histo8.attr,
+	&dev_attr_spi_device_transfer_bytes_histo9.attr,
+	&dev_attr_spi_device_transfer_bytes_histo10.attr,
+	&dev_attr_spi_device_transfer_bytes_histo11.attr,
+	&dev_attr_spi_device_transfer_bytes_histo12.attr,
+	&dev_attr_spi_device_transfer_bytes_histo13.attr,
+	&dev_attr_spi_device_transfer_bytes_histo14.attr,
+	&dev_attr_spi_device_transfer_bytes_histo15.attr,
+	&dev_attr_spi_device_transfer_bytes_histo16.attr,
 	NULL,
 };
 
@@ -168,6 +207,23 @@ static struct attribute *spi_master_statistics_attrs[] = {
 	&dev_attr_spi_master_bytes.attr,
 	&dev_attr_spi_master_bytes_rx.attr,
 	&dev_attr_spi_master_bytes_tx.attr,
+	&dev_attr_spi_master_transfer_bytes_histo0.attr,
+	&dev_attr_spi_master_transfer_bytes_histo1.attr,
+	&dev_attr_spi_master_transfer_bytes_histo2.attr,
+	&dev_attr_spi_master_transfer_bytes_histo3.attr,
+	&dev_attr_spi_master_transfer_bytes_histo4.attr,
+	&dev_attr_spi_master_transfer_bytes_histo5.attr,
+	&dev_attr_spi_master_transfer_bytes_histo6.attr,
+	&dev_attr_spi_master_transfer_bytes_histo7.attr,
+	&dev_attr_spi_master_transfer_bytes_histo8.attr,
+	&dev_attr_spi_master_transfer_bytes_histo9.attr,
+	&dev_attr_spi_master_transfer_bytes_histo10.attr,
+	&dev_attr_spi_master_transfer_bytes_histo11.attr,
+	&dev_attr_spi_master_transfer_bytes_histo12.attr,
+	&dev_attr_spi_master_transfer_bytes_histo13.attr,
+	&dev_attr_spi_master_transfer_bytes_histo14.attr,
+	&dev_attr_spi_master_transfer_bytes_histo15.attr,
+	&dev_attr_spi_master_transfer_bytes_histo16.attr,
 	NULL,
 };
 
@@ -186,10 +242,15 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
 				       struct spi_master *master)
 {
 	unsigned long flags;
+	int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1;
+
+	if (l2len < 0)
+		l2len = 0;
 
 	spin_lock_irqsave(&stats->lock, flags);
 
 	stats->transfers++;
+	stats->transfer_bytes_histo[l2len]++;
 
 	stats->bytes += xfer->len;
 	if ((xfer->tx_buf) &&
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 269e8afd3e2a..5b6fdc48eba7 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -51,6 +51,8 @@ extern struct bus_type spi_bus_type;
  * @bytes_tx:      number of bytes sent to device
  * @bytes_rx:      number of bytes received from device
  *
+ * @transfer_bytes_histo:
+ *                 transfer bytes histogramm
  */
 struct spi_statistics {
 	spinlock_t		lock; /* lock for the whole structure */
@@ -68,6 +70,8 @@ struct spi_statistics {
 	unsigned long long	bytes_rx;
 	unsigned long long	bytes_tx;
 
+#define SPI_STATISTICS_HISTO_SIZE 17
+	unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE];
 };
 
 void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
-- 
cgit v1.2.3


From 5cfc5220a63b1008e7198fb4f91c3ef763e46657 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Fri, 18 Sep 2015 10:14:41 +0300
Subject: ARM: pxa: Remove unused clock_enable field from struct
 pxa2xx_spi_master

Use for struct pxa2xx_spi_master clock_enable field was removed years ago
from the pxa2xx-spi driver by the commit 2f1a74e5a2de ("[ARM] pxa: make
pxa2xx_spi driver use ssp_request()/ssp_free()").

Therefore remove it from structure definition, documentation and from
couple affected board files.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
 Documentation/spi/pxa2xx       | 6 ------
 arch/arm/mach-pxa/hx4700.c     | 1 -
 arch/arm/mach-pxa/icontrol.c   | 2 --
 arch/arm/mach-pxa/z2.c         | 2 --
 include/linux/spi/pxa2xx_spi.h | 1 -
 5 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index 3352f97430e4..13a0b7fb192f 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -22,15 +22,10 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
 found in include/linux/spi/pxa2xx_spi.h:
 
 struct pxa2xx_spi_master {
-	u32 clock_enable;
 	u16 num_chipselect;
 	u8 enable_dma;
 };
 
-The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
-corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
-the "PXA2xx Developer Manual" section "Clocks and Power Management".
-
 The "pxa2xx_spi_master.num_chipselect" field is used to determine the number of
 slave device (chips) attached to this SPI master.
 
@@ -57,7 +52,6 @@ static struct resource pxa_spi_nssp_resources[] = {
 };
 
 static struct pxa2xx_spi_master pxa_nssp_master_info = {
-	.clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
 	.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
 	.enable_dma = 1, /* Enables NSSP DMA */
 };
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 5fb41ad6e3bc..1e0301a0dbbb 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -630,7 +630,6 @@ static struct spi_board_info tsc2046_board_info[] __initdata = {
 
 static struct pxa2xx_spi_master pxa_ssp2_master_info = {
 	.num_chipselect = 1,
-	.clock_enable   = CKEN_SSP2,
 	.enable_dma     = 1,
 };
 
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 9b0eb0252af6..a1869f9b6219 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -116,13 +116,11 @@ static struct spi_board_info mcp251x_board_info[] = {
 };
 
 static struct pxa2xx_spi_master pxa_ssp3_spi_master_info = {
-	.clock_enable   = CKEN_SSP3,
 	.num_chipselect = 2,
 	.enable_dma     = 1
 };
 
 static struct pxa2xx_spi_master pxa_ssp4_spi_master_info = {
-	.clock_enable   = CKEN_SSP4,
 	.num_chipselect = 2,
 	.enable_dma     = 1
 };
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index e1a121b36cfa..3deeca7bbe41 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -595,13 +595,11 @@ static struct spi_board_info spi_board_info[] __initdata = {
 };
 
 static struct pxa2xx_spi_master pxa_ssp1_master_info = {
-	.clock_enable	= CKEN_SSP,
 	.num_chipselect	= 1,
 	.enable_dma	= 1,
 };
 
 static struct pxa2xx_spi_master pxa_ssp2_master_info = {
-	.clock_enable	= CKEN_SSP2,
 	.num_chipselect	= 1,
 };
 
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 6d36dacec4ba..9ec4c147abbc 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -23,7 +23,6 @@ struct dma_chan;
 
 /* device.platform_data for SSP controller devices */
 struct pxa2xx_spi_master {
-	u32 clock_enable;
 	u16 num_chipselect;
 	u8 enable_dma;
 
-- 
cgit v1.2.3


From 59796edcf21c7c19d58a223001f9ed13746c51c2 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 10 Sep 2015 10:17:59 +0300
Subject: mei: make modules.alias UUID information easier to read

scripts/mod/file2alias.c:add_uuid()  convert UUID into a single string
which does not conform to the standard little endian UUID formatting.
This patch changes add_uuid() to output same format as %pUL and modifies
the mei driver to match the change.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c          | 7 ++-----
 include/linux/mod_devicetable.h | 4 ----
 scripts/mod/file2alias.c        | 8 +++++---
 3 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index eef1c6b46ad8..e294f70741a0 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -597,9 +597,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
 	size_t len;
 
-	len = snprintf(buf, PAGE_SIZE, "mei:%s:" MEI_CL_UUID_FMT ":",
-		cldev->name, MEI_CL_UUID_ARGS(uuid->b));
-
+	len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
 	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
 }
 static DEVICE_ATTR_RO(modalias);
@@ -631,8 +629,7 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 	if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
 		return -ENOMEM;
 
-	if (add_uevent_var(env, "MODALIAS=mei:%s:" MEI_CL_UUID_FMT ":",
-		cldev->name, MEI_CL_UUID_ARGS(uuid->b)))
+	if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:", cldev->name, uuid))
 		return -ENOMEM;
 
 	return 0;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 688997a24aad..5e8a0ad22cbc 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -601,10 +601,6 @@ struct ipack_device_id {
 
 #define MEI_CL_MODULE_PREFIX "mei:"
 #define MEI_CL_NAME_SIZE 32
-#define MEI_CL_UUID_FMT "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
-#define MEI_CL_UUID_ARGS(_u) \
-	_u[0], _u[1], _u[2], _u[3], _u[4], _u[5], _u[6], _u[7], \
-	_u[8], _u[9], _u[10], _u[11], _u[12], _u[13], _u[14], _u[15]
 
 /**
  * struct mei_cl_device_id - MEI client device identifier
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 5f2088209132..fa79d113f34c 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -137,10 +137,12 @@ static inline void add_wildcard(char *str)
 static inline void add_uuid(char *str, uuid_le uuid)
 {
 	int len = strlen(str);
-	int i;
 
-	for (i = 0; i < 16; i++)
-		sprintf(str + len + (i << 1), "%02x", uuid.b[i]);
+	sprintf(str + len, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+		uuid.b[3], uuid.b[2], uuid.b[1], uuid.b[0],
+		uuid.b[5], uuid.b[4], uuid.b[7], uuid.b[6],
+		uuid.b[8], uuid.b[9], uuid.b[10], uuid.b[11],
+		uuid.b[12], uuid.b[13], uuid.b[14], uuid.b[15]);
 }
 
 /**
-- 
cgit v1.2.3


From b26864cad1c9f66f4966726ba7bc81d2b9b8f990 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 10 Sep 2015 10:18:01 +0300
Subject: mei: bus: add client protocol version to the device alias

The device alias now looks like mei:S:uuid:N:*
In that way we can bind different drivers to clients with
different protocol versions if required.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c            | 28 +++++++++++++++++++++-------
 drivers/nfc/microread/mei.c       |  2 +-
 drivers/nfc/pn544/mei.c           |  2 +-
 include/linux/mod_devicetable.h   |  3 +++
 scripts/mod/devicetable-offsets.c |  1 +
 scripts/mod/file2alias.c          |  4 +++-
 6 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index d92017fa1630..38bc4380ad08 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -453,17 +453,26 @@ struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
 {
 	const struct mei_cl_device_id *id;
 	const uuid_le *uuid;
+	u8 version;
+	bool match;
 
 	uuid = mei_me_cl_uuid(cldev->me_cl);
+	version = mei_me_cl_ver(cldev->me_cl);
 
 	id = cldrv->id_table;
 	while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
 		if (!uuid_le_cmp(*uuid, id->uuid)) {
+			match = true;
 
-			if (!cldev->name[0])
-				return id;
+			if (cldev->name[0])
+				if (strncmp(cldev->name, id->name,
+					    sizeof(id->name)))
+					match = false;
 
-			if (!strncmp(cldev->name, id->name, sizeof(id->name)))
+			if (id->version != MEI_CL_VERSION_ANY)
+				if (id->version != version)
+					match = false;
+			if (match)
 				return id;
 		}
 
@@ -647,7 +656,8 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 	if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
 		return -ENOMEM;
 
-	if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:", cldev->name, uuid))
+	if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
+			   cldev->name, uuid, version))
 		return -ENOMEM;
 
 	return 0;
@@ -737,8 +747,10 @@ static bool mei_cl_dev_setup(struct mei_device *bus,
 	mei_cl_dev_fixup(cldev);
 
 	if (cldev->do_match)
-		dev_set_name(&cldev->dev, "mei:%s:%pUl",
-			     cldev->name, mei_me_cl_uuid(cldev->me_cl));
+		dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X",
+			     cldev->name,
+			     mei_me_cl_uuid(cldev->me_cl),
+			     mei_me_cl_ver(cldev->me_cl));
 
 	return cldev->do_match == 1;
 }
@@ -754,7 +766,9 @@ static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
 {
 	int ret;
 
-	dev_dbg(cldev->bus->dev, "adding %pUL\n", mei_me_cl_uuid(cldev->me_cl));
+	dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
+		mei_me_cl_uuid(cldev->me_cl),
+		mei_me_cl_ver(cldev->me_cl));
 	ret = device_add(&cldev->dev);
 	if (!ret)
 		cldev->is_added = 1;
diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c
index f9f5fc97cdd7..93328bd45110 100644
--- a/drivers/nfc/microread/mei.c
+++ b/drivers/nfc/microread/mei.c
@@ -67,7 +67,7 @@ static int microread_mei_remove(struct mei_cl_device *device)
 }
 
 static struct mei_cl_device_id microread_mei_tbl[] = {
-	{ MICROREAD_DRIVER_NAME, MEI_NFC_UUID},
+	{ MICROREAD_DRIVER_NAME, MEI_NFC_UUID, MEI_CL_VERSION_ANY},
 
 	/* required last entry */
 	{ }
diff --git a/drivers/nfc/pn544/mei.c b/drivers/nfc/pn544/mei.c
index 101a37e12efa..80f897b4a401 100644
--- a/drivers/nfc/pn544/mei.c
+++ b/drivers/nfc/pn544/mei.c
@@ -67,7 +67,7 @@ static int pn544_mei_remove(struct mei_cl_device *device)
 }
 
 static struct mei_cl_device_id pn544_mei_tbl[] = {
-	{ PN544_DRIVER_NAME, MEI_NFC_UUID},
+	{ PN544_DRIVER_NAME, MEI_NFC_UUID, MEI_CL_VERSION_ANY},
 
 	/* required last entry */
 	{ }
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 5e8a0ad22cbc..6975cbf1435b 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -601,11 +601,13 @@ struct ipack_device_id {
 
 #define MEI_CL_MODULE_PREFIX "mei:"
 #define MEI_CL_NAME_SIZE 32
+#define MEI_CL_VERSION_ANY 0xff
 
 /**
  * struct mei_cl_device_id - MEI client device identifier
  * @name: helper name
  * @uuid: client uuid
+ * @version: client protocol version
  * @driver_info: information used by the driver.
  *
  * identifies mei client device by uuid and name
@@ -613,6 +615,7 @@ struct ipack_device_id {
 struct mei_cl_device_id {
 	char name[MEI_CL_NAME_SIZE];
 	uuid_le uuid;
+	__u8    version;
 	kernel_ulong_t driver_info;
 };
 
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index e70fcd12eeeb..5a6edacc85d9 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -185,6 +185,7 @@ int main(void)
 	DEVID(mei_cl_device_id);
 	DEVID_FIELD(mei_cl_device_id, name);
 	DEVID_FIELD(mei_cl_device_id, uuid);
+	DEVID_FIELD(mei_cl_device_id, version);
 
 	DEVID(rio_device_id);
 	DEVID_FIELD(rio_device_id, did);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index fa79d113f34c..9bc2cfe0ee37 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1202,16 +1202,18 @@ static int do_cpu_entry(const char *filename, void *symval, char *alias)
 }
 ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry);
 
-/* Looks like: mei:S:uuid */
+/* Looks like: mei:S:uuid:N:* */
 static int do_mei_entry(const char *filename, void *symval,
 			char *alias)
 {
 	DEF_FIELD_ADDR(symval, mei_cl_device_id, name);
 	DEF_FIELD_ADDR(symval, mei_cl_device_id, uuid);
+	DEF_FIELD(symval, mei_cl_device_id, version);
 
 	sprintf(alias, MEI_CL_MODULE_PREFIX);
 	sprintf(alias + strlen(alias), "%s:",  (*name)[0]  ? *name : "*");
 	add_uuid(alias, *uuid);
+	ADD(alias, ":", version != MEI_CL_VERSION_ANY, version);
 
 	strcat(alias, ":*");
 
-- 
cgit v1.2.3


From baeacd0376975bee0fdf6378e1c24587ab0b6ad4 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 10 Sep 2015 10:18:02 +0300
Subject: mei: bus: export uuid and protocol version to mei_cl bus drivers

Export the uuid and the protocol version of the underlying me client
for me client bus drivers usage.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c     | 26 ++++++++++++++++++++++++++
 include/linux/mei_cl_bus.h |  3 +++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 38bc4380ad08..53525ca50337 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -341,6 +341,32 @@ void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data)
 }
 EXPORT_SYMBOL_GPL(mei_cl_set_drvdata);
 
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
+
+/**
+ * mei_cldev_ver - return protocol version of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client protocol version
+ */
+u8 mei_cldev_ver(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_ver(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_ver);
+
 /**
  * mei_cl_enable_device - enable me client device
  *     create connection with me client
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 0962b2ca628a..9834f832b098 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -85,6 +85,9 @@ int mei_cl_register_event_cb(struct mei_cl_device *device,
 #define MEI_CL_EVENT_TX 1
 #define MEI_CL_EVENT_NOTIF 2
 
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev);
+u8 mei_cldev_ver(const struct mei_cl_device *cldev);
+
 void *mei_cl_get_drvdata(const struct mei_cl_device *device);
 void mei_cl_set_drvdata(struct mei_cl_device *device, void *data);
 
-- 
cgit v1.2.3


From 01a14edeaf0456c28e2b9af3afdc0807ec6c20bd Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 10 Sep 2015 10:18:03 +0300
Subject: mei: bus: export mei_cldev_enabled function

Let me client device driver query of the device is connected
and hence enabled.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c     | 13 +++++++++++++
 include/linux/mei_cl_bus.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 53525ca50337..4edf71acfed6 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -367,6 +367,19 @@ u8 mei_cldev_ver(const struct mei_cl_device *cldev)
 }
 EXPORT_SYMBOL_GPL(mei_cldev_ver);
 
+/**
+ * mei_cldev_enabled - check whether the device is enabled
+ *
+ * @cldev: mei client device
+ *
+ * Return: true if me client is initialized and connected
+ */
+bool mei_cldev_enabled(struct mei_cl_device *cldev)
+{
+	return cldev->cl && mei_cl_is_connected(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enabled);
+
 /**
  * mei_cl_enable_device - enable me client device
  *     create connection with me client
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 9834f832b098..30d1c8b94a27 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -93,5 +93,6 @@ void mei_cl_set_drvdata(struct mei_cl_device *device, void *data);
 
 int mei_cl_enable_device(struct mei_cl_device *device);
 int mei_cl_disable_device(struct mei_cl_device *device);
+bool mei_cldev_enabled(struct mei_cl_device *cldev);
 
 #endif /* _LINUX_MEI_CL_BUS_H */
-- 
cgit v1.2.3


From 893913822e829f7a37824f6041ff964076374191 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 10 Sep 2015 10:18:04 +0300
Subject: mei: bus: complete variable rename of type struct mei_cl_device

In the
commit 5c079ae11921 ("mei: bus: fix drivers and devices names confusion")
we set the variables of type struct mei_cl_device to 'cldev'
but few places were left out, namely mei_cl_bus.h header
and the mei nfc drivers.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nfc/mei_phy.c       | 30 +++++++++++++++---------------
 drivers/nfc/mei_phy.h       |  4 ++--
 drivers/nfc/microread/mei.c |  8 ++++----
 drivers/nfc/pn544/mei.c     |  8 ++++----
 include/linux/mei_cl_bus.h  | 29 ++++++++++++++---------------
 5 files changed, 39 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
index 754a9bb0f58d..ecd2a0dce797 100644
--- a/drivers/nfc/mei_phy.c
+++ b/drivers/nfc/mei_phy.c
@@ -118,7 +118,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
 	cmd.sub_command = MEI_NFC_SUBCMD_IF_VERSION;
 
 	MEI_DUMP_NFC_HDR("version", &cmd.hdr);
-	r = mei_cl_send(phy->device, (u8 *)&cmd, sizeof(struct mei_nfc_cmd));
+	r = mei_cl_send(phy->cldev, (u8 *)&cmd, sizeof(struct mei_nfc_cmd));
 	if (r < 0) {
 		pr_err("Could not send IF version cmd\n");
 		return r;
@@ -132,7 +132,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
 	if (!reply)
 		return -ENOMEM;
 
-	bytes_recv = mei_cl_recv(phy->device, (u8 *)reply, if_version_length);
+	bytes_recv = mei_cl_recv(phy->cldev, (u8 *)reply, if_version_length);
 	if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
 		pr_err("Could not read IF version\n");
 		r = -EIO;
@@ -186,13 +186,13 @@ static int mei_nfc_connect(struct nfc_mei_phy *phy)
 	connect->vendor_id = phy->vendor_id;
 
 	MEI_DUMP_NFC_HDR("connect request", &cmd->hdr);
-	r = mei_cl_send(phy->device, (u8 *)cmd, connect_length);
+	r = mei_cl_send(phy->cldev, (u8 *)cmd, connect_length);
 	if (r < 0) {
 		pr_err("Could not send connect cmd %d\n", r);
 		goto err;
 	}
 
-	bytes_recv = mei_cl_recv(phy->device, (u8 *)reply, connect_resp_length);
+	bytes_recv = mei_cl_recv(phy->cldev, (u8 *)reply, connect_resp_length);
 	if (bytes_recv < 0) {
 		r = bytes_recv;
 		pr_err("Could not read connect response %d\n", r);
@@ -238,7 +238,7 @@ static int mei_nfc_send(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 	MEI_DUMP_NFC_HDR("send", hdr);
 
 	memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length);
-	err = mei_cl_send(phy->device, mei_buf, length + MEI_NFC_HEADER_SIZE);
+	err = mei_cl_send(phy->cldev, mei_buf, length + MEI_NFC_HEADER_SIZE);
 	if (err < 0)
 		goto out;
 
@@ -278,7 +278,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 	struct mei_nfc_hdr *hdr;
 	int received_length;
 
-	received_length = mei_cl_recv(phy->device, buf, length);
+	received_length = mei_cl_recv(phy->cldev, buf, length);
 	if (received_length < 0)
 		return received_length;
 
@@ -296,7 +296,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 }
 
 
-static void nfc_mei_event_cb(struct mei_cl_device *device, u32 events,
+static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events,
 			     void *context)
 {
 	struct nfc_mei_phy *phy = context;
@@ -337,7 +337,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 	if (phy->powered == 1)
 		return 0;
 
-	r = mei_cl_enable_device(phy->device);
+	r = mei_cl_enable_device(phy->cldev);
 	if (r < 0) {
 		pr_err("Could not enable device %d\n", r);
 		return r;
@@ -355,7 +355,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 		goto err;
 	}
 
-	r = mei_cl_register_event_cb(phy->device, BIT(MEI_CL_EVENT_RX),
+	r = mei_cl_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX),
 				     nfc_mei_event_cb, phy);
 	if (r) {
 		pr_err("Event cb registration failed %d\n", r);
@@ -368,7 +368,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 
 err:
 	phy->powered = 0;
-	mei_cl_disable_device(phy->device);
+	mei_cl_disable_device(phy->cldev);
 	return r;
 }
 
@@ -378,7 +378,7 @@ static void nfc_mei_phy_disable(void *phy_id)
 
 	pr_info("%s\n", __func__);
 
-	mei_cl_disable_device(phy->device);
+	mei_cl_disable_device(phy->cldev);
 
 	phy->powered = 0;
 }
@@ -390,7 +390,7 @@ struct nfc_phy_ops mei_phy_ops = {
 };
 EXPORT_SYMBOL_GPL(mei_phy_ops);
 
-struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device)
+struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *cldev)
 {
 	struct nfc_mei_phy *phy;
 
@@ -398,9 +398,9 @@ struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device)
 	if (!phy)
 		return NULL;
 
-	phy->device = device;
+	phy->cldev = cldev;
 	init_waitqueue_head(&phy->send_wq);
-	mei_cl_set_drvdata(device, phy);
+	mei_cl_set_drvdata(cldev, phy);
 
 	return phy;
 }
@@ -408,7 +408,7 @@ EXPORT_SYMBOL_GPL(nfc_mei_phy_alloc);
 
 void nfc_mei_phy_free(struct nfc_mei_phy *phy)
 {
-	mei_cl_disable_device(phy->device);
+	mei_cl_disable_device(phy->cldev);
 	kfree(phy);
 }
 EXPORT_SYMBOL_GPL(nfc_mei_phy_free);
diff --git a/drivers/nfc/mei_phy.h b/drivers/nfc/mei_phy.h
index fbfa3e61738f..acd3a1fc69e6 100644
--- a/drivers/nfc/mei_phy.h
+++ b/drivers/nfc/mei_phy.h
@@ -13,7 +13,7 @@
 /**
  * struct nfc_mei_phy
  *
- * @device: mei device
+ * @cldev: mei client device
  * @hdev:   nfc hci device
 
  * @send_wq: send completion wait queue
@@ -28,7 +28,7 @@
  *    and prevents normal operation.
  */
 struct nfc_mei_phy {
-	struct mei_cl_device *device;
+	struct mei_cl_device *cldev;
 	struct nfc_hci_dev *hdev;
 
 	wait_queue_head_t send_wq;
diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c
index 93328bd45110..994871c02b7b 100644
--- a/drivers/nfc/microread/mei.c
+++ b/drivers/nfc/microread/mei.c
@@ -29,7 +29,7 @@
 
 #define MICROREAD_DRIVER_NAME "microread"
 
-static int microread_mei_probe(struct mei_cl_device *device,
+static int microread_mei_probe(struct mei_cl_device *cldev,
 			       const struct mei_cl_device_id *id)
 {
 	struct nfc_mei_phy *phy;
@@ -37,7 +37,7 @@ static int microread_mei_probe(struct mei_cl_device *device,
 
 	pr_info("Probing NFC microread\n");
 
-	phy = nfc_mei_phy_alloc(device);
+	phy = nfc_mei_phy_alloc(cldev);
 	if (!phy) {
 		pr_err("Cannot allocate memory for microread mei phy.\n");
 		return -ENOMEM;
@@ -55,9 +55,9 @@ static int microread_mei_probe(struct mei_cl_device *device,
 	return 0;
 }
 
-static int microread_mei_remove(struct mei_cl_device *device)
+static int microread_mei_remove(struct mei_cl_device *cldev)
 {
-	struct nfc_mei_phy *phy = mei_cl_get_drvdata(device);
+	struct nfc_mei_phy *phy = mei_cl_get_drvdata(cldev);
 
 	microread_remove(phy->hdev);
 
diff --git a/drivers/nfc/pn544/mei.c b/drivers/nfc/pn544/mei.c
index 80f897b4a401..2a2c9304e64c 100644
--- a/drivers/nfc/pn544/mei.c
+++ b/drivers/nfc/pn544/mei.c
@@ -27,7 +27,7 @@
 
 #define PN544_DRIVER_NAME "pn544"
 
-static int pn544_mei_probe(struct mei_cl_device *device,
+static int pn544_mei_probe(struct mei_cl_device *cldev,
 			       const struct mei_cl_device_id *id)
 {
 	struct nfc_mei_phy *phy;
@@ -35,7 +35,7 @@ static int pn544_mei_probe(struct mei_cl_device *device,
 
 	pr_info("Probing NFC pn544\n");
 
-	phy = nfc_mei_phy_alloc(device);
+	phy = nfc_mei_phy_alloc(cldev);
 	if (!phy) {
 		pr_err("Cannot allocate memory for pn544 mei phy.\n");
 		return -ENOMEM;
@@ -53,9 +53,9 @@ static int pn544_mei_probe(struct mei_cl_device *device,
 	return 0;
 }
 
-static int pn544_mei_remove(struct mei_cl_device *device)
+static int pn544_mei_remove(struct mei_cl_device *cldev)
 {
-	struct nfc_mei_phy *phy = mei_cl_get_drvdata(device);
+	struct nfc_mei_phy *phy = mei_cl_get_drvdata(cldev);
 
 	pr_info("Removing pn544\n");
 
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 30d1c8b94a27..c364df750405 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -8,7 +8,7 @@
 struct mei_cl_device;
 struct mei_device;
 
-typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device,
+typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *cldev,
 			       u32 events, void *context);
 
 /**
@@ -62,22 +62,21 @@ struct mei_cl_driver {
 
 	const struct mei_cl_device_id *id_table;
 
-	int (*probe)(struct mei_cl_device *dev,
+	int (*probe)(struct mei_cl_device *cldev,
 		     const struct mei_cl_device_id *id);
-	int (*remove)(struct mei_cl_device *dev);
+	int (*remove)(struct mei_cl_device *cldev);
 };
 
-int __mei_cl_driver_register(struct mei_cl_driver *driver,
-				struct module *owner);
-#define mei_cl_driver_register(driver)             \
-	__mei_cl_driver_register(driver, THIS_MODULE)
+int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner);
+#define mei_cl_driver_register(cldrv)             \
+	__mei_cl_driver_register(cldrv, THIS_MODULE)
 
-void mei_cl_driver_unregister(struct mei_cl_driver *driver);
+void mei_cl_driver_unregister(struct mei_cl_driver *cldrv);
 
-ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length);
-ssize_t  mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length);
+ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
+ssize_t  mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
 
-int mei_cl_register_event_cb(struct mei_cl_device *device,
+int mei_cl_register_event_cb(struct mei_cl_device *cldev,
 			  unsigned long event_mask,
 			  mei_cl_event_cb_t read_cb, void *context);
 
@@ -88,11 +87,11 @@ int mei_cl_register_event_cb(struct mei_cl_device *device,
 const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev);
 u8 mei_cldev_ver(const struct mei_cl_device *cldev);
 
-void *mei_cl_get_drvdata(const struct mei_cl_device *device);
-void mei_cl_set_drvdata(struct mei_cl_device *device, void *data);
+void *mei_cl_get_drvdata(const struct mei_cl_device *cldev);
+void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data);
 
-int mei_cl_enable_device(struct mei_cl_device *device);
-int mei_cl_disable_device(struct mei_cl_device *device);
+int mei_cl_enable_device(struct mei_cl_device *cldev);
+int mei_cl_disable_device(struct mei_cl_device *cldev);
 bool mei_cldev_enabled(struct mei_cl_device *cldev);
 
 #endif /* _LINUX_MEI_CL_BUS_H */
-- 
cgit v1.2.3


From d49dc5e76fc917e5dfef76cb56fe3b3868deed5d Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 10 Sep 2015 10:18:05 +0300
Subject: mei: bus: use mei_cldev_ prefix for the API functions

Use mei_cldev_ prefix for all mei client bus api functions
in order to resolve prefix conflict with functions that handle
client function and are defined in client.c

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c      | 67 +++++++++++++++++++++++----------------------
 drivers/nfc/mei_phy.c       | 25 +++++++++--------
 drivers/nfc/microread/mei.c |  6 ++--
 drivers/nfc/pn544/mei.c     |  6 ++--
 include/linux/mei_cl_bus.h  | 33 +++++++++++-----------
 5 files changed, 70 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 4edf71acfed6..0406e7201fe4 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -165,7 +165,7 @@ out:
 }
 
 /**
- * mei_cl_send - me device send  (write)
+ * mei_cldev_send - me device send  (write)
  *
  * @cldev: me client device
  * @buf: buffer to send
@@ -173,7 +173,7 @@ out:
  *
  * Return: written size in bytes or < 0 on error
  */
-ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 {
 	struct mei_cl *cl = cldev->cl;
 
@@ -182,10 +182,10 @@ ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 
 	return __mei_cl_send(cl, buf, length, 1);
 }
-EXPORT_SYMBOL_GPL(mei_cl_send);
+EXPORT_SYMBOL_GPL(mei_cldev_send);
 
 /**
- * mei_cl_recv - client receive (read)
+ * mei_cldev_recv - client receive (read)
  *
  * @cldev: me client device
  * @buf: buffer to send
@@ -193,7 +193,7 @@ EXPORT_SYMBOL_GPL(mei_cl_send);
  *
  * Return: read size in bytes of < 0 on error
  */
-ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
 {
 	struct mei_cl *cl = cldev->cl;
 
@@ -202,15 +202,15 @@ ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
 
 	return __mei_cl_recv(cl, buf, length);
 }
-EXPORT_SYMBOL_GPL(mei_cl_recv);
+EXPORT_SYMBOL_GPL(mei_cldev_recv);
 
 /**
- * mei_bus_event_work  - dispatch rx event for a bus device
+ * mei_cl_bus_event_work  - dispatch rx event for a bus device
  *    and schedule new work
  *
  * @work: work
  */
-static void mei_bus_event_work(struct work_struct *work)
+static void mei_cl_bus_event_work(struct work_struct *work)
 {
 	struct mei_cl_device *cldev;
 
@@ -272,7 +272,7 @@ void mei_cl_bus_rx_event(struct mei_cl *cl)
 }
 
 /**
- * mei_cl_register_event_cb - register event callback
+ * mei_cldev_register_event_cb - register event callback
  *
  * @cldev: me client devices
  * @event_cb: callback function
@@ -283,9 +283,9 @@ void mei_cl_bus_rx_event(struct mei_cl *cl)
  *         -EALREADY if an callback is already registered
  *         <0 on other errors
  */
-int mei_cl_register_event_cb(struct mei_cl_device *cldev,
-			  unsigned long events_mask,
-			  mei_cl_event_cb_t event_cb, void *context)
+int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+				unsigned long events_mask,
+				mei_cldev_event_cb_t event_cb, void *context)
 {
 	int ret;
 
@@ -296,7 +296,7 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
 	cldev->events_mask = events_mask;
 	cldev->event_cb = event_cb;
 	cldev->event_context = context;
-	INIT_WORK(&cldev->event_work, mei_bus_event_work);
+	INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
 
 	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
 		ret = mei_cl_read_start(cldev->cl, 0, NULL);
@@ -314,32 +314,32 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mei_cl_register_event_cb);
+EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb);
 
 /**
- * mei_cl_get_drvdata - driver data getter
+ * mei_cldev_get_drvdata - driver data getter
  *
  * @cldev: mei client device
  *
  * Return: driver private data
  */
-void *mei_cl_get_drvdata(const struct mei_cl_device *cldev)
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
 {
 	return dev_get_drvdata(&cldev->dev);
 }
-EXPORT_SYMBOL_GPL(mei_cl_get_drvdata);
+EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
 
 /**
- * mei_cl_set_drvdata - driver data setter
+ * mei_cldev_set_drvdata - driver data setter
  *
  * @cldev: mei client device
  * @data: data to store
  */
-void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data)
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
 {
 	dev_set_drvdata(&cldev->dev, data);
 }
-EXPORT_SYMBOL_GPL(mei_cl_set_drvdata);
+EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
 
 /**
  * mei_cldev_uuid - return uuid of the underlying me client
@@ -381,14 +381,14 @@ bool mei_cldev_enabled(struct mei_cl_device *cldev)
 EXPORT_SYMBOL_GPL(mei_cldev_enabled);
 
 /**
- * mei_cl_enable_device - enable me client device
+ * mei_cldev_enable_device - enable me client device
  *     create connection with me client
  *
  * @cldev: me client device
  *
  * Return: 0 on success and < 0 on error
  */
-int mei_cl_enable_device(struct mei_cl_device *cldev)
+int mei_cldev_enable(struct mei_cl_device *cldev)
 {
 	struct mei_device *bus = cldev->bus;
 	struct mei_cl *cl;
@@ -428,17 +428,17 @@ out:
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(mei_cl_enable_device);
+EXPORT_SYMBOL_GPL(mei_cldev_enable);
 
 /**
- * mei_cl_disable_device - disable me client device
+ * mei_cldev_disable - disable me client device
  *     disconnect form the me client
  *
  * @cldev: me client device
  *
  * Return: 0 on success and < 0 on error
  */
-int mei_cl_disable_device(struct mei_cl_device *cldev)
+int mei_cldev_disable(struct mei_cl_device *cldev)
 {
 	struct mei_device *bus;
 	struct mei_cl *cl;
@@ -476,7 +476,7 @@ out:
 	mutex_unlock(&bus->device_lock);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mei_cl_disable_device);
+EXPORT_SYMBOL_GPL(mei_cldev_disable);
 
 /**
  * mei_cl_device_find - find matching entry in the driver id table
@@ -663,14 +663,14 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 }
 static DEVICE_ATTR_RO(modalias);
 
-static struct attribute *mei_cl_dev_attrs[] = {
+static struct attribute *mei_cldev_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_version.attr,
 	&dev_attr_modalias.attr,
 	NULL,
 };
-ATTRIBUTE_GROUPS(mei_cl_dev);
+ATTRIBUTE_GROUPS(mei_cldev);
 
 /**
  * mei_cl_device_uevent - me client bus uevent handler
@@ -704,7 +704,7 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 static struct bus_type mei_cl_bus_type = {
 	.name		= "mei",
-	.dev_groups	= mei_cl_dev_groups,
+	.dev_groups	= mei_cldev_groups,
 	.match		= mei_cl_device_match,
 	.probe		= mei_cl_device_probe,
 	.remove		= mei_cl_device_remove,
@@ -937,7 +937,8 @@ void mei_cl_bus_rescan(struct mei_device *bus)
 	dev_dbg(bus->dev, "rescan end");
 }
 
-int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner)
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+				struct module *owner)
 {
 	int err;
 
@@ -953,15 +954,15 @@ int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(__mei_cl_driver_register);
+EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
 
-void mei_cl_driver_unregister(struct mei_cl_driver *cldrv)
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
 {
 	driver_unregister(&cldrv->driver);
 
 	pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
 }
-EXPORT_SYMBOL_GPL(mei_cl_driver_unregister);
+EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
 
 
 int __init mei_cl_bus_init(void)
diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
index ecd2a0dce797..83deda4bb4d6 100644
--- a/drivers/nfc/mei_phy.c
+++ b/drivers/nfc/mei_phy.c
@@ -118,7 +118,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
 	cmd.sub_command = MEI_NFC_SUBCMD_IF_VERSION;
 
 	MEI_DUMP_NFC_HDR("version", &cmd.hdr);
-	r = mei_cl_send(phy->cldev, (u8 *)&cmd, sizeof(struct mei_nfc_cmd));
+	r = mei_cldev_send(phy->cldev, (u8 *)&cmd, sizeof(struct mei_nfc_cmd));
 	if (r < 0) {
 		pr_err("Could not send IF version cmd\n");
 		return r;
@@ -132,7 +132,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
 	if (!reply)
 		return -ENOMEM;
 
-	bytes_recv = mei_cl_recv(phy->cldev, (u8 *)reply, if_version_length);
+	bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply, if_version_length);
 	if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
 		pr_err("Could not read IF version\n");
 		r = -EIO;
@@ -186,13 +186,14 @@ static int mei_nfc_connect(struct nfc_mei_phy *phy)
 	connect->vendor_id = phy->vendor_id;
 
 	MEI_DUMP_NFC_HDR("connect request", &cmd->hdr);
-	r = mei_cl_send(phy->cldev, (u8 *)cmd, connect_length);
+	r = mei_cldev_send(phy->cldev, (u8 *)cmd, connect_length);
 	if (r < 0) {
 		pr_err("Could not send connect cmd %d\n", r);
 		goto err;
 	}
 
-	bytes_recv = mei_cl_recv(phy->cldev, (u8 *)reply, connect_resp_length);
+	bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply,
+				    connect_resp_length);
 	if (bytes_recv < 0) {
 		r = bytes_recv;
 		pr_err("Could not read connect response %d\n", r);
@@ -238,7 +239,7 @@ static int mei_nfc_send(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 	MEI_DUMP_NFC_HDR("send", hdr);
 
 	memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length);
-	err = mei_cl_send(phy->cldev, mei_buf, length + MEI_NFC_HEADER_SIZE);
+	err = mei_cldev_send(phy->cldev, mei_buf, length + MEI_NFC_HEADER_SIZE);
 	if (err < 0)
 		goto out;
 
@@ -278,7 +279,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length)
 	struct mei_nfc_hdr *hdr;
 	int received_length;
 
-	received_length = mei_cl_recv(phy->cldev, buf, length);
+	received_length = mei_cldev_recv(phy->cldev, buf, length);
 	if (received_length < 0)
 		return received_length;
 
@@ -337,7 +338,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 	if (phy->powered == 1)
 		return 0;
 
-	r = mei_cl_enable_device(phy->cldev);
+	r = mei_cldev_enable(phy->cldev);
 	if (r < 0) {
 		pr_err("Could not enable device %d\n", r);
 		return r;
@@ -355,7 +356,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 		goto err;
 	}
 
-	r = mei_cl_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX),
+	r = mei_cldev_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX),
 				     nfc_mei_event_cb, phy);
 	if (r) {
 		pr_err("Event cb registration failed %d\n", r);
@@ -368,7 +369,7 @@ static int nfc_mei_phy_enable(void *phy_id)
 
 err:
 	phy->powered = 0;
-	mei_cl_disable_device(phy->cldev);
+	mei_cldev_disable(phy->cldev);
 	return r;
 }
 
@@ -378,7 +379,7 @@ static void nfc_mei_phy_disable(void *phy_id)
 
 	pr_info("%s\n", __func__);
 
-	mei_cl_disable_device(phy->cldev);
+	mei_cldev_disable(phy->cldev);
 
 	phy->powered = 0;
 }
@@ -400,7 +401,7 @@ struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *cldev)
 
 	phy->cldev = cldev;
 	init_waitqueue_head(&phy->send_wq);
-	mei_cl_set_drvdata(cldev, phy);
+	mei_cldev_set_drvdata(cldev, phy);
 
 	return phy;
 }
@@ -408,7 +409,7 @@ EXPORT_SYMBOL_GPL(nfc_mei_phy_alloc);
 
 void nfc_mei_phy_free(struct nfc_mei_phy *phy)
 {
-	mei_cl_disable_device(phy->cldev);
+	mei_cldev_disable(phy->cldev);
 	kfree(phy);
 }
 EXPORT_SYMBOL_GPL(nfc_mei_phy_free);
diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c
index 994871c02b7b..3092501f26c4 100644
--- a/drivers/nfc/microread/mei.c
+++ b/drivers/nfc/microread/mei.c
@@ -57,7 +57,7 @@ static int microread_mei_probe(struct mei_cl_device *cldev,
 
 static int microread_mei_remove(struct mei_cl_device *cldev)
 {
-	struct nfc_mei_phy *phy = mei_cl_get_drvdata(cldev);
+	struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
 	microread_remove(phy->hdev);
 
@@ -88,7 +88,7 @@ static int microread_mei_init(void)
 
 	pr_debug(DRIVER_DESC ": %s\n", __func__);
 
-	r = mei_cl_driver_register(&microread_driver);
+	r = mei_cldev_driver_register(&microread_driver);
 	if (r) {
 		pr_err(MICROREAD_DRIVER_NAME ": driver registration failed\n");
 		return r;
@@ -99,7 +99,7 @@ static int microread_mei_init(void)
 
 static void microread_mei_exit(void)
 {
-	mei_cl_driver_unregister(&microread_driver);
+	mei_cldev_driver_unregister(&microread_driver);
 }
 
 module_init(microread_mei_init);
diff --git a/drivers/nfc/pn544/mei.c b/drivers/nfc/pn544/mei.c
index 2a2c9304e64c..46d0eb24eef9 100644
--- a/drivers/nfc/pn544/mei.c
+++ b/drivers/nfc/pn544/mei.c
@@ -55,7 +55,7 @@ static int pn544_mei_probe(struct mei_cl_device *cldev,
 
 static int pn544_mei_remove(struct mei_cl_device *cldev)
 {
-	struct nfc_mei_phy *phy = mei_cl_get_drvdata(cldev);
+	struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
 	pr_info("Removing pn544\n");
 
@@ -88,7 +88,7 @@ static int pn544_mei_init(void)
 
 	pr_debug(DRIVER_DESC ": %s\n", __func__);
 
-	r = mei_cl_driver_register(&pn544_driver);
+	r = mei_cldev_driver_register(&pn544_driver);
 	if (r) {
 		pr_err(PN544_DRIVER_NAME ": driver registration failed\n");
 		return r;
@@ -99,7 +99,7 @@ static int pn544_mei_init(void)
 
 static void pn544_mei_exit(void)
 {
-	mei_cl_driver_unregister(&pn544_driver);
+	mei_cldev_driver_unregister(&pn544_driver);
 }
 
 module_init(pn544_mei_init);
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index c364df750405..e746919530f5 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -8,8 +8,8 @@
 struct mei_cl_device;
 struct mei_device;
 
-typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *cldev,
-			       u32 events, void *context);
+typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev,
+				     u32 events, void *context);
 
 /**
  * struct mei_cl_device - MEI device handle
@@ -45,7 +45,7 @@ struct mei_cl_device {
 	char name[MEI_CL_NAME_SIZE];
 
 	struct work_struct event_work;
-	mei_cl_event_cb_t event_cb;
+	mei_cldev_event_cb_t event_cb;
 	void *event_context;
 	unsigned long events_mask;
 	unsigned long events;
@@ -67,18 +67,19 @@ struct mei_cl_driver {
 	int (*remove)(struct mei_cl_device *cldev);
 };
 
-int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner);
-#define mei_cl_driver_register(cldrv)             \
-	__mei_cl_driver_register(cldrv, THIS_MODULE)
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+				struct module *owner);
+#define mei_cldev_driver_register(cldrv)             \
+	__mei_cldev_driver_register(cldrv, THIS_MODULE)
 
-void mei_cl_driver_unregister(struct mei_cl_driver *cldrv);
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
 
-ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
-ssize_t  mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length);
+ssize_t  mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
 
-int mei_cl_register_event_cb(struct mei_cl_device *cldev,
-			  unsigned long event_mask,
-			  mei_cl_event_cb_t read_cb, void *context);
+int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+				unsigned long event_mask,
+				mei_cldev_event_cb_t read_cb, void *context);
 
 #define MEI_CL_EVENT_RX 0
 #define MEI_CL_EVENT_TX 1
@@ -87,11 +88,11 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
 const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev);
 u8 mei_cldev_ver(const struct mei_cl_device *cldev);
 
-void *mei_cl_get_drvdata(const struct mei_cl_device *cldev);
-void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data);
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev);
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);
 
-int mei_cl_enable_device(struct mei_cl_device *cldev);
-int mei_cl_disable_device(struct mei_cl_device *cldev);
+int mei_cldev_enable(struct mei_cl_device *cldev);
+int mei_cldev_disable(struct mei_cl_device *cldev);
 bool mei_cldev_enabled(struct mei_cl_device *cldev);
 
 #endif /* _LINUX_MEI_CL_BUS_H */
-- 
cgit v1.2.3


From ff6f46483f0e14b829cd5a71bafcbb1c136a84b6 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 10 Sep 2015 15:06:20 -0700
Subject: spmi: Auto-populate driver.owner in spmi_driver_register()

Populate the owner field of the spmi driver when
spmi_driver_register() is called in a similar fashion to how
other *_driver_register() functions do it. This saves driver
writers from having to do this themselves.

Cc: Andy Gross <agross@codeaurora.org>
Cc: Gilad Avidov <gavidov@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spmi/spmi.c  | 5 +++--
 include/linux/spmi.h | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spmi/spmi.c b/drivers/spmi/spmi.c
index 11467e17bdd8..6b3da1bb0d63 100644
--- a/drivers/spmi/spmi.c
+++ b/drivers/spmi/spmi.c
@@ -560,12 +560,13 @@ EXPORT_SYMBOL_GPL(spmi_controller_remove);
  * This API will register the client driver with the SPMI framework.
  * It is typically called from the driver's module-init function.
  */
-int spmi_driver_register(struct spmi_driver *sdrv)
+int __spmi_driver_register(struct spmi_driver *sdrv, struct module *owner)
 {
 	sdrv->driver.bus = &spmi_bus_type;
+	sdrv->driver.owner = owner;
 	return driver_register(&sdrv->driver);
 }
-EXPORT_SYMBOL_GPL(spmi_driver_register);
+EXPORT_SYMBOL_GPL(__spmi_driver_register);
 
 static void __exit spmi_exit(void)
 {
diff --git a/include/linux/spmi.h b/include/linux/spmi.h
index f84212cd3b7d..1396a255d2a2 100644
--- a/include/linux/spmi.h
+++ b/include/linux/spmi.h
@@ -153,7 +153,9 @@ static inline struct spmi_driver *to_spmi_driver(struct device_driver *d)
 	return container_of(d, struct spmi_driver, driver);
 }
 
-int spmi_driver_register(struct spmi_driver *sdrv);
+#define spmi_driver_register(sdrv) \
+	__spmi_driver_register(sdrv, THIS_MODULE)
+int __spmi_driver_register(struct spmi_driver *sdrv, struct module *owner);
 
 /**
  * spmi_driver_unregister() - unregister an SPMI client driver
-- 
cgit v1.2.3


From 66e8c57da6bf6b847a48a5a6fda59512f733ed78 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 25 Aug 2015 20:45:18 +0200
Subject: rcu: Change _wait_rcu_gp() to work around GCC bug 67055

Code like this in inline functions confuses some recent versions of gcc:

	const int n = const-expr;
	whatever_t array[n];

For more details, see:

	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055#c13

This compiler bug results in the following failure after 114b7fd4b (rcu:
Create rcu_sync infrastructure):

	In file included from include/linux/rcupdate.h:429:0,
			  from include/linux/rcu_sync.h:5,
			  from kernel/rcu/sync.c:1:
	include/linux/rcutiny.h: In function 'rcu_barrier_sched':
	include/linux/rcutiny.h:55:20: internal compiler error: Segmentation fault
	  static inline void rcu_barrier_sched(void)

This commit therefore eliminates the constant local variable in favor of
direct use of the expression.

Reported-and-tested-by: Mark Salter <msalter@redhat.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ff476515f716..581abf848566 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -230,12 +230,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		   struct rcu_synchronize *rs_array);
 
 #define _wait_rcu_gp(checktiny, ...) \
-do { \
-	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \
-	const int __n = ARRAY_SIZE(__crcu_array); \
-	struct rcu_synchronize __rs_array[__n]; \
-	\
-	__wait_rcu_gp(checktiny, __n, __crcu_array, __rs_array); \
+do {									\
+	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
+	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
+	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
+			__crcu_array, __rs_array);			\
 } while (0)
 
 #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
-- 
cgit v1.2.3


From 8203d6d0ee784cfb2ebf89053f7fe399abc867d7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 2 Aug 2015 13:53:17 -0700
Subject: rcu: Use single-stage IPI algorithm for RCU expedited grace period

The current preemptible-RCU expedited grace-period algorithm invokes
synchronize_sched_expedited() to enqueue all tasks currently running
in a preemptible-RCU read-side critical section, then waits for all the
->blkd_tasks lists to drain.  This works, but results in both an IPI and
a double context switch even on CPUs that do not happen to be running
in a preemptible RCU read-side critical section.

This commit implements a new algorithm that causes less OS jitter.
This new algorithm IPIs all online CPUs that are not idle (from an
RCU perspective), but refrains from self-IPIs.  If a CPU receiving
this IPI is not in a preemptible RCU read-side critical section (or
is just now exiting one), it pushes quiescence up the rcu_node tree,
otherwise, it sets a flag that will be handled by the upcoming outermost
rcu_read_unlock(), which will then push quiescence up the tree.

The expedited grace period must of course wait on any pre-existing blocked
readers, and newly blocked readers must be queued carefully based on
the state of both the normal and the expedited grace periods.  This
new queueing approach also avoids the need to update boost state,
courtesy of the fact that blocked tasks are no longer ever migrated to
the root rcu_node structure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h    |  10 +-
 kernel/rcu/tree.c        |  75 ++++++++----
 kernel/rcu/tree_plugin.h | 296 +++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 311 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a4ab9daa387c..7fa8c4d372e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1330,10 +1330,12 @@ struct sched_dl_entity {
 
 union rcu_special {
 	struct {
-		bool blocked;
-		bool need_qs;
-	} b;
-	short s;
+		u8 blocked;
+		u8 need_qs;
+		u8 exp_need_qs;
+		u8 pad;	/* Otherwise the compiler can store garbage here. */
+	} b; /* Bits. */
+	u32 s; /* Set of bits. */
 };
 struct rcu_node;
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8526896afea7..6e92bf4337bd 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3461,18 +3461,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
 }
 
 /*
- * Return non-zero if there are any tasks in RCU read-side critical
- * sections blocking the current preemptible-RCU expedited grace period.
- * If there is no preemptible-RCU expedited grace period currently in
- * progress, returns zero unconditionally.
- */
-static int rcu_preempted_readers_exp(struct rcu_node *rnp)
-{
-	return rnp->exp_tasks != NULL;
-}
-
-/*
- * return non-zero if there is no RCU expedited grace period in progress
+ * Return non-zero if there is no RCU expedited grace period in progress
  * for the specified rcu_node structure, in other words, if all CPUs and
  * tasks covered by the specified rcu_node structure have done their bit
  * for the current expedited grace period.  Works only for preemptible
@@ -3482,7 +3471,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp)
  */
 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
-	return !rcu_preempted_readers_exp(rnp) &&
+	return rnp->exp_tasks == NULL &&
 	       READ_ONCE(rnp->expmask) == 0;
 }
 
@@ -3494,19 +3483,21 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the root rcu_node's exp_funnel_mutex and the
+ * specified rcu_node structure's ->lock.
  */
-static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
-					      struct rcu_node *rnp, bool wake)
+static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
+				 bool wake, unsigned long flags)
+	__releases(rnp->lock)
 {
-	unsigned long flags;
 	unsigned long mask;
 
-	raw_spin_lock_irqsave(&rnp->lock, flags);
-	smp_mb__after_unlock_lock();
 	for (;;) {
 		if (!sync_rcu_preempt_exp_done(rnp)) {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			if (!rnp->expmask)
+				rcu_initiate_boost(rnp, flags);
+			else
+				raw_spin_unlock_irqrestore(&rnp->lock, flags);
 			break;
 		}
 		if (rnp->parent == NULL) {
@@ -3522,10 +3513,54 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 		rnp = rnp->parent;
 		raw_spin_lock(&rnp->lock); /* irqs already disabled */
 		smp_mb__after_unlock_lock();
+		WARN_ON_ONCE(!(rnp->expmask & mask));
 		rnp->expmask &= ~mask;
 	}
 }
 
+/*
+ * Report expedited quiescent state for specified node.  This is a
+ * lock-acquisition wrapper function for __rcu_report_exp_rnp().
+ *
+ * Caller must hold the root rcu_node's exp_funnel_mutex.
+ */
+static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
+					      struct rcu_node *rnp, bool wake)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	smp_mb__after_unlock_lock();
+	__rcu_report_exp_rnp(rsp, rnp, wake, flags);
+}
+
+/*
+ * Report expedited quiescent state for multiple CPUs, all covered by the
+ * specified leaf rcu_node structure.  Caller must hold the root
+ * rcu_node's exp_funnel_mutex.
+ */
+static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
+				    unsigned long mask, bool wake)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	smp_mb__after_unlock_lock();
+	WARN_ON_ONCE((rnp->expmask & mask) != mask);
+	rnp->expmask &= ~mask;
+	__rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
+}
+
+/*
+ * Report expedited quiescent state for specified rcu_data (CPU).
+ * Caller must hold the root rcu_node's exp_funnel_mutex.
+ */
+static void __maybe_unused rcu_report_exp_rdp(struct rcu_state *rsp,
+					      struct rcu_data *rdp, bool wake)
+{
+	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
+}
+
 /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
 static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
 			       struct rcu_data *rdp,
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 62d05413b7ba..6f7500f9387c 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -101,7 +101,6 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
 static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
 static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
 
-static int rcu_preempted_readers_exp(struct rcu_node *rnp);
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
 
@@ -114,6 +113,147 @@ static void __init rcu_bootup_announce(void)
 	rcu_bootup_announce_oddness();
 }
 
+/* Flags for rcu_preempt_ctxt_queue() decision table. */
+#define RCU_GP_TASKS	0x8
+#define RCU_EXP_TASKS	0x4
+#define RCU_GP_BLKD	0x2
+#define RCU_EXP_BLKD	0x1
+
+/*
+ * Queues a task preempted within an RCU-preempt read-side critical
+ * section into the appropriate location within the ->blkd_tasks list,
+ * depending on the states of any ongoing normal and expedited grace
+ * periods.  The ->gp_tasks pointer indicates which element the normal
+ * grace period is waiting on (NULL if none), and the ->exp_tasks pointer
+ * indicates which element the expedited grace period is waiting on (again,
+ * NULL if none).  If a grace period is waiting on a given element in the
+ * ->blkd_tasks list, it also waits on all subsequent elements.  Thus,
+ * adding a task to the tail of the list blocks any grace period that is
+ * already waiting on one of the elements.  In contrast, adding a task
+ * to the head of the list won't block any grace period that is already
+ * waiting on one of the elements.
+ *
+ * This queuing is imprecise, and can sometimes make an ongoing grace
+ * period wait for a task that is not strictly speaking blocking it.
+ * Given the choice, we needlessly block a normal grace period rather than
+ * blocking an expedited grace period.
+ *
+ * Note that an endless sequence of expedited grace periods still cannot
+ * indefinitely postpone a normal grace period.  Eventually, all of the
+ * fixed number of preempted tasks blocking the normal grace period that are
+ * not also blocking the expedited grace period will resume and complete
+ * their RCU read-side critical sections.  At that point, the ->gp_tasks
+ * pointer will equal the ->exp_tasks pointer, at which point the end of
+ * the corresponding expedited grace period will also be the end of the
+ * normal grace period.
+ */
+static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
+				   unsigned long flags) __releases(rnp->lock)
+{
+	int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
+			 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
+			 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
+			 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
+	struct task_struct *t = current;
+
+	/*
+	 * Decide where to queue the newly blocked task.  In theory,
+	 * this could be an if-statement.  In practice, when I tried
+	 * that, it was quite messy.
+	 */
+	switch (blkd_state) {
+	case 0:
+	case                RCU_EXP_TASKS:
+	case                RCU_EXP_TASKS + RCU_GP_BLKD:
+	case RCU_GP_TASKS:
+	case RCU_GP_TASKS + RCU_EXP_TASKS:
+
+		/*
+		 * Blocking neither GP, or first task blocking the normal
+		 * GP but not blocking the already-waiting expedited GP.
+		 * Queue at the head of the list to avoid unnecessarily
+		 * blocking the already-waiting GPs.
+		 */
+		list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
+		break;
+
+	case                                              RCU_EXP_BLKD:
+	case                                RCU_GP_BLKD:
+	case                                RCU_GP_BLKD + RCU_EXP_BLKD:
+	case RCU_GP_TASKS +                               RCU_EXP_BLKD:
+	case RCU_GP_TASKS +                 RCU_GP_BLKD + RCU_EXP_BLKD:
+	case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
+
+		/*
+		 * First task arriving that blocks either GP, or first task
+		 * arriving that blocks the expedited GP (with the normal
+		 * GP already waiting), or a task arriving that blocks
+		 * both GPs with both GPs already waiting.  Queue at the
+		 * tail of the list to avoid any GP waiting on any of the
+		 * already queued tasks that are not blocking it.
+		 */
+		list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
+		break;
+
+	case                RCU_EXP_TASKS +               RCU_EXP_BLKD:
+	case                RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
+	case RCU_GP_TASKS + RCU_EXP_TASKS +               RCU_EXP_BLKD:
+
+		/*
+		 * Second or subsequent task blocking the expedited GP.
+		 * The task either does not block the normal GP, or is the
+		 * first task blocking the normal GP.  Queue just after
+		 * the first task blocking the expedited GP.
+		 */
+		list_add(&t->rcu_node_entry, rnp->exp_tasks);
+		break;
+
+	case RCU_GP_TASKS +                 RCU_GP_BLKD:
+	case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
+
+		/*
+		 * Second or subsequent task blocking the normal GP.
+		 * The task does not block the expedited GP. Queue just
+		 * after the first task blocking the normal GP.
+		 */
+		list_add(&t->rcu_node_entry, rnp->gp_tasks);
+		break;
+
+	default:
+
+		/* Yet another exercise in excessive paranoia. */
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	/*
+	 * We have now queued the task.  If it was the first one to
+	 * block either grace period, update the ->gp_tasks and/or
+	 * ->exp_tasks pointers, respectively, to reference the newly
+	 * blocked tasks.
+	 */
+	if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD))
+		rnp->gp_tasks = &t->rcu_node_entry;
+	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
+		rnp->exp_tasks = &t->rcu_node_entry;
+	raw_spin_unlock(&rnp->lock);
+
+	/*
+	 * Report the quiescent state for the expedited GP.  This expedited
+	 * GP should not be able to end until we report, so there should be
+	 * no need to check for a subsequent expedited GP.  (Though we are
+	 * still in a quiescent state in any case.)
+	 */
+	if (blkd_state & RCU_EXP_BLKD &&
+	    t->rcu_read_unlock_special.b.exp_need_qs) {
+		t->rcu_read_unlock_special.b.exp_need_qs = false;
+		rcu_report_exp_rdp(rdp->rsp, rdp, true);
+	} else {
+		WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
+	}
+	local_irq_restore(flags);
+}
+
 /*
  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
@@ -167,42 +307,18 @@ static void rcu_preempt_note_context_switch(void)
 		t->rcu_blocked_node = rnp;
 
 		/*
-		 * If this CPU has already checked in, then this task
-		 * will hold up the next grace period rather than the
-		 * current grace period.  Queue the task accordingly.
-		 * If the task is queued for the current grace period
-		 * (i.e., this CPU has not yet passed through a quiescent
-		 * state for the current grace period), then as long
-		 * as that task remains queued, the current grace period
-		 * cannot end.  Note that there is some uncertainty as
-		 * to exactly when the current grace period started.
-		 * We take a conservative approach, which can result
-		 * in unnecessarily waiting on tasks that started very
-		 * slightly after the current grace period began.  C'est
-		 * la vie!!!
-		 *
-		 * But first, note that the current CPU must still be
-		 * on line!
+		 * Verify the CPU's sanity, trace the preemption, and
+		 * then queue the task as required based on the states
+		 * of any ongoing and expedited grace periods.
 		 */
 		WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
-		if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
-			list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
-			rnp->gp_tasks = &t->rcu_node_entry;
-			if (IS_ENABLED(CONFIG_RCU_BOOST) &&
-			    rnp->boost_tasks != NULL)
-				rnp->boost_tasks = rnp->gp_tasks;
-		} else {
-			list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
-			if (rnp->qsmask & rdp->grpmask)
-				rnp->gp_tasks = &t->rcu_node_entry;
-		}
 		trace_rcu_preempt_task(rdp->rsp->name,
 				       t->pid,
 				       (rnp->qsmask & rdp->grpmask)
 				       ? rnp->gpnum
 				       : rnp->gpnum + 1);
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		rcu_preempt_ctxt_queue(rnp, rdp, flags);
 	} else if (t->rcu_read_lock_nesting < 0 &&
 		   t->rcu_read_unlock_special.s) {
 
@@ -272,6 +388,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	unsigned long flags;
 	struct list_head *np;
 	bool drop_boost_mutex = false;
+	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	union rcu_special special;
 
@@ -282,8 +399,8 @@ void rcu_read_unlock_special(struct task_struct *t)
 	local_irq_save(flags);
 
 	/*
-	 * If RCU core is waiting for this CPU to exit critical section,
-	 * let it know that we have done so.  Because irqs are disabled,
+	 * If RCU core is waiting for this CPU to exit its critical section,
+	 * report the fact that it has exited.  Because irqs are disabled,
 	 * t->rcu_read_unlock_special cannot change.
 	 */
 	special = t->rcu_read_unlock_special;
@@ -296,13 +413,32 @@ void rcu_read_unlock_special(struct task_struct *t)
 		}
 	}
 
+	/*
+	 * Respond to a request for an expedited grace period, but only if
+	 * we were not preempted, meaning that we were running on the same
+	 * CPU throughout.  If we were preempted, the exp_need_qs flag
+	 * would have been cleared at the time of the first preemption,
+	 * and the quiescent state would be reported when we were dequeued.
+	 */
+	if (special.b.exp_need_qs) {
+		WARN_ON_ONCE(special.b.blocked);
+		t->rcu_read_unlock_special.b.exp_need_qs = false;
+		rdp = this_cpu_ptr(rcu_state_p->rda);
+		rcu_report_exp_rdp(rcu_state_p, rdp, true);
+		if (!t->rcu_read_unlock_special.s) {
+			local_irq_restore(flags);
+			return;
+		}
+	}
+
 	/* Hardware IRQ handlers cannot block, complain if they get here. */
 	if (in_irq() || in_serving_softirq()) {
 		lockdep_rcu_suspicious(__FILE__, __LINE__,
 				       "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
-		pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
+		pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
 			 t->rcu_read_unlock_special.s,
 			 t->rcu_read_unlock_special.b.blocked,
+			 t->rcu_read_unlock_special.b.exp_need_qs,
 			 t->rcu_read_unlock_special.b.need_qs);
 		local_irq_restore(flags);
 		return;
@@ -329,7 +465,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 		}
 		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
-		empty_exp = !rcu_preempted_readers_exp(rnp);
+		empty_exp = sync_rcu_preempt_exp_done(rnp);
 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
 		np = rcu_next_node_entry(t, rnp);
 		list_del_init(&t->rcu_node_entry);
@@ -353,7 +489,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
 		 * so we must take a snapshot of the expedited state.
 		 */
-		empty_exp_now = !rcu_preempted_readers_exp(rnp);
+		empty_exp_now = sync_rcu_preempt_exp_done(rnp);
 		if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
 			trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
 							 rnp->gpnum,
@@ -535,28 +671,99 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
+/*
+ * Remote handler for smp_call_function_single().  If there is an
+ * RCU read-side critical section in effect, request that the
+ * next rcu_read_unlock() record the quiescent state up the
+ * ->expmask fields in the rcu_node tree.  Otherwise, immediately
+ * report the quiescent state.
+ */
+static void sync_rcu_exp_handler(void *info)
+{
+	struct rcu_data *rdp;
+	struct rcu_state *rsp = info;
+	struct task_struct *t = current;
+
+	/*
+	 * Within an RCU read-side critical section, request that the next
+	 * rcu_read_unlock() report.  Unless this RCU read-side critical
+	 * section has already blocked, in which case it is already set
+	 * up for the expedited grace period to wait on it.
+	 */
+	if (t->rcu_read_lock_nesting > 0 &&
+	    !t->rcu_read_unlock_special.b.blocked) {
+		t->rcu_read_unlock_special.b.exp_need_qs = true;
+		return;
+	}
+
+	/*
+	 * We are either exiting an RCU read-side critical section (negative
+	 * values of t->rcu_read_lock_nesting) or are not in one at all
+	 * (zero value of t->rcu_read_lock_nesting).  Or we are in an RCU
+	 * read-side critical section that blocked before this expedited
+	 * grace period started.  Either way, we can immediately report
+	 * the quiescent state.
+	 */
+	rdp = this_cpu_ptr(rsp->rda);
+	rcu_report_exp_rdp(rsp, rdp, true);
+}
+
 /*
  * Select the nodes that the upcoming expedited grace period needs
  * to wait for.
  */
-static void sync_rcu_exp_select_nodes(struct rcu_state *rsp)
+static void sync_rcu_exp_select_cpus(struct rcu_state *rsp)
 {
+	int cpu;
 	unsigned long flags;
+	unsigned long mask;
+	unsigned long mask_ofl_test;
+	unsigned long mask_ofl_ipi;
+	int ret;
 	struct rcu_node *rnp;
 
 	sync_exp_reset_tree(rsp);
 	rcu_for_each_leaf_node(rsp, rnp) {
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
-		rnp->expmask = 0; /* No per-CPU component yet. */
-		if (!rcu_preempt_has_tasks(rnp)) {
-			/* FIXME: Want __rcu_report_exp_rnp() here. */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
-		} else {
+
+		/* Each pass checks a CPU for identity, offline, and idle. */
+		mask_ofl_test = 0;
+		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
+			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+			if (raw_smp_processor_id() == cpu ||
+			    cpu_is_offline(cpu) ||
+			    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+				mask_ofl_test |= rdp->grpmask;
+		}
+		mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
+
+		/*
+		 * Need to wait for any blocked tasks as well.  Note that
+		 * additional blocking tasks will also block the expedited
+		 * GP until such time as the ->expmask bits are cleared.
+		 */
+		if (rcu_preempt_has_tasks(rnp))
 			rnp->exp_tasks = rnp->blkd_tasks.next;
-			rcu_initiate_boost(rnp, flags);
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+		/* IPI the remaining CPUs for expedited quiescent state. */
+		mask = 1;
+		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
+			if (!(mask_ofl_ipi & mask))
+				continue;
+			ret = smp_call_function_single(cpu,
+						       sync_rcu_exp_handler,
+						       rsp, 0);
+			if (!ret)
+				mask_ofl_ipi &= ~mask;
 		}
-		rcu_report_exp_rnp(rsp, rnp, false);
+		/* Report quiescent states for those that went offline. */
+		mask_ofl_test |= mask_ofl_ipi;
+		if (mask_ofl_test)
+			rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
 	}
 }
 
@@ -587,11 +794,8 @@ void synchronize_rcu_expedited(void)
 
 	rcu_exp_gp_seq_start(rsp);
 
-	/* force all RCU readers onto ->blkd_tasks lists. */
-	synchronize_sched_expedited();
-
 	/* Initialize the rcu_node tree in preparation for the wait. */
-	sync_rcu_exp_select_nodes(rsp);
+	sync_rcu_exp_select_cpus(rsp);
 
 	/* Wait for snapshotted ->blkd_tasks lists to drain. */
 	rnp = rcu_get_root(rsp);
-- 
cgit v1.2.3


From b05b7c7cc0324524dcda7fa7c2be1255290ee416 Mon Sep 17 00:00:00 2001
From: Muhammad Hamza Farooq <mfarooq@visteon.com>
Date: Fri, 11 Sep 2015 16:42:38 +0200
Subject: ti-st: use worker instead of calling st_int_write in wake up

The wake up method is called with the port lock held. The st_int_write
method calls port->ops->write with tries to acquire the lock again,
causing CPU to wait infinitely. Right way to do is to write data to port
in worker thread.

Signed-off-by: Muhammad Hamza Farooq <mfarooq@visteon.com>
Signed-off-by: Jacob Siverskog <jacob@teenage.engineering>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ti-st/st_core.c | 18 ++++++++++++++++--
 include/linux/ti_wilink_st.h |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index c8c6a363069c..6e3af8b42cdd 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -460,6 +460,13 @@ static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
  * - TTY layer when write's finished
  * - st_write (in context of the protocol stack)
  */
+static void work_fn_write_wakeup(struct work_struct *work)
+{
+	struct st_data_s *st_gdata = container_of(work, struct st_data_s,
+			work_write_wakeup);
+
+	st_tx_wakeup((void *)st_gdata);
+}
 void st_tx_wakeup(struct st_data_s *st_data)
 {
 	struct sk_buff *skb;
@@ -812,8 +819,12 @@ static void st_tty_wakeup(struct tty_struct *tty)
 	/* don't do an wakeup for now */
 	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 
-	/* call our internal wakeup */
-	st_tx_wakeup((void *)st_gdata);
+	/*
+	 * schedule the internal wakeup instead of calling directly to
+	 * avoid lockup (port->lock needed in tty->ops->write is
+	 * already taken here
+	 */
+	schedule_work(&st_gdata->work_write_wakeup);
 }
 
 static void st_tty_flush_buffer(struct tty_struct *tty)
@@ -881,6 +892,9 @@ int st_core_init(struct st_data_s **core_data)
 			pr_err("unable to un-register ldisc");
 		return err;
 	}
+
+	INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
+
 	*core_data = st_gdata;
 	return 0;
 }
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index d4217eff489f..0a0d56834c8e 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -158,6 +158,7 @@ struct st_data_s {
 	unsigned long ll_state;
 	void *kim_data;
 	struct tty_struct *tty;
+	struct work_struct work_write_wakeup;
 };
 
 /*
-- 
cgit v1.2.3


From 2785968cd122b22b289db565b7438f2200984044 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sat, 9 May 2015 17:47:52 +0200
Subject: can: headers: make header files self contained

This patch adds the missing #include-s to the dev.h and led.h, so that they can
be used without including further header files.

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 3 ++-
 include/linux/can/led.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index c3a9c8fc60fa..56dcadd83716 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -14,9 +14,10 @@
 #define _CAN_DEV_H
 
 #include <linux/can.h>
-#include <linux/can/netlink.h>
 #include <linux/can/error.h>
 #include <linux/can/led.h>
+#include <linux/can/netlink.h>
+#include <linux/netdevice.h>
 
 /*
  * CAN mode
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index 146de4506d21..2746f7c2f87d 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h
@@ -11,6 +11,7 @@
 
 #include <linux/if.h>
 #include <linux/leds.h>
+#include <linux/netdevice.h>
 
 enum can_led_event {
 	CAN_LED_EVENT_OPEN,
-- 
cgit v1.2.3


From 472912a2b5e2027efd58aa47f78acb2373675187 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Sep 2015 18:01:59 -0400
Subject: memcg: generate file modified notifications on "memory.events"

cgroup core only recently grew generic notification support.  Wire up
"memory.events" so that it triggers a file modified event whenever its
content changes.

v2: Refreshed on top of mem_cgroup relocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zefan <lizefan@huawei.com>
---
 include/linux/memcontrol.h | 4 ++++
 mm/memcontrol.c            | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9aa7820c2177..c83c699a6605 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -213,6 +213,9 @@ struct mem_cgroup {
 	/* OOM-Killer disable */
 	int		oom_kill_disable;
 
+	/* handle for "memory.events" */
+	struct cgroup_file events_file;
+
 	/* protect arrays of thresholds */
 	struct mutex thresholds_lock;
 
@@ -286,6 +289,7 @@ static inline void mem_cgroup_events(struct mem_cgroup *memcg,
 		       unsigned int nr)
 {
 	this_cpu_add(memcg->stat->events[idx], nr);
+	cgroup_file_notify(&memcg->events_file);
 }
 
 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e672f2689326..9f331402e502 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5202,6 +5202,7 @@ static struct cftype memory_files[] = {
 	{
 		.name = "events",
 		.flags = CFTYPE_NOT_ON_ROOT,
+		.file_offset = offsetof(struct mem_cgroup, events_file),
 		.seq_show = memory_events_show,
 	},
 	{ }	/* terminate */
-- 
cgit v1.2.3


From 730a43fbc135e593cc3de3b1b895e49c05c8e2dc Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Thu, 3 Sep 2015 18:03:38 +0200
Subject: mtd: nand: add nand_check_erased helper functions

Add two helper functions to help NAND controller drivers test whether a
specific NAND region is erased or not.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 128 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h     |   5 ++
 2 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 51f7816d67c9..37c0d9d62478 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1100,6 +1100,134 @@ out:
 }
 EXPORT_SYMBOL(nand_lock);
 
+/**
+ * nand_check_erased_buf - check if a buffer contains (almost) only 0xff data
+ * @buf: buffer to test
+ * @len: buffer length
+ * @bitflips_threshold: maximum number of bitflips
+ *
+ * Check if a buffer contains only 0xff, which means the underlying region
+ * has been erased and is ready to be programmed.
+ * The bitflips_threshold specify the maximum number of bitflips before
+ * considering the region is not erased.
+ * Note: The logic of this function has been extracted from the memweight
+ * implementation, except that nand_check_erased_buf function exit before
+ * testing the whole buffer if the number of bitflips exceed the
+ * bitflips_threshold value.
+ *
+ * Returns a positive number of bitflips less than or equal to
+ * bitflips_threshold, or -ERROR_CODE for bitflips in excess of the
+ * threshold.
+ */
+static int nand_check_erased_buf(void *buf, int len, int bitflips_threshold)
+{
+	const unsigned char *bitmap = buf;
+	int bitflips = 0;
+	int weight;
+
+	for (; len && ((uintptr_t)bitmap) % sizeof(long);
+	     len--, bitmap++) {
+		weight = hweight8(*bitmap);
+		bitflips += BITS_PER_BYTE - weight;
+		if (unlikely(bitflips > bitflips_threshold))
+			return -EBADMSG;
+	}
+
+	for (; len >= sizeof(long);
+	     len -= sizeof(long), bitmap += sizeof(long)) {
+		weight = hweight_long(*((unsigned long *)bitmap));
+		bitflips += BITS_PER_LONG - weight;
+		if (unlikely(bitflips > bitflips_threshold))
+			return -EBADMSG;
+	}
+
+	for (; len > 0; len--, bitmap++) {
+		weight = hweight8(*bitmap);
+		bitflips += BITS_PER_BYTE - weight;
+		if (unlikely(bitflips > bitflips_threshold))
+			return -EBADMSG;
+	}
+
+	return bitflips;
+}
+
+/**
+ * nand_check_erased_ecc_chunk - check if an ECC chunk contains (almost) only
+ *				 0xff data
+ * @data: data buffer to test
+ * @datalen: data length
+ * @ecc: ECC buffer
+ * @ecclen: ECC length
+ * @extraoob: extra OOB buffer
+ * @extraooblen: extra OOB length
+ * @bitflips_threshold: maximum number of bitflips
+ *
+ * Check if a data buffer and its associated ECC and OOB data contains only
+ * 0xff pattern, which means the underlying region has been erased and is
+ * ready to be programmed.
+ * The bitflips_threshold specify the maximum number of bitflips before
+ * considering the region as not erased.
+ *
+ * Note:
+ * 1/ ECC algorithms are working on pre-defined block sizes which are usually
+ *    different from the NAND page size. When fixing bitflips, ECC engines will
+ *    report the number of errors per chunk, and the NAND core infrastructure
+ *    expect you to return the maximum number of bitflips for the whole page.
+ *    This is why you should always use this function on a single chunk and
+ *    not on the whole page. After checking each chunk you should update your
+ *    max_bitflips value accordingly.
+ * 2/ When checking for bitflips in erased pages you should not only check
+ *    the payload data but also their associated ECC data, because a user might
+ *    have programmed almost all bits to 1 but a few. In this case, we
+ *    shouldn't consider the chunk as erased, and checking ECC bytes prevent
+ *    this case.
+ * 3/ The extraoob argument is optional, and should be used if some of your OOB
+ *    data are protected by the ECC engine.
+ *    It could also be used if you support subpages and want to attach some
+ *    extra OOB data to an ECC chunk.
+ *
+ * Returns a positive number of bitflips less than or equal to
+ * bitflips_threshold, or -ERROR_CODE for bitflips in excess of the
+ * threshold. In case of success, the passed buffers are filled with 0xff.
+ */
+int nand_check_erased_ecc_chunk(void *data, int datalen,
+				void *ecc, int ecclen,
+				void *extraoob, int extraooblen,
+				int bitflips_threshold)
+{
+	int data_bitflips = 0, ecc_bitflips = 0, extraoob_bitflips = 0;
+
+	data_bitflips = nand_check_erased_buf(data, datalen,
+					      bitflips_threshold);
+	if (data_bitflips < 0)
+		return data_bitflips;
+
+	bitflips_threshold -= data_bitflips;
+
+	ecc_bitflips = nand_check_erased_buf(ecc, ecclen, bitflips_threshold);
+	if (ecc_bitflips < 0)
+		return ecc_bitflips;
+
+	bitflips_threshold -= ecc_bitflips;
+
+	extraoob_bitflips = nand_check_erased_buf(extraoob, extraooblen,
+						  bitflips_threshold);
+	if (extraoob_bitflips < 0)
+		return extraoob_bitflips;
+
+	if (data_bitflips)
+		memset(data, 0xff, datalen);
+
+	if (ecc_bitflips)
+		memset(ecc, 0xff, ecclen);
+
+	if (extraoob_bitflips)
+		memset(extraoob, 0xff, extraooblen);
+
+	return data_bitflips + ecc_bitflips + extraoob_bitflips;
+}
+EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
+
 /**
  * nand_read_page_raw - [INTERN] read raw page data without ecc
  * @mtd: mtd info structure
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 3140b3d94838..c4d8e308f453 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1030,4 +1030,9 @@ struct nand_sdr_timings {
 
 /* get timing characteristics from ONFI timing mode. */
 const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode);
+
+int nand_check_erased_ecc_chunk(void *data, int datalen,
+				void *ecc, int ecclen,
+				void *extraoob, int extraooblen,
+				int threshold);
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3


From 0f1c28ae74bb1a34d36fca2db5161611d58b3148 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 18 Sep 2015 11:36:14 -0700
Subject: tcp: usec resolution SYN/ACK RTT

Currently SYN/ACK RTT is measured in jiffies. For LAN the SYN/ACK
RTT is often measured as 0ms or sometimes 1ms, which would affect
RTT estimation and min RTT samping used by some congestion control.

This patch improves SYN/ACK RTT to be usec resolution if platform
supports it. While the timestamping of SYN/ACK is done in request
sock, the RTT measurement is carefully arranged to avoid storing
another u64 timestamp in tcp_sock.

For regular handshake w/o SYNACK retransmission, the RTT is sampled
right after the child socket is created and right before the request
sock is released (tcp_check_req() in tcp_minisocks.c)

For Fast Open the child socket is already created when SYN/ACK was
sent, the RTT is sampled in tcp_rcv_state_process() after processing
the final ACK an right before the request socket is released.

If the SYN/ACK was retransmistted or SYN-cookie was used, we rely
on TCP timestamps to measure the RTT. The sample is taken at the
same place in tcp_rcv_state_process() after the timestamp values
are validated in tcp_validate_incoming(). Note that we do not store
TS echo value in request_sock for SYN-cookies, because the value
is already stored in tp->rx_opt used by tcp_ack_update_rtt().

One side benefit is that the RTT measurement now happens before
initializing congestion control (of the passive side). Therefore
the congestion control can use the SYN/ACK RTT.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  2 +-
 include/net/tcp.h        |  1 +
 net/ipv4/syncookies.c    |  2 +-
 net/ipv4/tcp_input.c     | 29 ++++++++++++++---------------
 net/ipv4/tcp_minisocks.c |  3 ++-
 net/ipv6/syncookies.c    |  2 +-
 6 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 937b97893d5f..fcb573be75d9 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -112,11 +112,11 @@ struct tcp_request_sock_ops;
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 	const struct tcp_request_sock_ops *af_specific;
+	struct skb_mstamp		snt_synack; /* first SYNACK sent time */
 	bool				tfo_listener;
 	u32				txhash;
 	u32				rcv_isn;
 	u32				snt_isn;
-	u32				snt_synack; /* synack sent time */
 	u32				last_oow_ack_time; /* last SYNACK */
 	u32				rcv_nxt; /* the ack # by SYNACK. For
 						  * FastOpen it's the seq#
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0cab28cd43a9..5cf9672c13e2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -565,6 +565,7 @@ bool tcp_schedule_loss_probe(struct sock *sk);
 /* tcp_input.c */
 void tcp_resume_early_retransmit(struct sock *sk);
 void tcp_rearm_rto(struct sock *sk);
+void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
 void tcp_reset(struct sock *sk);
 
 /* tcp_timer.c */
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d70b1f603692..6595affded20 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -345,7 +345,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+	treq->snt_synack.v64	= 0;
 	treq->tfo_listener	= false;
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a62e9c76d485..497adf58a6b8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2953,21 +2953,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 }
 
 /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
+void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	long seq_rtt_us = -1L;
+	long rtt_us = -1L;
 
-	if (synack_stamp && !tp->total_retrans)
-		seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
+	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
+		struct skb_mstamp now;
 
-	/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
-	 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
-	 */
-	if (!tp->srtt_us)
-		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
+		skb_mstamp_get(&now);
+		rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
+	}
+
+	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L);
 }
 
+
 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -5706,7 +5706,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	struct request_sock *req;
 	int queued = 0;
 	bool acceptable;
-	u32 synack_stamp;
 
 	tp->rx_opt.saw_tstamp = 0;
 
@@ -5785,15 +5784,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (!acceptable)
 			return 1;
 
+		if (!tp->srtt_us)
+			tcp_synack_rtt_meas(sk, req);
+
 		/* Once we leave TCP_SYN_RECV, we no longer need req
 		 * so release it.
 		 */
 		if (req) {
-			synack_stamp = tcp_rsk(req)->snt_synack;
 			tp->total_retrans = req->num_retrans;
 			reqsk_fastopen_remove(sk, req, false);
 		} else {
-			synack_stamp = tp->lsndtime;
 			/* Make sure socket is routed, for correct metrics. */
 			icsk->icsk_af_ops->rebuild_header(sk);
 			tcp_init_congestion_control(sk);
@@ -5816,7 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-		tcp_synack_rtt_meas(sk, synack_stamp);
 
 		if (tp->rx_opt.tstamp_ok)
 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -6027,7 +6026,7 @@ static void tcp_openreq_init(struct request_sock *req,
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	skb_mstamp_get(&tcp_rsk(req)->snt_synack);
 	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6d8795b066ac..10933d01b982 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -470,7 +470,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		tcp_enable_early_retrans(newtp);
 		newtp->tlp_high_seq = 0;
-		newtp->lsndtime = treq->snt_synack;
+		newtp->lsndtime = treq->snt_synack.stamp_jiffies;
 		newtp->last_oow_ack_time = 0;
 		newtp->total_retrans = req->num_retrans;
 
@@ -760,6 +760,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (!child)
 		goto listen_overflow;
 
+	tcp_synack_rtt_meas(child, req);
 	inet_csk_reqsk_queue_drop(sk, req);
 	inet_csk_reqsk_queue_add(sk, req, child);
 	/* Warning: caller must not call reqsk_put(req);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0909f4e0d53c..2461b3ff9551 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -210,7 +210,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+	treq->snt_synack.v64	= 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
-- 
cgit v1.2.3


From 79c452adb159dc9abc507ea13faec8d115a78758 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Fri, 18 Sep 2015 17:49:25 +0200
Subject: mtd: spi-nor: remove unused read_xfer/write_xfer hooks

struct spi_nor_xfer_cfg and read_xfer/write_xfer hooks were never used by
any driver. Do some cleanup by removing them.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 35 -----------------------------------
 1 file changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index e9c912d73141..672595a381c5 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -87,33 +87,6 @@ enum read_mode {
 	SPI_NOR_QUAD,
 };
 
-/**
- * struct spi_nor_xfer_cfg - Structure for defining a Serial Flash transfer
- * @wren:		command for "Write Enable", or 0x00 for not required
- * @cmd:		command for operation
- * @cmd_pins:		number of pins to send @cmd (1, 2, 4)
- * @addr:		address for operation
- * @addr_pins:		number of pins to send @addr (1, 2, 4)
- * @addr_width:		number of address bytes
- *			(3,4, or 0 for address not required)
- * @mode:		mode data
- * @mode_pins:		number of pins to send @mode (1, 2, 4)
- * @mode_cycles:	number of mode cycles (0 for mode not required)
- * @dummy_cycles:	number of dummy cycles (0 for dummy not required)
- */
-struct spi_nor_xfer_cfg {
-	u8		wren;
-	u8		cmd;
-	u8		cmd_pins;
-	u32		addr;
-	u8		addr_pins;
-	u8		addr_width;
-	u8		mode;
-	u8		mode_pins;
-	u8		mode_cycles;
-	u8		dummy_cycles;
-};
-
 #define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
@@ -144,14 +117,11 @@ struct mtd_info;
  * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
  * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
- * @cfg:		used by the read_xfer/write_xfer
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
  * @unprepare:		[OPTIONAL] do some post work after the
  *			read/write/erase/lock/unlock operations
- * @read_xfer:		[OPTIONAL] the read fundamental primitive
- * @write_xfer:		[OPTIONAL] the writefundamental primitive
  * @read_reg:		[DRIVER-SPECIFIC] read out the register
  * @write_reg:		[DRIVER-SPECIFIC] write data to the register
  * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
@@ -176,15 +146,10 @@ struct spi_nor {
 	enum read_mode		flash_read;
 	bool			sst_write_second;
 	u32			flags;
-	struct spi_nor_xfer_cfg	cfg;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
 
 	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
 	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
-	int (*read_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
-			 u8 *buf, size_t len);
-	int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
-			  u8 *buf, size_t len);
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 
-- 
cgit v1.2.3


From e76e397a7f99bffbd91fb21f96370cbb165b5bcd Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Wed, 16 Sep 2015 10:42:13 +0100
Subject: mfd: arizona: Add registers for ADC microphone detection

The newer devices support using a software comparison to determine
whether a 3/4 pole jack is present. Add the registers necessary for
this.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/mfd/wm5110-tables.c           | 2 ++
 include/dt-bindings/mfd/arizona.h     | 2 ++
 include/linux/mfd/arizona/pdata.h     | 3 +++
 include/linux/mfd/arizona/registers.h | 6 +++---
 4 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index c4b9374efd76..5f604e00aaf1 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -1811,6 +1811,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_MIC_DETECT_1:
 	case ARIZONA_MIC_DETECT_2:
 	case ARIZONA_MIC_DETECT_3:
+	case ARIZONA_MIC_DETECT_4:
 	case ARIZONA_MIC_DETECT_LEVEL_1:
 	case ARIZONA_MIC_DETECT_LEVEL_2:
 	case ARIZONA_MIC_DETECT_LEVEL_3:
@@ -2847,6 +2848,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_MIC_DETECT_3:
+	case ARIZONA_MIC_DETECT_4:
 	case ARIZONA_HP_CTRL_1L:
 	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_HEADPHONE_DETECT_2:
diff --git a/include/dt-bindings/mfd/arizona.h b/include/dt-bindings/mfd/arizona.h
index 7b2000cead43..c40f665e2712 100644
--- a/include/dt-bindings/mfd/arizona.h
+++ b/include/dt-bindings/mfd/arizona.h
@@ -107,5 +107,7 @@
 #define ARIZONA_ACCDET_MODE_MIC 0
 #define ARIZONA_ACCDET_MODE_HPL 1
 #define ARIZONA_ACCDET_MODE_HPR 2
+#define ARIZONA_ACCDET_MODE_HPM 4
+#define ARIZONA_ACCDET_MODE_ADC 7
 
 #endif
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 1dc385850ba2..92f31ffaf866 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -124,6 +124,9 @@ struct arizona_pdata {
 	/** Channel to use for headphone detection */
 	unsigned int hpdet_channel;
 
+	/** Use software comparison to determine mic presence */
+	bool micd_software_compare;
+
 	/** Extra debounce timeout used during initial mic detection (ms) */
 	unsigned int micd_detect_debounce;
 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index fdd70b3c7418..ab219c9b0b08 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -2359,9 +2359,9 @@
 #define ARIZONA_ACCDET_SRC_MASK                  0x2000  /* ACCDET_SRC */
 #define ARIZONA_ACCDET_SRC_SHIFT                     13  /* ACCDET_SRC */
 #define ARIZONA_ACCDET_SRC_WIDTH                      1  /* ACCDET_SRC */
-#define ARIZONA_ACCDET_MODE_MASK                 0x0003  /* ACCDET_MODE - [1:0] */
-#define ARIZONA_ACCDET_MODE_SHIFT                     0  /* ACCDET_MODE - [1:0] */
-#define ARIZONA_ACCDET_MODE_WIDTH                     2  /* ACCDET_MODE - [1:0] */
+#define ARIZONA_ACCDET_MODE_MASK                 0x0007  /* ACCDET_MODE - [2:0] */
+#define ARIZONA_ACCDET_MODE_SHIFT                     0  /* ACCDET_MODE - [2:0] */
+#define ARIZONA_ACCDET_MODE_WIDTH                     3  /* ACCDET_MODE - [2:0] */
 
 /*
  * R667 (0x29B) - Headphone Detect 1
-- 
cgit v1.2.3


From 1ce376897e02e623ba357594604ca655df61053b Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Wed, 16 Sep 2015 10:42:14 +0100
Subject: mfd: arizona: Add register bits for general purpose switch

The switch is typically used in conjunction with the MICDET clamp in
order to suppress pops and clicks associated with jack insertion.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/mfd/wm5110-tables.c       | 2 ++
 include/linux/mfd/arizona/pdata.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 5f604e00aaf1..aae17d6997b1 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -1481,6 +1481,7 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000C04, 0xA101 },    /* R3076  - GPIO5 CTRL */
 	{ 0x00000C0F, 0x0400 },    /* R3087  - IRQ CTRL 1 */
 	{ 0x00000C10, 0x1000 },    /* R3088  - GPIO Debounce Config */
+	{ 0x00000C18, 0x0000 },    /* R3096  - GP Switch 1 */
 	{ 0x00000C20, 0x8002 },    /* R3104  - Misc Pad Ctrl 1 */
 	{ 0x00000C21, 0x8001 },    /* R3105  - Misc Pad Ctrl 2 */
 	{ 0x00000C22, 0x0000 },    /* R3106  - Misc Pad Ctrl 3 */
@@ -2528,6 +2529,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_GPIO5_CTRL:
 	case ARIZONA_IRQ_CTRL_1:
 	case ARIZONA_GPIO_DEBOUNCE_CONFIG:
+	case ARIZONA_GP_SWITCH_1:
 	case ARIZONA_MISC_PAD_CTRL_1:
 	case ARIZONA_MISC_PAD_CTRL_2:
 	case ARIZONA_MISC_PAD_CTRL_3:
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 92f31ffaf866..57b45caaea80 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -184,6 +184,9 @@ struct arizona_pdata {
 
 	/** GPIO for primary IRQ (used for edge triggered emulation) */
 	int irq_gpio;
+
+	/** General purpose switch control */
+	unsigned int gpsw;
 };
 
 #endif
-- 
cgit v1.2.3


From b8ba9edb911102b452c815879f520bfe2713fab6 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Wed, 16 Sep 2015 10:42:15 +0100
Subject: mfd: arizona: Add TST_CAP bits for headphone detection

On wm5110/8280 some additional settings are required to get accurate
measurements at the top end of the headphone detection range. This patch
adds the bits required for this.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/mfd/wm5110-tables.c           | 2 ++
 include/linux/mfd/arizona/registers.h | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index aae17d6997b1..28f2ae30507a 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -1912,6 +1912,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_HP1_SHORT_CIRCUIT_CTRL:
 	case ARIZONA_HP2_SHORT_CIRCUIT_CTRL:
 	case ARIZONA_HP3_SHORT_CIRCUIT_CTRL:
+	case ARIZONA_HP_TEST_CTRL_1:
 	case ARIZONA_AIF1_BCLK_CTRL:
 	case ARIZONA_AIF1_TX_PIN_CTRL:
 	case ARIZONA_AIF1_RX_PIN_CTRL:
@@ -2857,6 +2858,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INPUT_ENABLES_STATUS:
 	case ARIZONA_OUTPUT_STATUS_1:
 	case ARIZONA_RAW_OUTPUT_STATUS_1:
+	case ARIZONA_HP_TEST_CTRL_1:
 	case ARIZONA_SLIMBUS_RX_PORT_STATUS:
 	case ARIZONA_SLIMBUS_TX_PORT_STATUS:
 	case ARIZONA_INTERRUPT_STATUS_1:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index ab219c9b0b08..c7c11c900196 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -242,6 +242,7 @@
 #define ARIZONA_HP1_SHORT_CIRCUIT_CTRL           0x4A0
 #define ARIZONA_HP2_SHORT_CIRCUIT_CTRL           0x4A1
 #define ARIZONA_HP3_SHORT_CIRCUIT_CTRL           0x4A2
+#define ARIZONA_HP_TEST_CTRL_1                   0x4A4
 #define ARIZONA_SPK_CTRL_2                       0x4B5
 #define ARIZONA_SPK_CTRL_3                       0x4B6
 #define ARIZONA_DAC_COMP_1                       0x4DC
@@ -3701,6 +3702,13 @@
 #define ARIZONA_HP3_SC_ENA_SHIFT                     12  /* HP3_SC_ENA */
 #define ARIZONA_HP3_SC_ENA_WIDTH                      1  /* HP3_SC_ENA */
 
+/*
+ * R1188 (0x4A4) HP Test Ctrl 1
+ */
+#define ARIZONA_HP1_TST_CAP_SEL_MASK             0x0003  /* HP1_TST_CAP_SEL - [1:0] */
+#define ARIZONA_HP1_TST_CAP_SEL_SHIFT                 0  /* HP1_TST_CAP_SEL - [1:0] */
+#define ARIZONA_HP1_TST_CAP_SEL_WIDTH                 2  /* HP1_TST_CAP_SEL - [1:0] */
+
 /*
  * R1244 (0x4DC) - DAC comp 1
  */
-- 
cgit v1.2.3


From 87a93e4eceb495f93e3f37b100334d2641765b6c Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 18 Sep 2015 11:30:43 +0200
Subject: ieee802154: change needed headroom/tailroom

This patch cleanups needed_headroom, needed_tailroom and hard_header_len
fields for wpan and lowpan interfaces.

For wpan interfaces the worst case mac header len should be part of
needed_headroom, currently this is set as hard_header_len, but
hard_header_len should be set to the minimum header length which xmit
call assumes and this is the minimum frame length of 802.15.4.
The hard_header_len value will check inside send callbacl of AF_PACKET
raw sockets.

For lowpan interfaces, if fragmentation isn't needed the skb will
call dev_hard_header for 802154 layer and queue it afterwards. This
happens without new skb allocation, so we need the same headroom and
tailroom lengths like 802154 inside 802154 6lowpan layer. At least we
assume as minimum header length an ipv6 header size.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h    | 11 +++++++++++
 include/net/6lowpan.h         |  8 ++++++++
 include/net/mac802154.h       |  8 --------
 net/6lowpan/nhc.h             |  2 --
 net/ieee802154/6lowpan/core.c | 14 +++++++++++---
 net/ieee802154/6lowpan/tx.c   | 12 ++++++++++--
 net/ieee802154/header_ops.c   |  2 +-
 net/mac802154/iface.c         | 17 ++++++++++++++---
 net/mac802154/tx.c            |  3 ---
 9 files changed, 55 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index db01492814d3..205ce4e1ac32 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -31,6 +31,17 @@
 #define IEEE802154_ACK_PSDU_LEN		5
 #define IEEE802154_MIN_PSDU_LEN		9
 #define IEEE802154_FCS_LEN		2
+#define IEEE802154_MAX_AUTH_TAG_LEN	16
+
+/*  General MAC frame format:
+ *  2 bytes: Frame Control
+ *  1 byte:  Sequence Number
+ * 20 bytes: Addressing fields
+ * 14 bytes: Auxiliary Security Header
+ */
+#define IEEE802154_MAX_HEADER_LEN	(2 + 1 + 20 + 14)
+#define IEEE802154_MIN_HEADER_LEN	(IEEE802154_ACK_PSDU_LEN - \
+					 IEEE802154_FCS_LEN)
 
 #define IEEE802154_PAN_ID_BROADCAST	0xffff
 #define IEEE802154_ADDR_SHORT_BROADCAST	0xffff
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index eeae5eb58754..c17f556644fc 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -61,6 +61,14 @@
 #define UIP_PROTO_UDP			17 /* ipv6 next header value for UDP */
 #define UIP_FRAGH_LEN			8  /* ipv6 fragment header size */
 
+#define LOWPAN_NHC_MAX_ID_LEN	1
+/* Max IPHC Header len without IPv6 hdr specific inline data.
+ * Useful for getting the "extra" bytes we need at worst case compression.
+ *
+ * LOWPAN_IPHC + CID + LOWPAN_NHC_MAX_ID_LEN
+ */
+#define LOWPAN_IPHC_MAX_HEADER_LEN	(2 + 1 + LOWPAN_NHC_MAX_ID_LEN)
+
 /*
  * ipv6 address based on mac
  * second bit-flip (Universe/Local) is done according RFC2464
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index 32bd7c0467d4..2c478501ad14 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -23,14 +23,6 @@
 
 #include <net/cfg802154.h>
 
-/* General MAC frame format:
- *  2 bytes: Frame Control
- *  1 byte:  Sequence Number
- * 20 bytes: Addressing fields
- * 14 bytes: Auxiliary Security Header
- */
-#define MAC802154_FRAME_HARD_HEADER_LEN		(2 + 1 + 20 + 14)
-
 /**
  * enum ieee802154_hw_addr_filt_flags - hardware address filtering flags
  *
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index ed44938eb5de..c249f17fa37b 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -8,8 +8,6 @@
 #include <net/6lowpan.h>
 #include <net/ipv6.h>
 
-#define LOWPAN_NHC_MAX_ID_LEN	1
-
 /**
  * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct
  *
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 9f0cfa598e3a..44420ed95574 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,9 +104,8 @@ static void lowpan_setup(struct net_device *ldev)
 	ldev->addr_len		= IEEE802154_ADDR_LEN;
 	memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN);
 	ldev->type		= ARPHRD_6LOWPAN;
-	/* Frame Control + Sequence Number + Address fields + Security Header */
-	ldev->hard_header_len	= 2 + 1 + 20 + 14;
-	ldev->needed_tailroom	= 2; /* FCS */
+	/* We need an ipv6hdr as minimum len when calling xmit */
+	ldev->hard_header_len	= sizeof(struct ipv6hdr);
 	ldev->mtu		= IPV6_MIN_MTU;
 	ldev->priv_flags	|= IFF_NO_QUEUE;
 	ldev->flags		= IFF_BROADCAST | IFF_MULTICAST;
@@ -156,6 +155,15 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
 	lowpan_dev_info(ldev)->wdev = wdev;
 	/* Set the lowpan hardware address to the wpan hardware address. */
 	memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
+	/* We need headroom for possible wpan_dev_hard_header call and tailroom
+	 * for encryption/fcs handling. The lowpan interface will replace
+	 * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN
+	 * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6
+	 * header.
+	 */
+	ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN +
+				wdev->needed_headroom;
+	ldev->needed_tailroom = wdev->needed_tailroom;
 
 	lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154);
 
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 6067e064a3fe..7e0563eaea98 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -10,6 +10,7 @@
 
 #include <net/6lowpan.h>
 #include <net/ieee802154_netdev.h>
+#include <net/mac802154.h>
 
 #include "6lowpan_i.h"
 
@@ -36,6 +37,13 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
 			sizeof(struct lowpan_addr_info));
 }
 
+/* This callback will be called from AF_PACKET and IPv6 stack, the AF_PACKET
+ * sockets gives an 8 byte array for addresses only!
+ *
+ * TODO I think AF_PACKET DGRAM (sending/receiving) RAW (sending) makes no
+ * sense here. We should disable it, the right use-case would be AF_INET6
+ * RAW/DGRAM sockets.
+ */
 int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
 			 unsigned short type, const void *_daddr,
 			 const void *_saddr, unsigned int len)
@@ -77,13 +85,13 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
 	struct sk_buff *frag;
 	int rc;
 
-	frag = alloc_skb(wdev->hard_header_len + wdev->needed_tailroom + size,
+	frag = alloc_skb(wdev->needed_headroom + wdev->needed_tailroom + size,
 			 GFP_ATOMIC);
 
 	if (likely(frag)) {
 		frag->dev = wdev;
 		frag->priority = skb->priority;
-		skb_reserve(frag, wdev->hard_header_len);
+		skb_reserve(frag, wdev->needed_headroom);
 		skb_reset_network_header(frag);
 		*mac_cb(frag) = *mac_cb(skb);
 
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index d8443b057022..c7439f0fbbdf 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -85,7 +85,7 @@ ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr)
 int
 ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr)
 {
-	u8 buf[MAC802154_FRAME_HARD_HEADER_LEN];
+	u8 buf[IEEE802154_MAX_HEADER_LEN];
 	int pos = 2;
 	int rc;
 	struct ieee802154_hdr_fc *fc = &hdr->fc;
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 8afe26d72971..b5a0936ce514 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -537,8 +537,18 @@ static void ieee802154_if_setup(struct net_device *dev)
 	dev->addr_len		= IEEE802154_EXTENDED_ADDR_LEN;
 	memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN);
 
-	dev->hard_header_len	= MAC802154_FRAME_HARD_HEADER_LEN;
-	dev->needed_tailroom	= 2 + 16; /* FCS + MIC */
+	/* Let hard_header_len set to IEEE802154_MIN_HEADER_LEN. AF_PACKET
+	 * will not send frames without any payload, but ack frames
+	 * has no payload, so substract one that we can send a 3 bytes
+	 * frame. The xmit callback assumes at least a hard header where two
+	 * bytes fc and sequence field are set.
+	 */
+	dev->hard_header_len	= IEEE802154_MIN_HEADER_LEN - 1;
+	/* The auth_tag header is for security and places in private payload
+	 * room of mac frame which stucks between payload and FCS field.
+	 */
+	dev->needed_tailroom	= IEEE802154_MAX_AUTH_TAG_LEN +
+				  IEEE802154_FCS_LEN;
 	dev->mtu		= IEEE802154_MTU;
 	dev->tx_queue_len	= 300;
 	dev->flags		= IFF_NOARP | IFF_BROADCAST;
@@ -617,7 +627,8 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
 	if (!ndev)
 		return ERR_PTR(-ENOMEM);
 
-	ndev->needed_headroom = local->hw.extra_tx_headroom;
+	ndev->needed_headroom = local->hw.extra_tx_headroom +
+				IEEE802154_MAX_HEADER_LEN;
 
 	ret = dev_alloc_name(ndev, ndev->name);
 	if (ret < 0)
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 7ed439172f30..66d7ecb7c56b 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -77,9 +77,6 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
 		put_unaligned_le16(crc, skb_put(skb, 2));
 	}
 
-	if (skb_cow_head(skb, local->hw.extra_tx_headroom))
-		goto err_tx;
-
 	/* Stop the netif queue on each sub_if_data object. */
 	ieee802154_stop_queue(&local->hw);
 
-- 
cgit v1.2.3


From 79750ac4257763ff595a8b2cdc7ba580f0b0c8e0 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Mon, 21 Sep 2015 11:24:33 +0200
Subject: ieee802154: add helpers for frame control checks

This patch introduce two static inline functions. The first to get the
frame control field from an sk_buff. The second is for checking on the
acknowledgment request bit on the frame control field. Later we can
introduce more functions to check on the frame control fields.

These will deprecate the current behaviour which requires a
host-byteorder conversion and manually bit handling.

Reviewed-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 205ce4e1ac32..aca228b81464 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -25,6 +25,8 @@
 
 #include <linux/types.h>
 #include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/unaligned/memmove.h>
 #include <asm/byteorder.h>
 
 #define IEEE802154_MTU			127
@@ -218,6 +220,7 @@ enum {
 
 /* frame control handling */
 #define IEEE802154_FCTL_FTYPE		0x0003
+#define IEEE802154_FCTL_ACKREQ		0x0020
 #define IEEE802154_FCTL_INTRA_PAN	0x0040
 
 #define IEEE802154_FTYPE_DATA		0x0001
@@ -232,6 +235,15 @@ static inline int ieee802154_is_data(__le16 fc)
 		cpu_to_le16(IEEE802154_FTYPE_DATA);
 }
 
+/**
+ * ieee802154_is_ackreq - check if acknowledgment request bit is set
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline bool ieee802154_is_ackreq(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_ACKREQ);
+}
+
 /**
  * ieee802154_is_intra_pan - check if intra pan id communication
  * @fc: frame control bytes in little-endian byteorder
-- 
cgit v1.2.3


From 7ac8bf9bc9ba82aea763ef30671a34c6a2a39922 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 22 Sep 2015 11:56:04 +0200
Subject: EDAC: Carve out debugfs functionality

... into a separate compilation unit and drop a couple of
CONFIG_EDAC_DEBUG ifdefferies. Rename edac_create_debug_nodes() to
edac_create_debugfs_nodes(), while at it.

No functionality change.

Cc: <linux-edac@vger.kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/Makefile        |   2 +
 drivers/edac/debugfs.c       | 103 ++++++++++++++++++++++++++++++++++++++++
 drivers/edac/edac_core.h     |   2 +
 drivers/edac/edac_mc_sysfs.c | 110 +------------------------------------------
 drivers/edac/edac_module.h   |   2 +
 include/linux/edac.h         |   2 -
 6 files changed, 110 insertions(+), 111 deletions(-)
 create mode 100644 drivers/edac/debugfs.c

(limited to 'include/linux')

diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ae3c5f3ce405..dbf53e08bdd1 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -12,6 +12,8 @@ obj-$(CONFIG_EDAC_MM_EDAC)		+= edac_core.o
 edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o
 edac_core-y	+= edac_module.o edac_device_sysfs.o
 
+edac_core-$(CONFIG_EDAC_DEBUG)		+= debugfs.o
+
 ifdef CONFIG_PCI
 edac_core-y	+= edac_pci.o edac_pci_sysfs.o
 endif
diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c
new file mode 100644
index 000000000000..bcd558d5cb48
--- /dev/null
+++ b/drivers/edac/debugfs.c
@@ -0,0 +1,103 @@
+#include "edac_module.h"
+
+static struct dentry *edac_debugfs;
+
+static ssize_t edac_fake_inject_write(struct file *file,
+				      const char __user *data,
+				      size_t count, loff_t *ppos)
+{
+	struct device *dev = file->private_data;
+	struct mem_ctl_info *mci = to_mci(dev);
+	static enum hw_event_mc_err_type type;
+	u16 errcount = mci->fake_inject_count;
+
+	if (!errcount)
+		errcount = 1;
+
+	type = mci->fake_inject_ue ? HW_EVENT_ERR_UNCORRECTED
+				   : HW_EVENT_ERR_CORRECTED;
+
+	printk(KERN_DEBUG
+	       "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n",
+		errcount,
+		(type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE",
+		errcount > 1 ? "s" : "",
+		mci->fake_inject_layer[0],
+		mci->fake_inject_layer[1],
+		mci->fake_inject_layer[2]
+	       );
+	edac_mc_handle_error(type, mci, errcount, 0, 0, 0,
+			     mci->fake_inject_layer[0],
+			     mci->fake_inject_layer[1],
+			     mci->fake_inject_layer[2],
+			     "FAKE ERROR", "for EDAC testing only");
+
+	return count;
+}
+
+static const struct file_operations debug_fake_inject_fops = {
+	.open = simple_open,
+	.write = edac_fake_inject_write,
+	.llseek = generic_file_llseek,
+};
+
+int __init edac_debugfs_init(void)
+{
+	edac_debugfs = debugfs_create_dir("edac", NULL);
+	if (IS_ERR(edac_debugfs)) {
+		edac_debugfs = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void edac_debugfs_exit(void)
+{
+	debugfs_remove(edac_debugfs);
+}
+
+int edac_create_debugfs_nodes(struct mem_ctl_info *mci)
+{
+	struct dentry *d, *parent;
+	char name[80];
+	int i;
+
+	if (!edac_debugfs)
+		return -ENODEV;
+
+	d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
+	if (!d)
+		return -ENOMEM;
+	parent = d;
+
+	for (i = 0; i < mci->n_layers; i++) {
+		sprintf(name, "fake_inject_%s",
+			     edac_layer_name[mci->layers[i].type]);
+		d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent,
+				      &mci->fake_inject_layer[i]);
+		if (!d)
+			goto nomem;
+	}
+
+	d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent,
+				&mci->fake_inject_ue);
+	if (!d)
+		goto nomem;
+
+	d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent,
+				&mci->fake_inject_count);
+	if (!d)
+		goto nomem;
+
+	d = debugfs_create_file("fake_inject", S_IWUSR, parent,
+				&mci->dev,
+				&debug_fake_inject_fops);
+	if (!d)
+		goto nomem;
+
+	mci->debugfs = parent;
+	return 0;
+nomem:
+	debugfs_remove(mci->debugfs);
+	return -ENOMEM;
+}
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index ad42587c3f4d..4861542163d7 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -94,6 +94,8 @@ do {									\
 
 #define edac_dev_name(dev) (dev)->dev_name
 
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
 /*
  * The following are the structures to provide for a generic
  * or abstract 'edac_device'. This set of structures and the
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 33df7d93c857..071f7fca5ff5 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -785,47 +785,6 @@ static ssize_t mci_max_location_show(struct device *dev,
 	return p - data;
 }
 
-#ifdef CONFIG_EDAC_DEBUG
-static ssize_t edac_fake_inject_write(struct file *file,
-				      const char __user *data,
-				      size_t count, loff_t *ppos)
-{
-	struct device *dev = file->private_data;
-	struct mem_ctl_info *mci = to_mci(dev);
-	static enum hw_event_mc_err_type type;
-	u16 errcount = mci->fake_inject_count;
-
-	if (!errcount)
-		errcount = 1;
-
-	type = mci->fake_inject_ue ? HW_EVENT_ERR_UNCORRECTED
-				   : HW_EVENT_ERR_CORRECTED;
-
-	printk(KERN_DEBUG
-	       "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n",
-		errcount,
-		(type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE",
-		errcount > 1 ? "s" : "",
-		mci->fake_inject_layer[0],
-		mci->fake_inject_layer[1],
-		mci->fake_inject_layer[2]
-	       );
-	edac_mc_handle_error(type, mci, errcount, 0, 0, 0,
-			     mci->fake_inject_layer[0],
-			     mci->fake_inject_layer[1],
-			     mci->fake_inject_layer[2],
-			     "FAKE ERROR", "for EDAC testing only");
-
-	return count;
-}
-
-static const struct file_operations debug_fake_inject_fops = {
-	.open = simple_open,
-	.write = edac_fake_inject_write,
-	.llseek = generic_file_llseek,
-};
-#endif
-
 /* default Control file */
 static DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store);
 
@@ -896,71 +855,6 @@ static struct device_type mci_attr_type = {
 	.release	= mci_attr_release,
 };
 
-#ifdef CONFIG_EDAC_DEBUG
-static struct dentry *edac_debugfs;
-
-int __init edac_debugfs_init(void)
-{
-	edac_debugfs = debugfs_create_dir("edac", NULL);
-	if (IS_ERR(edac_debugfs)) {
-		edac_debugfs = NULL;
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void edac_debugfs_exit(void)
-{
-	debugfs_remove(edac_debugfs);
-}
-
-static int edac_create_debug_nodes(struct mem_ctl_info *mci)
-{
-	struct dentry *d, *parent;
-	char name[80];
-	int i;
-
-	if (!edac_debugfs)
-		return -ENODEV;
-
-	d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
-	if (!d)
-		return -ENOMEM;
-	parent = d;
-
-	for (i = 0; i < mci->n_layers; i++) {
-		sprintf(name, "fake_inject_%s",
-			     edac_layer_name[mci->layers[i].type]);
-		d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent,
-				      &mci->fake_inject_layer[i]);
-		if (!d)
-			goto nomem;
-	}
-
-	d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent,
-				&mci->fake_inject_ue);
-	if (!d)
-		goto nomem;
-
-	d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent,
-				&mci->fake_inject_count);
-	if (!d)
-		goto nomem;
-
-	d = debugfs_create_file("fake_inject", S_IWUSR, parent,
-				&mci->dev,
-				&debug_fake_inject_fops);
-	if (!d)
-		goto nomem;
-
-	mci->debugfs = parent;
-	return 0;
-nomem:
-	debugfs_remove(mci->debugfs);
-	return -ENOMEM;
-}
-#endif
-
 /*
  * Create a new Memory Controller kobject instance,
  *	mc<id> under the 'mc' directory
@@ -1039,9 +933,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
 		goto fail_unregister_dimm;
 #endif
 
-#ifdef CONFIG_EDAC_DEBUG
-	edac_create_debug_nodes(mci);
-#endif
+	edac_create_debugfs_nodes(mci);
 	return 0;
 
 fail_unregister_dimm:
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 26ecc52e073d..79a6c6e20819 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -63,12 +63,14 @@ extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
 #ifdef CONFIG_EDAC_DEBUG
 int edac_debugfs_init(void);
 void edac_debugfs_exit(void);
+int edac_create_debugfs_nodes(struct mem_ctl_info *mci);
 #else
 static inline int edac_debugfs_init(void)
 {
 	return -ENODEV;
 }
 static inline void edac_debugfs_exit(void) {}
+static inline int edac_create_debugfs_nodes(struct mem_ctl_info *mci) { return 0; }
 #endif
 
 /*
diff --git a/include/linux/edac.h b/include/linux/edac.h
index da3b72e95db3..b3d87e5822f8 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -769,12 +769,10 @@ struct mem_ctl_info {
 	/* the internal state of this controller instance */
 	int op_state;
 
-#ifdef CONFIG_EDAC_DEBUG
 	struct dentry *debugfs;
 	u8 fake_inject_layer[EDAC_MAX_LAYERS];
 	u32 fake_inject_ue;
 	u16 fake_inject_count;
-#endif
 };
 
 /*
-- 
cgit v1.2.3


From 2a1d3ab8986d1b2f598ffc42351d94166fa0f022 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Sep 2015 11:01:10 +0200
Subject: genirq: Handle force threading of irqs with primary and thread
 handler

Force threading of interrupts does not really deal with interrupts
which are requested with a primary and a threaded handler. The current
policy is to leave them alone and let the primary handler run in
interrupt context, but we set the ONESHOT flag for those interrupts as
well.

Kohji Okuno debugged a problem with the SDHCI driver where the
interrupt thread waits for a hardware interrupt to trigger, which can't
work well because the hardware interrupt is masked due to the ONESHOT
flag being set. He proposed to set the ONESHOT flag only if the
interrupt does not provide a thread handler.

Though that does not work either because these interrupts can be
shared. So the other interrupt would rightfully get the ONESHOT flag
set and therefor the same situation would happen again.

To deal with this proper, we need to force thread the primary handler
of such interrupts as well. That means that the primary interrupt
handler is treated as any other primary interrupt handler which is not
marked IRQF_NO_THREAD. The threaded handler becomes a separate thread
so the SDHCI flow logic can be handled gracefully.

The same issue was reported against 4.1-rt.

Reported-and-tested-by: Kohji Okuno <okuno.kohji@jp.panasonic.com>
Reported-By: Michal Smucr <msmucr@gmail.com>
Reported-and-tested-by: Nathan Sullivan <nathan.sullivan@ni.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509211058080.5606@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |   2 +
 kernel/irq/manage.c       | 158 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 119 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index be7e75c945e9..ad16809c8596 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -102,6 +102,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
  * @flags:	flags (see IRQF_* above)
  * @thread_fn:	interrupt handler function for threaded interrupts
  * @thread:	thread pointer for threaded interrupts
+ * @secondary:	pointer to secondary irqaction (force threading)
  * @thread_flags:	flags related to @thread
  * @thread_mask:	bitmask for keeping track of @thread activity
  * @dir:	pointer to the proc/irq/NN/name entry
@@ -113,6 +114,7 @@ struct irqaction {
 	struct irqaction	*next;
 	irq_handler_t		thread_fn;
 	struct task_struct	*thread;
+	struct irqaction	*secondary;
 	unsigned int		irq;
 	unsigned int		flags;
 	unsigned long		thread_flags;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f9a59f6cabd2..0a63c2b20bdd 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -730,6 +730,12 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
+static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
+{
+	WARN(1, "Secondary action handler called for irq %d\n", irq);
+	return IRQ_NONE;
+}
+
 static int irq_wait_for_interrupt(struct irqaction *action)
 {
 	set_current_state(TASK_INTERRUPTIBLE);
@@ -756,7 +762,8 @@ static int irq_wait_for_interrupt(struct irqaction *action)
 static void irq_finalize_oneshot(struct irq_desc *desc,
 				 struct irqaction *action)
 {
-	if (!(desc->istate & IRQS_ONESHOT))
+	if (!(desc->istate & IRQS_ONESHOT) ||
+	    action->handler == irq_forced_secondary_handler)
 		return;
 again:
 	chip_bus_lock(desc);
@@ -910,6 +917,18 @@ static void irq_thread_dtor(struct callback_head *unused)
 	irq_finalize_oneshot(desc, action);
 }
 
+static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
+{
+	struct irqaction *secondary = action->secondary;
+
+	if (WARN_ON_ONCE(!secondary))
+		return;
+
+	raw_spin_lock_irq(&desc->lock);
+	__irq_wake_thread(desc, secondary);
+	raw_spin_unlock_irq(&desc->lock);
+}
+
 /*
  * Interrupt handler thread
  */
@@ -940,6 +959,8 @@ static int irq_thread(void *data)
 		action_ret = handler_fn(desc, action);
 		if (action_ret == IRQ_HANDLED)
 			atomic_inc(&desc->threads_handled);
+		if (action_ret == IRQ_WAKE_THREAD)
+			irq_wake_secondary(desc, action);
 
 		wake_threads_waitq(desc);
 	}
@@ -984,20 +1005,36 @@ void irq_wake_thread(unsigned int irq, void *dev_id)
 }
 EXPORT_SYMBOL_GPL(irq_wake_thread);
 
-static void irq_setup_forced_threading(struct irqaction *new)
+static int irq_setup_forced_threading(struct irqaction *new)
 {
 	if (!force_irqthreads)
-		return;
+		return 0;
 	if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
-		return;
+		return 0;
 
 	new->flags |= IRQF_ONESHOT;
 
-	if (!new->thread_fn) {
-		set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
-		new->thread_fn = new->handler;
-		new->handler = irq_default_primary_handler;
+	/*
+	 * Handle the case where we have a real primary handler and a
+	 * thread handler. We force thread them as well by creating a
+	 * secondary action.
+	 */
+	if (new->handler != irq_default_primary_handler && new->thread_fn) {
+		/* Allocate the secondary action */
+		new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+		if (!new->secondary)
+			return -ENOMEM;
+		new->secondary->handler = irq_forced_secondary_handler;
+		new->secondary->thread_fn = new->thread_fn;
+		new->secondary->dev_id = new->dev_id;
+		new->secondary->irq = new->irq;
+		new->secondary->name = new->name;
 	}
+	/* Deal with the primary handler */
+	set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
+	new->thread_fn = new->handler;
+	new->handler = irq_default_primary_handler;
+	return 0;
 }
 
 static int irq_request_resources(struct irq_desc *desc)
@@ -1017,6 +1054,48 @@ static void irq_release_resources(struct irq_desc *desc)
 		c->irq_release_resources(d);
 }
 
+static int
+setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
+{
+	struct task_struct *t;
+	struct sched_param param = {
+		.sched_priority = MAX_USER_RT_PRIO/2,
+	};
+
+	if (!secondary) {
+		t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+				   new->name);
+	} else {
+		t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
+				   new->name);
+		param.sched_priority -= 1;
+	}
+
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
+
+	/*
+	 * We keep the reference to the task struct even if
+	 * the thread dies to avoid that the interrupt code
+	 * references an already freed task_struct.
+	 */
+	get_task_struct(t);
+	new->thread = t;
+	/*
+	 * Tell the thread to set its affinity. This is
+	 * important for shared interrupt handlers as we do
+	 * not invoke setup_affinity() for the secondary
+	 * handlers as everything is already set up. Even for
+	 * interrupts marked with IRQF_NO_BALANCE this is
+	 * correct as we want the thread to move to the cpu(s)
+	 * on which the requesting code placed the interrupt.
+	 */
+	set_bit(IRQTF_AFFINITY, &new->thread_flags);
+	return 0;
+}
+
 /*
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
@@ -1037,6 +1116,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	if (!try_module_get(desc->owner))
 		return -ENODEV;
 
+	new->irq = irq;
+
 	/*
 	 * Check whether the interrupt nests into another interrupt
 	 * thread.
@@ -1054,8 +1135,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 */
 		new->handler = irq_nested_primary_handler;
 	} else {
-		if (irq_settings_can_thread(desc))
-			irq_setup_forced_threading(new);
+		if (irq_settings_can_thread(desc)) {
+			ret = irq_setup_forced_threading(new);
+			if (ret)
+				goto out_mput;
+		}
 	}
 
 	/*
@@ -1064,37 +1148,14 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	 * thread.
 	 */
 	if (new->thread_fn && !nested) {
-		struct task_struct *t;
-		static const struct sched_param param = {
-			.sched_priority = MAX_USER_RT_PRIO/2,
-		};
-
-		t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
-				   new->name);
-		if (IS_ERR(t)) {
-			ret = PTR_ERR(t);
+		ret = setup_irq_thread(new, irq, false);
+		if (ret)
 			goto out_mput;
+		if (new->secondary) {
+			ret = setup_irq_thread(new->secondary, irq, true);
+			if (ret)
+				goto out_thread;
 		}
-
-		sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
-
-		/*
-		 * We keep the reference to the task struct even if
-		 * the thread dies to avoid that the interrupt code
-		 * references an already freed task_struct.
-		 */
-		get_task_struct(t);
-		new->thread = t;
-		/*
-		 * Tell the thread to set its affinity. This is
-		 * important for shared interrupt handlers as we do
-		 * not invoke setup_affinity() for the secondary
-		 * handlers as everything is already set up. Even for
-		 * interrupts marked with IRQF_NO_BALANCE this is
-		 * correct as we want the thread to move to the cpu(s)
-		 * on which the requesting code placed the interrupt.
-		 */
-		set_bit(IRQTF_AFFINITY, &new->thread_flags);
 	}
 
 	if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -1267,7 +1328,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 				   irq, nmsk, omsk);
 	}
 
-	new->irq = irq;
 	*old_ptr = new;
 
 	irq_pm_install_action(desc, new);
@@ -1293,6 +1353,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	 */
 	if (new->thread)
 		wake_up_process(new->thread);
+	if (new->secondary)
+		wake_up_process(new->secondary->thread);
 
 	register_irq_proc(irq, desc);
 	new->dir = NULL;
@@ -1323,6 +1385,13 @@ out_thread:
 		kthread_stop(t);
 		put_task_struct(t);
 	}
+	if (new->secondary && new->secondary->thread) {
+		struct task_struct *t = new->secondary->thread;
+
+		new->secondary->thread = NULL;
+		kthread_stop(t);
+		put_task_struct(t);
+	}
 out_mput:
 	module_put(desc->owner);
 	return ret;
@@ -1430,9 +1499,14 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 	if (action->thread) {
 		kthread_stop(action->thread);
 		put_task_struct(action->thread);
+		if (action->secondary && action->secondary->thread) {
+			kthread_stop(action->secondary->thread);
+			put_task_struct(action->secondary->thread);
+		}
 	}
 
 	module_put(desc->owner);
+	kfree(action->secondary);
 	return action;
 }
 
@@ -1576,8 +1650,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	retval = __setup_irq(irq, desc, action);
 	chip_bus_sync_unlock(desc);
 
-	if (retval)
+	if (retval) {
+		kfree(action->secondary);
 		kfree(action);
+	}
 
 #ifdef CONFIG_DEBUG_SHIRQ_FIXME
 	if (!retval && (irqflags & IRQF_SHARED)) {
-- 
cgit v1.2.3


From 69fb4dcada7704beeba86e3f401a66c726aa8504 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 1 Aug 2015 10:39:38 +0200
Subject: power: Add an axp20x-usb-power driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a driver for the usb power_supply bits of the axp20x PMICs.

I initially started writing my own driver, before coming aware of
Bruno Prémont's excellent earlier RFC with a driver for this.

My driver was lacking CURRENT_MAX and VOLTAGE_MIN support Bruno's
drvier has, so I've copied the code for those from his driver.

Note that the AC-power-supply and battery charger bits will need separate
drivers. Each one needs its own devictree child-node so that other
devicetree nodes can reference the right power-supply, and thus each one
will get its own mfd-cell / platform_device and platform-driver.

Cc: Bruno Prémont <bonbons@linux-vserver.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/Kconfig            |   7 ++
 drivers/power/Makefile           |   1 +
 drivers/power/axp20x_usb_power.c | 248 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/axp20x.h       |  24 ++++
 4 files changed, 280 insertions(+)
 create mode 100644 drivers/power/axp20x_usb_power.c

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index f8758d6febf8..914167e14d2e 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -472,6 +472,13 @@ config CHARGER_RT9455
 	help
 	  Say Y to enable support for Richtek RT9455 battery charger.
 
+config AXP20X_POWER
+	tristate "AXP20x power supply driver"
+	depends on MFD_AXP20X
+	help
+	  This driver provides support for the power supply features of
+	  AXP20x PMIC.
+
 source "drivers/power/reset/Kconfig"
 
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 5752ce818f51..fb4413e5dd37 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_GENERIC_ADC_BATTERY)	+= generic-adc-battery.o
 
 obj-$(CONFIG_PDA_POWER)		+= pda_power.o
 obj-$(CONFIG_APM_POWER)		+= apm_power.o
+obj-$(CONFIG_AXP20X_POWER)	+= axp20x_usb_power.o
 obj-$(CONFIG_MAX8925_POWER)	+= max8925_power.o
 obj-$(CONFIG_WM831X_BACKUP)	+= wm831x_backup.o
 obj-$(CONFIG_WM831X_POWER)	+= wm831x_power.o
diff --git a/drivers/power/axp20x_usb_power.c b/drivers/power/axp20x_usb_power.c
new file mode 100644
index 000000000000..421a90b83567
--- /dev/null
+++ b/drivers/power/axp20x_usb_power.c
@@ -0,0 +1,248 @@
+/*
+ * AXP20x PMIC USB power supply status driver
+ *
+ * Copyright (C) 2015 Hans de Goede <hdegoede@redhat.com>
+ * Copyright (C) 2014 Bruno Prémont <bonbons@linux-vserver.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/axp20x.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define DRVNAME "axp20x-usb-power-supply"
+
+#define AXP20X_PWR_STATUS_VBUS_PRESENT	BIT(5)
+#define AXP20X_PWR_STATUS_VBUS_USED	BIT(4)
+
+#define AXP20X_USB_STATUS_VBUS_VALID	BIT(2)
+
+#define AXP20X_VBUS_VHOLD_uV(b)		(4000000 + (((b) >> 3) & 7) * 100000)
+#define AXP20X_VBUS_CLIMIT_MASK		3
+#define AXP20X_VBUC_CLIMIT_900mA	0
+#define AXP20X_VBUC_CLIMIT_500mA	1
+#define AXP20X_VBUC_CLIMIT_100mA	2
+#define AXP20X_VBUC_CLIMIT_NONE		3
+
+#define AXP20X_ADC_EN1_VBUS_CURR	BIT(2)
+#define AXP20X_ADC_EN1_VBUS_VOLT	BIT(3)
+
+#define AXP20X_VBUS_MON_VBUS_VALID	BIT(3)
+
+struct axp20x_usb_power {
+	struct regmap *regmap;
+	struct power_supply *supply;
+};
+
+static irqreturn_t axp20x_usb_power_irq(int irq, void *devid)
+{
+	struct axp20x_usb_power *power = devid;
+
+	power_supply_changed(power->supply);
+
+	return IRQ_HANDLED;
+}
+
+static int axp20x_usb_power_get_property(struct power_supply *psy,
+	enum power_supply_property psp, union power_supply_propval *val)
+{
+	struct axp20x_usb_power *power = power_supply_get_drvdata(psy);
+	unsigned int input, v;
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN:
+		ret = regmap_read(power->regmap, AXP20X_VBUS_IPSOUT_MGMT, &v);
+		if (ret)
+			return ret;
+
+		val->intval = AXP20X_VBUS_VHOLD_uV(v);
+		return 0;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = axp20x_read_variable_width(power->regmap,
+						 AXP20X_VBUS_V_ADC_H, 12);
+		if (ret < 0)
+			return ret;
+
+		val->intval = ret * 1700; /* 1 step = 1.7 mV */
+		return 0;
+	case POWER_SUPPLY_PROP_CURRENT_MAX:
+		ret = regmap_read(power->regmap, AXP20X_VBUS_IPSOUT_MGMT, &v);
+		if (ret)
+			return ret;
+
+		switch (v & AXP20X_VBUS_CLIMIT_MASK) {
+		case AXP20X_VBUC_CLIMIT_100mA:
+			val->intval = 100000;
+			break;
+		case AXP20X_VBUC_CLIMIT_500mA:
+			val->intval = 500000;
+			break;
+		case AXP20X_VBUC_CLIMIT_900mA:
+			val->intval = 900000;
+			break;
+		case AXP20X_VBUC_CLIMIT_NONE:
+			val->intval = -1;
+			break;
+		}
+		return 0;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = axp20x_read_variable_width(power->regmap,
+						 AXP20X_VBUS_I_ADC_H, 12);
+		if (ret < 0)
+			return ret;
+
+		val->intval = ret * 375; /* 1 step = 0.375 mA */
+		return 0;
+	default:
+		break;
+	}
+
+	/* All the properties below need the input-status reg value */
+	ret = regmap_read(power->regmap, AXP20X_PWR_INPUT_STATUS, &input);
+	if (ret)
+		return ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_HEALTH:
+		if (!(input & AXP20X_PWR_STATUS_VBUS_PRESENT)) {
+			val->intval = POWER_SUPPLY_HEALTH_UNKNOWN;
+			break;
+		}
+
+		ret = regmap_read(power->regmap, AXP20X_USB_OTG_STATUS, &v);
+		if (ret)
+			return ret;
+
+		if (!(v & AXP20X_USB_STATUS_VBUS_VALID)) {
+			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+			break;
+		}
+
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = !!(input & AXP20X_PWR_STATUS_VBUS_PRESENT);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(input & AXP20X_PWR_STATUS_VBUS_USED);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static enum power_supply_property axp20x_usb_power_properties[] = {
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_MAX,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+};
+
+static const struct power_supply_desc axp20x_usb_power_desc = {
+	.name = "axp20x-usb",
+	.type = POWER_SUPPLY_TYPE_USB,
+	.properties = axp20x_usb_power_properties,
+	.num_properties = ARRAY_SIZE(axp20x_usb_power_properties),
+	.get_property = axp20x_usb_power_get_property,
+};
+
+static int axp20x_usb_power_probe(struct platform_device *pdev)
+{
+	struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent);
+	struct power_supply_config psy_cfg = {};
+	struct axp20x_usb_power *power;
+	static const char * const irq_names[] = { "VBUS_PLUGIN",
+		"VBUS_REMOVAL", "VBUS_VALID", "VBUS_NOT_VALID" };
+	int i, irq, ret;
+
+	if (!of_device_is_available(pdev->dev.of_node))
+		return -ENODEV;
+
+	if (!axp20x) {
+		dev_err(&pdev->dev, "Parent drvdata not set\n");
+		return -EINVAL;
+	}
+
+	power = devm_kzalloc(&pdev->dev, sizeof(*power), GFP_KERNEL);
+	if (!power)
+		return -ENOMEM;
+
+	power->regmap = axp20x->regmap;
+
+	/* Enable vbus valid checking */
+	ret = regmap_update_bits(power->regmap, AXP20X_VBUS_MON,
+		    AXP20X_VBUS_MON_VBUS_VALID, AXP20X_VBUS_MON_VBUS_VALID);
+	if (ret)
+		return ret;
+
+	/* Enable vbus voltage and current measurement */
+	ret = regmap_update_bits(power->regmap, AXP20X_ADC_EN1,
+			AXP20X_ADC_EN1_VBUS_CURR | AXP20X_ADC_EN1_VBUS_VOLT,
+			AXP20X_ADC_EN1_VBUS_CURR | AXP20X_ADC_EN1_VBUS_VOLT);
+	if (ret)
+		return ret;
+
+	psy_cfg.of_node = pdev->dev.of_node;
+	psy_cfg.drv_data = power;
+
+	power->supply = devm_power_supply_register(&pdev->dev,
+					&axp20x_usb_power_desc, &psy_cfg);
+	if (IS_ERR(power->supply))
+		return PTR_ERR(power->supply);
+
+	/* Request irqs after registering, as irqs may trigger immediately */
+	for (i = 0; i < ARRAY_SIZE(irq_names); i++) {
+		irq = platform_get_irq_byname(pdev, irq_names[i]);
+		if (irq < 0) {
+			dev_warn(&pdev->dev, "No IRQ for %s: %d\n",
+				 irq_names[i], irq);
+			continue;
+		}
+		irq = regmap_irq_get_virq(axp20x->regmap_irqc, irq);
+		ret = devm_request_any_context_irq(&pdev->dev, irq,
+				axp20x_usb_power_irq, 0, DRVNAME, power);
+		if (ret < 0)
+			dev_warn(&pdev->dev, "Error requesting %s IRQ: %d\n",
+				 irq_names[i], ret);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id axp20x_usb_power_match[] = {
+	{ .compatible = "x-powers,axp202-usb-power-supply" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, axp20x_usb_power_match);
+
+static struct platform_driver axp20x_usb_power_driver = {
+	.probe = axp20x_usb_power_probe,
+	.driver = {
+		.name = DRVNAME,
+		.of_match_table = axp20x_usb_power_match,
+	},
+};
+
+module_platform_driver(axp20x_usb_power_driver);
+
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
+MODULE_DESCRIPTION("AXP20x PMIC USB power supply status driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index cc8ad1e1a307..b24c771cebd5 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -11,6 +11,8 @@
 #ifndef __LINUX_MFD_AXP20X_H
 #define __LINUX_MFD_AXP20X_H
 
+#include <linux/regmap.h>
+
 enum {
 	AXP152_ID = 0,
 	AXP202_ID,
@@ -438,4 +440,26 @@ struct axp288_extcon_pdata {
 	struct gpio_desc *gpio_mux_cntl;
 };
 
+/* generic helper function for reading 9-16 bit wide regs */
+static inline int axp20x_read_variable_width(struct regmap *regmap,
+	unsigned int reg, unsigned int width)
+{
+	unsigned int reg_val, result;
+	int err;
+
+	err = regmap_read(regmap, reg, &reg_val);
+	if (err)
+		return err;
+
+	result = reg_val << (width - 8);
+
+	err = regmap_read(regmap, reg + 1, &reg_val);
+	if (err)
+		return err;
+
+	result |= reg_val;
+
+	return result;
+}
+
 #endif /* __LINUX_MFD_AXP20X_H */
-- 
cgit v1.2.3


From e3abc8ff0fc18b3925fd5d5c5fbd1613856f4e7c Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 16 Aug 2015 11:13:22 +0300
Subject: mac80211: allow to transmit A-MSDU within A-MPDU

Advertise the capability to send A-MSDU within A-MPDU
in the AddBA request sent by mac80211. Let the driver
know about the peer's capabilities.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/mac.c                 |  2 +-
 drivers/net/wireless/ath/ath9k/htc_drv_main.c         |  2 +-
 drivers/net/wireless/ath/ath9k/main.c                 |  2 +-
 drivers/net/wireless/ath/carl9170/main.c              |  2 +-
 drivers/net/wireless/ath/wcn36xx/main.c               |  2 +-
 drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c |  2 +-
 drivers/net/wireless/cw1200/sta.c                     |  2 +-
 drivers/net/wireless/cw1200/sta.h                     |  2 +-
 drivers/net/wireless/iwlegacy/4965-mac.c              |  2 +-
 drivers/net/wireless/iwlegacy/4965.h                  |  2 +-
 drivers/net/wireless/iwlwifi/dvm/mac80211.c           |  2 +-
 drivers/net/wireless/iwlwifi/mvm/mac80211.c           |  2 +-
 drivers/net/wireless/mac80211_hwsim.c                 |  2 +-
 drivers/net/wireless/mediatek/mt7601u/main.c          |  3 ++-
 drivers/net/wireless/mwl8k.c                          |  2 +-
 drivers/net/wireless/rsi/rsi_91x_mac80211.c           |  4 +++-
 drivers/net/wireless/rt2x00/rt2800lib.c               |  2 +-
 drivers/net/wireless/rt2x00/rt2800lib.h               |  2 +-
 drivers/net/wireless/rtlwifi/core.c                   |  2 +-
 drivers/net/wireless/ti/wlcore/main.c                 |  2 +-
 include/linux/ieee80211.h                             |  1 +
 include/net/mac80211.h                                |  5 ++++-
 net/mac80211/agg-rx.c                                 |  4 ++--
 net/mac80211/agg-tx.c                                 | 15 ++++++++++-----
 net/mac80211/driver-ops.h                             |  7 ++++---
 net/mac80211/sta_info.h                               |  2 ++
 net/mac80211/trace.h                                  | 10 ++++++----
 27 files changed, 52 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 64674c955d44..b04e7694c105 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -6144,7 +6144,7 @@ static int ath10k_ampdu_action(struct ieee80211_hw *hw,
 			       struct ieee80211_vif *vif,
 			       enum ieee80211_ampdu_mlme_action action,
 			       struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			       u8 buf_size)
+			       u8 buf_size, bool amsdu)
 {
 	struct ath10k *ar = hw->priv;
 	struct ath10k_vif *arvif = ath10k_vif_to_arvif(vif);
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index 172a9ff4aaab..a680a970b7f7 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -1659,7 +1659,7 @@ static int ath9k_htc_ampdu_action(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
 				  enum ieee80211_ampdu_mlme_action action,
 				  struct ieee80211_sta *sta,
-				  u16 tid, u16 *ssn, u8 buf_size)
+				  u16 tid, u16 *ssn, u8 buf_size, bool amsdu)
 {
 	struct ath9k_htc_priv *priv = hw->priv;
 	struct ath9k_htc_sta *ista;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index c27143ba9ffb..323eb33c3c6e 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1856,7 +1856,7 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif,
 			      enum ieee80211_ampdu_mlme_action action,
 			      struct ieee80211_sta *sta,
-			      u16 tid, u16 *ssn, u8 buf_size)
+			      u16 tid, u16 *ssn, u8 buf_size, bool amsdu)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 170c209f99b8..19d3d64416bf 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1415,7 +1415,7 @@ static int carl9170_op_ampdu_action(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    enum ieee80211_ampdu_mlme_action action,
 				    struct ieee80211_sta *sta,
-				    u16 tid, u16 *ssn, u8 buf_size)
+				    u16 tid, u16 *ssn, u8 buf_size, bool amsdu)
 {
 	struct ar9170 *ar = hw->priv;
 	struct carl9170_sta_info *sta_info = (void *) sta->drv_priv;
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 900e72a089d8..7c169abdbafe 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -859,7 +859,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
 		    struct ieee80211_vif *vif,
 		    enum ieee80211_ampdu_mlme_action action,
 		    struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-		    u8 buf_size)
+		    u8 buf_size, bool amsdu)
 {
 	struct wcn36xx *wcn = hw->priv;
 	struct wcn36xx_sta *sta_priv = NULL;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index d2c5747e3ac9..bec2dc1ca2e4 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -820,7 +820,7 @@ brcms_ops_ampdu_action(struct ieee80211_hw *hw,
 		    struct ieee80211_vif *vif,
 		    enum ieee80211_ampdu_mlme_action action,
 		    struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-		    u8 buf_size)
+		    u8 buf_size, bool amsdu)
 {
 	struct brcms_info *wl = hw->priv;
 	struct scb *scb = &wl->wlc->pri_scb;
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index b86500b4418f..95a7fdb3cc1c 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -2137,7 +2137,7 @@ int cw1200_ampdu_action(struct ieee80211_hw *hw,
 			struct ieee80211_vif *vif,
 			enum ieee80211_ampdu_mlme_action action,
 			struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			u8 buf_size)
+			u8 buf_size, bool amsdu)
 {
 	/* Aggregation is implemented fully in firmware,
 	 * including block ack negotiation. Do not allow
diff --git a/drivers/net/wireless/cw1200/sta.h b/drivers/net/wireless/cw1200/sta.h
index b7e386b7662b..bebb3379017f 100644
--- a/drivers/net/wireless/cw1200/sta.h
+++ b/drivers/net/wireless/cw1200/sta.h
@@ -111,7 +111,7 @@ int cw1200_ampdu_action(struct ieee80211_hw *hw,
 			struct ieee80211_vif *vif,
 			enum ieee80211_ampdu_mlme_action action,
 			struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			u8 buf_size);
+			u8 buf_size, bool amsdu);
 
 void cw1200_suspend_resume(struct cw1200_common *priv,
 			  struct wsm_suspend_resume *arg);
diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c
index 44fa422f255e..6656215a13a9 100644
--- a/drivers/net/wireless/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/iwlegacy/4965-mac.c
@@ -5984,7 +5984,7 @@ int
 il4965_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum ieee80211_ampdu_mlme_action action,
 			struct ieee80211_sta *sta, u16 tid, u16 * ssn,
-			u8 buf_size)
+			u8 buf_size, bool amsdu)
 {
 	struct il_priv *il = hw->priv;
 	int ret = -EINVAL;
diff --git a/drivers/net/wireless/iwlegacy/4965.h b/drivers/net/wireless/iwlegacy/4965.h
index 3a57f71b8ed5..8ab8706f9422 100644
--- a/drivers/net/wireless/iwlegacy/4965.h
+++ b/drivers/net/wireless/iwlegacy/4965.h
@@ -184,7 +184,7 @@ void il4965_mac_update_tkip_key(struct ieee80211_hw *hw,
 int il4965_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    enum ieee80211_ampdu_mlme_action action,
 			    struct ieee80211_sta *sta, u16 tid, u16 * ssn,
-			    u8 buf_size);
+			    u8 buf_size, bool amsdu);
 int il4965_mac_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta);
 void
diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index 453f7c315ab5..b3ad34e8bf5a 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -731,7 +731,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif,
 				   enum ieee80211_ampdu_mlme_action action,
 				   struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-				   u8 buf_size)
+				   u8 buf_size, bool amsdu)
 {
 	struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw);
 	int ret = -EINVAL;
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index aa8c2b7f23c7..f70452c41d63 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -820,7 +820,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    enum ieee80211_ampdu_mlme_action action,
 				    struct ieee80211_sta *sta, u16 tid,
-				    u16 *ssn, u8 buf_size)
+				    u16 *ssn, u8 buf_size, bool amsdu)
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 	int ret;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 520bef80747f..2af2f3d0cc31 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1819,7 +1819,7 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw,
 				       struct ieee80211_vif *vif,
 				       enum ieee80211_ampdu_mlme_action action,
 				       struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-				       u8 buf_size)
+				       u8 buf_size, bool amsdu)
 {
 	switch (action) {
 	case IEEE80211_AMPDU_TX_START:
diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c
index 169384b48b27..f715eee39851 100644
--- a/drivers/net/wireless/mediatek/mt7601u/main.c
+++ b/drivers/net/wireless/mediatek/mt7601u/main.c
@@ -335,7 +335,8 @@ static int mt7601u_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
 static int
 mt76_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		  enum ieee80211_ampdu_mlme_action action,
-		  struct ieee80211_sta *sta, u16 tid, u16 *ssn, u8 buf_size)
+		  struct ieee80211_sta *sta, u16 tid, u16 *ssn, u8 buf_size,
+		  bool amsdu)
 {
 	struct mt7601u_dev *dev = hw->priv;
 	struct mt76_sta *msta = (struct mt76_sta *) sta->drv_priv;
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 9420fc61c2e6..30e3aaae32e2 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -5423,7 +5423,7 @@ static int
 mwl8k_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		   enum ieee80211_ampdu_mlme_action action,
 		   struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-		   u8 buf_size)
+		   u8 buf_size, bool amsdu)
 {
 
 	int i, rc = 0;
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 7e804324bfa7..b5bcc933a2a6 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -664,6 +664,7 @@ static int rsi_mac80211_set_key(struct ieee80211_hw *hw,
  * @tid: Traffic identifier.
  * @ssn: Pointer to ssn value.
  * @buf_size: Buffer size (for kernel version > 2.6.38).
+ * @amsdu: is AMSDU in AMPDU allowed
  *
  * Return: status: 0 on success, negative error code on failure.
  */
@@ -673,7 +674,8 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
 				     struct ieee80211_sta *sta,
 				     unsigned short tid,
 				     unsigned short *ssn,
-				     unsigned char buf_size)
+				     unsigned char buf_size,
+				     bool amsdu)
 {
 	int status = -EOPNOTSUPP;
 	struct rsi_hw *adapter = hw->priv;
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 9524564f873b..9733b31a780d 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -7937,7 +7937,7 @@ EXPORT_SYMBOL_GPL(rt2800_get_tsf);
 int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum ieee80211_ampdu_mlme_action action,
 			struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			u8 buf_size)
+			u8 buf_size, bool amsdu)
 {
 	struct rt2x00_sta *sta_priv = (struct rt2x00_sta *)sta->drv_priv;
 	int ret = 0;
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.h b/drivers/net/wireless/rt2x00/rt2800lib.h
index 1609b8a7f7eb..440790b92b19 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.h
+++ b/drivers/net/wireless/rt2x00/rt2800lib.h
@@ -220,7 +220,7 @@ u64 rt2800_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum ieee80211_ampdu_mlme_action action,
 			struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			u8 buf_size);
+			u8 buf_size, bool amsdu);
 int rt2800_get_survey(struct ieee80211_hw *hw, int idx,
 		      struct survey_info *survey);
 void rt2800_disable_wpdma(struct rt2x00_dev *rt2x00dev);
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index 585d0883c7e5..c925a4dff599 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -1373,7 +1373,7 @@ static int rtl_op_ampdu_action(struct ieee80211_hw *hw,
 			       struct ieee80211_vif *vif,
 			       enum ieee80211_ampdu_mlme_action action,
 			       struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			       u8 buf_size)
+			       u8 buf_size, bool amsdu)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index e819369d8f8f..ec7f6af3fab2 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5263,7 +5263,7 @@ static int wl1271_op_ampdu_action(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
 				  enum ieee80211_ampdu_mlme_action action,
 				  struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-				  u8 buf_size)
+				  u8 buf_size, bool amsdu)
 {
 	struct wl1271 *wl = hw->priv;
 	struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cfa906f28b7a..19eb9ecd6cf3 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1379,6 +1379,7 @@ struct ieee80211_ht_operation {
 
 
 /* block-ack parameters */
+#define IEEE80211_ADDBA_PARAM_AMSDU_MASK 0x0001
 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
 #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 167864503138..f28cbc0988eb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3026,6 +3026,9 @@ enum ieee80211_reconfig_type {
  *	buffer size of 8. Correct ways to retransmit #1 would be:
  *	 - TX:       1 or 18 or 81
  *	Even "189" would be wrong since 1 could be lost again.
+ *	The @amsdu parameter is valid when the action is set to
+ *	%IEEE80211_AMPDU_TX_OPERATIONAL and indicates the peer's ability
+ *	to receive A-MSDU within A-MPDU.
  *
  *	Returns a negative error code on failure.
  *	The callback can sleep.
@@ -3363,7 +3366,7 @@ struct ieee80211_ops {
 			    struct ieee80211_vif *vif,
 			    enum ieee80211_ampdu_mlme_action action,
 			    struct ieee80211_sta *sta, u16 tid, u16 *ssn,
-			    u8 buf_size);
+			    u8 buf_size, bool amsdu);
 	int (*get_survey)(struct ieee80211_hw *hw, int idx,
 		struct survey_info *survey);
 	void (*rfkill_poll)(struct ieee80211_hw *hw);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 5c564a68fb50..6ebe8611eca5 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -79,7 +79,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	       (int)reason);
 
 	if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
-			     &sta->sta, tid, NULL, 0))
+			     &sta->sta, tid, NULL, 0, false))
 		sdata_info(sta->sdata,
 			   "HW problem - can not stop rx aggregation for %pM tid %d\n",
 			   sta->sta.addr, tid);
@@ -321,7 +321,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		__skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
 
 	ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
-			       &sta->sta, tid, &start_seq_num, 0);
+			       &sta->sta, tid, &start_seq_num, 0, false);
 	ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
 	       sta->sta.addr, tid, ret);
 	if (ret) {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c8ba2e77737c..a758eb84e8f0 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -97,7 +97,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
 
 	mgmt->u.action.u.addba_req.dialog_token = dialog_token;
-	capab = (u16)(1 << 1);		/* bit 1 aggregation policy */
+	capab = (u16)(1 << 0);		/* bit 0 A-MSDU support */
+	capab |= (u16)(1 << 1);		/* bit 1 aggregation policy */
 	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
 	capab |= (u16)(agg_size << 6);	/* bit 15:6 max size of aggergation */
 
@@ -331,7 +332,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 			return -EALREADY;
 		ret = drv_ampdu_action(local, sta->sdata,
 				       IEEE80211_AMPDU_TX_STOP_FLUSH_CONT,
-				       &sta->sta, tid, NULL, 0);
+				       &sta->sta, tid, NULL, 0, false);
 		WARN_ON_ONCE(ret);
 		return 0;
 	}
@@ -381,7 +382,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
 
 	ret = drv_ampdu_action(local, sta->sdata, action,
-			       &sta->sta, tid, NULL, 0);
+			       &sta->sta, tid, NULL, 0, false);
 
 	/* HW shall not deny going back to legacy */
 	if (WARN_ON(ret)) {
@@ -469,7 +470,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
-			       &sta->sta, tid, &start_seq_num, 0);
+			       &sta->sta, tid, &start_seq_num, 0, false);
 	if (ret) {
 		ht_dbg(sdata,
 		       "BA request denied - HW unavailable for %pM tid %d\n",
@@ -693,7 +694,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 
 	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
-			 &sta->sta, tid, NULL, tid_tx->buf_size);
+			 &sta->sta, tid, NULL, tid_tx->buf_size,
+			 tid_tx->amsdu);
 
 	/*
 	 * synchronize with TX path, while splicing the TX path
@@ -918,8 +920,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	struct tid_ampdu_tx *tid_tx;
 	u16 capab, tid;
 	u8 buf_size;
+	bool amsdu;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
+	amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
@@ -968,6 +972,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		}
 
 		tid_tx->buf_size = buf_size;
+		tid_tx->amsdu = amsdu;
 
 		if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
 			ieee80211_agg_tx_operational(local, sta, tid);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 157b20baf752..31482e2cd25f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -734,7 +734,7 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 				   struct ieee80211_sub_if_data *sdata,
 				   enum ieee80211_ampdu_mlme_action action,
 				   struct ieee80211_sta *sta, u16 tid,
-				   u16 *ssn, u8 buf_size)
+				   u16 *ssn, u8 buf_size, bool amsdu)
 {
 	int ret = -EOPNOTSUPP;
 
@@ -744,11 +744,12 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 	if (!check_sdata_in_driver(sdata))
 		return -EIO;
 
-	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
+	trace_drv_ampdu_action(local, sdata, action, sta, tid,
+			       ssn, buf_size, amsdu);
 
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
-					       sta, tid, ssn, buf_size);
+					       sta, tid, ssn, buf_size, amsdu);
 
 	trace_drv_return_int(local, ret);
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b087c71ff7fe..d5ded8749ac4 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -133,6 +133,7 @@ enum ieee80211_agg_stop_reason {
  * @buf_size: reorder buffer size at receiver
  * @failed_bar_ssn: ssn of the last failed BAR tx attempt
  * @bar_pending: BAR needs to be re-sent
+ * @amsdu: support A-MSDU withing A-MDPU
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -158,6 +159,7 @@ struct tid_ampdu_tx {
 
 	u16 failed_bar_ssn;
 	bool bar_pending;
+	bool amsdu;
 };
 
 /**
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index b5960b948f60..314e3bd7fbdb 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -974,9 +974,9 @@ TRACE_EVENT(drv_ampdu_action,
 		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_ampdu_mlme_action action,
 		 struct ieee80211_sta *sta, u16 tid,
-		 u16 *ssn, u8 buf_size),
+		 u16 *ssn, u8 buf_size, bool amsdu),
 
-	TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
+	TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size, amsdu),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -985,6 +985,7 @@ TRACE_EVENT(drv_ampdu_action,
 		__field(u16, tid)
 		__field(u16, ssn)
 		__field(u8, buf_size)
+		__field(bool, amsdu)
 		VIF_ENTRY
 	),
 
@@ -996,12 +997,13 @@ TRACE_EVENT(drv_ampdu_action,
 		__entry->tid = tid;
 		__entry->ssn = ssn ? *ssn : 0;
 		__entry->buf_size = buf_size;
+		__entry->amsdu = amsdu;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d amsdu:%d",
 		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
-		__entry->tid, __entry->buf_size
+		__entry->tid, __entry->buf_size, __entry->amsdu
 	)
 );
 
-- 
cgit v1.2.3


From 0edd5faeb07bfd3ec5402f9467e4c169dcd131e8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Aug 2015 14:31:48 +0200
Subject: wireless: mark element IDs 8 and 9 reserved

These were never used in the tree, and are marked as reserved
in the IEEE 802.11 documentation (ANA).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ipw2x00/libipw_rx.c | 2 --
 include/linux/ieee80211.h                | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ipw2x00/libipw_rx.c b/drivers/net/wireless/ipw2x00/libipw_rx.c
index a6877dd6ba73..cef7f7d79cd9 100644
--- a/drivers/net/wireless/ipw2x00/libipw_rx.c
+++ b/drivers/net/wireless/ipw2x00/libipw_rx.c
@@ -1091,8 +1091,6 @@ static const char *get_info_element_string(u16 id)
 		MFIE_STRING(TIM);
 		MFIE_STRING(IBSS_PARAMS);
 		MFIE_STRING(COUNTRY);
-		MFIE_STRING(HP_PARAMS);
-		MFIE_STRING(HP_TABLE);
 		MFIE_STRING(REQUEST);
 		MFIE_STRING(CHALLENGE);
 		MFIE_STRING(PWR_CONSTRAINT);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 19eb9ecd6cf3..f79a02a69d26 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1746,8 +1746,7 @@ enum ieee80211_eid {
 	WLAN_EID_TIM = 5,
 	WLAN_EID_IBSS_PARAMS = 6,
 	WLAN_EID_COUNTRY = 7,
-	WLAN_EID_HP_PARAMS = 8,
-	WLAN_EID_HP_TABLE = 9,
+	/* 8, 9 reserved */
 	WLAN_EID_REQUEST = 10,
 	WLAN_EID_QBSS_LOAD = 11,
 	WLAN_EID_EDCA_PARAM_SET = 12,
-- 
cgit v1.2.3


From 75ea8ca844fd35ce365e35e1617b239892d31f72 Mon Sep 17 00:00:00 2001
From: Marcel Ziswiler <marcel@ziswiler.com>
Date: Sun, 20 Sep 2015 00:13:37 +0200
Subject: power: charger-manager: comment spelling fixes

By accident I stumbled over a few misspelled words in the
charger-manager header file which this patch fixes. Namely:
- Extcon rather than Exton
- constraint rather than constratint
- existence rather than existance
- difference rather than diffential

While at it also add a missing space before a closing comment star
forward-slash.

Signed-off-by: Marcel Ziswiler <marcel@ziswiler.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/charger-manager.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index eadf28cb2fc9..c4fa907c8f14 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -65,7 +65,7 @@ struct charger_cable {
 	const char *extcon_name;
 	const char *name;
 
-	/* The charger-manager use Exton framework*/
+	/* The charger-manager use Extcon framework */
 	struct extcon_specific_cable_nb extcon_dev;
 	struct work_struct wq;
 	struct notifier_block nb;
@@ -94,7 +94,7 @@ struct charger_cable {
  *	the charger will be maintained with disabled state.
  * @cables:
  *	the array of charger cables to enable/disable charger
- *	and set current limit according to constratint data of
+ *	and set current limit according to constraint data of
  *	struct charger_cable if only charger cable included
  *	in the array of charger cables is attached/detached.
  * @num_cables: the number of charger cables.
@@ -148,7 +148,7 @@ struct charger_regulator {
  * @polling_interval_ms: interval in millisecond at which
  *	charger manager will monitor battery health
  * @battery_present:
- *	Specify where information for existance of battery can be obtained
+ *	Specify where information for existence of battery can be obtained
  * @psy_charger_stat: the names of power-supply for chargers
  * @num_charger_regulator: the number of entries in charger_regulators
  * @charger_regulators: array of charger regulators
@@ -156,7 +156,7 @@ struct charger_regulator {
  * @thermal_zone : the name of thermal zone for battery
  * @temp_min : Minimum battery temperature for charging.
  * @temp_max : Maximum battery temperature for charging.
- * @temp_diff : Temperature diffential to restart charging.
+ * @temp_diff : Temperature difference to restart charging.
  * @measure_battery_temp:
  *	true: measure battery temperature
  *	false: measure ambient temperature
-- 
cgit v1.2.3


From 4530eddb59494b89650d6bcd980fc7f7717ad80c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 11 Sep 2015 15:00:19 -0400
Subject: cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()

It wasn't explicitly documented but, when a process is being migrated,
cpuset and memcg depend on cgroup_taskset_first() returning the
threadgroup leader; however, this approach is somewhat ghetto and
would no longer work for the planned multi-process migration.

This patch introduces explicit cgroup_taskset_for_each_leader() which
iterates over only the threadgroup leaders and replaces
cgroup_taskset_first() usages for accessing the leader with it.

This prepares both memcg and cpuset for multi-process migration.  This
patch also updates the documentation for cgroup_taskset_for_each() to
clarify the iteration rules and removes comments mentioning task
ordering in tasksets.

v2: A previous patch which added threadgroup leader test was dropped.
    Patch updated accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h | 22 ++++++++++++++++++++++
 kernel/cgroup.c        | 11 -----------
 kernel/cpuset.c        |  9 ++++-----
 mm/memcontrol.c        | 17 +++++++++++++++--
 4 files changed, 41 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fb717f2cba5b..e9c3eac074e2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -232,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it);
  * cgroup_taskset_for_each - iterate cgroup_taskset
  * @task: the loop cursor
  * @tset: taskset to iterate
+ *
+ * @tset may contain multiple tasks and they may belong to multiple
+ * processes.  When there are multiple tasks in @tset, if a task of a
+ * process is in @tset, all tasks of the process are in @tset.  Also, all
+ * are guaranteed to share the same source and destination csses.
+ *
+ * Iteration is not in any specific order.
  */
 #define cgroup_taskset_for_each(task, tset)				\
 	for ((task) = cgroup_taskset_first((tset)); (task);		\
 	     (task) = cgroup_taskset_next((tset)))
 
+/**
+ * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
+ * @leader: the loop cursor
+ * @tset: takset to iterate
+ *
+ * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
+ * may not contain any.
+ */
+#define cgroup_taskset_for_each_leader(leader, tset)			\
+	for ((leader) = cgroup_taskset_first((tset)); (leader);		\
+	     (leader) = cgroup_taskset_next((tset)))			\
+		if ((leader) != (leader)->group_leader)			\
+			;						\
+		else
+
 /*
  * Inline functions.
  */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0be276ffe08a..7f4b85af03dc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2217,13 +2217,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
 
 	get_css_set(new_cset);
 	rcu_assign_pointer(tsk->cgroups, new_cset);
-
-	/*
-	 * Use move_tail so that cgroup_taskset_first() still returns the
-	 * leader after migration.  This works because cgroup_migrate()
-	 * ensures that the dst_cset of the leader is the first on the
-	 * tset's dst_csets list.
-	 */
 	list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
 
 	/*
@@ -2419,10 +2412,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
 		if (!cset->mg_src_cgrp)
 			goto next;
 
-		/*
-		 * cgroup_taskset_first() must always return the leader.
-		 * Take care to avoid disturbing the ordering.
-		 */
 		list_move_tail(&task->cg_list, &cset->mg_tasks);
 		if (list_empty(&cset->mg_node))
 			list_add_tail(&cset->mg_node, &tset.src_csets);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0b361a0b58f6..e4d999929903 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1488,7 +1488,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
 	/* static buf protected by cpuset_mutex */
 	static nodemask_t cpuset_attach_nodemask_to;
 	struct task_struct *task;
-	struct task_struct *leader = cgroup_taskset_first(tset);
+	struct task_struct *leader;
 	struct cpuset *cs = css_cs(css);
 	struct cpuset *oldcs = cpuset_attach_old_cs;
 
@@ -1514,12 +1514,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
 	}
 
 	/*
-	 * Change mm, possibly for multiple threads in a threadgroup. This
-	 * is expensive and may sleep and should be moved outside migration
-	 * path proper.
+	 * Change mm for all threadgroup leaders. This is expensive and may
+	 * sleep and should be moved outside migration path proper.
 	 */
 	cpuset_attach_nodemask_to = cs->effective_mems;
-	if (thread_group_leader(leader)) {
+	cgroup_taskset_for_each_leader(leader, tset) {
 		struct mm_struct *mm = get_task_mm(leader);
 
 		if (mm) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f331402e502..33c8dad6830f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4828,7 +4828,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *from;
-	struct task_struct *p;
+	struct task_struct *leader, *p;
 	struct mm_struct *mm;
 	unsigned long move_flags;
 	int ret = 0;
@@ -4842,7 +4842,20 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 	if (!move_flags)
 		return 0;
 
-	p = cgroup_taskset_first(tset);
+	/*
+	 * Multi-process migrations only happen on the default hierarchy
+	 * where charge immigration is not used.  Perform charge
+	 * immigration if @tset contains a leader and whine if there are
+	 * multiple.
+	 */
+	p = NULL;
+	cgroup_taskset_for_each_leader(leader, tset) {
+		WARN_ON_ONCE(p);
+		p = leader;
+	}
+	if (!p)
+		return 0;
+
 	from = mem_cgroup_from_task(p);
 
 	VM_BUG_ON(from == memcg);
-- 
cgit v1.2.3


From 4ad2ddce1a096dea71d42969515d3a64f0d9246f Mon Sep 17 00:00:00 2001
From: Stefan Koch <stefan.koch10@gmail.com>
Date: Tue, 25 Aug 2015 21:10:05 +0200
Subject: usb: interface authorization: Declare authorized attribute

The attribute authorized shows the authorization state for an interface.

Signed-off-by: Stefan Koch <stefan.koch10@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 447fe29b55b4..3deccab2ce0a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -178,6 +178,7 @@ struct usb_interface {
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
 	unsigned resetting_device:1;	/* true: bandwidth alloc after reset */
+	unsigned authorized:1;		/* used for interface authorization */
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-- 
cgit v1.2.3


From 6b2bd3c8c69c4817a9a2feb4597021d486c105f4 Mon Sep 17 00:00:00 2001
From: Stefan Koch <stefan.koch10@gmail.com>
Date: Tue, 25 Aug 2015 21:10:06 +0200
Subject: usb: interface authorization: Introduces the default interface
 authorization

Interfaces are allowed per default.
This can disabled or enabled (again) by writing 0 or 1 to
/sys/bus/usb/devices/usbX/interface_authorized_default

Signed-off-by: Stefan Koch <stefan.koch10@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c     | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/message.c |  1 +
 include/linux/usb/hcd.h    |  9 +++++++++
 3 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 4d64e5c499e1..83df1dde9c08 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -882,9 +882,53 @@ static ssize_t authorized_default_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(authorized_default);
 
+/*
+ * interface_authorized_default_show - show default authorization status
+ * for USB interfaces
+ *
+ * note: interface_authorized_default is the default value
+ *       for initializing the authorized attribute of interfaces
+ */
+static ssize_t interface_authorized_default_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct usb_device *usb_dev = to_usb_device(dev);
+	struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus);
+
+	return sprintf(buf, "%u\n", !!HCD_INTF_AUTHORIZED(hcd));
+}
+
+/*
+ * interface_authorized_default_store - store default authorization status
+ * for USB interfaces
+ *
+ * note: interface_authorized_default is the default value
+ *       for initializing the authorized attribute of interfaces
+ */
+static ssize_t interface_authorized_default_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct usb_device *usb_dev = to_usb_device(dev);
+	struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus);
+	int rc = count;
+	bool val;
+
+	if (strtobool(buf, &val) != 0)
+		return -EINVAL;
+
+	if (val)
+		set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags);
+	else
+		clear_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(interface_authorized_default);
+
 /* Group all the USB bus attributes */
 static struct attribute *usb_bus_attrs[] = {
 		&dev_attr_authorized_default.attr,
+		&dev_attr_interface_authorized_default.attr,
 		NULL,
 };
 
@@ -2682,6 +2726,9 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		hcd->authorized_default = authorized_default;
 	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
+	/* per default all interfaces are authorized */
+	set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags);
+
 	/* HC is in reset state, but accessible.  Now do the one-time init,
 	 * bottom up so that hcds can customize the root hubs before hub_wq
 	 * starts talking to them.  (Note, bus id is assigned early too.)
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index f368d2053da5..3d25d89671d7 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1807,6 +1807,7 @@ free_interfaces:
 		intfc = cp->intf_cache[i];
 		intf->altsetting = intfc->altsetting;
 		intf->num_altsetting = intfc->num_altsetting;
+		intf->authorized = !!HCD_INTF_AUTHORIZED(hcd);
 		kref_get(&intfc->ref);
 
 		alt = usb_altnum_to_altsetting(intf, 0);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index d2784c10bfe2..36ce3d215cad 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -120,6 +120,7 @@ struct usb_hcd {
 #define HCD_FLAG_WAKEUP_PENDING		4	/* root hub is resuming? */
 #define HCD_FLAG_RH_RUNNING		5	/* root hub is running? */
 #define HCD_FLAG_DEAD			6	/* controller has died? */
+#define HCD_FLAG_INTF_AUTHORIZED	7	/* authorize interfaces? */
 
 	/* The flags can be tested using these macros; they are likely to
 	 * be slightly faster than test_bit().
@@ -131,6 +132,14 @@ struct usb_hcd {
 #define HCD_RH_RUNNING(hcd)	((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING))
 #define HCD_DEAD(hcd)		((hcd)->flags & (1U << HCD_FLAG_DEAD))
 
+	/*
+	 * Specifies if interfaces are authorized by default
+	 * or they require explicit user space authorization; this bit is
+	 * settable through /sys/class/usb_host/X/interface_authorized_default
+	 */
+#define HCD_INTF_AUTHORIZED(hcd) \
+	((hcd)->flags & (1U << HCD_FLAG_INTF_AUTHORIZED))
+
 	/* Flags that get set only during HCD registration or removal. */
 	unsigned		rh_registered:1;/* is root hub registered? */
 	unsigned		rh_pollable:1;	/* may we poll the root hub? */
-- 
cgit v1.2.3


From ff8e2c560eca32043ed097099debac488a4bd99f Mon Sep 17 00:00:00 2001
From: Stefan Koch <stefan.koch10@gmail.com>
Date: Tue, 25 Aug 2015 21:10:11 +0200
Subject: usb: interface authorization: Use a flag for the default device
 authorization

With this patch a flag instead of a variable
is used for the default device authorization.

Signed-off-by: Stefan Koch <stefan.koch10@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 31 +++++++++++++++++++++----------
 drivers/usb/core/usb.c  |  2 +-
 include/linux/usb/hcd.h | 16 +++++++++-------
 3 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 83df1dde9c08..86b3d1190500 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -854,10 +854,10 @@ static ssize_t authorized_default_show(struct device *dev,
 {
 	struct usb_device *rh_usb_dev = to_usb_device(dev);
 	struct usb_bus *usb_bus = rh_usb_dev->bus;
-	struct usb_hcd *usb_hcd;
+	struct usb_hcd *hcd;
 
-	usb_hcd = bus_to_hcd(usb_bus);
-	return snprintf(buf, PAGE_SIZE, "%u\n", usb_hcd->authorized_default);
+	hcd = bus_to_hcd(usb_bus);
+	return snprintf(buf, PAGE_SIZE, "%u\n", !!HCD_DEV_AUTHORIZED(hcd));
 }
 
 static ssize_t authorized_default_store(struct device *dev,
@@ -868,12 +868,16 @@ static ssize_t authorized_default_store(struct device *dev,
 	unsigned val;
 	struct usb_device *rh_usb_dev = to_usb_device(dev);
 	struct usb_bus *usb_bus = rh_usb_dev->bus;
-	struct usb_hcd *usb_hcd;
+	struct usb_hcd *hcd;
 
-	usb_hcd = bus_to_hcd(usb_bus);
+	hcd = bus_to_hcd(usb_bus);
 	result = sscanf(buf, "%u\n", &val);
 	if (result == 1) {
-		usb_hcd->authorized_default = val ? 1 : 0;
+		if (val)
+			set_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+		else
+			clear_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+
 		result = size;
 	} else {
 		result = -EINVAL;
@@ -2720,10 +2724,17 @@ int usb_add_hcd(struct usb_hcd *hcd,
 	dev_info(hcd->self.controller, "%s\n", hcd->product_desc);
 
 	/* Keep old behaviour if authorized_default is not in [0, 1]. */
-	if (authorized_default < 0 || authorized_default > 1)
-		hcd->authorized_default = hcd->wireless ? 0 : 1;
-	else
-		hcd->authorized_default = authorized_default;
+	if (authorized_default < 0 || authorized_default > 1) {
+		if (hcd->wireless)
+			clear_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+		else
+			set_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+	} else {
+		if (authorized_default)
+			set_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+		else
+			clear_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+	}
 	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
 	/* per default all interfaces are authorized */
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 8d5b2f4113cd..f8bbd0b6d9fe 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -510,7 +510,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 	if (root_hub)	/* Root hub always ok [and always wired] */
 		dev->authorized = 1;
 	else {
-		dev->authorized = usb_hcd->authorized_default;
+		dev->authorized = !!HCD_DEV_AUTHORIZED(usb_hcd);
 		dev->wusb = usb_bus_is_wusb(bus) ? 1 : 0;
 	}
 	return dev;
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 36ce3d215cad..4d147277b30d 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -58,12 +58,6 @@
  *
  * Since "struct usb_bus" is so thin, you can't share much code in it.
  * This framework is a layer over that, and should be more sharable.
- *
- * @authorized_default: Specifies if new devices are authorized to
- *                      connect by default or they require explicit
- *                      user space authorization; this bit is settable
- *                      through /sys/class/usb_host/X/authorized_default.
- *                      For the rest is RO, so we don't lock to r/w it.
  */
 
 /*-------------------------------------------------------------------------*/
@@ -121,6 +115,7 @@ struct usb_hcd {
 #define HCD_FLAG_RH_RUNNING		5	/* root hub is running? */
 #define HCD_FLAG_DEAD			6	/* controller has died? */
 #define HCD_FLAG_INTF_AUTHORIZED	7	/* authorize interfaces? */
+#define HCD_FLAG_DEV_AUTHORIZED		8	/* authorize devices? */
 
 	/* The flags can be tested using these macros; they are likely to
 	 * be slightly faster than test_bit().
@@ -140,6 +135,14 @@ struct usb_hcd {
 #define HCD_INTF_AUTHORIZED(hcd) \
 	((hcd)->flags & (1U << HCD_FLAG_INTF_AUTHORIZED))
 
+	/*
+	 * Specifies if devices are authorized by default
+	 * or they require explicit user space authorization; this bit is
+	 * settable through /sys/class/usb_host/X/authorized_default
+	 */
+#define HCD_DEV_AUTHORIZED(hcd) \
+	((hcd)->flags & (1U << HCD_FLAG_DEV_AUTHORIZED))
+
 	/* Flags that get set only during HCD registration or removal. */
 	unsigned		rh_registered:1;/* is root hub registered? */
 	unsigned		rh_pollable:1;	/* may we poll the root hub? */
@@ -150,7 +153,6 @@ struct usb_hcd {
 	 * support the new root-hub polling mechanism. */
 	unsigned		uses_new_polling:1;
 	unsigned		wireless:1;	/* Wireless USB HCD */
-	unsigned		authorized_default:1;
 	unsigned		has_tt:1;	/* Integrated TT in root hub */
 	unsigned		amd_resume_bug:1; /* AMD remote wakeup quirk */
 	unsigned		can_do_streams:1; /* HC supports streams */
-- 
cgit v1.2.3


From 7dc87ff8815ef43717c936faea79013855e3dbef Mon Sep 17 00:00:00 2001
From: Fugang Duan <b38611@freescale.com>
Date: Mon, 7 Sep 2015 10:54:59 +0800
Subject: ARM: imx7d: add imx7d iomux-gpr field define

Add imx7d iomux-gpr field define.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/linux/mfd/syscon/imx7-iomuxc-gpr.h | 47 ++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 include/linux/mfd/syscon/imx7-iomuxc-gpr.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h
new file mode 100644
index 000000000000..4585d6105d68
--- /dev/null
+++ b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_IMX7_IOMUXC_GPR_H
+#define __LINUX_IMX7_IOMUXC_GPR_H
+
+#define IOMUXC_GPR0	0x00
+#define IOMUXC_GPR1	0x04
+#define IOMUXC_GPR2	0x08
+#define IOMUXC_GPR3	0x0c
+#define IOMUXC_GPR4	0x10
+#define IOMUXC_GPR5	0x14
+#define IOMUXC_GPR6	0x18
+#define IOMUXC_GPR7	0x1c
+#define IOMUXC_GPR8	0x20
+#define IOMUXC_GPR9	0x24
+#define IOMUXC_GPR10	0x28
+#define IOMUXC_GPR11	0x2c
+#define IOMUXC_GPR12	0x30
+#define IOMUXC_GPR13	0x34
+#define IOMUXC_GPR14	0x38
+#define IOMUXC_GPR15	0x3c
+#define IOMUXC_GPR16	0x40
+#define IOMUXC_GPR17	0x44
+#define IOMUXC_GPR18	0x48
+#define IOMUXC_GPR19	0x4c
+#define IOMUXC_GPR20	0x50
+#define IOMUXC_GPR21	0x54
+#define IOMUXC_GPR22	0x58
+
+/* For imx7d iomux gpr register field define */
+#define IMX7D_GPR1_IRQ_MASK			(0x1 << 12)
+#define IMX7D_GPR1_ENET1_TX_CLK_SEL_MASK	(0x1 << 13)
+#define IMX7D_GPR1_ENET2_TX_CLK_SEL_MASK	(0x1 << 14)
+#define IMX7D_GPR1_ENET_TX_CLK_SEL_MASK		(0x3 << 13)
+#define IMX7D_GPR1_ENET1_CLK_DIR_MASK		(0x1 << 17)
+#define IMX7D_GPR1_ENET2_CLK_DIR_MASK		(0x1 << 18)
+#define IMX7D_GPR1_ENET_CLK_DIR_MASK		(0x3 << 17)
+
+#define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI		(0x1 << 4)
+
+#endif /* __LINUX_IMX7_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From cb334648a10c7fa6f0f163c22602f4dc1c6d56b4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:05:47 -0700
Subject: arcnet: Use normal kernel spacing style

Standardized spacing is easier to read.

git diff -w shows no differences.
objdiff shows no differences.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/net/arcnet/arc-rawmode.c  |  10 +-
 drivers/net/arcnet/arc-rimi.c     |  30 ++--
 drivers/net/arcnet/arcnet.c       | 170 ++++++++++-----------
 drivers/net/arcnet/capmode.c      |  36 ++---
 drivers/net/arcnet/com20020-isa.c |   2 +-
 drivers/net/arcnet/com20020-pci.c |   2 +-
 drivers/net/arcnet/com20020.c     |  38 ++---
 drivers/net/arcnet/com20020_cs.c  | 306 +++++++++++++++++++-------------------
 drivers/net/arcnet/com90io.c      |  24 +--
 drivers/net/arcnet/com90xx.c      |  32 ++--
 drivers/net/arcnet/rfc1051.c      |  12 +-
 drivers/net/arcnet/rfc1201.c      |  36 ++---
 include/linux/arcdevice.h         |  90 +++++------
 13 files changed, 394 insertions(+), 394 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 705e6ce2eb90..49f5819a0367 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - "raw mode" packet encapsulation (no soft headers)
- * 
+ *
  * Written 1994-1999 by Avery Pennarun.
  * Derived from skeleton.c by Donald Becker.
  *
@@ -109,7 +109,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb_put(skb, length + ARC_HDR_SIZE);
 	skb->dev = dev;
 
-	pkt = (struct archdr *) skb->data;
+	pkt = (struct archdr *)skb->data;
 
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
@@ -136,7 +136,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type, uint8_t daddr)
 {
 	int hdr_size = ARC_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *) skb_push(skb, hdr_size);
+	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
 
 	/*
 	 * Set the source hardware address.
@@ -150,7 +150,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	/* see linux/net/ethernet/eth.c to see where I got the following */
 
 	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
-		/* 
+		/*
 		 * FIXME: fill in the last byte of the dest ipaddr here to better
 		 * comply with RFC1051 in "noarp" mode.
 		 */
@@ -192,7 +192,7 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		hard->offset[0] = ofs = 256 - length;
 
 	BUGMSG(D_DURING, "prepare_tx: length=%d ofs=%d\n",
-	       length,ofs);
+	       length, ofs);
 
 	lp->hw.copy_to_card(dev, bufnum, 0, hard, ARC_HDR_SIZE);
 	lp->hw.copy_to_card(dev, bufnum, ofs, &pkt->soft, length);
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index b8b4c7ba884f..4644d46b6a5f 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - "RIM I" (entirely mem-mapped) cards
- * 
+ *
  * Written 1994-1999 by Avery Pennarun.
  * Written 1999-2000 by Martin Mares <mj@ucw.cz>.
  * Derived from skeleton.c by Donald Becker.
@@ -56,27 +56,27 @@ static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offse
 /* Handy defines for ARCnet specific stuff */
 
 /* Amount of I/O memory used by the card */
-#define BUFFER_SIZE (512)
-#define MIRROR_SIZE (BUFFER_SIZE*4)
+#define BUFFER_SIZE	(512)
+#define MIRROR_SIZE	(BUFFER_SIZE * 4)
 
 /* COM 9026 controller chip --> ARCnet register addresses */
-#define _INTMASK (ioaddr+0)	/* writable */
-#define _STATUS  (ioaddr+0)	/* readable */
-#define _COMMAND (ioaddr+1)	/* writable, returns random vals on read (?) */
-#define _RESET  (ioaddr+8)	/* software reset (on read) */
-#define _MEMDATA  (ioaddr+12)	/* Data port for IO-mapped memory */
-#define _ADDR_HI  (ioaddr+15)	/* Control registers for said */
-#define _ADDR_LO  (ioaddr+14)
-#define _CONFIG  (ioaddr+2)	/* Configuration register */
+#define _INTMASK	(ioaddr + 0)	/* writable */
+#define _STATUS		(ioaddr + 0)	/* readable */
+#define _COMMAND	(ioaddr + 1)	/* writable, returns random vals on read (?) */
+#define _RESET		(ioaddr + 8)	/* software reset (on read) */
+#define _MEMDATA	(ioaddr + 12)	/* Data port for IO-mapped memory */
+#define _ADDR_HI	(ioaddr + 15)	/* Control registers for said */
+#define _ADDR_LO	(ioaddr + 14)
+#define _CONFIG		(ioaddr + 2)	/* Configuration register */
 
 #undef ASTATUS
 #undef ACOMMAND
 #undef AINTMASK
 
 #define ASTATUS()	readb(_STATUS)
-#define ACOMMAND(cmd)	writeb((cmd),_COMMAND)
-#define AINTMASK(msk)	writeb((msk),_INTMASK)
-#define SETCONF()	writeb(lp->config,_CONFIG)
+#define ACOMMAND(cmd)	writeb((cmd), _COMMAND)
+#define AINTMASK(msk)	writeb((msk), _INTMASK)
+#define SETCONF()	writeb(lp->config, _CONFIG)
 
 
 /*
@@ -90,7 +90,7 @@ static int __init arcrimi_probe(struct net_device *dev)
 	BUGLVL(D_NORMAL) printk("E-mail me if you actually test the RIM I driver, please!\n");
 
 	BUGLVL(D_NORMAL) printk("Given: node %02Xh, shmem %lXh, irq %d\n",
-	       dev->dev_addr[0], dev->mem_start, dev->irq);
+				dev->dev_addr[0], dev->mem_start, dev->irq);
 
 	if (dev->mem_start <= 0 || dev->irq <= 0) {
 		BUGLVL(D_NORMAL) printk("No autoprobe for RIM I; you "
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 10f71c732b59..2a594d1c4b55 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - device-independent routines
- * 
+ *
  * Written 1997 by David Woodhouse.
  * Written 1994-1999 by Avery Pennarun.
  * Written 1999-2000 by Martin Mares <mj@ucw.cz>.
@@ -20,12 +20,12 @@
  * modified by SRC, incorporated herein by reference.
  *
  * **********************
- * 
+ *
  * The change log is now in a file called ChangeLog in this directory.
  *
  * Sources:
  *  - Crynwr arcnet.com/arcether.com packet drivers.
- *  - arcnet.c v0.00 dated 1/1/94 and apparently by 
+ *  - arcnet.c v0.00 dated 1/1/94 and apparently by
  *     Donald Becker - it didn't work :)
  *  - skeleton.c v0.05 dated 11/16/93 by Donald Becker
  *     (from Linux Kernel 1.1.45)
@@ -69,8 +69,8 @@ static void arcnet_rx(struct net_device *dev, int bufnum);
  * arc_proto_default instead.  It also must not be NULL; if you would like
  * to set it to NULL, set it to &arc_proto_null instead.
  */
- struct ArcProto *arc_proto_map[256], *arc_proto_default,
-   *arc_bcast_proto, *arc_raw_proto;
+struct ArcProto *arc_proto_map[256], *arc_proto_default,
+	*arc_bcast_proto, *arc_raw_proto;
 
 static struct ArcProto arc_proto_null =
 {
@@ -136,8 +136,8 @@ static int __init arcnet_init(void)
 
 	BUGLVL(D_DURING)
 	    printk("arcnet: struct sizes: %Zd %Zd %Zd %Zd %Zd\n",
-		 sizeof(struct arc_hardware), sizeof(struct arc_rfc1201),
-		sizeof(struct arc_rfc1051), sizeof(struct arc_eth_encap),
+		   sizeof(struct arc_hardware), sizeof(struct arc_rfc1201),
+		   sizeof(struct arc_rfc1051), sizeof(struct arc_eth_encap),
 		   sizeof(struct archdr));
 
 	return 0;
@@ -184,11 +184,11 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum,
 
 	/* hw.copy_from_card expects IRQ context so take the IRQ lock
 	   to keep it single threaded */
-	if(take_arcnet_lock)
+	if (take_arcnet_lock)
 		spin_lock_irqsave(&lp->lock, flags);
 
 	lp->hw.copy_from_card(dev, bufnum, 0, buf, 512);
-	if(take_arcnet_lock)
+	if (take_arcnet_lock)
 		spin_unlock_irqrestore(&lp->lock, flags);
 
 	/* if the offset[0] byte is nonzero, this is a 256-byte packet */
@@ -202,7 +202,7 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum,
 
 #else
 
-#define arcnet_dump_packet(dev, bufnum, desc,take_arcnet_lock) do { } while (0)
+#define arcnet_dump_packet(dev, bufnum, desc, take_arcnet_lock) do { } while (0)
 
 #endif
 
@@ -233,7 +233,7 @@ void arcnet_unregister_proto(struct ArcProto *proto)
 /*
  * Add a buffer to the queue.  Only the interrupt handler is allowed to do
  * this, unless interrupts are disabled.
- * 
+ *
  * Note: we don't check for a full queue, since there aren't enough buffers
  * to more than fill it.
  */
@@ -248,7 +248,7 @@ static void release_arcbuf(struct net_device *dev, int bufnum)
 	BUGLVL(D_DURING) {
 		BUGMSG(D_DURING, "release_arcbuf: freed #%d; buffer queue is now: ",
 		       bufnum);
-		for (i = lp->next_buf; i != lp->first_free_buf; i = (i+1) % 5)
+		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
 			BUGMSG2(D_DURING, "#%d ", lp->buf_queue[i]);
 		BUGMSG2(D_DURING, "\n");
 	}
@@ -284,7 +284,7 @@ static int get_arcbuf(struct net_device *dev)
 
 	BUGLVL(D_DURING) {
 		BUGMSG(D_DURING, "get_arcbuf: got #%d; buffer queue is now: ", buf);
-		for (i = lp->next_buf; i != lp->first_free_buf; i = (i+1) % 5)
+		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
 			BUGMSG2(D_DURING, "#%d ", lp->buf_queue[i]);
 		BUGMSG2(D_DURING, "\n");
 	}
@@ -346,7 +346,7 @@ struct net_device *alloc_arcdev(const char *name)
 	dev = alloc_netdev(sizeof(struct arcnet_local),
 			   name && *name ? name : "arc%d", NET_NAME_UNKNOWN,
 			   arcdev_setup);
-	if(dev) {
+	if (dev) {
 		struct arcnet_local *lp = netdev_priv(dev);
 		spin_lock_init(&lp->lock);
 	}
@@ -367,7 +367,7 @@ int arcnet_open(struct net_device *dev)
 	struct arcnet_local *lp = netdev_priv(dev);
 	int count, newmtu, error;
 
-	BUGMSG(D_INIT,"opened.");
+	BUGMSG(D_INIT, "opened.");
 
 	if (!try_module_get(lp->hw.owner))
 		return -ENODEV;
@@ -431,24 +431,24 @@ int arcnet_open(struct net_device *dev)
 		BUGMSG(D_NORMAL, "WARNING!  Station address FF may confuse "
 		       "DOS networking programs!\n");
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	if (ASTATUS() & RESETflag) {
-	  	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+		BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 		ACOMMAND(CFLAGScmd | RESETclear);
 	}
 
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	/* make sure we're ready to receive IRQ's. */
 	AINTMASK(0);
 	udelay(1);		/* give it time to set the mask before
 				 * we reset it again. (may not even be
 				 * necessary)
 				 */
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	lp->intmask = NORXflag | RECONflag;
 	AINTMASK(lp->intmask);
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	netif_start_queue(dev);
 
@@ -489,29 +489,29 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 	struct ArcProto *proto;
 
 	BUGMSG(D_DURING,
-	    "create header from %d to %d; protocol %d (%Xh); size %u.\n",
-	       saddr ? *(uint8_t *) saddr : -1,
-	       daddr ? *(uint8_t *) daddr : -1,
+	       "create header from %d to %d; protocol %d (%Xh); size %u.\n",
+	       saddr ? *(uint8_t *)saddr : -1,
+	       daddr ? *(uint8_t *)daddr : -1,
 	       type, type, len);
 
-	if (skb->len!=0 && len != skb->len)
+	if (skb->len != 0 && len != skb->len)
 		BUGMSG(D_NORMAL, "arcnet_header: Yikes!  skb->len(%d) != len(%d)!\n",
 		       skb->len, len);
 
 
-  	/* Type is host order - ? */
-  	if(type == ETH_P_ARCNET) {
-  		proto = arc_raw_proto;
-  		BUGMSG(D_DEBUG, "arc_raw_proto used. proto='%c'\n",proto->suffix);
-  		_daddr = daddr ? *(uint8_t *) daddr : 0;
-  	}
+	/* Type is host order - ? */
+	if (type == ETH_P_ARCNET) {
+		proto = arc_raw_proto;
+		BUGMSG(D_DEBUG, "arc_raw_proto used. proto='%c'\n", proto->suffix);
+		_daddr = daddr ? *(uint8_t *)daddr : 0;
+	}
 	else if (!daddr) {
 		/*
 		 * if the dest addr isn't provided, we can't choose an encapsulation!
 		 * Store the packet type (eg. ETH_P_IP) for now, and we'll push on a
 		 * real header when we do rebuild_header.
 		 */
-		*(uint16_t *) skb_push(skb, 2) = type;
+		*(uint16_t *)skb_push(skb, 2) = type;
 		/*
 		 * XXX: Why not use skb->mac_len?
 		 */
@@ -522,7 +522,7 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 	}
 	else {
 		/* otherwise, we can just add the header as usual. */
-		_daddr = *(uint8_t *) daddr;
+		_daddr = *(uint8_t *)daddr;
 		proto_num = lp->default_proto[_daddr];
 		proto = arc_proto_map[proto_num];
 		BUGMSG(D_DURING, "building header for %02Xh using protocol '%c'\n",
@@ -538,7 +538,7 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 
 /* Called by the kernel in order to transmit a packet. */
 netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
-				     struct net_device *dev)
+			       struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	struct archdr *pkt;
@@ -550,14 +550,14 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 
 	BUGMSG(D_DURING,
 	       "transmit requested (status=%Xh, txbufs=%d/%d, len=%d, protocol %x)\n",
-	       ASTATUS(), lp->cur_tx, lp->next_tx, skb->len,skb->protocol);
+	       ASTATUS(), lp->cur_tx, lp->next_tx, skb->len, skb->protocol);
 
-	pkt = (struct archdr *) skb->data;
+	pkt = (struct archdr *)skb->data;
 	soft = &pkt->soft.rfc1201;
 	proto = arc_proto_map[soft->proto];
 
 	BUGMSG(D_SKB_SIZE, "skb: transmitting %d bytes to %02X\n",
-		skb->len, pkt->hard.dest);
+	       skb->len, pkt->hard.dest);
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "tx");
 
 	/* fits in one packet? */
@@ -572,7 +572,7 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 
 	spin_lock_irqsave(&lp->lock, flags);
 	AINTMASK(0);
-	if(lp->next_tx == -1)
+	if (lp->next_tx == -1)
 		txbuf = get_arcbuf(dev);
 	else {
 		txbuf = -1;
@@ -594,9 +594,9 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 
 			if (proto->continue_tx &&
 			    proto->continue_tx(dev, txbuf)) {
-			  BUGMSG(D_NORMAL,
-				 "bug! continue_tx finished the first time! "
-				 "(proto='%c')\n", proto->suffix);
+				BUGMSG(D_NORMAL,
+				       "bug! continue_tx finished the first time! "
+				       "(proto='%c')\n", proto->suffix);
 			}
 		}
 		retval = NETDEV_TX_OK;
@@ -606,14 +606,14 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 		freeskb = 0;
 	}
 
-	BUGMSG(D_DEBUG, "%s: %d: %s, status: %x\n",__FILE__,__LINE__,__func__,ASTATUS());
+	BUGMSG(D_DEBUG, "%s: %d: %s, status: %x\n", __FILE__, __LINE__, __func__, ASTATUS());
 	/* make sure we didn't ignore a TX IRQ while we were in here */
 	AINTMASK(0);
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
-	lp->intmask |= TXFREEflag|EXCNAKflag;
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	lp->intmask |= TXFREEflag | EXCNAKflag;
 	AINTMASK(lp->intmask);
-	BUGMSG(D_DEBUG, "%s: %d: %s, status: %x\n",__FILE__,__LINE__,__func__,ASTATUS());
+	BUGMSG(D_DEBUG, "%s: %d: %s, status: %x\n", __FILE__, __LINE__, __func__, ASTATUS());
 
 	spin_unlock_irqrestore(&lp->lock, flags);
 	if (freeskb) {
@@ -649,7 +649,7 @@ static int go_tx(struct net_device *dev)
 	lp->lasttrans_dest = lp->lastload_dest;
 	lp->lastload_dest = 0;
 	lp->excnak_pending = 0;
-	lp->intmask |= TXFREEflag|EXCNAKflag;
+	lp->intmask |= TXFREEflag | EXCNAKflag;
 
 	return 1;
 }
@@ -676,12 +676,12 @@ void arcnet_timeout(struct net_device *dev)
 
 	/* make sure we didn't miss a TX or a EXC NAK IRQ */
 	AINTMASK(0);
-	lp->intmask |= TXFREEflag|EXCNAKflag;
+	lp->intmask |= TXFREEflag | EXCNAKflag;
 	AINTMASK(lp->intmask);
-	
+
 	spin_unlock_irqrestore(&lp->lock, flags);
 
-	if (time_after(jiffies, lp->last_timeout + 10*HZ)) {
+	if (time_after(jiffies, lp->last_timeout + 10 * HZ)) {
 		BUGMSG(D_EXTRA, "tx timed out%s (status=%Xh, intmask=%Xh, dest=%02Xh)\n",
 		       msg, status, lp->intmask, lp->lasttrans_dest);
 		lp->last_timeout = jiffies;
@@ -710,7 +710,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 	lp = netdev_priv(dev);
 	BUG_ON(!lp);
-		
+
 	spin_lock(&lp->lock);
 
 	/*
@@ -731,16 +731,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	boguscount = 5;
 	do {
 		status = ASTATUS();
-                diagstatus = (status >> 8) & 0xFF;
+		diagstatus = (status >> 8) & 0xFF;
 
 		BUGMSG(D_DEBUG, "%s: %d: %s: status=%x\n",
-			__FILE__,__LINE__,__func__,status);
+		       __FILE__, __LINE__, __func__, status);
 		didsomething = 0;
 
 		/*
 		 * RESET flag was enabled - card is resetting and if RX is
 		 * disabled, it's NOT because we just got a packet.
-		 * 
+		 *
 		 * The card is in an undefined state.  Clear it out and start over.
 		 */
 		if (status & RESETflag) {
@@ -751,14 +751,14 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			/* get out of the interrupt handler! */
 			break;
 		}
-		/* 
+		/*
 		 * RX is inhibited - we must have received something. Prepare to
 		 * receive into the next buffer.
-		 * 
+		 *
 		 * We don't actually copy the received packet from the card until
 		 * after the transmit handler runs (and possibly launches the next
 		 * tx); this should improve latency slightly if we get both types
-		 * of interrupts at once. 
+		 * of interrupts at once.
 		 */
 		recbuf = -1;
 		if (status & lp->intmask & NORXflag) {
@@ -775,27 +775,27 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			didsomething++;
 		}
 
-		if((diagstatus & EXCNAKflag)) {
+		if ((diagstatus & EXCNAKflag)) {
 			BUGMSG(D_DURING, "EXCNAK IRQ (diagstat=%Xh)\n",
 			       diagstatus);
 
-                        ACOMMAND(NOTXcmd);      /* disable transmit */
-                        lp->excnak_pending = 1;
+			ACOMMAND(NOTXcmd);      /* disable transmit */
+			lp->excnak_pending = 1;
 
-                        ACOMMAND(EXCNAKclear);
+			ACOMMAND(EXCNAKclear);
 			lp->intmask &= ~(EXCNAKflag);
-                        didsomething++;
-                }
+			didsomething++;
+		}
 
 
 		/* a transmit finished, and we're interested in it. */
 		if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
-			lp->intmask &= ~(TXFREEflag|EXCNAKflag);
+			lp->intmask &= ~(TXFREEflag | EXCNAKflag);
 
 			BUGMSG(D_DURING, "TX IRQ (stat=%Xh)\n", status);
 
 			if (lp->cur_tx != -1 && !lp->timed_out) {
-				if(!(status & TXACKflag)) {
+				if (!(status & TXACKflag)) {
 					if (lp->lasttrans_dest != 0) {
 						BUGMSG(D_EXTRA,
 						       "transmit was not acknowledged! "
@@ -813,16 +813,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 				if (lp->outgoing.proto &&
 				    lp->outgoing.proto->ack_tx) {
-				  int ackstatus;
-				  if(status & TXACKflag)
-                                    ackstatus=2;
-                                  else if(lp->excnak_pending)
-                                    ackstatus=1;
-                                  else
-                                    ackstatus=0;
-
-                                  lp->outgoing.proto
-                                    ->ack_tx(dev, ackstatus);
+					int ackstatus;
+					if (status & TXACKflag)
+						ackstatus = 2;
+					else if (lp->excnak_pending)
+						ackstatus = 1;
+					else
+						ackstatus = 0;
+
+					lp->outgoing.proto
+						->ack_tx(dev, ackstatus);
 				}
 			}
 			if (lp->cur_tx != -1)
@@ -842,11 +842,11 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 					if (lp->outgoing.proto->continue_tx(dev, txbuf)) {
 						/* that was the last segment */
 						dev->stats.tx_bytes += lp->outgoing.skb->len;
-						if(!lp->outgoing.proto->ack_tx)
-						  {
-						    dev_kfree_skb_irq(lp->outgoing.skb);
-						    lp->outgoing.proto = NULL;
-						  }
+						if (!lp->outgoing.proto->ack_tx)
+						{
+							dev_kfree_skb_irq(lp->outgoing.skb);
+							lp->outgoing.proto = NULL;
+						}
 					}
 					lp->next_tx = txbuf;
 				}
@@ -871,8 +871,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			BUGMSG(D_RECON, "Network reconfiguration detected (status=%Xh)\n",
 			       status);
 			/* MYRECON bit is at bit 7 of diagstatus */
-			if(diagstatus & 0x80)
-				BUGMSG(D_RECON,"Put out that recon myself\n");
+			if (diagstatus & 0x80)
+				BUGMSG(D_RECON, "Put out that recon myself\n");
 
 			/* is the RECON info empty or old? */
 			if (!lp->first_recon || !lp->last_recon ||
@@ -889,7 +889,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 				BUGMSG(D_DURING, "recon: counter=%d, time=%lds, net=%d\n",
 				       lp->num_recons,
-				 (lp->last_recon - lp->first_recon) / HZ,
+				       (lp->last_recon - lp->first_recon) / HZ,
 				       lp->network_down);
 
 				/* if network is marked up;
@@ -911,7 +911,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 				}
 			}
 		} else if (lp->network_down &&
-				time_after(jiffies, lp->last_recon + HZ * 10)) {
+			   time_after(jiffies, lp->last_recon + HZ * 10)) {
 			if (lp->network_down)
 				BUGMSG(D_NORMAL, "cabling restored?\n");
 			lp->first_recon = lp->last_recon = 0;
@@ -920,7 +920,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			BUGMSG(D_DURING, "not recon: clearing counters anyway.\n");
 		}
 
-		if(didsomething) {
+		if (didsomething) {
 			retval |= IRQ_HANDLED;
 		}
 	}
@@ -934,7 +934,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	AINTMASK(0);
 	udelay(1);
 	AINTMASK(lp->intmask);
-	
+
 	spin_unlock(&lp->lock);
 	return retval;
 }
@@ -1007,7 +1007,7 @@ static void null_rx(struct net_device *dev, int bufnum,
 		    struct archdr *pkthdr, int length)
 {
 	BUGMSG(D_PROTO,
-	"rx: don't know how to deal with proto %02Xh from host %02Xh.\n",
+	       "rx: don't know how to deal with proto %02Xh from host %02Xh.\n",
 	       pkthdr->soft.rfc1201.proto, pkthdr->hard.source);
 }
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 42fce91b71fc..95a6cf07965c 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -71,12 +71,12 @@ static void rx(struct net_device *dev, int bufnum,
 
 	/* use these variables to be sure we count in bytes, not in
 	   sizeof(struct archdr) */
-	pktbuf=(char*)pkt;
-	pkthdrbuf=(char*)pkthdr;
-	memcpy(pktbuf, pkthdrbuf, ARC_HDR_SIZE+sizeof(pkt->soft.cap.proto));
-	memcpy(pktbuf+ARC_HDR_SIZE+sizeof(pkt->soft.cap.proto)+sizeof(int),
-	       pkthdrbuf+ARC_HDR_SIZE+sizeof(pkt->soft.cap.proto),
-	       sizeof(struct archdr)-ARC_HDR_SIZE-sizeof(pkt->soft.cap.proto));
+	pktbuf = (char *)pkt;
+	pkthdrbuf = (char *)pkthdr;
+	memcpy(pktbuf, pkthdrbuf, ARC_HDR_SIZE + sizeof(pkt->soft.cap.proto));
+	memcpy(pktbuf + ARC_HDR_SIZE + sizeof(pkt->soft.cap.proto) + sizeof(int),
+	       pkthdrbuf + ARC_HDR_SIZE + sizeof(pkt->soft.cap.proto),
+	       sizeof(struct archdr) - ARC_HDR_SIZE - sizeof(pkt->soft.cap.proto));
 
 	if (length > sizeof(pkt->soft))
 		lp->hw.copy_from_card(dev, bufnum, ofs + sizeof(pkt->soft),
@@ -101,10 +101,10 @@ static int build_header(struct sk_buff *skb,
 			uint8_t daddr)
 {
 	int hdr_size = ARC_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *) skb_push(skb, hdr_size);
+	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
 
 	BUGMSG(D_PROTO, "Preparing header for cap packet %x.\n",
-	       *((int*)&pkt->soft.cap.cookie[0]));
+	       *((int *)&pkt->soft.cap.cookie[0]));
 	/*
 	 * Set the source hardware address.
 	 *
@@ -148,7 +148,7 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	       lp->next_tx, lp->cur_tx, bufnum);
 
 	BUGMSG(D_PROTO, "Sending for cap packet %x.\n",
-	       *((int*)&pkt->soft.cap.cookie[0]));
+	       *((int *)&pkt->soft.cap.cookie[0]));
 
 	if (length > XMTU) {
 		/* should never happen! other people already check for this. */
@@ -166,7 +166,7 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		hard->offset[0] = ofs = 256 - length;
 
 	BUGMSG(D_DURING, "prepare_tx: length=%d ofs=%d\n",
-	       length,ofs);
+	       length, ofs);
 
 	/* Copy the arcnet-header + the protocol byte down: */
 	lp->hw.copy_to_card(dev, bufnum, 0, hard, ARC_HDR_SIZE);
@@ -175,8 +175,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 
 	/* Skip the extra integer we have written into it as a cookie
 	   but write the rest of the message: */
-	lp->hw.copy_to_card(dev, bufnum, ofs+1,
-			    ((unsigned char*)&pkt->soft.cap.mes),length-1);
+	lp->hw.copy_to_card(dev, bufnum, ofs + 1,
+			    ((unsigned char *)&pkt->soft.cap.mes), length - 1);
 
 	lp->lastload_dest = hard->dest;
 
@@ -188,21 +188,21 @@ static int ack_tx(struct net_device *dev, int acked)
 	struct arcnet_local *lp = netdev_priv(dev);
 	struct sk_buff *ackskb;
 	struct archdr *ackpkt;
-	int length=sizeof(struct arc_cap);
+	int length = sizeof(struct arc_cap);
 
 	BUGMSG(D_DURING, "capmode: ack_tx: protocol: %x: result: %d\n",
-		lp->outgoing.skb->protocol, acked);
+	       lp->outgoing.skb->protocol, acked);
 
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, lp->outgoing.skb, "ack_tx");
 
 	/* Now alloc a skb to send back up through the layers: */
-	ackskb = alloc_skb(length + ARC_HDR_SIZE , GFP_ATOMIC);
+	ackskb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 	if (ackskb == NULL) {
 		BUGMSG(D_NORMAL, "Memory squeeze, can't acknowledge.\n");
 		goto free_outskb;
 	}
 
-	skb_put(ackskb, length + ARC_HDR_SIZE );
+	skb_put(ackskb, length + ARC_HDR_SIZE);
 	ackskb->dev = dev;
 
 	skb_reset_mac_header(ackskb);
@@ -212,10 +212,10 @@ static int ack_tx(struct net_device *dev, int acked)
 	skb_copy_from_linear_data(lp->outgoing.skb, ackpkt,
 				  ARC_HDR_SIZE + sizeof(struct arc_cap));
 	ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
-	ackpkt->soft.cap.mes.ack=acked;
+	ackpkt->soft.cap.mes.ack = acked;
 
 	BUGMSG(D_PROTO, "Ackknowledge for cap packet %x.\n",
-			*((int*)&ackpkt->soft.cap.cookie[0]));
+	       *((int *)&ackpkt->soft.cap.cookie[0]));
 
 	ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
 
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index 45c61a2c5fbd..d8746caf8e7a 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - COM20020 chipset support
- * 
+ *
  * Written 1997 by David Woodhouse.
  * Written 1994-1999 by Avery Pennarun.
  * Written 1999-2000 by Martin Mares <mj@ucw.cz>.
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 96edc1346124..e0f489a117f1 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -1,7 +1,7 @@
 /*
  * Linux ARCnet driver - COM20020 PCI support
  * Contemporary Controls PCI20 and SOHARD SH-ARC PCI
- * 
+ *
  * Written 1994-1999 by Avery Pennarun,
  *    based on an ISA version by David Woodhouse.
  * Written 1999-2000 by Martin Mares <mj@ucw.cz>.
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 1a8437842fbc..cd2a5ca56ce5 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - COM20020 chipset support
- * 
+ *
  * Written 1997 by David Woodhouse.
  * Written 1994-1999 by Avery Pennarun.
  * Written 1999 by Martin Mares <mj@ucw.cz>.
@@ -108,7 +108,7 @@ int com20020_check(struct net_device *dev)
 	{
 		SET_SUBADR(SUB_SETUP2);
 		outb(lp->setup2, _XREG);
-	
+
 		/* must now write the magic "restart operation" command */
 		mdelay(1);
 		outb(0x18, _COMMAND);
@@ -117,7 +117,7 @@ int com20020_check(struct net_device *dev)
 	lp->config = 0x21 | (lp->timeout << 3) | (lp->backplane << 2);
 	/* set node ID to 0x42 (but transmitter is disabled, so it's okay) */
 	SETCONF;
-	outb(0x42, ioaddr + BUS_ALIGN*7);
+	outb(0x42, ioaddr + BUS_ALIGN * 7);
 
 	status = ASTATUS();
 
@@ -129,7 +129,7 @@ int com20020_check(struct net_device *dev)
 
 	/* Enable TX */
 	outb(0x39, _CONFIG);
-	outb(inb(ioaddr + BUS_ALIGN*8), ioaddr + BUS_ALIGN*7);
+	outb(inb(ioaddr + BUS_ALIGN * 8), ioaddr + BUS_ALIGN * 7);
 
 	ACOMMAND(CFLAGScmd | RESETclear | CONFIGclear);
 
@@ -193,7 +193,7 @@ int com20020_found(struct net_device *dev, int shared)
 	lp->hw.close = com20020_close;
 
 	if (!dev->dev_addr[0])
-		dev->dev_addr[0] = inb(ioaddr + BUS_ALIGN*8);	/* FIXME: do this some other way! */
+		dev->dev_addr[0] = inb(ioaddr + BUS_ALIGN * 8);	/* FIXME: do this some other way! */
 
 	SET_SUBADR(SUB_SETUP1);
 	outb(lp->setup, _XREG);
@@ -202,7 +202,7 @@ int com20020_found(struct net_device *dev, int shared)
 	{
 		SET_SUBADR(SUB_SETUP2);
 		outb(lp->setup2, _XREG);
-	
+
 		/* must now write the magic "restart operation" command */
 		mdelay(1);
 		outb(0x18, _COMMAND);
@@ -232,7 +232,7 @@ int com20020_found(struct net_device *dev, int shared)
 		BUGMSG(D_NORMAL, "Using extended timeout value of %d.\n", lp->timeout);
 
 	BUGMSG(D_NORMAL, "Using CKP %d - data rate %s.\n",
-	       lp->setup >> 1, 
+	       lp->setup >> 1,
 	       clockrates[3 - ((lp->setup2 & 0xF0) >> 4) + ((lp->setup & 0x0F) >> 1)]);
 
 	if (register_netdev(dev)) {
@@ -243,9 +243,9 @@ int com20020_found(struct net_device *dev, int shared)
 }
 
 
-/* 
+/*
  * Do a hardware reset on the card, and set up necessary registers.
- * 
+ *
  * This should be called as little as possible, because it disrupts the
  * token on the network (causes a RECON) and requires a significant delay.
  *
@@ -258,15 +258,15 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 	u_char inbyte;
 
 	BUGMSG(D_DEBUG, "%s: %d: %s: dev: %p, lp: %p, dev->name: %s\n",
-		__FILE__,__LINE__,__func__,dev,lp,dev->name);
+	       __FILE__, __LINE__, __func__, dev, lp, dev->name);
 	BUGMSG(D_INIT, "Resetting %s (status=%02Xh)\n",
 	       dev->name, ASTATUS());
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	lp->config = TXENcfg | (lp->timeout << 3) | (lp->backplane << 2);
 	/* power-up defaults */
 	SETCONF;
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	if (really_reset) {
 		/* reset the card */
@@ -274,22 +274,22 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 		mdelay(RESETtime * 2);	/* COM20020 seems to be slower sometimes */
 	}
 	/* clear flags & end reset */
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	ACOMMAND(CFLAGScmd | RESETclear | CONFIGclear);
 
 	/* verify that the ARCnet signature byte is present */
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	com20020_copy_from_card(dev, 0, 0, &inbyte, 1);
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	if (inbyte != TESTvalue) {
-		BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+		BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 		BUGMSG(D_NORMAL, "reset failed: TESTvalue not present.\n");
 		return 1;
 	}
 	/* enable extended (512-byte) packets */
 	ACOMMAND(CONFIGcmd | EXTconf);
-	BUGMSG(D_DEBUG, "%s: %d: %s\n",__FILE__,__LINE__,__func__);
+	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	/* done!  return success. */
 	return 0;
@@ -299,7 +299,7 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 static void com20020_setmask(struct net_device *dev, int mask)
 {
 	u_int ioaddr = dev->base_addr;
-	BUGMSG(D_DURING, "Setting mask to %x at %x\n",mask,ioaddr);
+	BUGMSG(D_DURING, "Setting mask to %x at %x\n", mask, ioaddr);
 	AINTMASK(mask);
 }
 
@@ -315,7 +315,7 @@ static int com20020_status(struct net_device *dev)
 {
 	u_int ioaddr = dev->base_addr;
 
-	return ASTATUS() + (ADIAGSTATUS()<<8);
+	return ASTATUS() + (ADIAGSTATUS() << 8);
 }
 
 static void com20020_close(struct net_device *dev)
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index 057d9582132a..f68752102379 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - COM20020 PCMCIA support
- * 
+ *
  * Written 1994-1999 by Avery Pennarun,
  *    based on an ISA version by David Woodhouse.
  * Derived from ibmtr_cs.c by Steve Kipisz (pcmcia-cs 3.1.4)
@@ -19,14 +19,14 @@
  * Director, National Security Agency.  This software may only be used
  * and distributed according to the terms of the GNU General Public License as
  * modified by SRC, incorporated herein by reference.
- * 
+ *
  * **********************
  * Changes:
  * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - 08/08/2000
  * - reorganize kmallocs in com20020_attach, checking all for failure
  *   and releasing the previous allocations if one fails
  * **********************
- * 
+ *
  * For more details, see drivers/net/arcnet.c
  *
  * **********************
@@ -53,33 +53,33 @@
 static void regdump(struct net_device *dev)
 {
 #ifdef DEBUG
-    int ioaddr = dev->base_addr;
-    int count;
-    
-    netdev_dbg(dev, "register dump:\n");
-    for (count = ioaddr; count < ioaddr + 16; count++)
-    {
-	if (!(count % 16))
-	    pr_cont("%04X:", count);
-	pr_cont(" %02X", inb(count));
-    }
-    pr_cont("\n");
-    
-    netdev_dbg(dev, "buffer0 dump:\n");
+	int ioaddr = dev->base_addr;
+	int count;
+
+	netdev_dbg(dev, "register dump:\n");
+	for (count = ioaddr; count < ioaddr + 16; count++)
+	{
+		if (!(count % 16))
+			pr_cont("%04X:", count);
+		pr_cont(" %02X", inb(count));
+	}
+	pr_cont("\n");
+
+	netdev_dbg(dev, "buffer0 dump:\n");
 	/* set up the address register */
-        count = 0;
+	count = 0;
 	outb((count >> 8) | RDDATAflag | AUTOINCflag, _ADDR_HI);
 	outb(count & 0xff, _ADDR_LO);
-    
-    for (count = 0; count < 256+32; count++)
-    {
-	if (!(count % 16))
-	    pr_cont("%04X:", count);
-	
-	/* copy the data */
-	pr_cont(" %02X", inb(_MEMDATA));
-    }
-    pr_cont("\n");
+
+	for (count = 0; count < 256 + 32; count++)
+	{
+		if (!(count % 16))
+			pr_cont("%04X:", count);
+
+		/* copy the data */
+		pr_cont(" %02X", inb(_MEMDATA));
+	}
+	pr_cont("\n");
 #endif
 }
 
@@ -114,169 +114,169 @@ static void com20020_detach(struct pcmcia_device *p_dev);
 
 static int com20020_probe(struct pcmcia_device *p_dev)
 {
-    struct com20020_dev *info;
-    struct net_device *dev;
-    struct arcnet_local *lp;
+	struct com20020_dev *info;
+	struct net_device *dev;
+	struct arcnet_local *lp;
 
-    dev_dbg(&p_dev->dev, "com20020_attach()\n");
+	dev_dbg(&p_dev->dev, "com20020_attach()\n");
 
-    /* Create new network device */
-    info = kzalloc(sizeof(*info), GFP_KERNEL);
-    if (!info)
-	goto fail_alloc_info;
+	/* Create new network device */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		goto fail_alloc_info;
 
-    dev = alloc_arcdev("");
-    if (!dev)
-	goto fail_alloc_dev;
+	dev = alloc_arcdev("");
+	if (!dev)
+		goto fail_alloc_dev;
 
-    lp = netdev_priv(dev);
-    lp->timeout = timeout;
-    lp->backplane = backplane;
-    lp->clockp = clockp;
-    lp->clockm = clockm & 3;
-    lp->hw.owner = THIS_MODULE;
+	lp = netdev_priv(dev);
+	lp->timeout = timeout;
+	lp->backplane = backplane;
+	lp->clockp = clockp;
+	lp->clockm = clockm & 3;
+	lp->hw.owner = THIS_MODULE;
 
-    /* fill in our module parameters as defaults */
-    dev->dev_addr[0] = node;
+	/* fill in our module parameters as defaults */
+	dev->dev_addr[0] = node;
 
-    p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-    p_dev->resource[0]->end = 16;
-    p_dev->config_flags |= CONF_ENABLE_IRQ;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[0]->end = 16;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
-    info->dev = dev;
-    p_dev->priv = info;
+	info->dev = dev;
+	p_dev->priv = info;
 
-    return com20020_config(p_dev);
+	return com20020_config(p_dev);
 
 fail_alloc_dev:
-    kfree(info);
+	kfree(info);
 fail_alloc_info:
-    return -ENOMEM;
+	return -ENOMEM;
 } /* com20020_attach */
 
 static void com20020_detach(struct pcmcia_device *link)
 {
-    struct com20020_dev *info = link->priv;
-    struct net_device *dev = info->dev;
+	struct com20020_dev *info = link->priv;
+	struct net_device *dev = info->dev;
 
-    dev_dbg(&link->dev, "detach...\n");
+	dev_dbg(&link->dev, "detach...\n");
 
-    dev_dbg(&link->dev, "com20020_detach\n");
+	dev_dbg(&link->dev, "com20020_detach\n");
 
-    dev_dbg(&link->dev, "unregister...\n");
+	dev_dbg(&link->dev, "unregister...\n");
 
-    unregister_netdev(dev);
+	unregister_netdev(dev);
 
-    /*
-     * this is necessary because we register our IRQ separately
-     * from card services.
-     */
-    if (dev->irq)
-	    free_irq(dev->irq, dev);
+	/*
+	 * this is necessary because we register our IRQ separately
+	 * from card services.
+	 */
+	if (dev->irq)
+		free_irq(dev->irq, dev);
 
-    com20020_release(link);
+	com20020_release(link);
 
-    /* Unlink device structure, free bits */
-    dev_dbg(&link->dev, "unlinking...\n");
-    if (link->priv)
-    {
-	dev = info->dev;
-	if (dev)
+	/* Unlink device structure, free bits */
+	dev_dbg(&link->dev, "unlinking...\n");
+	if (link->priv)
 	{
-	    dev_dbg(&link->dev, "kfree...\n");
-	    free_netdev(dev);
+		dev = info->dev;
+		if (dev)
+		{
+			dev_dbg(&link->dev, "kfree...\n");
+			free_netdev(dev);
+		}
+		dev_dbg(&link->dev, "kfree2...\n");
+		kfree(info);
 	}
-	dev_dbg(&link->dev, "kfree2...\n");
-	kfree(info);
-    }
 
 } /* com20020_detach */
 
 static int com20020_config(struct pcmcia_device *link)
 {
-    struct arcnet_local *lp;
-    struct com20020_dev *info;
-    struct net_device *dev;
-    int i, ret;
-    int ioaddr;
+	struct arcnet_local *lp;
+	struct com20020_dev *info;
+	struct net_device *dev;
+	int i, ret;
+	int ioaddr;
+
+	info = link->priv;
+	dev = info->dev;
 
-    info = link->priv;
-    dev = info->dev;
+	dev_dbg(&link->dev, "config...\n");
 
-    dev_dbg(&link->dev, "config...\n");
+	dev_dbg(&link->dev, "com20020_config\n");
 
-    dev_dbg(&link->dev, "com20020_config\n");
+	dev_dbg(&link->dev, "baseport1 is %Xh\n",
+		(unsigned int)link->resource[0]->start);
 
-    dev_dbg(&link->dev, "baseport1 is %Xh\n",
-	    (unsigned int) link->resource[0]->start);
+	i = -ENODEV;
+	link->io_lines = 16;
 
-    i = -ENODEV;
-    link->io_lines = 16;
+	if (!link->resource[0]->start)
+	{
+		for (ioaddr = 0x100; ioaddr < 0x400; ioaddr += 0x10)
+		{
+			link->resource[0]->start = ioaddr;
+			i = pcmcia_request_io(link);
+			if (i == 0)
+				break;
+		}
+	}
+	else
+		i = pcmcia_request_io(link);
+
+	if (i != 0)
+	{
+		dev_dbg(&link->dev, "requestIO failed totally!\n");
+		goto failed;
+	}
+
+	ioaddr = dev->base_addr = link->resource[0]->start;
+	dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr);
+
+	dev_dbg(&link->dev, "request IRQ %d\n",
+		link->irq);
+	if (!link->irq)
+	{
+		dev_dbg(&link->dev, "requestIRQ failed totally!\n");
+		goto failed;
+	}
 
-    if (!link->resource[0]->start)
-    {
-	for (ioaddr = 0x100; ioaddr < 0x400; ioaddr += 0x10)
+	dev->irq = link->irq;
+
+	ret = pcmcia_enable_device(link);
+	if (ret)
+		goto failed;
+
+	if (com20020_check(dev))
 	{
-	    link->resource[0]->start = ioaddr;
-	    i = pcmcia_request_io(link);
-	    if (i == 0)
-		break;
+		regdump(dev);
+		goto failed;
 	}
-    }
-    else
-	i = pcmcia_request_io(link);
-    
-    if (i != 0)
-    {
-	dev_dbg(&link->dev, "requestIO failed totally!\n");
-	goto failed;
-    }
-	
-    ioaddr = dev->base_addr = link->resource[0]->start;
-    dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr);
-
-    dev_dbg(&link->dev, "request IRQ %d\n",
-	    link->irq);
-    if (!link->irq)
-    {
-	dev_dbg(&link->dev, "requestIRQ failed totally!\n");
-	goto failed;
-    }
-
-    dev->irq = link->irq;
-
-    ret = pcmcia_enable_device(link);
-    if (ret)
-	    goto failed;
-
-    if (com20020_check(dev))
-    {
-	regdump(dev);
-	goto failed;
-    }
-    
-    lp = netdev_priv(dev);
-    lp->card_name = "PCMCIA COM20020";
-    lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */
-
-    SET_NETDEV_DEV(dev, &link->dev);
-
-    i = com20020_found(dev, 0);	/* calls register_netdev */
-    
-    if (i != 0) {
-	dev_notice(&link->dev,
-		   "com20020_found() failed\n");
-	goto failed;
-    }
-
-    netdev_dbg(dev, "port %#3lx, irq %d\n",
-	       dev->base_addr, dev->irq);
-    return 0;
+
+	lp = netdev_priv(dev);
+	lp->card_name = "PCMCIA COM20020";
+	lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */
+
+	SET_NETDEV_DEV(dev, &link->dev);
+
+	i = com20020_found(dev, 0);	/* calls register_netdev */
+
+	if (i != 0) {
+		dev_notice(&link->dev,
+			   "com20020_found() failed\n");
+		goto failed;
+	}
+
+	netdev_dbg(dev, "port %#3lx, irq %d\n",
+		   dev->base_addr, dev->irq);
+	return 0;
 
 failed:
-    dev_dbg(&link->dev, "com20020_config failed...\n");
-    com20020_release(link);
-    return -ENODEV;
+	dev_dbg(&link->dev, "com20020_config failed...\n");
+	com20020_release(link);
+	return -ENODEV;
 } /* com20020_config */
 
 static void com20020_release(struct pcmcia_device *link)
@@ -312,9 +312,9 @@ static int com20020_resume(struct pcmcia_device *link)
 
 static const struct pcmcia_device_id com20020_ids[] = {
 	PCMCIA_DEVICE_PROD_ID12("Contemporary Control Systems, Inc.",
-			"PCM20 Arcnet Adapter", 0x59991666, 0x95dfffaf),
+				"PCM20 Arcnet Adapter", 0x59991666, 0x95dfffaf),
 	PCMCIA_DEVICE_PROD_ID12("SoHard AG",
-			"SH ARC PCMCIA", 0xf8991729, 0x69dff0c7),
+				"SH ARC PCMCIA", 0xf8991729, 0x69dff0c7),
 	PCMCIA_DEVICE_NULL
 };
 MODULE_DEVICE_TABLE(pcmcia, com20020_ids);
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 487d780ebbdf..23ac9edbe91c 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - COM90xx chipset (IO-mapped buffers)
- * 
+ *
  * Written 1997 by David Woodhouse.
  * Written 1994-1999 by Avery Pennarun.
  * Written 1999-2000 by Martin Mares <mj@ucw.cz>.
@@ -60,23 +60,23 @@ static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offse
 #define ARCNET_TOTAL_SIZE 16
 
 /* COM 9026 controller chip --> ARCnet register addresses */
-#define _INTMASK (ioaddr+0)	/* writable */
-#define _STATUS  (ioaddr+0)	/* readable */
-#define _COMMAND (ioaddr+1)	/* writable, returns random vals on read (?) */
-#define _RESET  (ioaddr+8)	/* software reset (on read) */
-#define _MEMDATA  (ioaddr+12)	/* Data port for IO-mapped memory */
-#define _ADDR_HI  (ioaddr+15)	/* Control registers for said */
-#define _ADDR_LO  (ioaddr+14)
-#define _CONFIG  (ioaddr+2)	/* Configuration register */
+#define _INTMASK	(ioaddr + 0)	/* writable */
+#define _STATUS		(ioaddr + 0)	/* readable */
+#define _COMMAND	(ioaddr + 1)	/* writable, returns random vals on read (?) */
+#define _RESET		(ioaddr + 8)	/* software reset (on read) */
+#define _MEMDATA	(ioaddr + 12)	/* Data port for IO-mapped memory */
+#define _ADDR_HI	(ioaddr + 15)	/* Control registers for said */
+#define _ADDR_LO	(ioaddr + 14)
+#define _CONFIG		(ioaddr + 2)	/* Configuration register */
 
 #undef ASTATUS
 #undef ACOMMAND
 #undef AINTMASK
 
 #define ASTATUS()	inb(_STATUS)
-#define ACOMMAND(cmd) outb((cmd),_COMMAND)
-#define AINTMASK(msk)	outb((msk),_INTMASK)
-#define SETCONF() 	outb((lp->config),_CONFIG)
+#define ACOMMAND(cmd)	outb((cmd), _COMMAND)
+#define AINTMASK(msk)	outb((msk), _INTMASK)
+#define SETCONF()	outb((lp->config), _CONFIG)
 
 
 /****************************************************************************
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index b80fbe40aa0e..a4c5d5909b13 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - COM90xx chipset (memory-mapped buffers)
- * 
+ *
  * Written 1994-1999 by Avery Pennarun.
  * Written 1999 by Martin Mares <mj@ucw.cz>.
  * Derived from skeleton.c by Donald Becker.
@@ -77,25 +77,25 @@ static int numcards;
 
 /* Amount of I/O memory used by the card */
 #define BUFFER_SIZE (512)
-#define MIRROR_SIZE (BUFFER_SIZE*4)
+#define MIRROR_SIZE (BUFFER_SIZE * 4)
 
 /* COM 9026 controller chip --> ARCnet register addresses */
-#define _INTMASK (ioaddr+0)	/* writable */
-#define _STATUS  (ioaddr+0)	/* readable */
-#define _COMMAND (ioaddr+1)	/* writable, returns random vals on read (?) */
-#define _CONFIG  (ioaddr+2)	/* Configuration register */
-#define _RESET   (ioaddr+8)	/* software reset (on read) */
-#define _MEMDATA (ioaddr+12)	/* Data port for IO-mapped memory */
-#define _ADDR_HI (ioaddr+15)	/* Control registers for said */
-#define _ADDR_LO (ioaddr+14)
+#define _INTMASK	(ioaddr + 0)	/* writable */
+#define _STATUS		(ioaddr + 0)	/* readable */
+#define _COMMAND	(ioaddr + 1)	/* writable, returns random vals on read (?) */
+#define _CONFIG		(ioaddr + 2)	/* Configuration register */
+#define _RESET		(ioaddr + 8)	/* software reset (on read) */
+#define _MEMDATA	(ioaddr + 12)	/* Data port for IO-mapped memory */
+#define _ADDR_HI	(ioaddr + 15)	/* Control registers for said */
+#define _ADDR_LO	(ioaddr + 14)
 
 #undef ASTATUS
 #undef ACOMMAND
 #undef AINTMASK
 
 #define ASTATUS()	inb(_STATUS)
-#define ACOMMAND(cmd) 	outb((cmd),_COMMAND)
-#define AINTMASK(msk)	outb((msk),_INTMASK)
+#define ACOMMAND(cmd)	outb((cmd), _COMMAND)
+#define AINTMASK(msk)	outb((msk), _INTMASK)
 
 
 static int com90xx_skip_probe __initdata = 0;
@@ -127,12 +127,12 @@ static void __init com90xx_probe(void)
 	if (!io && !irq && !shmem && !*device && com90xx_skip_probe)
 		return;
 
-	shmems = kzalloc(((0x100000-0xa0000) / 0x800) * sizeof(unsigned long),
+	shmems = kzalloc(((0x100000 - 0xa0000) / 0x800) * sizeof(unsigned long),
 			 GFP_KERNEL);
 	if (!shmems)
 		return;
-	iomem = kzalloc(((0x100000-0xa0000) / 0x800) * sizeof(void __iomem *),
-			 GFP_KERNEL);
+	iomem = kzalloc(((0x100000 - 0xa0000) / 0x800) * sizeof(void __iomem *),
+			GFP_KERNEL);
 	if (!iomem) {
 		kfree(shmems);
 		return;
@@ -579,7 +579,7 @@ static void com90xx_setmask(struct net_device *dev, int mask)
 
 /*
  * Do a hardware reset on the card, and set up necessary registers.
- * 
+ *
  * This should be called as little as possible, because it disrupts the
  * token on the network (causes a RECON) and requires a significant delay.
  *
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index f81db4070a57..ae1ded286897 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - RFC1051 ("simple" standard) packet encapsulation
- * 
+ *
  * Written 1994-1999 by Avery Pennarun.
  * Derived from skeleton.c by Donald Becker.
  *
@@ -84,12 +84,12 @@ MODULE_LICENSE("GPL");
 
 /*
  * Determine a packet's protocol ID.
- * 
+ *
  * With ARCnet we have to convert everything to Ethernet-style stuff.
  */
 static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	struct archdr *pkt = (struct archdr *) skb->data;
+	struct archdr *pkt = (struct archdr *)skb->data;
 	struct arc_rfc1051 *soft = &pkt->soft.rfc1051;
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
 
@@ -146,7 +146,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb_put(skb, length + ARC_HDR_SIZE);
 	skb->dev = dev;
 
-	pkt = (struct archdr *) skb->data;
+	pkt = (struct archdr *)skb->data;
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
 	memcpy(pkt, pkthdr, sizeof(struct archdr));
@@ -169,7 +169,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type, uint8_t daddr)
 {
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *) skb_push(skb, hdr_size);
+	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
 	struct arc_rfc1051 *soft = &pkt->soft.rfc1051;
 
 	/* set the protocol ID according to RFC1051 */
@@ -201,7 +201,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	/* see linux/net/ethernet/eth.c to see where I got the following */
 
 	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
-		/* 
+		/*
 		 * FIXME: fill in the last byte of the dest ipaddr here to better
 		 * comply with RFC1051 in "noarp" mode.
 		 */
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index b71431aae084..bf2a8cb3f0e2 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -1,6 +1,6 @@
 /*
  * Linux ARCnet driver - RFC1201 (standard) packet encapsulation
- * 
+ *
  * Written 1994-1999 by Avery Pennarun.
  * Derived from skeleton.c by Donald Becker.
  *
@@ -86,12 +86,12 @@ module_exit(arcnet_rfc1201_exit);
 
 /*
  * Determine a packet's protocol ID.
- * 
+ *
  * With ARCnet we have to convert everything to Ethernet-style stuff.
  */
 static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	struct archdr *pkt = (struct archdr *) skb->data;
+	struct archdr *pkt = (struct archdr *)skb->data;
 	struct arc_rfc1201 *soft = &pkt->soft.rfc1201;
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
 
@@ -169,7 +169,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 		if (in->skb) {	/* already assembling one! */
 			BUGMSG(D_EXTRA, "aborting assembly (seq=%d) for unsplit packet (splitflag=%d, seq=%d)\n",
-			 in->sequence, soft->split_flag, soft->sequence);
+			       in->sequence, soft->split_flag, soft->sequence);
 			lp->rfc1201.aborted_seq = soft->sequence;
 			dev_kfree_skb_irq(in->skb);
 			dev->stats.rx_errors++;
@@ -187,7 +187,7 @@ static void rx(struct net_device *dev, int bufnum,
 		skb_put(skb, length + ARC_HDR_SIZE);
 		skb->dev = dev;
 
-		pkt = (struct archdr *) skb->data;
+		pkt = (struct archdr *)skb->data;
 		soft = &pkt->soft.rfc1201;
 
 		/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -203,11 +203,11 @@ static void rx(struct net_device *dev, int bufnum,
 		 * (which is impossible to fumble) and insert it ourselves.
 		 */
 		if (soft->proto == ARC_P_ARP) {
-			struct arphdr *arp = (struct arphdr *) soft->payload;
+			struct arphdr *arp = (struct arphdr *)soft->payload;
 
 			/* make sure addresses are the right length */
 			if (arp->ar_hln == 1 && arp->ar_pln == 4) {
-				uint8_t *cptr = (uint8_t *) arp + sizeof(struct arphdr);
+				uint8_t *cptr = (uint8_t *)arp + sizeof(struct arphdr);
 
 				if (!*cptr) {	/* is saddr = 00? */
 					BUGMSG(D_EXTRA,
@@ -274,7 +274,7 @@ static void rx(struct net_device *dev, int bufnum,
 				dev_kfree_skb_irq(in->skb);
 			}
 			in->sequence = soft->sequence;
-			in->numpackets = ((unsigned) soft->split_flag >> 1) + 2;
+			in->numpackets = ((unsigned)soft->split_flag >> 1) + 2;
 			in->lastpacket = 1;
 
 			if (in->numpackets > 16) {
@@ -294,7 +294,7 @@ static void rx(struct net_device *dev, int bufnum,
 				return;
 			}
 			skb->dev = dev;
-			pkt = (struct archdr *) skb->data;
+			pkt = (struct archdr *)skb->data;
 			soft = &pkt->soft.rfc1201;
 
 			memcpy(pkt, pkthdr, ARC_HDR_SIZE + RFC1201_HDR_SIZE);
@@ -302,7 +302,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 			soft->split_flag = 0;	/* end result won't be split */
 		} else {	/* not first packet */
-			int packetnum = ((unsigned) soft->split_flag >> 1) + 1;
+			int packetnum = ((unsigned)soft->split_flag >> 1) + 1;
 
 			/*
 			 * if we're not assembling, there's no point trying to
@@ -341,7 +341,7 @@ static void rx(struct net_device *dev, int bufnum,
 				in->lastpacket = in->numpackets = 0;
 				return;
 			}
-			pkt = (struct archdr *) in->skb->data;
+			pkt = (struct archdr *)in->skb->data;
 			soft = &pkt->soft.rfc1201;
 		}
 
@@ -357,10 +357,10 @@ static void rx(struct net_device *dev, int bufnum,
 			in->skb = NULL;
 			in->lastpacket = in->numpackets = 0;
 
-	    BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (unsplit)\n",
-    		skb->len, pkt->hard.source);
-	    BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (split)\n",
-    		skb->len, pkt->hard.source);
+			BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (unsplit)\n",
+			       skb->len, pkt->hard.source);
+			BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (split)\n",
+			       skb->len, pkt->hard.source);
 			BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 			skb->protocol = type_trans(skb, dev);
@@ -376,7 +376,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
-	struct archdr *pkt = (struct archdr *) skb_push(skb, hdr_size);
+	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
 	struct arc_rfc1201 *soft = &pkt->soft.rfc1201;
 
 	/* set the protocol ID according to RFC1201 */
@@ -424,7 +424,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	/* see linux/net/ethernet/eth.c to see where I got the following */
 
 	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
-		/* 
+		/*
 		 * FIXME: fill in the last byte of the dest ipaddr here to better
 		 * comply with RFC1051 in "noarp" mode.  For now, always broadcasting
 		 * will probably at least get packets sent out :)
@@ -517,7 +517,7 @@ static int continue_tx(struct net_device *dev, int bufnum)
 	int seglen;
 
 	BUGMSG(D_DURING,
-	  "rfc1201 continue_tx: loading segment %d(+1) of %d (seq=%d)\n",
+	       "rfc1201 continue_tx: loading segment %d(+1) of %d (seq=%d)\n",
 	       out->segnum, out->numsegs, soft->sequence);
 
 	/* the "new" soft header comes right before the data chunk */
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index df0356220730..ccfd1d2f984b 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -51,7 +51,7 @@
 
 /*
  * Debugging bitflags: each option can be enabled individually.
- * 
+ *
  * Note: only debug flags included in the ARCNET_DEBUG_MAX define will
  *   actually be available.  GCC will (at least, GCC 2.7.0 will) notice
  *   lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize
@@ -77,33 +77,33 @@
 #endif
 
 #ifndef ARCNET_DEBUG
-#define ARCNET_DEBUG (D_NORMAL|D_EXTRA)
+#define ARCNET_DEBUG (D_NORMAL | D_EXTRA)
 #endif
 extern int arcnet_debug;
 
 /* macros to simplify debug checking */
-#define BUGLVL(x) if ((ARCNET_DEBUG_MAX)&arcnet_debug&(x))
-#define BUGMSG2(x,msg,args...) do { BUGLVL(x) printk(msg, ## args); } while (0)
-#define BUGMSG(x,msg,args...) \
-	BUGMSG2(x, "%s%6s: " msg, \
-            x==D_NORMAL	? KERN_WARNING \
-            		: x < D_DURING ? KERN_INFO : KERN_DEBUG, \
-	    dev->name , ## args)
+#define BUGLVL(x) if ((ARCNET_DEBUG_MAX) & arcnet_debug & (x))
+#define BUGMSG2(x, msg, args...) do { BUGLVL(x) printk(msg, ## args); } while (0)
+#define BUGMSG(x, msg, args...)						\
+	BUGMSG2(x, "%s%6s: " msg,					\
+		x == D_NORMAL	? KERN_WARNING				\
+		: x < D_DURING ? KERN_INFO : KERN_DEBUG,		\
+		dev->name, ## args)
 
 /* see how long a function call takes to run, expressed in CPU cycles */
-#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \
-	    unsigned long _x, _y; \
-	    _x = get_cycles(); \
-	    call; \
-	    _y = get_cycles(); \
-	    BUGMSG(D_TIMING, \
-	       "%s: %d bytes in %lu cycles == " \
-	       "%lu Kbytes/100Mcycle\n",\
-		   name, bytes, _y - _x, \
-		   100000000 / 1024 * bytes / (_y - _x + 1));\
-	} \
-	else { \
-		    call;\
+#define TIME(name, bytes, call) BUGLVL(D_TIMING) {			\
+		unsigned long _x, _y;					\
+		_x = get_cycles();					\
+		call;							\
+		_y = get_cycles();					\
+		BUGMSG(D_TIMING,					\
+		       "%s: %d bytes in %lu cycles == "			\
+		       "%lu Kbytes/100Mcycle\n",			\
+		       name, bytes, _y - _x,				\
+		       100000000 / 1024 * bytes / (_y - _x + 1));	\
+	}								\
+	else {								\
+		call;							\
 	}
 
 
@@ -189,16 +189,16 @@ struct ArcProto {
 	int mtu;		/* largest possible packet */
 	int is_ip;              /* This is a ip plugin - not a raw thing */
 
-	void (*rx) (struct net_device * dev, int bufnum,
-		    struct archdr * pkthdr, int length);
-	int (*build_header) (struct sk_buff * skb, struct net_device *dev,
-			     unsigned short ethproto, uint8_t daddr);
+	void (*rx)(struct net_device *dev, int bufnum,
+		   struct archdr *pkthdr, int length);
+	int (*build_header)(struct sk_buff *skb, struct net_device *dev,
+			    unsigned short ethproto, uint8_t daddr);
 
 	/* these functions return '1' if the skb can now be freed */
-	int (*prepare_tx) (struct net_device * dev, struct archdr * pkt, int length,
-			   int bufnum);
-	int (*continue_tx) (struct net_device * dev, int bufnum);
-	int (*ack_tx) (struct net_device * dev, int acked);
+	int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, int length,
+			  int bufnum);
+	int (*continue_tx)(struct net_device *dev, int bufnum);
+	int (*ack_tx)(struct net_device *dev, int acked);
 };
 
 extern struct ArcProto *arc_proto_map[256], *arc_proto_default,
@@ -263,13 +263,13 @@ struct arcnet_local {
 	 * situations in which we (for example) want to pre-load a transmit
 	 * buffer, or start receiving while we copy a received packet to
 	 * memory.
-	 * 
+	 *
 	 * The rules: only the interrupt handler is allowed to _add_ buffers to
 	 * the queue; thus, this doesn't require a lock.  Both the interrupt
 	 * handler and the transmit function will want to _remove_ buffers, so
 	 * we need to handle the situation where they try to do it at the same
 	 * time.
-	 * 
+	 *
 	 * If next_buf == first_free_buf, the queue is empty.  Since there are
 	 * only four possible buffers, the queue should never be full.
 	 */
@@ -298,17 +298,17 @@ struct arcnet_local {
 	/* hardware-specific functions */
 	struct {
 		struct module *owner;
-		void (*command) (struct net_device * dev, int cmd);
-		int (*status) (struct net_device * dev);
-		void (*intmask) (struct net_device * dev, int mask);
-		int (*reset) (struct net_device * dev, int really_reset);
-		void (*open) (struct net_device * dev);
-		void (*close) (struct net_device * dev);
-
-		void (*copy_to_card) (struct net_device * dev, int bufnum, int offset,
-				      void *buf, int count);
-		void (*copy_from_card) (struct net_device * dev, int bufnum, int offset,
-					void *buf, int count);
+		void (*command)(struct net_device *dev, int cmd);
+		int (*status)(struct net_device *dev);
+		void (*intmask)(struct net_device *dev, int mask);
+		int (*reset)(struct net_device *dev, int really_reset);
+		void (*open)(struct net_device *dev);
+		void (*close)(struct net_device *dev);
+
+		void (*copy_to_card)(struct net_device *dev, int bufnum, int offset,
+				     void *buf, int count);
+		void (*copy_from_card)(struct net_device *dev, int bufnum, int offset,
+				       void *buf, int count);
 	} hw;
 
 	void __iomem *mem_start;	/* pointer to ioremap'ed MMIO */
@@ -325,7 +325,7 @@ struct arcnet_local {
 #if ARCNET_DEBUG_MAX & D_SKB
 void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
 #else
-#define arcnet_dump_skb(dev,skb,desc) ;
+#define arcnet_dump_skb(dev, skb, desc) ;
 #endif
 
 void arcnet_unregister_proto(struct ArcProto *proto);
@@ -335,7 +335,7 @@ struct net_device *alloc_arcdev(const char *name);
 int arcnet_open(struct net_device *dev);
 int arcnet_close(struct net_device *dev);
 netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
-				     struct net_device *dev);
+			       struct net_device *dev);
 void arcnet_timeout(struct net_device *dev);
 
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3


From 01a1d5ac4e1a5890fd6c0d0ae900e1b6e4f851d6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:05:48 -0700
Subject: arcnet: Add and remove blank lines

Use a more current kernel line style.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/net/arcnet/arc-rawmode.c  |  5 -----
 drivers/net/arcnet/arc-rimi.c     |  8 +++-----
 drivers/net/arcnet/arcnet.c       | 23 +++--------------------
 drivers/net/arcnet/capmode.c      |  3 ---
 drivers/net/arcnet/com20020-isa.c |  1 -
 drivers/net/arcnet/com20020-pci.c |  2 --
 drivers/net/arcnet/com20020.c     |  8 ++------
 drivers/net/arcnet/com20020_cs.c  |  4 +---
 drivers/net/arcnet/com90io.c      | 12 +-----------
 drivers/net/arcnet/com90xx.c      | 13 +++----------
 drivers/net/arcnet/rfc1051.c      |  7 -------
 drivers/net/arcnet/rfc1201.c      |  8 --------
 include/linux/arcdevice.h         | 12 ------------
 13 files changed, 13 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 49f5819a0367..251a98b0bf78 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -35,7 +35,6 @@
 
 #define VERSION "arcnet: raw mode (`r') encapsulation support loaded.\n"
 
-
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length);
 static int build_header(struct sk_buff *skb, struct net_device *dev,
@@ -54,7 +53,6 @@ static struct ArcProto rawmode_proto =
 	.ack_tx         = NULL
 };
 
-
 static int __init arcnet_raw_init(void)
 {
 	int count;
@@ -83,7 +81,6 @@ module_exit(arcnet_raw_exit);
 
 MODULE_LICENSE("GPL");
 
-
 /* packet receiver */
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length)
@@ -127,7 +124,6 @@ static void rx(struct net_device *dev, int bufnum,
 	netif_rx(skb);
 }
 
-
 /*
  * Create the ARCnet hard/soft headers for raw mode.
  * There aren't any soft headers in raw mode - not even the protocol id.
@@ -163,7 +159,6 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	return hdr_size;	/* success */
 }
 
-
 static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		      int bufnum)
 {
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index 4644d46b6a5f..e8c15016bb8e 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -36,10 +36,8 @@
 #include <asm/io.h>
 #include <linux/arcdevice.h>
 
-
 #define VERSION "arcnet: RIM I (entirely mem-mapped) support\n"
 
-
 /* Internal function declarations */
 
 static int arcrimi_probe(struct net_device *dev);
@@ -78,7 +76,6 @@ static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offse
 #define AINTMASK(msk)	writeb((msk), _INTMASK)
 #define SETCONF()	writeb(lp->config, _CONFIG)
 
-
 /*
  * We cannot probe for a RIM I card; one reason is I don't know how to reset
  * them.  In fact, we can't even get their node ID automatically.  So, we
@@ -249,7 +246,6 @@ err_free_irq:
 	return -EIO;
 }
 
-
 /*
  * Do a hardware reset on the card, and set up necessary registers.
  *
@@ -308,15 +304,16 @@ static void arcrimi_copy_to_card(struct net_device *dev, int bufnum, int offset,
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
+
 	TIME("memcpy_toio", count, memcpy_toio(memaddr, buf, count));
 }
 
-
 static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offset,
 				   void *buf, int count)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
+
 	TIME("memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
 }
 
@@ -374,6 +371,7 @@ static void __exit arc_rimi_exit(void)
 static int __init arcrimi_setup(char *s)
 {
 	int ints[8];
+
 	s = get_options(s, 8, ints);
 	if (!ints[0])
 		return 1;
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 2a594d1c4b55..bb49753f8203 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -168,7 +168,6 @@ void arcnet_dump_skb(struct net_device *dev,
 EXPORT_SYMBOL(arcnet_dump_skb);
 #endif
 
-
 /*
  * Dump the contents of an ARCnet buffer
  */
@@ -206,7 +205,6 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum,
 
 #endif
 
-
 /*
  * Unregister a protocol driver from the arc_proto_map.  Protocol drivers
  * are responsible for registering themselves, but the unregister routine
@@ -229,7 +227,6 @@ void arcnet_unregister_proto(struct ArcProto *proto)
 	}
 }
 
-
 /*
  * Add a buffer to the queue.  Only the interrupt handler is allowed to do
  * this, unless interrupts are disabled.
@@ -254,7 +251,6 @@ static void release_arcbuf(struct net_device *dev, int bufnum)
 	}
 }
 
-
 /*
  * Get a buffer from the queue.  If this returns -1, there are no buffers
  * available.
@@ -281,7 +277,6 @@ static int get_arcbuf(struct net_device *dev)
 		}
 	}
 
-
 	BUGLVL(D_DURING) {
 		BUGMSG(D_DURING, "get_arcbuf: got #%d; buffer queue is now: ", buf);
 		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
@@ -293,7 +288,6 @@ static int get_arcbuf(struct net_device *dev)
 	return buf;
 }
 
-
 static int choose_mtu(void)
 {
 	int count, mtu = 65535;
@@ -348,6 +342,7 @@ struct net_device *alloc_arcdev(const char *name)
 			   arcdev_setup);
 	if (dev) {
 		struct arcnet_local *lp = netdev_priv(dev);
+
 		spin_lock_init(&lp->lock);
 	}
 
@@ -380,7 +375,6 @@ int arcnet_open(struct net_device *dev)
 		BUGMSG2(D_PROTO, "\n");
 	}
 
-
 	BUGMSG(D_INIT, "arcnet_open: resetting card.\n");
 
 	/* try to put the card in a defined state - if it fails the first
@@ -437,7 +431,6 @@ int arcnet_open(struct net_device *dev)
 		ACOMMAND(CFLAGScmd | RESETclear);
 	}
 
-
 	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	/* make sure we're ready to receive IRQ's. */
 	AINTMASK(0);
@@ -459,7 +452,6 @@ int arcnet_open(struct net_device *dev)
 	return error;
 }
 
-
 /* The inverse routine to arcnet_open - shuts down the card. */
 int arcnet_close(struct net_device *dev)
 {
@@ -479,7 +471,6 @@ int arcnet_close(struct net_device *dev)
 	return 0;
 }
 
-
 static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 			 unsigned short type, const void *daddr,
 			 const void *saddr, unsigned len)
@@ -498,7 +489,6 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 		BUGMSG(D_NORMAL, "arcnet_header: Yikes!  skb->len(%d) != len(%d)!\n",
 		       skb->len, len);
 
-
 	/* Type is host order - ? */
 	if (type == ETH_P_ARCNET) {
 		proto = arc_raw_proto;
@@ -622,7 +612,6 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 	return retval;		/* no need to try again */
 }
 
-
 /*
  * Actually start transmitting a packet that was loaded into a buffer
  * by prepare_tx.  This should _only_ be called by the interrupt handler.
@@ -654,7 +643,6 @@ static int go_tx(struct net_device *dev)
 	return 1;
 }
 
-
 /* Called by the kernel when transmit times out */
 void arcnet_timeout(struct net_device *dev)
 {
@@ -691,7 +679,6 @@ void arcnet_timeout(struct net_device *dev)
 		netif_wake_queue(dev);
 }
 
-
 /*
  * The typical workload of the driver: Handle the network interface
  * interrupts. Establish which device needs attention, and call the correct
@@ -787,7 +774,6 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			didsomething++;
 		}
 
-
 		/* a transmit finished, and we're interested in it. */
 		if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
 			lp->intmask &= ~(TXFREEflag | EXCNAKflag);
@@ -814,6 +800,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 				if (lp->outgoing.proto &&
 				    lp->outgoing.proto->ack_tx) {
 					int ackstatus;
+
 					if (status & TXACKflag)
 						ackstatus = 2;
 					else if (lp->excnak_pending)
@@ -838,6 +825,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			/* continue a split packet, if any */
 			if (lp->outgoing.proto && lp->outgoing.proto->continue_tx) {
 				int txbuf = get_arcbuf(dev);
+
 				if (txbuf != -1) {
 					if (lp->outgoing.proto->continue_tx(dev, txbuf)) {
 						/* that was the last segment */
@@ -930,7 +918,6 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	       ASTATUS(), boguscount);
 	BUGMSG(D_DURING, "\n");
 
-
 	AINTMASK(0);
 	udelay(1);
 	AINTMASK(lp->intmask);
@@ -939,7 +926,6 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	return retval;
 }
 
-
 /*
  * This is a generic packet receiver that calls arcnet??_rx depending on the
  * protocol ID found.
@@ -1002,7 +988,6 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 	arc_proto_map[soft->proto]->rx(dev, bufnum, &pkt, length);
 }
 
-
 static void null_rx(struct net_device *dev, int bufnum,
 		    struct archdr *pkthdr, int length)
 {
@@ -1011,7 +996,6 @@ static void null_rx(struct net_device *dev, int bufnum,
 	       pkthdr->soft.rfc1201.proto, pkthdr->hard.source);
 }
 
-
 static int null_build_header(struct sk_buff *skb, struct net_device *dev,
 			     unsigned short type, uint8_t daddr)
 {
@@ -1025,7 +1009,6 @@ static int null_build_header(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 }
 
-
 /* the "do nothing" prepare_tx function warns that there's nothing to do. */
 static int null_prepare_tx(struct net_device *dev, struct archdr *pkt,
 			   int length, int bufnum)
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 95a6cf07965c..02815ff891f2 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -90,7 +90,6 @@ static void rx(struct net_device *dev, int bufnum,
 	netif_rx(skb);
 }
 
-
 /*
  * Create the ARCnet hard/soft headers for cap mode.
  * There aren't any soft headers in cap mode - not even the protocol id.
@@ -130,7 +129,6 @@ static int build_header(struct sk_buff *skb,
 	return hdr_size;	/* success */
 }
 
-
 static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		      int bufnum)
 {
@@ -138,7 +136,6 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	struct arc_hardware *hard = &pkt->hard;
 	int ofs;
 
-
 	/* hard header is not included in packet length */
 	length -= ARC_HDR_SIZE;
 	/* And neither is the cookie field */
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index d8746caf8e7a..c7d3377a45fe 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -43,7 +43,6 @@
 
 #define VERSION "arcnet: COM20020 ISA support (by David Woodhouse et al.)\n"
 
-
 /*
  * We cannot (yet) probe for an IO mapped card, although we can check that
  * it's where we were told it was, and even do autoirq.
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index e0f489a117f1..06621e95bd38 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -42,7 +42,6 @@
 
 #include <asm/io.h>
 
-
 #define VERSION "arcnet: COM20020 PCI support\n"
 
 /* Module parameters */
@@ -86,7 +85,6 @@ static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 
 	INIT_LIST_HEAD(&priv->list_dev);
 
-
 	for (i = 0; i < ci->devcount; i++) {
 		struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
 		struct com20020_dev *card;
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index cd2a5ca56ce5..c749af6db94e 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -70,7 +70,6 @@ static void com20020_copy_from_card(struct net_device *dev, int bufnum,
 	TIME("insb", count, insb(_MEMDATA, buf, count));
 }
 
-
 static void com20020_copy_to_card(struct net_device *dev, int bufnum,
 				  int offset, void *buf, int count)
 {
@@ -84,7 +83,6 @@ static void com20020_copy_to_card(struct net_device *dev, int bufnum,
 	TIME("outsb", count, outsb(_MEMDATA, buf, count));
 }
 
-
 /* Reset the card and check some basic stuff during the detection stage. */
 int com20020_check(struct net_device *dev)
 {
@@ -242,7 +240,6 @@ int com20020_found(struct net_device *dev, int shared)
 	return 0;
 }
 
-
 /*
  * Do a hardware reset on the card, and set up necessary registers.
  *
@@ -295,22 +292,21 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 	return 0;
 }
 
-
 static void com20020_setmask(struct net_device *dev, int mask)
 {
 	u_int ioaddr = dev->base_addr;
+
 	BUGMSG(D_DURING, "Setting mask to %x at %x\n", mask, ioaddr);
 	AINTMASK(mask);
 }
 
-
 static void com20020_command(struct net_device *dev, int cmd)
 {
 	u_int ioaddr = dev->base_addr;
+
 	ACOMMAND(cmd);
 }
 
-
 static int com20020_status(struct net_device *dev)
 {
 	u_int ioaddr = dev->base_addr;
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index f68752102379..2eafb298626b 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -49,7 +49,6 @@
 
 #define VERSION "arcnet: COM20020 PCMCIA support loaded.\n"
 
-
 static void regdump(struct net_device *dev)
 {
 #ifdef DEBUG
@@ -83,8 +82,6 @@ static void regdump(struct net_device *dev)
 #endif
 }
 
-
-
 /*====================================================================*/
 
 /* Parameters that can be set with 'insmod' */
@@ -304,6 +301,7 @@ static int com20020_resume(struct pcmcia_device *link)
 	if (link->open) {
 		int ioaddr = dev->base_addr;
 		struct arcnet_local *lp = netdev_priv(dev);
+
 		ARCRESET;
 	}
 
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 23ac9edbe91c..11d2d0b4ccea 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -37,10 +37,8 @@
 #include <asm/io.h>
 #include <linux/arcdevice.h>
 
-
 #define VERSION "arcnet: COM90xx IO-mapped mode support (by David Woodhouse et el.)\n"
 
-
 /* Internal function declarations */
 
 static int com90io_found(struct net_device *dev);
@@ -53,7 +51,6 @@ static void com90io_copy_to_card(struct net_device *dev, int bufnum, int offset,
 static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offset,
 				   void *buf, int count);
 
-
 /* Handy defines for ARCnet specific stuff */
 
 /* The number of low I/O ports used by the card. */
@@ -78,7 +75,6 @@ static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offse
 #define AINTMASK(msk)	outb((msk), _INTMASK)
 #define SETCONF()	outb((lp->config), _CONFIG)
 
-
 /****************************************************************************
  *                                                                          *
  * IO-mapped operation routines                                             *
@@ -111,7 +107,6 @@ static void put_buffer_byte(struct net_device *dev, unsigned offset, u_char datu
 
 #endif
 
-
 static void get_whole_buffer(struct net_device *dev, unsigned offset, unsigned length, char *dest)
 {
 	int ioaddr = dev->base_addr;
@@ -227,7 +222,6 @@ err_out:
 	return -ENODEV;
 }
 
-
 /* Set up the struct net_device associated with this card.  Called after
  * probing succeeds.
  */
@@ -279,7 +273,6 @@ static int __init com90io_found(struct net_device *dev)
 	return 0;
 }
 
-
 /*
  * Do a hardware reset on the card, and set up necessary registers.
  *
@@ -319,7 +312,6 @@ static int com90io_reset(struct net_device *dev, int really_reset)
 	return 0;
 }
 
-
 static void com90io_command(struct net_device *dev, int cmd)
 {
 	short ioaddr = dev->base_addr;
@@ -327,7 +319,6 @@ static void com90io_command(struct net_device *dev, int cmd)
 	ACOMMAND(cmd);
 }
 
-
 static int com90io_status(struct net_device *dev)
 {
 	short ioaddr = dev->base_addr;
@@ -335,7 +326,6 @@ static int com90io_status(struct net_device *dev)
 	return ASTATUS();
 }
 
-
 static void com90io_setmask(struct net_device *dev, int mask)
 {
 	short ioaddr = dev->base_addr;
@@ -349,7 +339,6 @@ static void com90io_copy_to_card(struct net_device *dev, int bufnum, int offset,
 	TIME("put_whole_buffer", count, put_whole_buffer(dev, bufnum * 512 + offset, count, buf));
 }
 
-
 static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offset,
 				   void *buf, int count)
 {
@@ -369,6 +358,7 @@ MODULE_LICENSE("GPL");
 static int __init com90io_setup(char *s)
 {
 	int ints[4];
+
 	s = get_options(s, 4, ints);
 	if (!ints[0])
 		return 0;
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index a4c5d5909b13..f0f06a274045 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -35,10 +35,8 @@
 #include <asm/io.h>
 #include <linux/arcdevice.h>
 
-
 #define VERSION "arcnet: COM90xx chipset support\n"
 
-
 /* Define this to speed up the autoprobe by assuming if only one io port and
  * shmem are left in the list at Stage 5, they must correspond to each
  * other.
@@ -53,7 +51,6 @@
  */
 #undef FAST_PROBE
 
-
 /* Internal function declarations */
 static int com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem *);
 static void com90xx_command(struct net_device *dev, int command);
@@ -97,7 +94,6 @@ static int numcards;
 #define ACOMMAND(cmd)	outb((cmd), _COMMAND)
 #define AINTMASK(msk)	outb((msk), _INTMASK)
 
-
 static int com90xx_skip_probe __initdata = 0;
 
 /* Module parameters */
@@ -308,6 +304,7 @@ static void __init com90xx_probe(void)
 	numprint = -1;
 	for (port = &ports[0]; port < ports + numports; port++) {
 		int found = 0;
+
 		numprint++;
 		numprint %= 8;
 		if (!numprint) {
@@ -552,7 +549,6 @@ err_free_dev:
 	return -EIO;
 }
 
-
 static void com90xx_command(struct net_device *dev, int cmd)
 {
 	short ioaddr = dev->base_addr;
@@ -560,7 +556,6 @@ static void com90xx_command(struct net_device *dev, int cmd)
 	ACOMMAND(cmd);
 }
 
-
 static int com90xx_status(struct net_device *dev)
 {
 	short ioaddr = dev->base_addr;
@@ -568,7 +563,6 @@ static int com90xx_status(struct net_device *dev)
 	return ASTATUS();
 }
 
-
 static void com90xx_setmask(struct net_device *dev, int mask)
 {
 	short ioaddr = dev->base_addr;
@@ -576,7 +570,6 @@ static void com90xx_setmask(struct net_device *dev, int mask)
 	AINTMASK(mask);
 }
 
-
 /*
  * Do a hardware reset on the card, and set up necessary registers.
  *
@@ -625,19 +618,19 @@ static void com90xx_copy_to_card(struct net_device *dev, int bufnum, int offset,
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
+
 	TIME("memcpy_toio", count, memcpy_toio(memaddr, buf, count));
 }
 
-
 static void com90xx_copy_from_card(struct net_device *dev, int bufnum, int offset,
 				   void *buf, int count)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
+
 	TIME("memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
 }
 
-
 MODULE_LICENSE("GPL");
 
 static int __init com90xx_init(void)
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index ae1ded286897..49d35c972bca 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -34,7 +34,6 @@
 
 #define VERSION "arcnet: RFC1051 \"simple standard\" (`s') encapsulation support loaded.\n"
 
-
 static __be16 type_trans(struct sk_buff *skb, struct net_device *dev);
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length);
@@ -43,7 +42,6 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		      int bufnum);
 
-
 static struct ArcProto rfc1051_proto =
 {
 	.suffix		= 's',
@@ -56,7 +54,6 @@ static struct ArcProto rfc1051_proto =
 	.ack_tx         = NULL
 };
 
-
 static int __init arcnet_rfc1051_init(void)
 {
 	printk(VERSION);
@@ -120,7 +117,6 @@ static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 	return htons(ETH_P_IP);
 }
 
-
 /* packet receiver */
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length)
@@ -161,7 +157,6 @@ static void rx(struct net_device *dev, int bufnum,
 	netif_rx(skb);
 }
 
-
 /*
  * Create the ARCnet hard/soft headers for RFC1051.
  */
@@ -188,7 +183,6 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 	}
 
-
 	/*
 	 * Set the source hardware address.
 	 *
@@ -214,7 +208,6 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	return hdr_size;	/* success */
 }
 
-
 static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		      int bufnum)
 {
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index bf2a8cb3f0e2..15f103a8a9db 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -34,7 +34,6 @@
 MODULE_LICENSE("GPL");
 #define VERSION "arcnet: RFC1201 \"standard\" (`a') encapsulation support loaded.\n"
 
-
 static __be16 type_trans(struct sk_buff *skb, struct net_device *dev);
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length);
@@ -56,7 +55,6 @@ static struct ArcProto rfc1201_proto =
 	.ack_tx         = NULL
 };
 
-
 static int __init arcnet_rfc1201_init(void)
 {
 	printk(VERSION);
@@ -129,7 +127,6 @@ static __be16 type_trans(struct sk_buff *skb, struct net_device *dev)
 	return htons(ETH_P_IP);
 }
 
-
 /* packet receiver */
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length)
@@ -369,7 +366,6 @@ static void rx(struct net_device *dev, int bufnum,
 	}
 }
 
-
 /* Create the ARCnet hard/soft headers for RFC1201. */
 static int build_header(struct sk_buff *skb, struct net_device *dev,
 			unsigned short type, uint8_t daddr)
@@ -437,7 +433,6 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 	return hdr_size;
 }
 
-
 static void load_pkt(struct net_device *dev, struct arc_hardware *hard,
 		     struct arc_rfc1201 *soft, int softlen, int bufnum)
 {
@@ -470,7 +465,6 @@ static void load_pkt(struct net_device *dev, struct arc_hardware *hard,
 	lp->lastload_dest = hard->dest;
 }
 
-
 static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		      int bufnum)
 {
@@ -478,7 +472,6 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	const int maxsegsize = XMTU - RFC1201_HDR_SIZE;
 	struct Outgoing *out;
 
-
 	BUGMSG(D_DURING, "prepare_tx: txbufs=%d/%d/%d\n",
 	       lp->next_tx, lp->cur_tx, bufnum);
 
@@ -506,7 +499,6 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	return 1;		/* done */
 }
 
-
 static int continue_tx(struct net_device *dev, int bufnum)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index ccfd1d2f984b..78687885eb81 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -34,7 +34,6 @@
  */
 #define RECON_THRESHOLD 30
 
-
 /*
  * Define this to the minimum "timeout" value.  If a transmit takes longer
  * than TX_TIMEOUT jiffies, Linux will abort the TX and retry.  On a large
@@ -44,11 +43,9 @@
  */
 #define TX_TIMEOUT (HZ * 200 / 1000)
 
-
 /* Display warnings about the driver being an ALPHA version. */
 #undef ALPHA_WARNING
 
-
 /*
  * Debugging bitflags: each option can be enabled individually.
  *
@@ -106,7 +103,6 @@ extern int arcnet_debug;
 		call;							\
 	}
 
-
 /*
  * Time needed to reset the card - in ms (milliseconds).  This works on my
  * SMC PC100.  I can't find a reference that tells me just how long I
@@ -182,7 +178,6 @@ extern int arcnet_debug;
 #define ARC_CAN_10MBIT  2   /* card uses COM20022, supporting 10MBit,
 				 but default is 2.5MBit. */
 
-
 /* information needed to define an encapsulation driver */
 struct ArcProto {
 	char suffix;		/* a for RFC1201, e for ether-encap, etc. */
@@ -204,7 +199,6 @@ struct ArcProto {
 extern struct ArcProto *arc_proto_map[256], *arc_proto_default,
 	*arc_bcast_proto, *arc_raw_proto;
 
-
 /*
  * "Incoming" is information needed for each address that could be sending
  * to us.  Mostly for partially-received split packets.
@@ -216,7 +210,6 @@ struct Incoming {
 		numpackets;	/* number of packets in split     */
 };
 
-
 /* only needed for RFC1201 */
 struct Outgoing {
 	struct ArcProto *proto;	/* protocol driver that owns this:
@@ -230,7 +223,6 @@ struct Outgoing {
 		numsegs;	/* number of segments */
 };
 
-
 struct arcnet_local {
 	uint8_t config,		/* current value of CONFIG register */
 		timeout,	/* Extended timeout for COM20020 */
@@ -251,7 +243,6 @@ struct arcnet_local {
 	char *card_name;	/* card ident string */
 	int card_flags;		/* special card features */
 
-
 	/* On preemtive and SMB a lock is needed */
 	spinlock_t lock;
 
@@ -314,14 +305,11 @@ struct arcnet_local {
 	void __iomem *mem_start;	/* pointer to ioremap'ed MMIO */
 };
 
-
 #define ARCRESET(x)  (lp->hw.reset(dev, (x)))
 #define ACOMMAND(x)  (lp->hw.command(dev, (x)))
 #define ASTATUS()    (lp->hw.status(dev))
 #define AINTMASK(x)  (lp->hw.intmask(dev, (x)))
 
-
-
 #if ARCNET_DEBUG_MAX & D_SKB
 void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
 #else
-- 
cgit v1.2.3


From d77510f3436e0db9b5e72fa8159ce26c3ac88d2d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:05:53 -0700
Subject: arcnet: Neaten BUGMSG macro defines

These macros are actually printk and pr_cont uses with a flag.

Add a new BUGLVL_TEST macro which is just the "should use" test
and not an odd "if (<foo>)" macro to simplify uses in a new patch.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 include/linux/arcdevice.h | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index 78687885eb81..ad610208fbba 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -78,14 +78,24 @@
 #endif
 extern int arcnet_debug;
 
+#define BUGLVL_TEST(x)	((x) & ARCNET_DEBUG_MAX & arcnet_debug)
+#define BUGLVL(x)	if (BUGLVL_TEST(x))
+
 /* macros to simplify debug checking */
-#define BUGLVL(x) if ((ARCNET_DEBUG_MAX) & arcnet_debug & (x))
-#define BUGMSG2(x, msg, args...) do { BUGLVL(x) printk(msg, ## args); } while (0)
-#define BUGMSG(x, msg, args...)						\
-	BUGMSG2(x, "%s%6s: " msg,					\
-		x == D_NORMAL	? KERN_WARNING				\
-		: x < D_DURING ? KERN_INFO : KERN_DEBUG,		\
-		dev->name, ## args)
+#define BUGMSG(x, fmt, ...)						\
+do {									\
+	if (BUGLVL_TEST(x))						\
+		printk("%s%6s: " fmt,					\
+		       (x) == D_NORMAL	? KERN_WARNING :		\
+		       (x) < D_DURING ? KERN_INFO : KERN_DEBUG,		\
+		       dev->name, ##__VA_ARGS__);			\
+} while (0)
+
+#define BUGMSG2(x, fmt, ...)						\
+do {									\
+	if (BUGLVL_TEST(x))						\
+		printk(fmt, ##__VA_ARGS__);				\
+} while (0)
 
 /* see how long a function call takes to run, expressed in CPU cycles */
 #define TIME(name, bytes, call) BUGLVL(D_TIMING) {			\
-- 
cgit v1.2.3


From 72aeea4841c037b9b3abf65859673cbd7b6664a9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:05:54 -0700
Subject: arcnet: Expand odd BUGLVL macro with if and uses

Don't hide what should be obvious.

Make the macro a simple test instead of using if and test.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/net/arcnet/arc-rawmode.c  |  3 ++-
 drivers/net/arcnet/arc-rimi.c     | 20 +++++++++-------
 drivers/net/arcnet/arcnet.c       | 29 ++++++++++++-----------
 drivers/net/arcnet/capmode.c      |  9 ++++---
 drivers/net/arcnet/com20020-isa.c |  3 ++-
 drivers/net/arcnet/com20020-pci.c |  3 ++-
 drivers/net/arcnet/com20020.c     |  3 ++-
 drivers/net/arcnet/com90io.c      |  6 +++--
 drivers/net/arcnet/com90xx.c      | 49 +++++++++++++++++++++++++--------------
 drivers/net/arcnet/rfc1051.c      |  3 ++-
 drivers/net/arcnet/rfc1201.c      |  6 +++--
 include/linux/arcdevice.h         | 17 +++++++-------
 12 files changed, 93 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index efdd8e95a2bf..998c8356e576 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -117,7 +117,8 @@ static void rx(struct net_device *dev, int bufnum,
 				      pkt->soft.raw + sizeof(pkt->soft),
 				      length - sizeof(pkt->soft));
 
-	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
+	if (BUGLVL(D_SKB))
+		arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
 	netif_rx(skb);
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index c7ab23e8ec6a..cd47a1b3008c 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -82,18 +82,21 @@ static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offse
  */
 static int __init arcrimi_probe(struct net_device *dev)
 {
-	BUGLVL(D_NORMAL) printk(VERSION);
-	BUGLVL(D_NORMAL) printk("E-mail me if you actually test the RIM I driver, please!\n");
-
-	BUGLVL(D_NORMAL) printk("Given: node %02Xh, shmem %lXh, irq %d\n",
-				dev->dev_addr[0], dev->mem_start, dev->irq);
+	if (BUGLVL(D_NORMAL)) {
+		printk(VERSION);
+		printk("E-mail me if you actually test the RIM I driver, please!\n");
+		printk("Given: node %02Xh, shmem %lXh, irq %d\n",
+		       dev->dev_addr[0], dev->mem_start, dev->irq);
+	}
 
 	if (dev->mem_start <= 0 || dev->irq <= 0) {
-		BUGLVL(D_NORMAL) printk("No autoprobe for RIM I; you must specify the shmem and irq!\n");
+		if (BUGLVL(D_NORMAL))
+			printk("No autoprobe for RIM I; you must specify the shmem and irq!\n");
 		return -ENODEV;
 	}
 	if (dev->dev_addr[0] == 0) {
-		BUGLVL(D_NORMAL) printk("You need to specify your card's station ID!\n");
+		if (BUGLVL(D_NORMAL))
+			printk("You need to specify your card's station ID!\n");
 		return -ENODEV;
 	}
 	/* Grab the memory region at mem_start for MIRROR_SIZE bytes.
@@ -102,7 +105,8 @@ static int __init arcrimi_probe(struct net_device *dev)
 	 * will be taken.
 	 */
 	if (!request_mem_region(dev->mem_start, MIRROR_SIZE, "arcnet (90xx)")) {
-		BUGLVL(D_NORMAL) printk("Card memory already allocated\n");
+		if (BUGLVL(D_NORMAL))
+			printk("Card memory already allocated\n");
 		return -ENODEV;
 	}
 	return arcrimi_found(dev);
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 605b4a7abecc..7a90b8a45a37 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -117,7 +117,7 @@ static int __init arcnet_init(void)
 	printk("arcnet loaded.\n");
 
 #ifdef ALPHA_WARNING
-	BUGLVL(D_EXTRA) {
+	if (BUGLVL(D_EXTRA)) {
 		printk("arcnet: ***\n"
 		"arcnet: * Read arcnet.txt for important release notes!\n"
 		       "arcnet: *\n"
@@ -132,11 +132,11 @@ static int __init arcnet_init(void)
 	for (count = 0; count < 256; count++)
 		arc_proto_map[count] = arc_proto_default;
 
-	BUGLVL(D_DURING)
-	    printk("arcnet: struct sizes: %Zd %Zd %Zd %Zd %Zd\n",
-		   sizeof(struct arc_hardware), sizeof(struct arc_rfc1201),
-		   sizeof(struct arc_rfc1051), sizeof(struct arc_eth_encap),
-		   sizeof(struct archdr));
+	if (BUGLVL(D_DURING))
+		printk("arcnet: struct sizes: %Zd %Zd %Zd %Zd %Zd\n",
+		       sizeof(struct arc_hardware), sizeof(struct arc_rfc1201),
+		       sizeof(struct arc_rfc1051), sizeof(struct arc_eth_encap),
+		       sizeof(struct archdr));
 
 	return 0;
 }
@@ -235,7 +235,7 @@ static void release_arcbuf(struct net_device *dev, int bufnum)
 	lp->buf_queue[lp->first_free_buf++] = bufnum;
 	lp->first_free_buf %= 5;
 
-	BUGLVL(D_DURING) {
+	if (BUGLVL(D_DURING)) {
 		BUGMSG(D_DURING, "release_arcbuf: freed #%d; buffer queue is now: ",
 		       bufnum);
 		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
@@ -268,7 +268,7 @@ static int get_arcbuf(struct net_device *dev)
 		}
 	}
 
-	BUGLVL(D_DURING) {
+	if (BUGLVL(D_DURING)) {
 		BUGMSG(D_DURING, "get_arcbuf: got #%d; buffer queue is now: ", buf);
 		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
 			BUGMSG2(D_DURING, "#%d ", lp->buf_queue[i]);
@@ -356,7 +356,7 @@ int arcnet_open(struct net_device *dev)
 	if (!try_module_get(lp->hw.owner))
 		return -ENODEV;
 
-	BUGLVL(D_PROTO) {
+	if (BUGLVL(D_PROTO)) {
 		BUGMSG(D_PROTO, "protocol map (default is '%c'): ",
 		       arc_proto_default->suffix);
 		for (count = 0; count < 256; count++)
@@ -531,7 +531,8 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 
 	BUGMSG(D_SKB_SIZE, "skb: transmitting %d bytes to %02X\n",
 	       skb->len, pkt->hard.dest);
-	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "tx");
+	if (BUGLVL(D_SKB))
+		arcnet_dump_skb(dev, skb, "tx");
 
 	/* fits in one packet? */
 	if (skb->len - ARC_HDR_SIZE > XMTU && !proto->continue_tx) {
@@ -609,7 +610,8 @@ static int go_tx(struct net_device *dev)
 	if (lp->cur_tx != -1 || lp->next_tx == -1)
 		return 0;
 
-	BUGLVL(D_TX) arcnet_dump_packet(dev, lp->next_tx, "go_tx", 0);
+	if (BUGLVL(D_TX))
+		arcnet_dump_packet(dev, lp->next_tx, "go_tx", 0);
 
 	lp->cur_tx = lp->next_tx;
 	lp->next_tx = -1;
@@ -822,7 +824,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		}
 		/* now process the received packet, if any */
 		if (recbuf != -1) {
-			BUGLVL(D_RX) arcnet_dump_packet(dev, recbuf, "rx irq", 0);
+			if (BUGLVL(D_RX))
+				arcnet_dump_packet(dev, recbuf, "rx irq", 0);
 
 			arcnet_rx(dev, recbuf);
 			release_arcbuf(dev, recbuf);
@@ -938,7 +941,7 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 
 	/* call the right receiver for the protocol */
 	if (arc_proto_map[soft->proto]->is_ip) {
-		BUGLVL(D_PROTO) {
+		if (BUGLVL(D_PROTO)) {
 			struct ArcProto
 			*oldp = arc_proto_map[lp->default_proto[pkt.hard.source]],
 			*newp = arc_proto_map[soft->proto];
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 8391324f684b..e7ec907f4680 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -84,7 +84,8 @@ static void rx(struct net_device *dev, int bufnum,
 				      + sizeof(int),
 				      length - sizeof(pkt->soft));
 
-	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
+	if (BUGLVL(D_SKB))
+		arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
 	netif_rx(skb);
@@ -190,7 +191,8 @@ static int ack_tx(struct net_device *dev, int acked)
 	BUGMSG(D_DURING, "capmode: ack_tx: protocol: %x: result: %d\n",
 	       lp->outgoing.skb->protocol, acked);
 
-	BUGLVL(D_SKB) arcnet_dump_skb(dev, lp->outgoing.skb, "ack_tx");
+	if (BUGLVL(D_SKB))
+		arcnet_dump_skb(dev, lp->outgoing.skb, "ack_tx");
 
 	/* Now alloc a skb to send back up through the layers: */
 	ackskb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
@@ -216,7 +218,8 @@ static int ack_tx(struct net_device *dev, int acked)
 
 	ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
 
-	BUGLVL(D_SKB) arcnet_dump_skb(dev, ackskb, "ack_tx_recv");
+	if (BUGLVL(D_SKB))
+		arcnet_dump_skb(dev, ackskb, "ack_tx_recv");
 	netif_rx(ackskb);
 
 free_outskb:
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index 6e38a19d7210..af87c7482478 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -53,7 +53,8 @@ static int __init com20020isa_probe(struct net_device *dev)
 	struct arcnet_local *lp = netdev_priv(dev);
 	int err;
 
-	BUGLVL(D_NORMAL) printk(VERSION);
+	if (BUGLVL(D_NORMAL))
+		printk(VERSION);
 
 	ioaddr = dev->base_addr;
 	if (!ioaddr) {
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 72334b59aa26..c8184de4480d 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -402,7 +402,8 @@ static struct pci_driver com20020pci_driver = {
 
 static int __init com20020pci_init(void)
 {
-	BUGLVL(D_NORMAL) printk(VERSION);
+	if (BUGLVL(D_NORMAL))
+		printk(VERSION);
 	return pci_register_driver(&com20020pci_driver);
 }
 
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 63697272f146..3170e8ebb7fa 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -365,7 +365,8 @@ MODULE_LICENSE("GPL");
 
 static int __init com20020_module_init(void)
 {
-	BUGLVL(D_NORMAL) printk(VERSION);
+	if (BUGLVL(D_NORMAL))
+		printk(VERSION);
 	return 0;
 }
 
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 6ec36f9975ba..33a8531e90c7 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -145,8 +145,10 @@ static int __init com90io_probe(struct net_device *dev)
 	int ioaddr = dev->base_addr, status;
 	unsigned long airqmask;
 
-	BUGLVL(D_NORMAL) printk(VERSION);
-	BUGLVL(D_NORMAL) printk("E-mail me if you actually test this driver, please!\n");
+	if (BUGLVL(D_NORMAL)) {
+		printk(VERSION);
+		printk("E-mail me if you actually test this driver, please!\n");
+	}
 
 	if (!ioaddr) {
 		BUGMSG(D_NORMAL, "No autoprobe for IO mapped cards; you must specify the base address!\n");
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index 4781cd696dd1..b5e1c1904f13 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -133,7 +133,8 @@ static void __init com90xx_probe(void)
 		return;
 	}
 
-	BUGLVL(D_NORMAL) printk(VERSION);
+	if (BUGLVL(D_NORMAL))
+		printk(VERSION);
 
 	/* set up the arrays where we'll store the possible probe addresses */
 	numports = numshmems = 0;
@@ -166,14 +167,16 @@ static void __init com90xx_probe(void)
 		if (!request_region(*port, ARCNET_TOTAL_SIZE, "arcnet (90xx)")) {
 			BUGMSG2(D_INIT_REASONS, "(request_region)\n");
 			BUGMSG2(D_INIT_REASONS, "S1: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			*port-- = ports[--numports];
 			continue;
 		}
 		if (ASTATUS() == 0xFF) {
 			BUGMSG2(D_INIT_REASONS, "(empty)\n");
 			BUGMSG2(D_INIT_REASONS, "S1: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			release_region(*port, ARCNET_TOTAL_SIZE);
 			*port-- = ports[--numports];
 			continue;
@@ -182,7 +185,8 @@ static void __init com90xx_probe(void)
 
 		BUGMSG2(D_INIT_REASONS, "\n");
 		BUGMSG2(D_INIT_REASONS, "S1: ");
-		BUGLVL(D_INIT_REASONS) numprint = 0;
+		if (BUGLVL(D_INIT_REASONS))
+			numprint = 0;
 	}
 	BUGMSG2(D_INIT, "\n");
 
@@ -227,21 +231,24 @@ static void __init com90xx_probe(void)
 		if (!request_mem_region(*p, MIRROR_SIZE, "arcnet (90xx)")) {
 			BUGMSG2(D_INIT_REASONS, "(request_mem_region)\n");
 			BUGMSG2(D_INIT_REASONS, "Stage 3: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			goto out;
 		}
 		base = ioremap(*p, MIRROR_SIZE);
 		if (!base) {
 			BUGMSG2(D_INIT_REASONS, "(ioremap)\n");
 			BUGMSG2(D_INIT_REASONS, "Stage 3: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			goto out1;
 		}
 		if (readb(base) != TESTvalue) {
 			BUGMSG2(D_INIT_REASONS, "(%02Xh != %02Xh)\n",
 				readb(base), TESTvalue);
 			BUGMSG2(D_INIT_REASONS, "S3: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			goto out2;
 		}
 		/* By writing 0x42 to the TESTvalue location, we also make
@@ -257,7 +264,8 @@ static void __init com90xx_probe(void)
 		}
 		BUGMSG2(D_INIT_REASONS, "\n");
 		BUGMSG2(D_INIT_REASONS, "S3: ");
-		BUGLVL(D_INIT_REASONS) numprint = 0;
+		if (BUGLVL(D_INIT_REASONS))
+			numprint = 0;
 		iomem[index] = base;
 		continue;
 	out2:
@@ -319,7 +327,8 @@ static void __init com90xx_probe(void)
 		    != (NORXflag | RECONflag | TXFREEflag | RESETflag)) {
 			BUGMSG2(D_INIT_REASONS, "(status=%Xh)\n", status);
 			BUGMSG2(D_INIT_REASONS, "S5: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			release_region(*port, ARCNET_TOTAL_SIZE);
 			*port-- = ports[--numports];
 			continue;
@@ -330,7 +339,8 @@ static void __init com90xx_probe(void)
 			BUGMSG2(D_INIT_REASONS, " (eternal reset, status=%Xh)\n",
 				status);
 			BUGMSG2(D_INIT_REASONS, "S5: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT_REASONS))
+				numprint = 0;
 			release_region(*port, ARCNET_TOTAL_SIZE);
 			*port-- = ports[--numports];
 			continue;
@@ -352,7 +362,8 @@ static void __init com90xx_probe(void)
 			if (airq <= 0) {
 				BUGMSG2(D_INIT_REASONS, "(airq=%d)\n", airq);
 				BUGMSG2(D_INIT_REASONS, "S5: ");
-				BUGLVL(D_INIT_REASONS) numprint = 0;
+				if (BUGLVL(D_INIT_REASONS))
+					numprint = 0;
 				release_region(*port, ARCNET_TOTAL_SIZE);
 				*port-- = ports[--numports];
 				continue;
@@ -406,16 +417,20 @@ static void __init com90xx_probe(void)
 		}
 
 		if (openparen) {
-			BUGLVL(D_INIT) printk("no matching shmem)\n");
-			BUGLVL(D_INIT_REASONS) printk("S5: ");
-			BUGLVL(D_INIT_REASONS) numprint = 0;
+			if (BUGLVL(D_INIT))
+				printk("no matching shmem)\n");
+			if (BUGLVL(D_INIT_REASONS)) {
+				printk("S5: ");
+				numprint = 0;
+			}
 		}
 		if (!found)
 			release_region(*port, ARCNET_TOTAL_SIZE);
 		*port-- = ports[--numports];
 	}
 
-	BUGLVL(D_INIT_REASONS) printk("\n");
+	if (BUGLVL(D_INIT_REASONS))
+		printk("\n");
 
 	/* Now put back TESTvalue on all leftover shmems. */
 	for (index = 0; index < numshmems; index++) {
@@ -603,8 +618,8 @@ static int com90xx_reset(struct net_device *dev, int really_reset)
 	ACOMMAND(CONFIGcmd | EXTconf);
 
 	/* clean out all the memory to make debugging make more sense :) */
-	BUGLVL(D_DURING)
-	    memset_io(lp->mem_start, 0x42, 2048);
+	if (BUGLVL(D_DURING))
+		memset_io(lp->mem_start, 0x42, 2048);
 
 	/* done!  return success. */
 	return 0;
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index e78b76535acc..824d71fcfd53 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -149,7 +149,8 @@ static void rx(struct net_device *dev, int bufnum,
 				      pkt->soft.raw + sizeof(pkt->soft),
 				      length - sizeof(pkt->soft));
 
-	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
+	if (BUGLVL(D_SKB))
+		arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = type_trans(skb, dev);
 	netif_rx(skb);
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index 4ebfbcbe5de3..d052976ebdfa 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -221,7 +221,8 @@ static void rx(struct net_device *dev, int bufnum,
 				dev->stats.rx_crc_errors++;
 			}
 		}
-		BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
+		if (BUGLVL(D_SKB))
+			arcnet_dump_skb(dev, skb, "rx");
 
 		skb->protocol = type_trans(skb, dev);
 		netif_rx(skb);
@@ -353,7 +354,8 @@ static void rx(struct net_device *dev, int bufnum,
 			       skb->len, pkt->hard.source);
 			BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (split)\n",
 			       skb->len, pkt->hard.source);
-			BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
+			if (BUGLVL(D_SKB))
+				arcnet_dump_skb(dev, skb, "rx");
 
 			skb->protocol = type_trans(skb, dev);
 			netif_rx(skb);
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index ad610208fbba..f07c66383b88 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -78,13 +78,12 @@
 #endif
 extern int arcnet_debug;
 
-#define BUGLVL_TEST(x)	((x) & ARCNET_DEBUG_MAX & arcnet_debug)
-#define BUGLVL(x)	if (BUGLVL_TEST(x))
+#define BUGLVL(x)	((x) & ARCNET_DEBUG_MAX & arcnet_debug)
 
 /* macros to simplify debug checking */
 #define BUGMSG(x, fmt, ...)						\
 do {									\
-	if (BUGLVL_TEST(x))						\
+	if (BUGLVL(x))						\
 		printk("%s%6s: " fmt,					\
 		       (x) == D_NORMAL	? KERN_WARNING :		\
 		       (x) < D_DURING ? KERN_INFO : KERN_DEBUG,		\
@@ -93,12 +92,14 @@ do {									\
 
 #define BUGMSG2(x, fmt, ...)						\
 do {									\
-	if (BUGLVL_TEST(x))						\
+	if (BUGLVL(x))						\
 		printk(fmt, ##__VA_ARGS__);				\
 } while (0)
 
 /* see how long a function call takes to run, expressed in CPU cycles */
-#define TIME(name, bytes, call) BUGLVL(D_TIMING) {			\
+#define TIME(name, bytes, call)						\
+do {									\
+	if (BUGLVL(D_TIMING)) {						\
 		unsigned long _x, _y;					\
 		_x = get_cycles();					\
 		call;							\
@@ -108,10 +109,10 @@ do {									\
 		       "%lu Kbytes/100Mcycle\n",			\
 		       name, bytes, _y - _x,				\
 		       100000000 / 1024 * bytes / (_y - _x + 1));	\
-	}								\
-	else {								\
+	} else {							\
 		call;							\
-	}
+	}								\
+} while (0)
 
 /*
  * Time needed to reset the card - in ms (milliseconds).  This works on my
-- 
cgit v1.2.3


From a34c0932c3b2f28542825ffc5280d562c49ad42d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:05:55 -0700
Subject: arcnet: Convert BUGMSG and BUGMSG2 to arc_prink and arc_cont

These macros don't actually represent BUG uses but are more commonly
used as logging macros, so use a more kernel style macro.

Convert the BUGMSG from a netdev_ like use to actually use netdev_<level>.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/net/arcnet/arc-rawmode.c  |  16 +--
 drivers/net/arcnet/arc-rimi.c     |  24 +++--
 drivers/net/arcnet/arcnet.c       | 212 ++++++++++++++++++++------------------
 drivers/net/arcnet/capmode.c      |  35 ++++---
 drivers/net/arcnet/com20020-isa.c |  15 +--
 drivers/net/arcnet/com20020-pci.c |   2 +-
 drivers/net/arcnet/com20020.c     |  64 ++++++------
 drivers/net/arcnet/com90io.c      |  43 ++++----
 drivers/net/arcnet/com90xx.c      | 119 ++++++++++-----------
 drivers/net/arcnet/rfc1051.c      |  16 +--
 drivers/net/arcnet/rfc1201.c      | 104 ++++++++++---------
 include/linux/arcdevice.h         |  32 +++---
 12 files changed, 355 insertions(+), 327 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 998c8356e576..034c8988f987 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -89,7 +89,7 @@ static void rx(struct net_device *dev, int bufnum,
 	struct archdr *pkt = pkthdr;
 	int ofs;
 
-	BUGMSG(D_DURING, "it's a raw packet (length=%d)\n", length);
+	arc_printk(D_DURING, dev, "it's a raw packet (length=%d)\n", length);
 
 	if (length > MTU)
 		ofs = 512 - length;
@@ -98,7 +98,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 	skb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 	if (skb == NULL) {
-		BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
+		arc_printk(D_NORMAL, dev, "Memory squeeze, dropping packet\n");
 		dev->stats.rx_dropped++;
 		return;
 	}
@@ -163,15 +163,15 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	struct arc_hardware *hard = &pkt->hard;
 	int ofs;
 
-	BUGMSG(D_DURING, "prepare_tx: txbufs=%d/%d/%d\n",
-	       lp->next_tx, lp->cur_tx, bufnum);
+	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
+		   lp->next_tx, lp->cur_tx, bufnum);
 
 	length -= ARC_HDR_SIZE;	/* hard header is not included in packet length */
 
 	if (length > XMTU) {
 		/* should never happen! other people already check for this. */
-		BUGMSG(D_NORMAL, "Bug!  prepare_tx with size %d (> %d)\n",
-		       length, XMTU);
+		arc_printk(D_NORMAL, dev, "Bug!  prepare_tx with size %d (> %d)\n",
+			   length, XMTU);
 		length = XMTU;
 	}
 	if (length >= MinTU) {
@@ -184,8 +184,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		hard->offset[0] = ofs = 256 - length;
 	}
 
-	BUGMSG(D_DURING, "prepare_tx: length=%d ofs=%d\n",
-	       length, ofs);
+	arc_printk(D_DURING, dev, "prepare_tx: length=%d ofs=%d\n",
+		   length, ofs);
 
 	lp->hw.copy_to_card(dev, bufnum, 0, hard, ARC_HDR_SIZE);
 	lp->hw.copy_to_card(dev, bufnum, ofs, &pkt->soft, length);
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index cd47a1b3008c..25f84b7437f3 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -147,7 +147,7 @@ static int __init arcrimi_found(struct net_device *dev)
 	p = ioremap(dev->mem_start, MIRROR_SIZE);
 	if (!p) {
 		release_mem_region(dev->mem_start, MIRROR_SIZE);
-		BUGMSG(D_NORMAL, "Can't ioremap\n");
+		arc_printk(D_NORMAL, dev, "Can't ioremap\n");
 		return -ENODEV;
 	}
 
@@ -155,7 +155,7 @@ static int __init arcrimi_found(struct net_device *dev)
 	if (request_irq(dev->irq, arcnet_interrupt, 0, "arcnet (RIM I)", dev)) {
 		iounmap(p);
 		release_mem_region(dev->mem_start, MIRROR_SIZE);
-		BUGMSG(D_NORMAL, "Can't get IRQ %d!\n", dev->irq);
+		arc_printk(D_NORMAL, dev, "Can't get IRQ %d!\n", dev->irq);
 		return -ENODEV;
 	}
 
@@ -210,23 +210,24 @@ static int __init arcrimi_found(struct net_device *dev)
 	if (!request_mem_region(dev->mem_start,
 				dev->mem_end - dev->mem_start + 1,
 				"arcnet (90xx)")) {
-		BUGMSG(D_NORMAL, "Card memory already allocated\n");
+		arc_printk(D_NORMAL, dev, "Card memory already allocated\n");
 		goto err_free_irq;
 	}
 
 	lp->mem_start = ioremap(dev->mem_start, dev->mem_end - dev->mem_start + 1);
 	if (!lp->mem_start) {
-		BUGMSG(D_NORMAL, "Can't remap device memory!\n");
+		arc_printk(D_NORMAL, dev, "Can't remap device memory!\n");
 		goto err_release_mem;
 	}
 
 	/* get and check the station ID from offset 1 in shmem */
 	dev->dev_addr[0] = readb(lp->mem_start + 1);
 
-	BUGMSG(D_NORMAL, "ARCnet RIM I: station %02Xh found at IRQ %d, ShMem %lXh (%ld*%d bytes)\n",
-	       dev->dev_addr[0],
-	       dev->irq, dev->mem_start,
-	 (dev->mem_end - dev->mem_start + 1) / mirror_size, mirror_size);
+	arc_printk(D_NORMAL, dev, "ARCnet RIM I: station %02Xh found at IRQ %d, ShMem %lXh (%ld*%d bytes)\n",
+		   dev->dev_addr[0],
+		   dev->irq, dev->mem_start,
+		   (dev->mem_end - dev->mem_start + 1) / mirror_size,
+		   mirror_size);
 
 	err = register_netdev(dev);
 	if (err)
@@ -255,7 +256,8 @@ static int arcrimi_reset(struct net_device *dev, int really_reset)
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *ioaddr = lp->mem_start + 0x800;
 
-	BUGMSG(D_INIT, "Resetting %s (status=%02Xh)\n", dev->name, ASTATUS());
+	arc_printk(D_INIT, dev, "Resetting %s (status=%02Xh)\n",
+		   dev->name, ASTATUS());
 
 	if (really_reset) {
 		writeb(TESTvalue, ioaddr - 0x800);	/* fake reset */
@@ -301,7 +303,7 @@ static void arcrimi_copy_to_card(struct net_device *dev, int bufnum, int offset,
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
 
-	TIME("memcpy_toio", count, memcpy_toio(memaddr, buf, count));
+	TIME(dev, "memcpy_toio", count, memcpy_toio(memaddr, buf, count));
 }
 
 static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offset,
@@ -310,7 +312,7 @@ static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offse
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
 
-	TIME("memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
+	TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
 }
 
 static int node;
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 7a90b8a45a37..2aab7e2f7e4a 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -236,11 +236,11 @@ static void release_arcbuf(struct net_device *dev, int bufnum)
 	lp->first_free_buf %= 5;
 
 	if (BUGLVL(D_DURING)) {
-		BUGMSG(D_DURING, "release_arcbuf: freed #%d; buffer queue is now: ",
-		       bufnum);
+		arc_printk(D_DURING, dev, "release_arcbuf: freed #%d; buffer queue is now: ",
+			   bufnum);
 		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
-			BUGMSG2(D_DURING, "#%d ", lp->buf_queue[i]);
-		BUGMSG2(D_DURING, "\n");
+			arc_cont(D_DURING, "#%d ", lp->buf_queue[i]);
+		arc_cont(D_DURING, "\n");
 	}
 }
 
@@ -254,14 +254,14 @@ static int get_arcbuf(struct net_device *dev)
 
 	if (!atomic_dec_and_test(&lp->buf_lock)) {
 		/* already in this function */
-		BUGMSG(D_NORMAL, "get_arcbuf: overlap (%d)!\n",
-		       lp->buf_lock.counter);
+		arc_printk(D_NORMAL, dev, "get_arcbuf: overlap (%d)!\n",
+			   lp->buf_lock.counter);
 	} else {			/* we can continue */
 		if (lp->next_buf >= 5)
 			lp->next_buf -= 5;
 
 		if (lp->next_buf == lp->first_free_buf) {
-			BUGMSG(D_NORMAL, "get_arcbuf: BUG: no buffers are available??\n");
+			arc_printk(D_NORMAL, dev, "get_arcbuf: BUG: no buffers are available??\n");
 		} else {
 			buf = lp->buf_queue[lp->next_buf++];
 			lp->next_buf %= 5;
@@ -269,10 +269,11 @@ static int get_arcbuf(struct net_device *dev)
 	}
 
 	if (BUGLVL(D_DURING)) {
-		BUGMSG(D_DURING, "get_arcbuf: got #%d; buffer queue is now: ", buf);
+		arc_printk(D_DURING, dev, "get_arcbuf: got #%d; buffer queue is now: ",
+			   buf);
 		for (i = lp->next_buf; i != lp->first_free_buf; i = (i + 1) % 5)
-			BUGMSG2(D_DURING, "#%d ", lp->buf_queue[i]);
-		BUGMSG2(D_DURING, "\n");
+			arc_cont(D_DURING, "#%d ", lp->buf_queue[i]);
+		arc_cont(D_DURING, "\n");
 	}
 
 	atomic_inc(&lp->buf_lock);
@@ -351,20 +352,20 @@ int arcnet_open(struct net_device *dev)
 	struct arcnet_local *lp = netdev_priv(dev);
 	int count, newmtu, error;
 
-	BUGMSG(D_INIT, "opened.");
+	arc_printk(D_INIT, dev, "opened.");
 
 	if (!try_module_get(lp->hw.owner))
 		return -ENODEV;
 
 	if (BUGLVL(D_PROTO)) {
-		BUGMSG(D_PROTO, "protocol map (default is '%c'): ",
-		       arc_proto_default->suffix);
+		arc_printk(D_PROTO, dev, "protocol map (default is '%c'): ",
+			   arc_proto_default->suffix);
 		for (count = 0; count < 256; count++)
-			BUGMSG2(D_PROTO, "%c", arc_proto_map[count]->suffix);
-		BUGMSG2(D_PROTO, "\n");
+			arc_cont(D_PROTO, "%c", arc_proto_map[count]->suffix);
+		arc_cont(D_PROTO, "\n");
 	}
 
-	BUGMSG(D_INIT, "arcnet_open: resetting card.\n");
+	arc_printk(D_INIT, dev, "arcnet_open: resetting card.\n");
 
 	/* try to put the card in a defined state - if it fails the first
 	 * time, actually reset it.
@@ -377,7 +378,7 @@ int arcnet_open(struct net_device *dev)
 	if (newmtu < dev->mtu)
 		dev->mtu = newmtu;
 
-	BUGMSG(D_INIT, "arcnet_open: mtu: %d.\n", dev->mtu);
+	arc_printk(D_INIT, dev, "arcnet_open: mtu: %d.\n", dev->mtu);
 
 	/* autodetect the encapsulation for each host. */
 	memset(lp->default_proto, 0, sizeof(lp->default_proto));
@@ -408,27 +409,28 @@ int arcnet_open(struct net_device *dev)
 		lp->hw.open(dev);
 
 	if (dev->dev_addr[0] == 0)
-		BUGMSG(D_NORMAL, "WARNING!  Station address 00 is reserved for broadcasts!\n");
+		arc_printk(D_NORMAL, dev, "WARNING!  Station address 00 is reserved for broadcasts!\n");
 	else if (dev->dev_addr[0] == 255)
-		BUGMSG(D_NORMAL, "WARNING!  Station address FF may confuse DOS networking programs!\n");
+		arc_printk(D_NORMAL, dev, "WARNING!  Station address FF may confuse DOS networking programs!\n");
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	if (ASTATUS() & RESETflag) {
-		BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+		arc_printk(D_DEBUG, dev, "%s: %d: %s\n",
+			   __FILE__, __LINE__, __func__);
 		ACOMMAND(CFLAGScmd | RESETclear);
 	}
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	/* make sure we're ready to receive IRQ's. */
 	AINTMASK(0);
 	udelay(1);		/* give it time to set the mask before
 				 * we reset it again. (may not even be
 				 * necessary)
 				 */
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	lp->intmask = NORXflag | RECONflag;
 	AINTMASK(lp->intmask);
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	netif_start_queue(dev);
 
@@ -466,20 +468,21 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 	uint8_t _daddr, proto_num;
 	struct ArcProto *proto;
 
-	BUGMSG(D_DURING,
-	       "create header from %d to %d; protocol %d (%Xh); size %u.\n",
-	       saddr ? *(uint8_t *)saddr : -1,
-	       daddr ? *(uint8_t *)daddr : -1,
-	       type, type, len);
+	arc_printk(D_DURING, dev,
+		   "create header from %d to %d; protocol %d (%Xh); size %u.\n",
+		   saddr ? *(uint8_t *)saddr : -1,
+		   daddr ? *(uint8_t *)daddr : -1,
+		   type, type, len);
 
 	if (skb->len != 0 && len != skb->len)
-		BUGMSG(D_NORMAL, "arcnet_header: Yikes!  skb->len(%d) != len(%d)!\n",
-		       skb->len, len);
+		arc_printk(D_NORMAL, dev, "arcnet_header: Yikes!  skb->len(%d) != len(%d)!\n",
+			   skb->len, len);
 
 	/* Type is host order - ? */
 	if (type == ETH_P_ARCNET) {
 		proto = arc_raw_proto;
-		BUGMSG(D_DEBUG, "arc_raw_proto used. proto='%c'\n", proto->suffix);
+		arc_printk(D_DEBUG, dev, "arc_raw_proto used. proto='%c'\n",
+			   proto->suffix);
 		_daddr = daddr ? *(uint8_t *)daddr : 0;
 	} else if (!daddr) {
 		/* if the dest addr isn't provided, we can't choose an
@@ -490,19 +493,19 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 		*(uint16_t *)skb_push(skb, 2) = type;
 		/* XXX: Why not use skb->mac_len? */
 		if (skb->network_header - skb->mac_header != 2)
-			BUGMSG(D_NORMAL, "arcnet_header: Yikes!  diff (%d) is not 2!\n",
-			       (int)(skb->network_header - skb->mac_header));
+			arc_printk(D_NORMAL, dev, "arcnet_header: Yikes!  diff (%u) is not 2!\n",
+				   skb->network_header - skb->mac_header);
 		return -2;	/* return error -- can't transmit yet! */
 	} else {
 		/* otherwise, we can just add the header as usual. */
 		_daddr = *(uint8_t *)daddr;
 		proto_num = lp->default_proto[_daddr];
 		proto = arc_proto_map[proto_num];
-		BUGMSG(D_DURING, "building header for %02Xh using protocol '%c'\n",
-		       proto_num, proto->suffix);
+		arc_printk(D_DURING, dev, "building header for %02Xh using protocol '%c'\n",
+			   proto_num, proto->suffix);
 		if (proto == &arc_proto_null && arc_bcast_proto != proto) {
-			BUGMSG(D_DURING, "actually, let's use '%c' instead.\n",
-			       arc_bcast_proto->suffix);
+			arc_printk(D_DURING, dev, "actually, let's use '%c' instead.\n",
+				   arc_bcast_proto->suffix);
 			proto = arc_bcast_proto;
 		}
 	}
@@ -521,22 +524,22 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 	unsigned long flags;
 	int freeskb, retval;
 
-	BUGMSG(D_DURING,
-	       "transmit requested (status=%Xh, txbufs=%d/%d, len=%d, protocol %x)\n",
-	       ASTATUS(), lp->cur_tx, lp->next_tx, skb->len, skb->protocol);
+	arc_printk(D_DURING, dev,
+		   "transmit requested (status=%Xh, txbufs=%d/%d, len=%d, protocol %x)\n",
+		   ASTATUS(), lp->cur_tx, lp->next_tx, skb->len, skb->protocol);
 
 	pkt = (struct archdr *)skb->data;
 	soft = &pkt->soft.rfc1201;
 	proto = arc_proto_map[soft->proto];
 
-	BUGMSG(D_SKB_SIZE, "skb: transmitting %d bytes to %02X\n",
-	       skb->len, pkt->hard.dest);
+	arc_printk(D_SKB_SIZE, dev, "skb: transmitting %d bytes to %02X\n",
+		   skb->len, pkt->hard.dest);
 	if (BUGLVL(D_SKB))
 		arcnet_dump_skb(dev, skb, "tx");
 
 	/* fits in one packet? */
 	if (skb->len - ARC_HDR_SIZE > XMTU && !proto->continue_tx) {
-		BUGMSG(D_NORMAL, "fixme: packet too large: compensating badly!\n");
+		arc_printk(D_NORMAL, dev, "fixme: packet too large: compensating badly!\n");
 		dev_kfree_skb(skb);
 		return NETDEV_TX_OK;	/* don't try again */
 	}
@@ -569,9 +572,9 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 
 			if (proto->continue_tx &&
 			    proto->continue_tx(dev, txbuf)) {
-				BUGMSG(D_NORMAL,
-				       "bug! continue_tx finished the first time! (proto='%c')\n",
-				       proto->suffix);
+				arc_printk(D_NORMAL, dev,
+					   "bug! continue_tx finished the first time! (proto='%c')\n",
+					   proto->suffix);
 			}
 		}
 		retval = NETDEV_TX_OK;
@@ -581,14 +584,16 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 		freeskb = 0;
 	}
 
-	BUGMSG(D_DEBUG, "%s: %d: %s, status: %x\n", __FILE__, __LINE__, __func__, ASTATUS());
+	arc_printk(D_DEBUG, dev, "%s: %d: %s, status: %x\n",
+		   __FILE__, __LINE__, __func__, ASTATUS());
 	/* make sure we didn't ignore a TX IRQ while we were in here */
 	AINTMASK(0);
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	lp->intmask |= TXFREEflag | EXCNAKflag;
 	AINTMASK(lp->intmask);
-	BUGMSG(D_DEBUG, "%s: %d: %s, status: %x\n", __FILE__, __LINE__, __func__, ASTATUS());
+	arc_printk(D_DEBUG, dev, "%s: %d: %s, status: %x\n",
+		   __FILE__, __LINE__, __func__, ASTATUS());
 
 	spin_unlock_irqrestore(&lp->lock, flags);
 	if (freeskb)
@@ -604,8 +609,8 @@ static int go_tx(struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 
-	BUGMSG(D_DURING, "go_tx: status=%Xh, intmask=%Xh, next_tx=%d, cur_tx=%d\n",
-	       ASTATUS(), lp->intmask, lp->next_tx, lp->cur_tx);
+	arc_printk(D_DURING, dev, "go_tx: status=%Xh, intmask=%Xh, next_tx=%d, cur_tx=%d\n",
+		   ASTATUS(), lp->intmask, lp->next_tx, lp->cur_tx);
 
 	if (lp->cur_tx != -1 || lp->next_tx == -1)
 		return 0;
@@ -655,8 +660,8 @@ void arcnet_timeout(struct net_device *dev)
 	spin_unlock_irqrestore(&lp->lock, flags);
 
 	if (time_after(jiffies, lp->last_timeout + 10 * HZ)) {
-		BUGMSG(D_EXTRA, "tx timed out%s (status=%Xh, intmask=%Xh, dest=%02Xh)\n",
-		       msg, status, lp->intmask, lp->lasttrans_dest);
+		arc_printk(D_EXTRA, dev, "tx timed out%s (status=%Xh, intmask=%Xh, dest=%02Xh)\n",
+			   msg, status, lp->intmask, lp->lasttrans_dest);
 		lp->last_timeout = jiffies;
 	}
 
@@ -675,9 +680,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	int recbuf, status, diagstatus, didsomething, boguscount;
 	int retval = IRQ_NONE;
 
-	BUGMSG(D_DURING, "\n");
+	arc_printk(D_DURING, dev, "\n");
 
-	BUGMSG(D_DURING, "in arcnet_interrupt\n");
+	arc_printk(D_DURING, dev, "in arcnet_interrupt\n");
 
 	lp = netdev_priv(dev);
 	BUG_ON(!lp);
@@ -695,16 +700,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		return retval;
 	}
 
-	BUGMSG(D_DURING, "in arcnet_inthandler (status=%Xh, intmask=%Xh)\n",
-	       ASTATUS(), lp->intmask);
+	arc_printk(D_DURING, dev, "in arcnet_inthandler (status=%Xh, intmask=%Xh)\n",
+		   ASTATUS(), lp->intmask);
 
 	boguscount = 5;
 	do {
 		status = ASTATUS();
 		diagstatus = (status >> 8) & 0xFF;
 
-		BUGMSG(D_DEBUG, "%s: %d: %s: status=%x\n",
-		       __FILE__, __LINE__, __func__, status);
+		arc_printk(D_DEBUG, dev, "%s: %d: %s: status=%x\n",
+			   __FILE__, __LINE__, __func__, status);
 		didsomething = 0;
 
 		/* RESET flag was enabled - card is resetting and if RX is
@@ -714,7 +719,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		 * Clear it out and start over.
 		 */
 		if (status & RESETflag) {
-			BUGMSG(D_NORMAL, "spurious reset (status=%Xh)\n", status);
+			arc_printk(D_NORMAL, dev, "spurious reset (status=%Xh)\n",
+				   status);
 			arcnet_close(dev);
 			arcnet_open(dev);
 
@@ -732,21 +738,21 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		recbuf = -1;
 		if (status & lp->intmask & NORXflag) {
 			recbuf = lp->cur_rx;
-			BUGMSG(D_DURING, "Buffer #%d: receive irq (status=%Xh)\n",
-			       recbuf, status);
+			arc_printk(D_DURING, dev, "Buffer #%d: receive irq (status=%Xh)\n",
+				   recbuf, status);
 
 			lp->cur_rx = get_arcbuf(dev);
 			if (lp->cur_rx != -1) {
-				BUGMSG(D_DURING, "enabling receive to buffer #%d\n",
-				       lp->cur_rx);
+				arc_printk(D_DURING, dev, "enabling receive to buffer #%d\n",
+					   lp->cur_rx);
 				ACOMMAND(RXcmd | (lp->cur_rx << 3) | RXbcasts);
 			}
 			didsomething++;
 		}
 
 		if ((diagstatus & EXCNAKflag)) {
-			BUGMSG(D_DURING, "EXCNAK IRQ (diagstat=%Xh)\n",
-			       diagstatus);
+			arc_printk(D_DURING, dev, "EXCNAK IRQ (diagstat=%Xh)\n",
+				   diagstatus);
 
 			ACOMMAND(NOTXcmd);      /* disable transmit */
 			lp->excnak_pending = 1;
@@ -760,20 +766,22 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
 			lp->intmask &= ~(TXFREEflag | EXCNAKflag);
 
-			BUGMSG(D_DURING, "TX IRQ (stat=%Xh)\n", status);
+			arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n", status);
 
 			if (lp->cur_tx != -1 && !lp->timed_out) {
 				if (!(status & TXACKflag)) {
 					if (lp->lasttrans_dest != 0) {
-						BUGMSG(D_EXTRA,
-						       "transmit was not acknowledged! (status=%Xh, dest=%02Xh)\n",
-						       status, lp->lasttrans_dest);
+						arc_printk(D_EXTRA, dev,
+							   "transmit was not acknowledged! (status=%Xh, dest=%02Xh)\n",
+							   status,
+							   lp->lasttrans_dest);
 						dev->stats.tx_errors++;
 						dev->stats.tx_carrier_errors++;
 					} else {
-						BUGMSG(D_DURING,
-						       "broadcast was not acknowledged; that's normal (status=%Xh, dest=%02Xh)\n",
-						       status, lp->lasttrans_dest);
+						arc_printk(D_DURING, dev,
+							   "broadcast was not acknowledged; that's normal (status=%Xh, dest=%02Xh)\n",
+							   status,
+							   lp->lasttrans_dest);
 					}
 				}
 
@@ -836,29 +844,29 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			ACOMMAND(CFLAGScmd | CONFIGclear);
 			dev->stats.tx_carrier_errors++;
 
-			BUGMSG(D_RECON, "Network reconfiguration detected (status=%Xh)\n",
-			       status);
+			arc_printk(D_RECON, dev, "Network reconfiguration detected (status=%Xh)\n",
+				   status);
 			/* MYRECON bit is at bit 7 of diagstatus */
 			if (diagstatus & 0x80)
-				BUGMSG(D_RECON, "Put out that recon myself\n");
+				arc_printk(D_RECON, dev, "Put out that recon myself\n");
 
 			/* is the RECON info empty or old? */
 			if (!lp->first_recon || !lp->last_recon ||
 			    time_after(jiffies, lp->last_recon + HZ * 10)) {
 				if (lp->network_down)
-					BUGMSG(D_NORMAL, "reconfiguration detected: cabling restored?\n");
+					arc_printk(D_NORMAL, dev, "reconfiguration detected: cabling restored?\n");
 				lp->first_recon = lp->last_recon = jiffies;
 				lp->num_recons = lp->network_down = 0;
 
-				BUGMSG(D_DURING, "recon: clearing counters.\n");
+				arc_printk(D_DURING, dev, "recon: clearing counters.\n");
 			} else {	/* add to current RECON counter */
 				lp->last_recon = jiffies;
 				lp->num_recons++;
 
-				BUGMSG(D_DURING, "recon: counter=%d, time=%lds, net=%d\n",
-				       lp->num_recons,
-				       (lp->last_recon - lp->first_recon) / HZ,
-				       lp->network_down);
+				arc_printk(D_DURING, dev, "recon: counter=%d, time=%lds, net=%d\n",
+					   lp->num_recons,
+					   (lp->last_recon - lp->first_recon) / HZ,
+					   lp->network_down);
 
 				/* if network is marked up;
 				 * and first_recon and last_recon are 60+ apart;
@@ -870,7 +878,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 				    (lp->last_recon - lp->first_recon) <= HZ * 60 &&
 				    lp->num_recons >= RECON_THRESHOLD) {
 					lp->network_down = 1;
-					BUGMSG(D_NORMAL, "many reconfigurations detected: cabling problem?\n");
+					arc_printk(D_NORMAL, dev, "many reconfigurations detected: cabling problem?\n");
 				} else if (!lp->network_down &&
 					   lp->last_recon - lp->first_recon > HZ * 60) {
 					/* reset counters if we've gone for over a minute. */
@@ -881,20 +889,20 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		} else if (lp->network_down &&
 			   time_after(jiffies, lp->last_recon + HZ * 10)) {
 			if (lp->network_down)
-				BUGMSG(D_NORMAL, "cabling restored?\n");
+				arc_printk(D_NORMAL, dev, "cabling restored?\n");
 			lp->first_recon = lp->last_recon = 0;
 			lp->num_recons = lp->network_down = 0;
 
-			BUGMSG(D_DURING, "not recon: clearing counters anyway.\n");
+			arc_printk(D_DURING, dev, "not recon: clearing counters anyway.\n");
 		}
 
 		if (didsomething)
 			retval |= IRQ_HANDLED;
 	} while (--boguscount && didsomething);
 
-	BUGMSG(D_DURING, "arcnet_interrupt complete (status=%Xh, count=%d)\n",
-	       ASTATUS(), boguscount);
-	BUGMSG(D_DURING, "\n");
+	arc_printk(D_DURING, dev, "arcnet_interrupt complete (status=%Xh, count=%d)\n",
+		   ASTATUS(), boguscount);
+	arc_printk(D_DURING, dev, "\n");
 
 	AINTMASK(0);
 	udelay(1);
@@ -933,8 +941,8 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 		lp->hw.copy_from_card(dev, bufnum, ofs, soft, length);
 	}
 
-	BUGMSG(D_DURING, "Buffer #%d: received packet from %02Xh to %02Xh (%d+4 bytes)\n",
-	       bufnum, pkt.hard.source, pkt.hard.dest, length);
+	arc_printk(D_DURING, dev, "Buffer #%d: received packet from %02Xh to %02Xh (%d+4 bytes)\n",
+		   bufnum, pkt.hard.source, pkt.hard.dest, length);
 
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += length + ARC_HDR_SIZE;
@@ -947,10 +955,10 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 			*newp = arc_proto_map[soft->proto];
 
 			if (oldp != newp) {
-				BUGMSG(D_PROTO,
-				       "got protocol %02Xh; encap for host %02Xh is now '%c' (was '%c')\n",
-				       soft->proto, pkt.hard.source,
-				       newp->suffix, oldp->suffix);
+				arc_printk(D_PROTO, dev,
+					   "got protocol %02Xh; encap for host %02Xh is now '%c' (was '%c')\n",
+					   soft->proto, pkt.hard.source,
+					   newp->suffix, oldp->suffix);
 			}
 		}
 
@@ -967,9 +975,9 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
 static void null_rx(struct net_device *dev, int bufnum,
 		    struct archdr *pkthdr, int length)
 {
-	BUGMSG(D_PROTO,
-	       "rx: don't know how to deal with proto %02Xh from host %02Xh.\n",
-	       pkthdr->soft.rfc1201.proto, pkthdr->hard.source);
+	arc_printk(D_PROTO, dev,
+		   "rx: don't know how to deal with proto %02Xh from host %02Xh.\n",
+		   pkthdr->soft.rfc1201.proto, pkthdr->hard.source);
 }
 
 static int null_build_header(struct sk_buff *skb, struct net_device *dev,
@@ -977,9 +985,9 @@ static int null_build_header(struct sk_buff *skb, struct net_device *dev,
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 
-	BUGMSG(D_PROTO,
-	       "tx: can't build header for encap %02Xh; load a protocol driver.\n",
-	       lp->default_proto[daddr]);
+	arc_printk(D_PROTO, dev,
+		   "tx: can't build header for encap %02Xh; load a protocol driver.\n",
+		   lp->default_proto[daddr]);
 
 	/* always fails */
 	return 0;
@@ -992,7 +1000,7 @@ static int null_prepare_tx(struct net_device *dev, struct archdr *pkt,
 	struct arcnet_local *lp = netdev_priv(dev);
 	struct arc_hardware newpkt;
 
-	BUGMSG(D_PROTO, "tx: no encap for this host; load a protocol driver.\n");
+	arc_printk(D_PROTO, dev, "tx: no encap for this host; load a protocol driver.\n");
 
 	/* send a packet to myself -- will never get received, of course */
 	newpkt.source = newpkt.dest = dev->dev_addr[0];
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index e7ec907f4680..d62d1accf4b6 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -47,7 +47,8 @@ static void rx(struct net_device *dev, int bufnum,
 	char *pktbuf, *pkthdrbuf;
 	int ofs;
 
-	BUGMSG(D_DURING, "it's a raw(cap) packet (length=%d)\n", length);
+	arc_printk(D_DURING, dev, "it's a raw(cap) packet (length=%d)\n",
+		   length);
 
 	if (length >= MinTU)
 		ofs = 512 - length;
@@ -56,7 +57,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 	skb = alloc_skb(length + ARC_HDR_SIZE + sizeof(int), GFP_ATOMIC);
 	if (skb == NULL) {
-		BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
+		arc_printk(D_NORMAL, dev, "Memory squeeze, dropping packet\n");
 		dev->stats.rx_dropped++;
 		return;
 	}
@@ -102,8 +103,8 @@ static int build_header(struct sk_buff *skb,
 	int hdr_size = ARC_HDR_SIZE;
 	struct archdr *pkt = (struct archdr *)skb_push(skb, hdr_size);
 
-	BUGMSG(D_PROTO, "Preparing header for cap packet %x.\n",
-	       *((int *)&pkt->soft.cap.cookie[0]));
+	arc_printk(D_PROTO, dev, "Preparing header for cap packet %x.\n",
+		   *((int *)&pkt->soft.cap.cookie[0]));
 
 	/* Set the source hardware address.
 	 *
@@ -140,16 +141,16 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	/* And neither is the cookie field */
 	length -= sizeof(int);
 
-	BUGMSG(D_DURING, "prepare_tx: txbufs=%d/%d/%d\n",
-	       lp->next_tx, lp->cur_tx, bufnum);
+	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
+		   lp->next_tx, lp->cur_tx, bufnum);
 
-	BUGMSG(D_PROTO, "Sending for cap packet %x.\n",
-	       *((int *)&pkt->soft.cap.cookie[0]));
+	arc_printk(D_PROTO, dev, "Sending for cap packet %x.\n",
+		   *((int *)&pkt->soft.cap.cookie[0]));
 
 	if (length > XMTU) {
 		/* should never happen! other people already check for this. */
-		BUGMSG(D_NORMAL, "Bug!  prepare_tx with size %d (> %d)\n",
-		       length, XMTU);
+		arc_printk(D_NORMAL, dev, "Bug!  prepare_tx with size %d (> %d)\n",
+			   length, XMTU);
 		length = XMTU;
 	}
 	if (length > MinTU) {
@@ -162,8 +163,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		hard->offset[0] = ofs = 256 - length;
 	}
 
-	BUGMSG(D_DURING, "prepare_tx: length=%d ofs=%d\n",
-	       length, ofs);
+	arc_printk(D_DURING, dev, "prepare_tx: length=%d ofs=%d\n",
+		   length, ofs);
 
 	/* Copy the arcnet-header + the protocol byte down: */
 	lp->hw.copy_to_card(dev, bufnum, 0, hard, ARC_HDR_SIZE);
@@ -188,8 +189,8 @@ static int ack_tx(struct net_device *dev, int acked)
 	struct archdr *ackpkt;
 	int length = sizeof(struct arc_cap);
 
-	BUGMSG(D_DURING, "capmode: ack_tx: protocol: %x: result: %d\n",
-	       lp->outgoing.skb->protocol, acked);
+	arc_printk(D_DURING, dev, "capmode: ack_tx: protocol: %x: result: %d\n",
+		   lp->outgoing.skb->protocol, acked);
 
 	if (BUGLVL(D_SKB))
 		arcnet_dump_skb(dev, lp->outgoing.skb, "ack_tx");
@@ -197,7 +198,7 @@ static int ack_tx(struct net_device *dev, int acked)
 	/* Now alloc a skb to send back up through the layers: */
 	ackskb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 	if (ackskb == NULL) {
-		BUGMSG(D_NORMAL, "Memory squeeze, can't acknowledge.\n");
+		arc_printk(D_NORMAL, dev, "Memory squeeze, can't acknowledge\n");
 		goto free_outskb;
 	}
 
@@ -213,8 +214,8 @@ static int ack_tx(struct net_device *dev, int acked)
 	ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
 	ackpkt->soft.cap.mes.ack = acked;
 
-	BUGMSG(D_PROTO, "Ackknowledge for cap packet %x.\n",
-	       *((int *)&ackpkt->soft.cap.cookie[0]));
+	arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+		   *((int *)&ackpkt->soft.cap.cookie[0]));
 
 	ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
 
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index af87c7482478..c3748f7c5081 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -58,16 +58,16 @@ static int __init com20020isa_probe(struct net_device *dev)
 
 	ioaddr = dev->base_addr;
 	if (!ioaddr) {
-		BUGMSG(D_NORMAL, "No autoprobe (yet) for IO mapped cards; you must specify the base address!\n");
+		arc_printk(D_NORMAL, dev, "No autoprobe (yet) for IO mapped cards; you must specify the base address!\n");
 		return -ENODEV;
 	}
 	if (!request_region(ioaddr, ARCNET_TOTAL_SIZE, "arcnet (COM20020)")) {
-		BUGMSG(D_NORMAL, "IO region %xh-%xh already allocated.\n",
-		       ioaddr, ioaddr + ARCNET_TOTAL_SIZE - 1);
+		arc_printk(D_NORMAL, dev, "IO region %xh-%xh already allocated.\n",
+			   ioaddr, ioaddr + ARCNET_TOTAL_SIZE - 1);
 		return -ENXIO;
 	}
 	if (ASTATUS() == 0xFF) {
-		BUGMSG(D_NORMAL, "IO address %x empty\n", ioaddr);
+		arc_printk(D_NORMAL, dev, "IO address %x empty\n", ioaddr);
 		err = -ENODEV;
 		goto out;
 	}
@@ -81,7 +81,8 @@ static int __init com20020isa_probe(struct net_device *dev)
 		 * card has just reset and the NORXflag is on until
 		 * we tell it to start receiving.
 		 */
-		BUGMSG(D_INIT_REASONS, "intmask was %02Xh\n", inb(_INTMASK));
+		arc_printk(D_INIT_REASONS, dev, "intmask was %02Xh\n",
+			   inb(_INTMASK));
 		outb(0, _INTMASK);
 		airqmask = probe_irq_on();
 		outb(NORXflag, _INTMASK);
@@ -90,14 +91,14 @@ static int __init com20020isa_probe(struct net_device *dev)
 		dev->irq = probe_irq_off(airqmask);
 
 		if ((int)dev->irq <= 0) {
-			BUGMSG(D_INIT_REASONS, "Autoprobe IRQ failed first time\n");
+			arc_printk(D_INIT_REASONS, dev, "Autoprobe IRQ failed first time\n");
 			airqmask = probe_irq_on();
 			outb(NORXflag, _INTMASK);
 			udelay(5);
 			outb(0, _INTMASK);
 			dev->irq = probe_irq_off(airqmask);
 			if ((int)dev->irq <= 0) {
-				BUGMSG(D_NORMAL, "Autoprobe IRQ failed.\n");
+				arc_printk(D_NORMAL, dev, "Autoprobe IRQ failed.\n");
 				err = -ENODEV;
 				goto out;
 			}
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index c8184de4480d..0d0cc6a9c618 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -99,7 +99,7 @@ static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 
 		lp = netdev_priv(dev);
 
-		BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
+		arc_printk(D_NORMAL, dev, "%s Controls\n", ci->name);
 		ioaddr = pci_resource_start(pdev, cm->bar) + cm->offset;
 
 		r = devm_request_region(&pdev->dev, ioaddr, cm->size,
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 3170e8ebb7fa..b0b8a0be08e5 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -68,7 +68,7 @@ static void com20020_copy_from_card(struct net_device *dev, int bufnum,
 	outb(ofs & 0xff, _ADDR_LO);
 
 	/* copy the data */
-	TIME("insb", count, insb(_MEMDATA, buf, count));
+	TIME(dev, "insb", count, insb(_MEMDATA, buf, count));
 }
 
 static void com20020_copy_to_card(struct net_device *dev, int bufnum,
@@ -81,7 +81,7 @@ static void com20020_copy_to_card(struct net_device *dev, int bufnum,
 	outb(ofs & 0xff, _ADDR_LO);
 
 	/* copy the data */
-	TIME("outsb", count, outsb(_MEMDATA, buf, count));
+	TIME(dev, "outsb", count, outsb(_MEMDATA, buf, count));
 }
 
 /* Reset the card and check some basic stuff during the detection stage. */
@@ -120,10 +120,10 @@ int com20020_check(struct net_device *dev)
 	status = ASTATUS();
 
 	if ((status & 0x99) != (NORXflag | TXFREEflag | RESETflag)) {
-		BUGMSG(D_NORMAL, "status invalid (%Xh).\n", status);
+		arc_printk(D_NORMAL, dev, "status invalid (%Xh).\n", status);
 		return -ENODEV;
 	}
-	BUGMSG(D_INIT_REASONS, "status after reset: %X\n", status);
+	arc_printk(D_INIT_REASONS, dev, "status after reset: %X\n", status);
 
 	/* Enable TX */
 	outb(0x39, _CONFIG);
@@ -132,16 +132,16 @@ int com20020_check(struct net_device *dev)
 	ACOMMAND(CFLAGScmd | RESETclear | CONFIGclear);
 
 	status = ASTATUS();
-	BUGMSG(D_INIT_REASONS, "status after reset acknowledged: %X\n",
-	       status);
+	arc_printk(D_INIT_REASONS, dev, "status after reset acknowledged: %X\n",
+		   status);
 
 	/* Read first location of memory */
 	outb(0 | RDDATAflag | AUTOINCflag, _ADDR_HI);
 	outb(0, _ADDR_LO);
 
 	if ((status = inb(_MEMDATA)) != TESTvalue) {
-		BUGMSG(D_NORMAL, "Signature byte not found (%02Xh != D1h).\n",
-		       status);
+		arc_printk(D_NORMAL, dev, "Signature byte not found (%02Xh != D1h).\n",
+			   status);
 		return -ENODEV;
 	}
 	return 0;
@@ -213,24 +213,25 @@ int com20020_found(struct net_device *dev, int shared)
 	/* reserve the irq */
 	if (request_irq(dev->irq, arcnet_interrupt, shared,
 			"arcnet (COM20020)", dev)) {
-		BUGMSG(D_NORMAL, "Can't get IRQ %d!\n", dev->irq);
+		arc_printk(D_NORMAL, dev, "Can't get IRQ %d!\n", dev->irq);
 		return -ENODEV;
 	}
 
 	dev->base_addr = ioaddr;
 
-	BUGMSG(D_NORMAL, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
-	       lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
+	arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
+		   lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
 
 	if (lp->backplane)
-		BUGMSG(D_NORMAL, "Using backplane mode.\n");
+		arc_printk(D_NORMAL, dev, "Using backplane mode.\n");
 
 	if (lp->timeout != 3)
-		BUGMSG(D_NORMAL, "Using extended timeout value of %d.\n", lp->timeout);
+		arc_printk(D_NORMAL, dev, "Using extended timeout value of %d\n",
+			   lp->timeout);
 
-	BUGMSG(D_NORMAL, "Using CKP %d - data rate %s.\n",
-	       lp->setup >> 1,
-	       clockrates[3 - ((lp->setup2 & 0xF0) >> 4) + ((lp->setup & 0x0F) >> 1)]);
+	arc_printk(D_NORMAL, dev, "Using CKP %d - data rate %s\n",
+		   lp->setup >> 1,
+		   clockrates[3 - ((lp->setup2 & 0xF0) >> 4) + ((lp->setup & 0x0F) >> 1)]);
 
 	if (register_netdev(dev)) {
 		free_irq(dev->irq, dev);
@@ -252,16 +253,16 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 	u_int ioaddr = dev->base_addr;
 	u_char inbyte;
 
-	BUGMSG(D_DEBUG, "%s: %d: %s: dev: %p, lp: %p, dev->name: %s\n",
-	       __FILE__, __LINE__, __func__, dev, lp, dev->name);
-	BUGMSG(D_INIT, "Resetting %s (status=%02Xh)\n",
-	       dev->name, ASTATUS());
+	arc_printk(D_DEBUG, dev, "%s: %d: %s: dev: %p, lp: %p, dev->name: %s\n",
+		   __FILE__, __LINE__, __func__, dev, lp, dev->name);
+	arc_printk(D_INIT, dev, "Resetting %s (status=%02Xh)\n",
+		   dev->name, ASTATUS());
 
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	lp->config = TXENcfg | (lp->timeout << 3) | (lp->backplane << 2);
 	/* power-up defaults */
 	SETCONF;
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	if (really_reset) {
 		/* reset the card */
@@ -269,22 +270,23 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 		mdelay(RESETtime * 2);	/* COM20020 seems to be slower sometimes */
 	}
 	/* clear flags & end reset */
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	ACOMMAND(CFLAGScmd | RESETclear | CONFIGclear);
 
 	/* verify that the ARCnet signature byte is present */
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	com20020_copy_from_card(dev, 0, 0, &inbyte, 1);
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 	if (inbyte != TESTvalue) {
-		BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
-		BUGMSG(D_NORMAL, "reset failed: TESTvalue not present.\n");
+		arc_printk(D_DEBUG, dev, "%s: %d: %s\n",
+			   __FILE__, __LINE__, __func__);
+		arc_printk(D_NORMAL, dev, "reset failed: TESTvalue not present.\n");
 		return 1;
 	}
 	/* enable extended (512-byte) packets */
 	ACOMMAND(CONFIGcmd | EXTconf);
-	BUGMSG(D_DEBUG, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
+	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
 
 	/* done!  return success. */
 	return 0;
@@ -294,7 +296,7 @@ static void com20020_setmask(struct net_device *dev, int mask)
 {
 	u_int ioaddr = dev->base_addr;
 
-	BUGMSG(D_DURING, "Setting mask to %x at %x\n", mask, ioaddr);
+	arc_printk(D_DURING, dev, "Setting mask to %x at %x\n", mask, ioaddr);
 	AINTMASK(mask);
 }
 
@@ -336,7 +338,7 @@ static void com20020_set_mc_list(struct net_device *dev)
 
 	if ((dev->flags & IFF_PROMISC) && (dev->flags & IFF_UP)) {	/* Enable promiscuous mode */
 		if (!(lp->setup & PROMISCset))
-			BUGMSG(D_NORMAL, "Setting promiscuous flag...\n");
+			arc_printk(D_NORMAL, dev, "Setting promiscuous flag...\n");
 		SET_SUBADR(SUB_SETUP1);
 		lp->setup |= PROMISCset;
 		outb(lp->setup, _XREG);
@@ -344,7 +346,7 @@ static void com20020_set_mc_list(struct net_device *dev)
 		/* Disable promiscuous mode, use normal mode */
 	{
 		if ((lp->setup & PROMISCset))
-			BUGMSG(D_NORMAL, "Resetting promiscuous flag...\n");
+			arc_printk(D_NORMAL, dev, "Resetting promiscuous flag...\n");
 		SET_SUBADR(SUB_SETUP1);
 		lp->setup &= ~PROMISCset;
 		outb(lp->setup, _XREG);
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 33a8531e90c7..ec2db8527862 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -151,16 +151,17 @@ static int __init com90io_probe(struct net_device *dev)
 	}
 
 	if (!ioaddr) {
-		BUGMSG(D_NORMAL, "No autoprobe for IO mapped cards; you must specify the base address!\n");
+		arc_printk(D_NORMAL, dev, "No autoprobe for IO mapped cards; you must specify the base address!\n");
 		return -ENODEV;
 	}
 	if (!request_region(ioaddr, ARCNET_TOTAL_SIZE, "com90io probe")) {
-		BUGMSG(D_INIT_REASONS, "IO request_region %x-%x failed.\n",
-		       ioaddr, ioaddr + ARCNET_TOTAL_SIZE - 1);
+		arc_printk(D_INIT_REASONS, dev, "IO request_region %x-%x failed\n",
+			   ioaddr, ioaddr + ARCNET_TOTAL_SIZE - 1);
 		return -ENXIO;
 	}
 	if (ASTATUS() == 0xFF) {
-		BUGMSG(D_INIT_REASONS, "IO address %x empty\n", ioaddr);
+		arc_printk(D_INIT_REASONS, dev, "IO address %x empty\n",
+			   ioaddr);
 		goto err_out;
 	}
 	inb(_RESET);
@@ -169,19 +170,22 @@ static int __init com90io_probe(struct net_device *dev)
 	status = ASTATUS();
 
 	if ((status & 0x9D) != (NORXflag | RECONflag | TXFREEflag | RESETflag)) {
-		BUGMSG(D_INIT_REASONS, "Status invalid (%Xh).\n", status);
+		arc_printk(D_INIT_REASONS, dev, "Status invalid (%Xh)\n",
+			   status);
 		goto err_out;
 	}
-	BUGMSG(D_INIT_REASONS, "Status after reset: %X\n", status);
+	arc_printk(D_INIT_REASONS, dev, "Status after reset: %X\n", status);
 
 	ACOMMAND(CFLAGScmd | RESETclear | CONFIGclear);
 
-	BUGMSG(D_INIT_REASONS, "Status after reset acknowledged: %X\n", status);
+	arc_printk(D_INIT_REASONS, dev, "Status after reset acknowledged: %X\n",
+		   status);
 
 	status = ASTATUS();
 
 	if (status & RESETflag) {
-		BUGMSG(D_INIT_REASONS, "Eternal reset (status=%Xh)\n", status);
+		arc_printk(D_INIT_REASONS, dev, "Eternal reset (status=%Xh)\n",
+			   status);
 		goto err_out;
 	}
 	outb((0x16 | IOMAPflag) & ~ENABLE16flag, _CONFIG);
@@ -192,8 +196,8 @@ static int __init com90io_probe(struct net_device *dev)
 	outb(0, _ADDR_LO);
 
 	if ((status = inb(_MEMDATA)) != 0xd1) {
-		BUGMSG(D_INIT_REASONS, "Signature byte not found (%Xh instead).\n",
-		       status);
+		arc_printk(D_INIT_REASONS, dev, "Signature byte not found (%Xh instead).\n",
+			   status);
 		goto err_out;
 	}
 	if (!dev->irq) {
@@ -209,7 +213,7 @@ static int __init com90io_probe(struct net_device *dev)
 		dev->irq = probe_irq_off(airqmask);
 
 		if ((int)dev->irq <= 0) {
-			BUGMSG(D_INIT_REASONS, "Autoprobe IRQ failed\n");
+			arc_printk(D_INIT_REASONS, dev, "Autoprobe IRQ failed\n");
 			goto err_out;
 		}
 	}
@@ -232,7 +236,7 @@ static int __init com90io_found(struct net_device *dev)
 
 	/* Reserve the irq */
 	if (request_irq(dev->irq, arcnet_interrupt, 0, "arcnet (COM90xx-IO)", dev)) {
-		BUGMSG(D_NORMAL, "Can't get IRQ %d!\n", dev->irq);
+		arc_printk(D_NORMAL, dev, "Can't get IRQ %d!\n", dev->irq);
 		return -ENODEV;
 	}
 	/* Reserve the I/O region */
@@ -266,8 +270,8 @@ static int __init com90io_found(struct net_device *dev)
 		return err;
 	}
 
-	BUGMSG(D_NORMAL, "COM90IO: station %02Xh found at %03lXh, IRQ %d.\n",
-	       dev->dev_addr[0], dev->base_addr, dev->irq);
+	arc_printk(D_NORMAL, dev, "COM90IO: station %02Xh found at %03lXh, IRQ %d.\n",
+		   dev->dev_addr[0], dev->base_addr, dev->irq);
 
 	return 0;
 }
@@ -284,7 +288,8 @@ static int com90io_reset(struct net_device *dev, int really_reset)
 	struct arcnet_local *lp = netdev_priv(dev);
 	short ioaddr = dev->base_addr;
 
-	BUGMSG(D_INIT, "Resetting %s (status=%02Xh)\n", dev->name, ASTATUS());
+	arc_printk(D_INIT, dev, "Resetting %s (status=%02Xh)\n",
+		   dev->name, ASTATUS());
 
 	if (really_reset) {
 		/* reset the card */
@@ -300,7 +305,7 @@ static int com90io_reset(struct net_device *dev, int really_reset)
 
 	/* verify that the ARCnet signature byte is present */
 	if (get_buffer_byte(dev, 0) != TESTvalue) {
-		BUGMSG(D_NORMAL, "reset failed: TESTvalue not present.\n");
+		arc_printk(D_NORMAL, dev, "reset failed: TESTvalue not present.\n");
 		return 1;
 	}
 	/* enable extended (512-byte) packets */
@@ -334,13 +339,15 @@ static void com90io_setmask(struct net_device *dev, int mask)
 static void com90io_copy_to_card(struct net_device *dev, int bufnum, int offset,
 				 void *buf, int count)
 {
-	TIME("put_whole_buffer", count, put_whole_buffer(dev, bufnum * 512 + offset, count, buf));
+	TIME(dev, "put_whole_buffer", count,
+	     put_whole_buffer(dev, bufnum * 512 + offset, count, buf));
 }
 
 static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offset,
 				   void *buf, int count)
 {
-	TIME("get_whole_buffer", count, get_whole_buffer(dev, bufnum * 512 + offset, count, buf));
+	TIME(dev, "get_whole_buffer", count,
+	     get_whole_buffer(dev, bufnum * 512 + offset, count, buf));
 }
 
 static int io;			/* use the insmod io= irq= shmem= options */
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index b5e1c1904f13..12534a3cb4ce 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -157,24 +157,24 @@ static void __init com90xx_probe(void)
 		numprint++;
 		numprint %= 8;
 		if (!numprint) {
-			BUGMSG2(D_INIT, "\n");
-			BUGMSG2(D_INIT, "S1: ");
+			arc_cont(D_INIT, "\n");
+			arc_cont(D_INIT, "S1: ");
 		}
-		BUGMSG2(D_INIT, "%Xh ", *port);
+		arc_cont(D_INIT, "%Xh ", *port);
 
 		ioaddr = *port;
 
 		if (!request_region(*port, ARCNET_TOTAL_SIZE, "arcnet (90xx)")) {
-			BUGMSG2(D_INIT_REASONS, "(request_region)\n");
-			BUGMSG2(D_INIT_REASONS, "S1: ");
+			arc_cont(D_INIT_REASONS, "(request_region)\n");
+			arc_cont(D_INIT_REASONS, "S1: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			*port-- = ports[--numports];
 			continue;
 		}
 		if (ASTATUS() == 0xFF) {
-			BUGMSG2(D_INIT_REASONS, "(empty)\n");
-			BUGMSG2(D_INIT_REASONS, "S1: ");
+			arc_cont(D_INIT_REASONS, "(empty)\n");
+			arc_cont(D_INIT_REASONS, "S1: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			release_region(*port, ARCNET_TOTAL_SIZE);
@@ -183,15 +183,15 @@ static void __init com90xx_probe(void)
 		}
 		inb(_RESET);	/* begin resetting card */
 
-		BUGMSG2(D_INIT_REASONS, "\n");
-		BUGMSG2(D_INIT_REASONS, "S1: ");
+		arc_cont(D_INIT_REASONS, "\n");
+		arc_cont(D_INIT_REASONS, "S1: ");
 		if (BUGLVL(D_INIT_REASONS))
 			numprint = 0;
 	}
-	BUGMSG2(D_INIT, "\n");
+	arc_cont(D_INIT, "\n");
 
 	if (!numports) {
-		BUGMSG2(D_NORMAL, "S1: No ARCnet cards found.\n");
+		arc_cont(D_NORMAL, "S1: No ARCnet cards found.\n");
 		kfree(shmems);
 		kfree(iomem);
 		return;
@@ -205,12 +205,12 @@ static void __init com90xx_probe(void)
 		numprint++;
 		numprint %= 8;
 		if (!numprint) {
-			BUGMSG2(D_INIT, "\n");
-			BUGMSG2(D_INIT, "S2: ");
+			arc_cont(D_INIT, "\n");
+			arc_cont(D_INIT, "S2: ");
 		}
-		BUGMSG2(D_INIT, "%Xh ", *port);
+		arc_cont(D_INIT, "%Xh ", *port);
 	}
-	BUGMSG2(D_INIT, "\n");
+	arc_cont(D_INIT, "\n");
 	mdelay(RESETtime);
 
 	/* Stage 3: abandon any shmem addresses that don't have the signature
@@ -223,30 +223,30 @@ static void __init com90xx_probe(void)
 		numprint++;
 		numprint %= 8;
 		if (!numprint) {
-			BUGMSG2(D_INIT, "\n");
-			BUGMSG2(D_INIT, "S3: ");
+			arc_cont(D_INIT, "\n");
+			arc_cont(D_INIT, "S3: ");
 		}
-		BUGMSG2(D_INIT, "%lXh ", *p);
+		arc_cont(D_INIT, "%lXh ", *p);
 
 		if (!request_mem_region(*p, MIRROR_SIZE, "arcnet (90xx)")) {
-			BUGMSG2(D_INIT_REASONS, "(request_mem_region)\n");
-			BUGMSG2(D_INIT_REASONS, "Stage 3: ");
+			arc_cont(D_INIT_REASONS, "(request_mem_region)\n");
+			arc_cont(D_INIT_REASONS, "Stage 3: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			goto out;
 		}
 		base = ioremap(*p, MIRROR_SIZE);
 		if (!base) {
-			BUGMSG2(D_INIT_REASONS, "(ioremap)\n");
-			BUGMSG2(D_INIT_REASONS, "Stage 3: ");
+			arc_cont(D_INIT_REASONS, "(ioremap)\n");
+			arc_cont(D_INIT_REASONS, "Stage 3: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			goto out1;
 		}
 		if (readb(base) != TESTvalue) {
-			BUGMSG2(D_INIT_REASONS, "(%02Xh != %02Xh)\n",
-				readb(base), TESTvalue);
-			BUGMSG2(D_INIT_REASONS, "S3: ");
+			arc_cont(D_INIT_REASONS, "(%02Xh != %02Xh)\n",
+				 readb(base), TESTvalue);
+			arc_cont(D_INIT_REASONS, "S3: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			goto out2;
@@ -258,12 +258,12 @@ static void __init com90xx_probe(void)
 		 */
 		writeb(0x42, base);
 		if (readb(base) != 0x42) {
-			BUGMSG2(D_INIT_REASONS, "(read only)\n");
-			BUGMSG2(D_INIT_REASONS, "S3: ");
+			arc_cont(D_INIT_REASONS, "(read only)\n");
+			arc_cont(D_INIT_REASONS, "S3: ");
 			goto out2;
 		}
-		BUGMSG2(D_INIT_REASONS, "\n");
-		BUGMSG2(D_INIT_REASONS, "S3: ");
+		arc_cont(D_INIT_REASONS, "\n");
+		arc_cont(D_INIT_REASONS, "S3: ");
 		if (BUGLVL(D_INIT_REASONS))
 			numprint = 0;
 		iomem[index] = base;
@@ -276,10 +276,10 @@ static void __init com90xx_probe(void)
 		*p-- = shmems[--numshmems];
 		index--;
 	}
-	BUGMSG2(D_INIT, "\n");
+	arc_cont(D_INIT, "\n");
 
 	if (!numshmems) {
-		BUGMSG2(D_NORMAL, "S3: No ARCnet cards found.\n");
+		arc_cont(D_NORMAL, "S3: No ARCnet cards found.\n");
 		for (port = &ports[0]; port < ports + numports; port++)
 			release_region(*port, ARCNET_TOTAL_SIZE);
 		kfree(shmems);
@@ -294,12 +294,12 @@ static void __init com90xx_probe(void)
 		numprint++;
 		numprint %= 8;
 		if (!numprint) {
-			BUGMSG2(D_INIT, "\n");
-			BUGMSG2(D_INIT, "S4: ");
+			arc_cont(D_INIT, "\n");
+			arc_cont(D_INIT, "S4: ");
 		}
-		BUGMSG2(D_INIT, "%lXh ", *p);
+		arc_cont(D_INIT, "%lXh ", *p);
 	}
-	BUGMSG2(D_INIT, "\n");
+	arc_cont(D_INIT, "\n");
 
 	/* Stage 5: for any ports that have the correct status, can disable
 	 * the RESET flag, and (if no irq is given) generate an autoirq,
@@ -315,18 +315,18 @@ static void __init com90xx_probe(void)
 		numprint++;
 		numprint %= 8;
 		if (!numprint) {
-			BUGMSG2(D_INIT, "\n");
-			BUGMSG2(D_INIT, "S5: ");
+			arc_cont(D_INIT, "\n");
+			arc_cont(D_INIT, "S5: ");
 		}
-		BUGMSG2(D_INIT, "%Xh ", *port);
+		arc_cont(D_INIT, "%Xh ", *port);
 
 		ioaddr = *port;
 		status = ASTATUS();
 
 		if ((status & 0x9D)
 		    != (NORXflag | RECONflag | TXFREEflag | RESETflag)) {
-			BUGMSG2(D_INIT_REASONS, "(status=%Xh)\n", status);
-			BUGMSG2(D_INIT_REASONS, "S5: ");
+			arc_cont(D_INIT_REASONS, "(status=%Xh)\n", status);
+			arc_cont(D_INIT_REASONS, "S5: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			release_region(*port, ARCNET_TOTAL_SIZE);
@@ -336,9 +336,9 @@ static void __init com90xx_probe(void)
 		ACOMMAND(CFLAGScmd | RESETclear | CONFIGclear);
 		status = ASTATUS();
 		if (status & RESETflag) {
-			BUGMSG2(D_INIT_REASONS, " (eternal reset, status=%Xh)\n",
-				status);
-			BUGMSG2(D_INIT_REASONS, "S5: ");
+			arc_cont(D_INIT_REASONS, " (eternal reset, status=%Xh)\n",
+				 status);
+			arc_cont(D_INIT_REASONS, "S5: ");
 			if (BUGLVL(D_INIT_REASONS))
 				numprint = 0;
 			release_region(*port, ARCNET_TOTAL_SIZE);
@@ -360,8 +360,8 @@ static void __init com90xx_probe(void)
 			airq = probe_irq_off(airqmask);
 
 			if (airq <= 0) {
-				BUGMSG2(D_INIT_REASONS, "(airq=%d)\n", airq);
-				BUGMSG2(D_INIT_REASONS, "S5: ");
+				arc_cont(D_INIT_REASONS, "(airq=%d)\n", airq);
+				arc_cont(D_INIT_REASONS, "S5: ");
 				if (BUGLVL(D_INIT_REASONS))
 					numprint = 0;
 				release_region(*port, ARCNET_TOTAL_SIZE);
@@ -372,7 +372,7 @@ static void __init com90xx_probe(void)
 			airq = irq;
 		}
 
-		BUGMSG2(D_INIT, "(%d,", airq);
+		arc_cont(D_INIT, "(%d,", airq);
 		openparen = 1;
 
 		/* Everything seems okay.  But which shmem, if any, puts
@@ -399,7 +399,7 @@ static void __init com90xx_probe(void)
 			void __iomem *base = iomem[index];
 
 			if (readb(base) == TESTvalue) {	/* found one */
-				BUGMSG2(D_INIT, "%lXh)\n", *p);
+				arc_cont(D_INIT, "%lXh)\n", *p);
 				openparen = 0;
 
 				/* register the card */
@@ -412,7 +412,7 @@ static void __init com90xx_probe(void)
 				iomem[index] = iomem[numshmems];
 				break;	/* go to the next I/O port */
 			} else {
-				BUGMSG2(D_INIT_REASONS, "%Xh-", readb(base));
+				arc_cont(D_INIT_REASONS, "%Xh-", readb(base));
 			}
 		}
 
@@ -476,7 +476,7 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem
 	/* allocate struct net_device */
 	dev = alloc_arcdev(device);
 	if (!dev) {
-		BUGMSG2(D_NORMAL, "com90xx: Can't allocate device!\n");
+		arc_cont(D_NORMAL, "com90xx: Can't allocate device!\n");
 		iounmap(p);
 		release_mem_region(shmem, MIRROR_SIZE);
 		return -ENOMEM;
@@ -515,7 +515,7 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem
 
 	/* reserve the irq */
 	if (request_irq(airq, arcnet_interrupt, 0, "arcnet (90xx)", dev)) {
-		BUGMSG(D_NORMAL, "Can't get IRQ %d!\n", airq);
+		arc_printk(D_NORMAL, dev, "Can't get IRQ %d!\n", airq);
 		goto err_release_mem;
 	}
 	dev->irq = airq;
@@ -531,7 +531,7 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem
 	lp->hw.copy_from_card = com90xx_copy_from_card;
 	lp->mem_start = ioremap(dev->mem_start, dev->mem_end - dev->mem_start + 1);
 	if (!lp->mem_start) {
-		BUGMSG(D_NORMAL, "Can't remap device memory!\n");
+		arc_printk(D_NORMAL, dev, "Can't remap device memory!\n");
 		goto err_free_irq;
 	}
 
@@ -540,10 +540,11 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem
 
 	dev->base_addr = ioaddr;
 
-	BUGMSG(D_NORMAL, "COM90xx station %02Xh found at %03lXh, IRQ %d, ShMem %lXh (%ld*%xh).\n",
-	       dev->dev_addr[0],
-	       dev->base_addr, dev->irq, dev->mem_start,
-	 (dev->mem_end - dev->mem_start + 1) / mirror_size, mirror_size);
+	arc_printk(D_NORMAL, dev, "COM90xx station %02Xh found at %03lXh, IRQ %d, ShMem %lXh (%ld*%xh).\n",
+		   dev->dev_addr[0],
+		   dev->base_addr, dev->irq, dev->mem_start,
+		   (dev->mem_end - dev->mem_start + 1) / mirror_size,
+		   mirror_size);
 
 	if (register_netdev(dev))
 		goto err_unmap;
@@ -595,7 +596,7 @@ static int com90xx_reset(struct net_device *dev, int really_reset)
 	struct arcnet_local *lp = netdev_priv(dev);
 	short ioaddr = dev->base_addr;
 
-	BUGMSG(D_INIT, "Resetting (status=%02Xh)\n", ASTATUS());
+	arc_printk(D_INIT, dev, "Resetting (status=%02Xh)\n", ASTATUS());
 
 	if (really_reset) {
 		/* reset the card */
@@ -611,7 +612,7 @@ static int com90xx_reset(struct net_device *dev, int really_reset)
 	/* verify that the ARCnet signature byte is present */
 	if (readb(lp->mem_start) != TESTvalue) {
 		if (really_reset)
-			BUGMSG(D_NORMAL, "reset failed: TESTvalue not present.\n");
+			arc_printk(D_NORMAL, dev, "reset failed: TESTvalue not present.\n");
 		return 1;
 	}
 	/* enable extended (512-byte) packets */
@@ -631,7 +632,7 @@ static void com90xx_copy_to_card(struct net_device *dev, int bufnum, int offset,
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
 
-	TIME("memcpy_toio", count, memcpy_toio(memaddr, buf, count));
+	TIME(dev, "memcpy_toio", count, memcpy_toio(memaddr, buf, count));
 }
 
 static void com90xx_copy_from_card(struct net_device *dev, int bufnum, int offset,
@@ -640,7 +641,7 @@ static void com90xx_copy_from_card(struct net_device *dev, int bufnum, int offse
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
 
-	TIME("memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
+	TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 824d71fcfd53..47c7d1a4ef4c 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -124,7 +124,7 @@ static void rx(struct net_device *dev, int bufnum,
 	struct archdr *pkt = pkthdr;
 	int ofs;
 
-	BUGMSG(D_DURING, "it's a raw packet (length=%d)\n", length);
+	arc_printk(D_DURING, dev, "it's a raw packet (length=%d)\n", length);
 
 	if (length >= MinTU)
 		ofs = 512 - length;
@@ -133,7 +133,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 	skb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 	if (skb == NULL) {
-		BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
+		arc_printk(D_NORMAL, dev, "Memory squeeze, dropping packet\n");
 		dev->stats.rx_dropped++;
 		return;
 	}
@@ -173,8 +173,8 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 		soft->proto = ARC_P_ARP_RFC1051;
 		break;
 	default:
-		BUGMSG(D_NORMAL, "RFC1051: I don't understand protocol %d (%Xh)\n",
-		       type, type);
+		arc_printk(D_NORMAL, dev, "RFC1051: I don't understand protocol %d (%Xh)\n",
+			   type, type);
 		dev->stats.tx_errors++;
 		dev->stats.tx_aborted_errors++;
 		return 0;
@@ -210,15 +210,15 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	struct arc_hardware *hard = &pkt->hard;
 	int ofs;
 
-	BUGMSG(D_DURING, "prepare_tx: txbufs=%d/%d/%d\n",
-	       lp->next_tx, lp->cur_tx, bufnum);
+	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
+		   lp->next_tx, lp->cur_tx, bufnum);
 
 	length -= ARC_HDR_SIZE;	/* hard header is not included in packet length */
 
 	if (length > XMTU) {
 		/* should never happen! other people already check for this. */
-		BUGMSG(D_NORMAL, "Bug!  prepare_tx with size %d (> %d)\n",
-		       length, XMTU);
+		arc_printk(D_NORMAL, dev, "Bug!  prepare_tx with size %d (> %d)\n",
+			   length, XMTU);
 		length = XMTU;
 	}
 	if (length > MinTU) {
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index d052976ebdfa..97af7d92c020 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -136,7 +136,8 @@ static void rx(struct net_device *dev, int bufnum,
 	int saddr = pkt->hard.source, ofs;
 	struct Incoming *in = &lp->rfc1201.incoming[saddr];
 
-	BUGMSG(D_DURING, "it's an RFC1201 packet (length=%d)\n", length);
+	arc_printk(D_DURING, dev, "it's an RFC1201 packet (length=%d)\n",
+		   length);
 
 	if (length >= MinTU)
 		ofs = 512 - length;
@@ -145,10 +146,10 @@ static void rx(struct net_device *dev, int bufnum,
 
 	if (soft->split_flag == 0xFF) {		/* Exception Packet */
 		if (length >= 4 + RFC1201_HDR_SIZE) {
-			BUGMSG(D_DURING, "compensating for exception packet\n");
+			arc_printk(D_DURING, dev, "compensating for exception packet\n");
 		} else {
-			BUGMSG(D_EXTRA, "short RFC1201 exception packet from %02Xh",
-			       saddr);
+			arc_printk(D_EXTRA, dev, "short RFC1201 exception packet from %02Xh",
+				   saddr);
 			return;
 		}
 
@@ -159,12 +160,13 @@ static void rx(struct net_device *dev, int bufnum,
 				      soft, sizeof(pkt->soft));
 	}
 	if (!soft->split_flag) {	/* not split */
-		BUGMSG(D_RX, "incoming is not split (splitflag=%d)\n",
-		       soft->split_flag);
+		arc_printk(D_RX, dev, "incoming is not split (splitflag=%d)\n",
+			   soft->split_flag);
 
 		if (in->skb) {	/* already assembling one! */
-			BUGMSG(D_EXTRA, "aborting assembly (seq=%d) for unsplit packet (splitflag=%d, seq=%d)\n",
-			       in->sequence, soft->split_flag, soft->sequence);
+			arc_printk(D_EXTRA, dev, "aborting assembly (seq=%d) for unsplit packet (splitflag=%d, seq=%d)\n",
+				   in->sequence, soft->split_flag,
+				   soft->sequence);
 			lp->rfc1201.aborted_seq = soft->sequence;
 			dev_kfree_skb_irq(in->skb);
 			dev->stats.rx_errors++;
@@ -175,7 +177,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 		skb = alloc_skb(length + ARC_HDR_SIZE, GFP_ATOMIC);
 		if (skb == NULL) {
-			BUGMSG(D_NORMAL, "Memory squeeze, dropping packet.\n");
+			arc_printk(D_NORMAL, dev, "Memory squeeze, dropping packet\n");
 			dev->stats.rx_dropped++;
 			return;
 		}
@@ -205,18 +207,18 @@ static void rx(struct net_device *dev, int bufnum,
 				uint8_t *cptr = (uint8_t *)arp + sizeof(struct arphdr);
 
 				if (!*cptr) {	/* is saddr = 00? */
-					BUGMSG(D_EXTRA,
-					       "ARP source address was 00h, set to %02Xh.\n",
-					       saddr);
+					arc_printk(D_EXTRA, dev,
+						   "ARP source address was 00h, set to %02Xh\n",
+						   saddr);
 					dev->stats.rx_crc_errors++;
 					*cptr = saddr;
 				} else {
-					BUGMSG(D_DURING, "ARP source address (%Xh) is fine.\n",
-					       *cptr);
+					arc_printk(D_DURING, dev, "ARP source address (%Xh) is fine.\n",
+						   *cptr);
 				}
 			} else {
-				BUGMSG(D_NORMAL, "funny-shaped ARP packet. (%Xh, %Xh)\n",
-				       arp->ar_hln, arp->ar_pln);
+				arc_printk(D_NORMAL, dev, "funny-shaped ARP packet. (%Xh, %Xh)\n",
+					   arp->ar_hln, arp->ar_pln);
 				dev->stats.rx_errors++;
 				dev->stats.rx_crc_errors++;
 			}
@@ -245,13 +247,13 @@ static void rx(struct net_device *dev, int bufnum,
 		 * other way to be reliable.
 		 */
 
-		BUGMSG(D_RX, "packet is split (splitflag=%d, seq=%d)\n",
-		       soft->split_flag, in->sequence);
+		arc_printk(D_RX, dev, "packet is split (splitflag=%d, seq=%d)\n",
+			   soft->split_flag, in->sequence);
 
 		if (in->skb && in->sequence != soft->sequence) {
-			BUGMSG(D_EXTRA, "wrong seq number (saddr=%d, expected=%d, seq=%d, splitflag=%d)\n",
-			       saddr, in->sequence, soft->sequence,
-			       soft->split_flag);
+			arc_printk(D_EXTRA, dev, "wrong seq number (saddr=%d, expected=%d, seq=%d, splitflag=%d)\n",
+				   saddr, in->sequence, soft->sequence,
+				   soft->split_flag);
 			dev_kfree_skb_irq(in->skb);
 			in->skb = NULL;
 			dev->stats.rx_errors++;
@@ -259,12 +261,12 @@ static void rx(struct net_device *dev, int bufnum,
 			in->lastpacket = in->numpackets = 0;
 		}
 		if (soft->split_flag & 1) {	/* first packet in split */
-			BUGMSG(D_RX, "brand new splitpacket (splitflag=%d)\n",
-			       soft->split_flag);
+			arc_printk(D_RX, dev, "brand new splitpacket (splitflag=%d)\n",
+				   soft->split_flag);
 			if (in->skb) {	/* already assembling one! */
-				BUGMSG(D_EXTRA, "aborting previous (seq=%d) assembly (splitflag=%d, seq=%d)\n",
-				       in->sequence, soft->split_flag,
-				       soft->sequence);
+				arc_printk(D_EXTRA, dev, "aborting previous (seq=%d) assembly (splitflag=%d, seq=%d)\n",
+					   in->sequence, soft->split_flag,
+					   soft->sequence);
 				dev->stats.rx_errors++;
 				dev->stats.rx_missed_errors++;
 				dev_kfree_skb_irq(in->skb);
@@ -274,8 +276,8 @@ static void rx(struct net_device *dev, int bufnum,
 			in->lastpacket = 1;
 
 			if (in->numpackets > 16) {
-				BUGMSG(D_EXTRA, "incoming packet more than 16 segments; dropping. (splitflag=%d)\n",
-				       soft->split_flag);
+				arc_printk(D_EXTRA, dev, "incoming packet more than 16 segments; dropping. (splitflag=%d)\n",
+					   soft->split_flag);
 				lp->rfc1201.aborted_seq = soft->sequence;
 				dev->stats.rx_errors++;
 				dev->stats.rx_length_errors++;
@@ -284,7 +286,7 @@ static void rx(struct net_device *dev, int bufnum,
 			in->skb = skb = alloc_skb(508 * in->numpackets + ARC_HDR_SIZE,
 						  GFP_ATOMIC);
 			if (skb == NULL) {
-				BUGMSG(D_NORMAL, "(split) memory squeeze, dropping packet.\n");
+				arc_printk(D_NORMAL, dev, "(split) memory squeeze, dropping packet.\n");
 				lp->rfc1201.aborted_seq = soft->sequence;
 				dev->stats.rx_dropped++;
 				return;
@@ -305,9 +307,10 @@ static void rx(struct net_device *dev, int bufnum,
 			 */
 			if (!in->skb) {
 				if (lp->rfc1201.aborted_seq != soft->sequence) {
-					BUGMSG(D_EXTRA, "can't continue split without starting first! (splitflag=%d, seq=%d, aborted=%d)\n",
-					soft->split_flag, soft->sequence,
-					       lp->rfc1201.aborted_seq);
+					arc_printk(D_EXTRA, dev, "can't continue split without starting first! (splitflag=%d, seq=%d, aborted=%d)\n",
+						   soft->split_flag,
+						   soft->sequence,
+						   lp->rfc1201.aborted_seq);
 					dev->stats.rx_errors++;
 					dev->stats.rx_missed_errors++;
 				}
@@ -317,15 +320,16 @@ static void rx(struct net_device *dev, int bufnum,
 			if (packetnum != in->lastpacket) {	/* not the right flag! */
 				/* harmless duplicate? ignore. */
 				if (packetnum <= in->lastpacket - 1) {
-					BUGMSG(D_EXTRA, "duplicate splitpacket ignored! (splitflag=%d)\n",
-					       soft->split_flag);
+					arc_printk(D_EXTRA, dev, "duplicate splitpacket ignored! (splitflag=%d)\n",
+						   soft->split_flag);
 					dev->stats.rx_errors++;
 					dev->stats.rx_frame_errors++;
 					return;
 				}
 				/* "bad" duplicate, kill reassembly */
-				BUGMSG(D_EXTRA, "out-of-order splitpacket, reassembly (seq=%d) aborted (splitflag=%d, seq=%d)\n",
-				       in->sequence, soft->split_flag, soft->sequence);
+				arc_printk(D_EXTRA, dev, "out-of-order splitpacket, reassembly (seq=%d) aborted (splitflag=%d, seq=%d)\n",
+					   in->sequence, soft->split_flag,
+					   soft->sequence);
 				lp->rfc1201.aborted_seq = soft->sequence;
 				dev_kfree_skb_irq(in->skb);
 				in->skb = NULL;
@@ -350,10 +354,10 @@ static void rx(struct net_device *dev, int bufnum,
 			in->skb = NULL;
 			in->lastpacket = in->numpackets = 0;
 
-			BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (unsplit)\n",
-			       skb->len, pkt->hard.source);
-			BUGMSG(D_SKB_SIZE, "skb: received %d bytes from %02X (split)\n",
-			       skb->len, pkt->hard.source);
+			arc_printk(D_SKB_SIZE, dev, "skb: received %d bytes from %02X (unsplit)\n",
+				   skb->len, pkt->hard.source);
+			arc_printk(D_SKB_SIZE, dev, "skb: received %d bytes from %02X (split)\n",
+				   skb->len, pkt->hard.source);
 			if (BUGLVL(D_SKB))
 				arcnet_dump_skb(dev, skb, "rx");
 
@@ -395,8 +399,8 @@ static int build_header(struct sk_buff *skb, struct net_device *dev,
 		soft->proto = ARC_P_ATALK;
 		break;
 	default:
-		BUGMSG(D_NORMAL, "RFC1201: I don't understand protocol %d (%Xh)\n",
-		       type, type);
+		arc_printk(D_NORMAL, dev, "RFC1201: I don't understand protocol %d (%Xh)\n",
+			   type, type);
 		dev->stats.tx_errors++;
 		dev->stats.tx_aborted_errors++;
 		return 0;
@@ -469,8 +473,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	const int maxsegsize = XMTU - RFC1201_HDR_SIZE;
 	struct Outgoing *out;
 
-	BUGMSG(D_DURING, "prepare_tx: txbufs=%d/%d/%d\n",
-	       lp->next_tx, lp->cur_tx, bufnum);
+	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
+		   lp->next_tx, lp->cur_tx, bufnum);
 
 	length -= ARC_HDR_SIZE;	/* hard header is not included in packet length */
 	pkt->soft.rfc1201.split_flag = 0;
@@ -484,9 +488,9 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 		out->numsegs = (out->dataleft + maxsegsize - 1) / maxsegsize;
 		out->segnum = 0;
 
-		BUGMSG(D_DURING, "rfc1201 prep_tx: ready for %d-segment split (%d bytes, seq=%d)\n",
-		       out->numsegs, out->length,
-		       pkt->soft.rfc1201.sequence);
+		arc_printk(D_DURING, dev, "rfc1201 prep_tx: ready for %d-segment split (%d bytes, seq=%d)\n",
+			   out->numsegs, out->length,
+			   pkt->soft.rfc1201.sequence);
 
 		return 0;	/* not done */
 	}
@@ -505,9 +509,9 @@ static int continue_tx(struct net_device *dev, int bufnum)
 	int maxsegsize = XMTU - RFC1201_HDR_SIZE;
 	int seglen;
 
-	BUGMSG(D_DURING,
-	       "rfc1201 continue_tx: loading segment %d(+1) of %d (seq=%d)\n",
-	       out->segnum, out->numsegs, soft->sequence);
+	arc_printk(D_DURING, dev,
+		   "rfc1201 continue_tx: loading segment %d(+1) of %d (seq=%d)\n",
+		   out->segnum, out->numsegs, soft->sequence);
 
 	/* the "new" soft header comes right before the data chunk */
 	newsoft = (struct arc_rfc1201 *)
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index f07c66383b88..a678027ff6c2 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -81,34 +81,36 @@ extern int arcnet_debug;
 #define BUGLVL(x)	((x) & ARCNET_DEBUG_MAX & arcnet_debug)
 
 /* macros to simplify debug checking */
-#define BUGMSG(x, fmt, ...)						\
+#define arc_printk(x, dev, fmt, ...)					\
 do {									\
-	if (BUGLVL(x))						\
-		printk("%s%6s: " fmt,					\
-		       (x) == D_NORMAL	? KERN_WARNING :		\
-		       (x) < D_DURING ? KERN_INFO : KERN_DEBUG,		\
-		       dev->name, ##__VA_ARGS__);			\
+	if (BUGLVL(x)) {						\
+		if ((x) == D_NORMAL)					\
+			netdev_warn(dev, fmt, ##__VA_ARGS__);		\
+		else if ((x) < D_DURING)				\
+			netdev_info(dev, fmt, ##__VA_ARGS__);		\
+		else							\
+			netdev_dbg(dev, fmt, ##__VA_ARGS__);		\
+	}								\
 } while (0)
 
-#define BUGMSG2(x, fmt, ...)						\
+#define arc_cont(x, fmt, ...)						\
 do {									\
-	if (BUGLVL(x))						\
-		printk(fmt, ##__VA_ARGS__);				\
+	if (BUGLVL(x))							\
+		pr_cont(fmt, ##__VA_ARGS__);				\
 } while (0)
 
 /* see how long a function call takes to run, expressed in CPU cycles */
-#define TIME(name, bytes, call)						\
+#define TIME(dev, name, bytes, call)					\
 do {									\
 	if (BUGLVL(D_TIMING)) {						\
 		unsigned long _x, _y;					\
 		_x = get_cycles();					\
 		call;							\
 		_y = get_cycles();					\
-		BUGMSG(D_TIMING,					\
-		       "%s: %d bytes in %lu cycles == "			\
-		       "%lu Kbytes/100Mcycle\n",			\
-		       name, bytes, _y - _x,				\
-		       100000000 / 1024 * bytes / (_y - _x + 1));	\
+		arc_printk(D_TIMING, dev,				\
+			   "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \
+			   name, bytes, _y - _x,			\
+			   100000000 / 1024 * bytes / (_y - _x + 1));	\
 	} else {							\
 		call;							\
 	}								\
-- 
cgit v1.2.3


From 83df99b50f901cb7c72cf132a83f43bbaeb01362 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:06:01 -0700
Subject: arcnet: Convert arcnet_dump_skb macro to static inline

Make sure the arguments are tested appropriately when not using
this function.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 include/linux/arcdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index a678027ff6c2..1d8e36e13616 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -326,7 +326,10 @@ struct arcnet_local {
 #if ARCNET_DEBUG_MAX & D_SKB
 void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
 #else
-#define arcnet_dump_skb(dev, skb, desc) ;
+static inline
+void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc)
+{
+}
 #endif
 
 void arcnet_unregister_proto(struct ArcProto *proto);
-- 
cgit v1.2.3


From d6d7d3ed56e3bfe7fd34108dbe23f0610e3d8621 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:06:02 -0700
Subject: arcnet: Wrap some long lines

Just neatening.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/net/arcnet/arc-rawmode.c  |  3 ++-
 drivers/net/arcnet/arc-rimi.c     | 11 ++++++-----
 drivers/net/arcnet/arcnet.c       | 10 +++++++---
 drivers/net/arcnet/capmode.c      |  3 ++-
 drivers/net/arcnet/com20020-pci.c |  3 ++-
 drivers/net/arcnet/com20020.c     | 14 ++++++++------
 drivers/net/arcnet/com90io.c      | 31 +++++++++++++++++++------------
 drivers/net/arcnet/com90xx.c      | 28 +++++++++++++++++-----------
 drivers/net/arcnet/rfc1051.c      |  3 ++-
 drivers/net/arcnet/rfc1201.c      | 15 ++++++++++-----
 include/linux/arcdevice.h         | 12 ++++++------
 11 files changed, 81 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 40035ee8cb40..59b3083c19f9 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -165,7 +165,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
 		   lp->next_tx, lp->cur_tx, bufnum);
 
-	length -= ARC_HDR_SIZE;	/* hard header is not included in packet length */
+	/* hard header is not included in packet length */
+	length -= ARC_HDR_SIZE;
 
 	if (length > XMTU) {
 		/* should never happen! other people already check for this. */
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index 7360214e3d3f..8fa5eb43f308 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -49,8 +49,8 @@ static void arcrimi_setmask(struct net_device *dev, int mask);
 static int arcrimi_reset(struct net_device *dev, int really_reset);
 static void arcrimi_copy_to_card(struct net_device *dev, int bufnum, int offset,
 				 void *buf, int count);
-static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offset,
-				   void *buf, int count);
+static void arcrimi_copy_from_card(struct net_device *dev, int bufnum,
+				   int offset, void *buf, int count);
 
 /* Handy defines for ARCnet specific stuff */
 
@@ -215,7 +215,8 @@ static int __init arcrimi_found(struct net_device *dev)
 		goto err_free_irq;
 	}
 
-	lp->mem_start = ioremap(dev->mem_start, dev->mem_end - dev->mem_start + 1);
+	lp->mem_start = ioremap(dev->mem_start,
+				dev->mem_end - dev->mem_start + 1);
 	if (!lp->mem_start) {
 		arc_printk(D_NORMAL, dev, "Can't remap device memory!\n");
 		goto err_release_mem;
@@ -307,8 +308,8 @@ static void arcrimi_copy_to_card(struct net_device *dev, int bufnum, int offset,
 	TIME(dev, "memcpy_toio", count, memcpy_toio(memaddr, buf, count));
 }
 
-static void arcrimi_copy_from_card(struct net_device *dev, int bufnum, int offset,
-				   void *buf, int count)
+static void arcrimi_copy_from_card(struct net_device *dev, int bufnum,
+				   int offset, void *buf, int count)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index a9535ec9c236..00074e781353 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -759,7 +759,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
 			lp->intmask &= ~(TXFREEflag | EXCNAKflag);
 
-			arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n", status);
+			arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n",
+				   status);
 
 			if (lp->cur_tx != -1 && !lp->timed_out) {
 				if (!(status & TXACKflag)) {
@@ -804,7 +805,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 			go_tx(dev);
 
 			/* continue a split packet, if any */
-			if (lp->outgoing.proto && lp->outgoing.proto->continue_tx) {
+			if (lp->outgoing.proto &&
+			    lp->outgoing.proto->continue_tx) {
 				int txbuf = get_arcbuf(dev);
 
 				if (txbuf != -1) {
@@ -874,7 +876,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 					arc_printk(D_NORMAL, dev, "many reconfigurations detected: cabling problem?\n");
 				} else if (!lp->network_down &&
 					   lp->last_recon - lp->first_recon > HZ * 60) {
-					/* reset counters if we've gone for over a minute. */
+					/* reset counters if we've gone for
+					 *  over a minute.
+					 */
 					lp->first_recon = lp->last_recon;
 					lp->num_recons = 1;
 				}
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index a898647ab05d..46a76dcc21aa 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -222,7 +222,8 @@ static int ack_tx(struct net_device *dev, int acked)
 
 free_outskb:
 	dev_kfree_skb_irq(lp->outgoing.skb);
-	lp->outgoing.proto = NULL; /* We are always finished when in this protocol */
+	lp->outgoing.proto = NULL;
+			/* We are always finished when in this protocol */
 
 	return 0;
 }
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index fc6809b43f8f..a28887dfcc9e 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -64,7 +64,8 @@ MODULE_LICENSE("GPL");
 
 static void com20020pci_remove(struct pci_dev *pdev);
 
-static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int com20020pci_probe(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
 {
 	struct com20020_pci_card_info *ci;
 	struct net_device *dev;
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index bea386222149..00f910cf7b81 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -192,8 +192,9 @@ int com20020_found(struct net_device *dev, int shared)
 	lp->hw.copy_from_card = com20020_copy_from_card;
 	lp->hw.close = com20020_close;
 
+	/* FIXME: do this some other way! */
 	if (!dev->dev_addr[0])
-		dev->dev_addr[0] = inb(ioaddr + BUS_ALIGN * 8);	/* FIXME: do this some other way! */
+		dev->dev_addr[0] = inb(ioaddr + BUS_ALIGN * 8);
 
 	SET_SUBADR(SUB_SETUP1);
 	outb(lp->setup, _XREG);
@@ -269,7 +270,8 @@ static int com20020_reset(struct net_device *dev, int really_reset)
 	if (really_reset) {
 		/* reset the card */
 		ARCRESET;
-		mdelay(RESETtime * 2);	/* COM20020 seems to be slower sometimes */
+		mdelay(RESETtime * 2);
+				/* COM20020 seems to be slower sometimes */
 	}
 	/* clear flags & end reset */
 	arc_printk(D_DEBUG, dev, "%s: %d: %s\n", __FILE__, __LINE__, __func__);
@@ -338,15 +340,15 @@ static void com20020_set_mc_list(struct net_device *dev)
 	struct arcnet_local *lp = netdev_priv(dev);
 	int ioaddr = dev->base_addr;
 
-	if ((dev->flags & IFF_PROMISC) && (dev->flags & IFF_UP)) {	/* Enable promiscuous mode */
+	if ((dev->flags & IFF_PROMISC) && (dev->flags & IFF_UP)) {
+		/* Enable promiscuous mode */
 		if (!(lp->setup & PROMISCset))
 			arc_printk(D_NORMAL, dev, "Setting promiscuous flag...\n");
 		SET_SUBADR(SUB_SETUP1);
 		lp->setup |= PROMISCset;
 		outb(lp->setup, _XREG);
-	} else
+	} else {
 		/* Disable promiscuous mode, use normal mode */
-	{
 		if ((lp->setup & PROMISCset))
 			arc_printk(D_NORMAL, dev, "Resetting promiscuous flag...\n");
 		SET_SUBADR(SUB_SETUP1);
@@ -370,7 +372,7 @@ MODULE_LICENSE("GPL");
 static int __init com20020_module_init(void)
 {
 	if (BUGLVL(D_NORMAL))
-		pr_info("%s\n", "COM20020 chipset support (by David Woodhouse et al.)\n");
+		pr_info("%s\n", "COM20020 chipset support (by David Woodhouse et al.)");
 	return 0;
 }
 
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 11bc589aa8d1..9e779a53035d 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -49,8 +49,8 @@ static void com90io_setmask(struct net_device *dev, int mask);
 static int com90io_reset(struct net_device *dev, int really_reset);
 static void com90io_copy_to_card(struct net_device *dev, int bufnum, int offset,
 				 void *buf, int count);
-static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offset,
-				   void *buf, int count);
+static void com90io_copy_from_card(struct net_device *dev, int bufnum,
+				   int offset, void *buf, int count);
 
 /* Handy defines for ARCnet specific stuff */
 
@@ -96,7 +96,8 @@ static u_char get_buffer_byte(struct net_device *dev, unsigned offset)
 }
 
 #ifdef ONE_AT_A_TIME_TX
-static void put_buffer_byte(struct net_device *dev, unsigned offset, u_char datum)
+static void put_buffer_byte(struct net_device *dev, unsigned offset,
+			    u_char datum)
 {
 	int ioaddr = dev->base_addr;
 
@@ -108,7 +109,8 @@ static void put_buffer_byte(struct net_device *dev, unsigned offset, u_char datu
 
 #endif
 
-static void get_whole_buffer(struct net_device *dev, unsigned offset, unsigned length, char *dest)
+static void get_whole_buffer(struct net_device *dev, unsigned offset,
+			     unsigned length, char *dest)
 {
 	int ioaddr = dev->base_addr;
 
@@ -123,7 +125,8 @@ static void get_whole_buffer(struct net_device *dev, unsigned offset, unsigned l
 #endif
 }
 
-static void put_whole_buffer(struct net_device *dev, unsigned offset, unsigned length, char *dest)
+static void put_whole_buffer(struct net_device *dev, unsigned offset,
+			     unsigned length, char *dest)
 {
 	int ioaddr = dev->base_addr;
 
@@ -237,12 +240,14 @@ static int __init com90io_found(struct net_device *dev)
 	int err;
 
 	/* Reserve the irq */
-	if (request_irq(dev->irq, arcnet_interrupt, 0, "arcnet (COM90xx-IO)", dev)) {
+	if (request_irq(dev->irq, arcnet_interrupt, 0,
+			"arcnet (COM90xx-IO)", dev)) {
 		arc_printk(D_NORMAL, dev, "Can't get IRQ %d!\n", dev->irq);
 		return -ENODEV;
 	}
 	/* Reserve the I/O region */
-	if (!request_region(dev->base_addr, ARCNET_TOTAL_SIZE, "arcnet (COM90xx-IO)")) {
+	if (!request_region(dev->base_addr, ARCNET_TOTAL_SIZE,
+			    "arcnet (COM90xx-IO)")) {
 		free_irq(dev->irq, dev);
 		return -EBUSY;
 	}
@@ -338,15 +343,15 @@ static void com90io_setmask(struct net_device *dev, int mask)
 	AINTMASK(mask);
 }
 
-static void com90io_copy_to_card(struct net_device *dev, int bufnum, int offset,
-				 void *buf, int count)
+static void com90io_copy_to_card(struct net_device *dev, int bufnum,
+				 int offset, void *buf, int count)
 {
 	TIME(dev, "put_whole_buffer", count,
 	     put_whole_buffer(dev, bufnum * 512 + offset, count, buf));
 }
 
-static void com90io_copy_from_card(struct net_device *dev, int bufnum, int offset,
-				   void *buf, int count)
+static void com90io_copy_from_card(struct net_device *dev, int bufnum,
+				   int offset, void *buf, int count)
 {
 	TIME(dev, "get_whole_buffer", count,
 	     get_whole_buffer(dev, bufnum * 512 + offset, count, buf));
@@ -418,7 +423,9 @@ static void __exit com90io_exit(void)
 
 	unregister_netdev(dev);
 
-	/* Set the thing back to MMAP mode, in case the old driver is loaded later */
+	/* In case the old driver is loaded later,
+	 * set the thing back to MMAP mode
+	 */
 	outb((inb(_CONFIG) & ~IOMAPflag), _CONFIG);
 
 	free_irq(dev->irq, dev);
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index 50c346b5d907..62100acfb6a7 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -60,8 +60,8 @@ static void com90xx_setmask(struct net_device *dev, int mask);
 static int com90xx_reset(struct net_device *dev, int really_reset);
 static void com90xx_copy_to_card(struct net_device *dev, int bufnum, int offset,
 				 void *buf, int count);
-static void com90xx_copy_from_card(struct net_device *dev, int bufnum, int offset,
-				   void *buf, int count);
+static void com90xx_copy_from_card(struct net_device *dev, int bufnum,
+				   int offset, void *buf, int count);
 
 /* Known ARCnet cards */
 
@@ -165,7 +165,8 @@ static void __init com90xx_probe(void)
 
 		ioaddr = *port;
 
-		if (!request_region(*port, ARCNET_TOTAL_SIZE, "arcnet (90xx)")) {
+		if (!request_region(*port, ARCNET_TOTAL_SIZE,
+				    "arcnet (90xx)")) {
 			arc_cont(D_INIT_REASONS, "(request_region)\n");
 			arc_cont(D_INIT_REASONS, "S1: ");
 			if (BUGLVL(D_INIT_REASONS))
@@ -467,7 +468,8 @@ static int check_mirror(unsigned long addr, size_t size)
 /* Set up the struct net_device associated with this card.  Called after
  * probing succeeds.
  */
-static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem *p)
+static int __init com90xx_found(int ioaddr, int airq, u_long shmem,
+				void __iomem *p)
 {
 	struct net_device *dev = NULL;
 	struct arcnet_local *lp;
@@ -511,7 +513,9 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem
 	iounmap(p);
 	release_mem_region(shmem, MIRROR_SIZE);
 
-	if (!request_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1, "arcnet (90xx)"))
+	if (!request_mem_region(dev->mem_start,
+				dev->mem_end - dev->mem_start + 1,
+				"arcnet (90xx)"))
 		goto err_free_dev;
 
 	/* reserve the irq */
@@ -530,7 +534,8 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem
 	lp->hw.owner = THIS_MODULE;
 	lp->hw.copy_to_card = com90xx_copy_to_card;
 	lp->hw.copy_from_card = com90xx_copy_from_card;
-	lp->mem_start = ioremap(dev->mem_start, dev->mem_end - dev->mem_start + 1);
+	lp->mem_start = ioremap(dev->mem_start,
+				dev->mem_end - dev->mem_start + 1);
 	if (!lp->mem_start) {
 		arc_printk(D_NORMAL, dev, "Can't remap device memory!\n");
 		goto err_free_irq;
@@ -627,8 +632,8 @@ static int com90xx_reset(struct net_device *dev, int really_reset)
 	return 0;
 }
 
-static void com90xx_copy_to_card(struct net_device *dev, int bufnum, int offset,
-				 void *buf, int count)
+static void com90xx_copy_to_card(struct net_device *dev, int bufnum,
+				 int offset, void *buf, int count)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
@@ -636,8 +641,8 @@ static void com90xx_copy_to_card(struct net_device *dev, int bufnum, int offset,
 	TIME(dev, "memcpy_toio", count, memcpy_toio(memaddr, buf, count));
 }
 
-static void com90xx_copy_from_card(struct net_device *dev, int bufnum, int offset,
-				   void *buf, int count)
+static void com90xx_copy_from_card(struct net_device *dev, int bufnum,
+				   int offset, void *buf, int count)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
@@ -671,7 +676,8 @@ static void __exit com90xx_exit(void)
 		free_irq(dev->irq, dev);
 		iounmap(lp->mem_start);
 		release_region(dev->base_addr, ARCNET_TOTAL_SIZE);
-		release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1);
+		release_mem_region(dev->mem_start,
+				   dev->mem_end - dev->mem_start + 1);
 		free_netdev(dev);
 	}
 }
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index b76e458b19e0..691bee5bcd42 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -213,7 +213,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
 		   lp->next_tx, lp->cur_tx, bufnum);
 
-	length -= ARC_HDR_SIZE;	/* hard header is not included in packet length */
+	/* hard header is not included in packet length */
+	length -= ARC_HDR_SIZE;
 
 	if (length > XMTU) {
 		/* should never happen! other people already check for this. */
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index 1d777c1e8383..aaa1966593aa 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -188,11 +188,14 @@ static void rx(struct net_device *dev, int bufnum,
 		pkt = (struct archdr *)skb->data;
 		soft = &pkt->soft.rfc1201;
 
-		/* up to sizeof(pkt->soft) has already been copied from the card */
+		/* up to sizeof(pkt->soft) has already
+		 * been copied from the card
+		 */
 		memcpy(pkt, pkthdr, sizeof(struct archdr));
 		if (length > sizeof(pkt->soft))
-			lp->hw.copy_from_card(dev, bufnum, ofs + sizeof(pkt->soft),
-				       pkt->soft.raw + sizeof(pkt->soft),
+			lp->hw.copy_from_card(dev, bufnum,
+					      ofs + sizeof(pkt->soft),
+					      pkt->soft.raw + sizeof(pkt->soft),
 					      length - sizeof(pkt->soft));
 
 		/* ARP packets have problems when sent from some DOS systems:
@@ -318,7 +321,8 @@ static void rx(struct net_device *dev, int bufnum,
 				return;
 			}
 			in->lastpacket++;
-			if (packetnum != in->lastpacket) {	/* not the right flag! */
+			/* if not the right flag */
+			if (packetnum != in->lastpacket) {
 				/* harmless duplicate? ignore. */
 				if (packetnum <= in->lastpacket - 1) {
 					arc_printk(D_EXTRA, dev, "duplicate splitpacket ignored! (splitflag=%d)\n",
@@ -477,7 +481,8 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length,
 	arc_printk(D_DURING, dev, "prepare_tx: txbufs=%d/%d/%d\n",
 		   lp->next_tx, lp->cur_tx, bufnum);
 
-	length -= ARC_HDR_SIZE;	/* hard header is not included in packet length */
+	/* hard header is not included in packet length */
+	length -= ARC_HDR_SIZE;
 	pkt->soft.rfc1201.split_flag = 0;
 
 	/* need to do a split packet? */
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index 1d8e36e13616..9ca135d0f114 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -203,8 +203,8 @@ struct ArcProto {
 			    unsigned short ethproto, uint8_t daddr);
 
 	/* these functions return '1' if the skb can now be freed */
-	int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, int length,
-			  int bufnum);
+	int (*prepare_tx)(struct net_device *dev, struct archdr *pkt,
+			  int length, int bufnum);
 	int (*continue_tx)(struct net_device *dev, int bufnum);
 	int (*ack_tx)(struct net_device *dev, int acked);
 };
@@ -309,10 +309,10 @@ struct arcnet_local {
 		void (*open)(struct net_device *dev);
 		void (*close)(struct net_device *dev);
 
-		void (*copy_to_card)(struct net_device *dev, int bufnum, int offset,
-				     void *buf, int count);
-		void (*copy_from_card)(struct net_device *dev, int bufnum, int offset,
-				       void *buf, int count);
+		void (*copy_to_card)(struct net_device *dev, int bufnum,
+				     int offset, void *buf, int count);
+		void (*copy_from_card)(struct net_device *dev, int bufnum,
+				       int offset, void *buf, int count);
 	} hw;
 
 	void __iomem *mem_start;	/* pointer to ioremap'ed MMIO */
-- 
cgit v1.2.3


From 26c6d281688e8bb8154fa78c60e551d024f5d0b8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 10:06:03 -0700
Subject: arcnet: Move files out of include/linux

These #include files don't need to be in the include/linux directory
as they can be local to drivers/net/arcnet/

Move them and update the #include statements.

Update the MAINTAINERS file pattern by deleting arcdevice from the
NETWORKING block as arcnet is currently unmaintained.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 MAINTAINERS                       |   1 -
 drivers/net/arcnet/arc-rawmode.c  |   3 +-
 drivers/net/arcnet/arc-rimi.c     |   3 +-
 drivers/net/arcnet/arcdevice.h    | 346 ++++++++++++++++++++++++++++++++++++++
 drivers/net/arcnet/arcnet.c       |   3 +-
 drivers/net/arcnet/capmode.c      |   3 +-
 drivers/net/arcnet/com20020-isa.c |   6 +-
 drivers/net/arcnet/com20020-pci.c |   6 +-
 drivers/net/arcnet/com20020.c     |   6 +-
 drivers/net/arcnet/com20020.h     | 145 ++++++++++++++++
 drivers/net/arcnet/com20020_cs.c  |   7 +-
 drivers/net/arcnet/com90io.c      |   3 +-
 drivers/net/arcnet/com90xx.c      |   3 +-
 drivers/net/arcnet/rfc1051.c      |   3 +-
 drivers/net/arcnet/rfc1201.c      |   3 +-
 include/linux/arcdevice.h         | 346 --------------------------------------
 include/linux/com20020.h          | 145 ----------------
 17 files changed, 519 insertions(+), 513 deletions(-)
 create mode 100644 drivers/net/arcnet/arcdevice.h
 create mode 100644 drivers/net/arcnet/com20020.h
 delete mode 100644 include/linux/arcdevice.h
 delete mode 100644 include/linux/com20020.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 310da4295c70..c978a257f4aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7292,7 +7292,6 @@ S:	Odd Fixes
 F:	drivers/net/
 F:	include/linux/if_*
 F:	include/linux/netdevice.h
-F:	include/linux/arcdevice.h
 F:	include/linux/etherdevice.h
 F:	include/linux/fcdevice.h
 F:	include/linux/fddidevice.h
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 59b3083c19f9..35a747a7329e 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -33,7 +33,8 @@
 #include <net/arp.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 static void rx(struct net_device *dev, int bufnum,
 	       struct archdr *pkthdr, int length);
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index 8fa5eb43f308..37406fffc001 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -37,7 +37,8 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 /* Internal function declarations */
 
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
new file mode 100644
index 000000000000..9ca135d0f114
--- /dev/null
+++ b/drivers/net/arcnet/arcdevice.h
@@ -0,0 +1,346 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  NET  is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              Definitions used by the ARCnet driver.
+ *
+ * Authors:     Avery Pennarun and David Woodhouse
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ */
+#ifndef _LINUX_ARCDEVICE_H
+#define _LINUX_ARCDEVICE_H
+
+#include <asm/timex.h>
+#include <linux/if_arcnet.h>
+
+#ifdef __KERNEL__
+#include  <linux/irqreturn.h>
+
+/*
+ * RECON_THRESHOLD is the maximum number of RECON messages to receive
+ * within one minute before printing a "cabling problem" warning. The
+ * default value should be fine.
+ *
+ * After that, a "cabling restored" message will be printed on the next IRQ
+ * if no RECON messages have been received for 10 seconds.
+ *
+ * Do not define RECON_THRESHOLD at all if you want to disable this feature.
+ */
+#define RECON_THRESHOLD 30
+
+/*
+ * Define this to the minimum "timeout" value.  If a transmit takes longer
+ * than TX_TIMEOUT jiffies, Linux will abort the TX and retry.  On a large
+ * network, or one with heavy network traffic, this timeout may need to be
+ * increased.  The larger it is, though, the longer it will be between
+ * necessary transmits - don't set this too high.
+ */
+#define TX_TIMEOUT (HZ * 200 / 1000)
+
+/* Display warnings about the driver being an ALPHA version. */
+#undef ALPHA_WARNING
+
+/*
+ * Debugging bitflags: each option can be enabled individually.
+ *
+ * Note: only debug flags included in the ARCNET_DEBUG_MAX define will
+ *   actually be available.  GCC will (at least, GCC 2.7.0 will) notice
+ *   lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize
+ *   them out.
+ */
+#define D_NORMAL	1	/* important operational info             */
+#define D_EXTRA		2	/* useful, but non-vital information      */
+#define	D_INIT		4	/* show init/probe messages               */
+#define D_INIT_REASONS	8	/* show reasons for discarding probes     */
+#define D_RECON		32	/* print a message whenever token is lost */
+#define D_PROTO		64	/* debug auto-protocol support            */
+/* debug levels below give LOTS of output during normal operation! */
+#define D_DURING	128	/* trace operations (including irq's)     */
+#define D_TX	        256	/* show tx packets                        */
+#define D_RX		512	/* show rx packets                        */
+#define D_SKB		1024	/* show skb's                             */
+#define D_SKB_SIZE	2048	/* show skb sizes			  */
+#define D_TIMING	4096	/* show time needed to copy buffers to card */
+#define D_DEBUG         8192    /* Very detailed debug line for line */
+
+#ifndef ARCNET_DEBUG_MAX
+#define ARCNET_DEBUG_MAX (127)	/* change to ~0 if you want detailed debugging */
+#endif
+
+#ifndef ARCNET_DEBUG
+#define ARCNET_DEBUG (D_NORMAL | D_EXTRA)
+#endif
+extern int arcnet_debug;
+
+#define BUGLVL(x)	((x) & ARCNET_DEBUG_MAX & arcnet_debug)
+
+/* macros to simplify debug checking */
+#define arc_printk(x, dev, fmt, ...)					\
+do {									\
+	if (BUGLVL(x)) {						\
+		if ((x) == D_NORMAL)					\
+			netdev_warn(dev, fmt, ##__VA_ARGS__);		\
+		else if ((x) < D_DURING)				\
+			netdev_info(dev, fmt, ##__VA_ARGS__);		\
+		else							\
+			netdev_dbg(dev, fmt, ##__VA_ARGS__);		\
+	}								\
+} while (0)
+
+#define arc_cont(x, fmt, ...)						\
+do {									\
+	if (BUGLVL(x))							\
+		pr_cont(fmt, ##__VA_ARGS__);				\
+} while (0)
+
+/* see how long a function call takes to run, expressed in CPU cycles */
+#define TIME(dev, name, bytes, call)					\
+do {									\
+	if (BUGLVL(D_TIMING)) {						\
+		unsigned long _x, _y;					\
+		_x = get_cycles();					\
+		call;							\
+		_y = get_cycles();					\
+		arc_printk(D_TIMING, dev,				\
+			   "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \
+			   name, bytes, _y - _x,			\
+			   100000000 / 1024 * bytes / (_y - _x + 1));	\
+	} else {							\
+		call;							\
+	}								\
+} while (0)
+
+/*
+ * Time needed to reset the card - in ms (milliseconds).  This works on my
+ * SMC PC100.  I can't find a reference that tells me just how long I
+ * should wait.
+ */
+#define RESETtime (300)
+
+/*
+ * These are the max/min lengths of packet payload, not including the
+ * arc_hardware header, but definitely including the soft header.
+ *
+ * Note: packet sizes 254, 255, 256 are impossible because of the way
+ * ARCnet registers work  That's why RFC1201 defines "exception" packets.
+ * In non-RFC1201 protocols, we have to just tack some extra bytes on the
+ * end.
+ */
+#define MTU	253		/* normal packet max size */
+#define MinTU	257		/* extended packet min size */
+#define XMTU	508		/* extended packet max size */
+
+/* status/interrupt mask bit fields */
+#define TXFREEflag	0x01	/* transmitter available */
+#define TXACKflag       0x02	/* transmitted msg. ackd */
+#define RECONflag       0x04	/* network reconfigured */
+#define TESTflag        0x08	/* test flag */
+#define EXCNAKflag      0x08    /* excesive nak flag */
+#define RESETflag       0x10	/* power-on-reset */
+#define RES1flag        0x20	/* reserved - usually set by jumper */
+#define RES2flag        0x40	/* reserved - usually set by jumper */
+#define NORXflag        0x80	/* receiver inhibited */
+
+/* Flags used for IO-mapped memory operations */
+#define AUTOINCflag     0x40	/* Increase location with each access */
+#define IOMAPflag       0x02	/* (for 90xx) Use IO mapped memory, not mmap */
+#define ENABLE16flag    0x80	/* (for 90xx) Enable 16-bit mode */
+
+/* in the command register, the following bits have these meanings:
+ *                0-2     command
+ *                3-4     page number (for enable rcv/xmt command)
+ *                 7      receive broadcasts
+ */
+#define NOTXcmd         0x01	/* disable transmitter */
+#define NORXcmd         0x02	/* disable receiver */
+#define TXcmd           0x03	/* enable transmitter */
+#define RXcmd           0x04	/* enable receiver */
+#define CONFIGcmd       0x05	/* define configuration */
+#define CFLAGScmd       0x06	/* clear flags */
+#define TESTcmd         0x07	/* load test flags */
+
+/* flags for "clear flags" command */
+#define RESETclear      0x08	/* power-on-reset */
+#define CONFIGclear     0x10	/* system reconfigured */
+
+#define EXCNAKclear     0x0E    /* Clear and acknowledge the excive nak bit */
+
+/* flags for "load test flags" command */
+#define TESTload        0x08	/* test flag (diagnostic) */
+
+/* byte deposited into first address of buffers on reset */
+#define TESTvalue       0321	/* that's octal for 0xD1 :) */
+
+/* for "enable receiver" command */
+#define RXbcasts        0x80	/* receive broadcasts */
+
+/* flags for "define configuration" command */
+#define NORMALconf      0x00	/* 1-249 byte packets */
+#define EXTconf         0x08	/* 250-504 byte packets */
+
+/* card feature flags, set during auto-detection.
+ * (currently only used by com20020pci)
+ */
+#define ARC_IS_5MBIT    1   /* card default speed is 5MBit */
+#define ARC_CAN_10MBIT  2   /* card uses COM20022, supporting 10MBit,
+				 but default is 2.5MBit. */
+
+/* information needed to define an encapsulation driver */
+struct ArcProto {
+	char suffix;		/* a for RFC1201, e for ether-encap, etc. */
+	int mtu;		/* largest possible packet */
+	int is_ip;              /* This is a ip plugin - not a raw thing */
+
+	void (*rx)(struct net_device *dev, int bufnum,
+		   struct archdr *pkthdr, int length);
+	int (*build_header)(struct sk_buff *skb, struct net_device *dev,
+			    unsigned short ethproto, uint8_t daddr);
+
+	/* these functions return '1' if the skb can now be freed */
+	int (*prepare_tx)(struct net_device *dev, struct archdr *pkt,
+			  int length, int bufnum);
+	int (*continue_tx)(struct net_device *dev, int bufnum);
+	int (*ack_tx)(struct net_device *dev, int acked);
+};
+
+extern struct ArcProto *arc_proto_map[256], *arc_proto_default,
+	*arc_bcast_proto, *arc_raw_proto;
+
+/*
+ * "Incoming" is information needed for each address that could be sending
+ * to us.  Mostly for partially-received split packets.
+ */
+struct Incoming {
+	struct sk_buff *skb;	/* packet data buffer             */
+	__be16 sequence;	/* sequence number of assembly    */
+	uint8_t lastpacket,	/* number of last packet (from 1) */
+		numpackets;	/* number of packets in split     */
+};
+
+/* only needed for RFC1201 */
+struct Outgoing {
+	struct ArcProto *proto;	/* protocol driver that owns this:
+				 *   if NULL, no packet is pending.
+				 */
+	struct sk_buff *skb;	/* buffer from upper levels */
+	struct archdr *pkt;	/* a pointer into the skb */
+	uint16_t length,	/* bytes total */
+		dataleft,	/* bytes left */
+		segnum,		/* segment being sent */
+		numsegs;	/* number of segments */
+};
+
+struct arcnet_local {
+	uint8_t config,		/* current value of CONFIG register */
+		timeout,	/* Extended timeout for COM20020 */
+		backplane,	/* Backplane flag for COM20020 */
+		clockp,		/* COM20020 clock divider */
+		clockm,		/* COM20020 clock multiplier flag */
+		setup,		/* Contents of setup1 register */
+		setup2,		/* Contents of setup2 register */
+		intmask;	/* current value of INTMASK register */
+	uint8_t default_proto[256];	/* default encap to use for each host */
+	int	cur_tx,		/* buffer used by current transmit, or -1 */
+		next_tx,	/* buffer where a packet is ready to send */
+		cur_rx;		/* current receive buffer */
+	int	lastload_dest,	/* can last loaded packet be acked? */
+		lasttrans_dest;	/* can last TX'd packet be acked? */
+	int	timed_out;	/* need to process TX timeout and drop packet */
+	unsigned long last_timeout;	/* time of last reported timeout */
+	char *card_name;	/* card ident string */
+	int card_flags;		/* special card features */
+
+	/* On preemtive and SMB a lock is needed */
+	spinlock_t lock;
+
+	/*
+	 * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of
+	 * which can be used for either sending or receiving.  The new dynamic
+	 * buffer management routines use a simple circular queue of available
+	 * buffers, and take them as they're needed.  This way, we simplify
+	 * situations in which we (for example) want to pre-load a transmit
+	 * buffer, or start receiving while we copy a received packet to
+	 * memory.
+	 *
+	 * The rules: only the interrupt handler is allowed to _add_ buffers to
+	 * the queue; thus, this doesn't require a lock.  Both the interrupt
+	 * handler and the transmit function will want to _remove_ buffers, so
+	 * we need to handle the situation where they try to do it at the same
+	 * time.
+	 *
+	 * If next_buf == first_free_buf, the queue is empty.  Since there are
+	 * only four possible buffers, the queue should never be full.
+	 */
+	atomic_t buf_lock;
+	int buf_queue[5];
+	int next_buf, first_free_buf;
+
+	/* network "reconfiguration" handling */
+	unsigned long first_recon; /* time of "first" RECON message to count */
+	unsigned long last_recon;  /* time of most recent RECON */
+	int num_recons;		/* number of RECONs between first and last. */
+	int network_down;	/* do we think the network is down? */
+
+	int excnak_pending;    /* We just got an excesive nak interrupt */
+
+	struct {
+		uint16_t sequence;	/* sequence number (incs with each packet) */
+		__be16 aborted_seq;
+
+		struct Incoming incoming[256];	/* one from each address */
+	} rfc1201;
+
+	/* really only used by rfc1201, but we'll pretend it's not */
+	struct Outgoing outgoing;	/* packet currently being sent */
+
+	/* hardware-specific functions */
+	struct {
+		struct module *owner;
+		void (*command)(struct net_device *dev, int cmd);
+		int (*status)(struct net_device *dev);
+		void (*intmask)(struct net_device *dev, int mask);
+		int (*reset)(struct net_device *dev, int really_reset);
+		void (*open)(struct net_device *dev);
+		void (*close)(struct net_device *dev);
+
+		void (*copy_to_card)(struct net_device *dev, int bufnum,
+				     int offset, void *buf, int count);
+		void (*copy_from_card)(struct net_device *dev, int bufnum,
+				       int offset, void *buf, int count);
+	} hw;
+
+	void __iomem *mem_start;	/* pointer to ioremap'ed MMIO */
+};
+
+#define ARCRESET(x)  (lp->hw.reset(dev, (x)))
+#define ACOMMAND(x)  (lp->hw.command(dev, (x)))
+#define ASTATUS()    (lp->hw.status(dev))
+#define AINTMASK(x)  (lp->hw.intmask(dev, (x)))
+
+#if ARCNET_DEBUG_MAX & D_SKB
+void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
+#else
+static inline
+void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc)
+{
+}
+#endif
+
+void arcnet_unregister_proto(struct ArcProto *proto);
+irqreturn_t arcnet_interrupt(int irq, void *dev_id);
+struct net_device *alloc_arcdev(const char *name);
+
+int arcnet_open(struct net_device *dev);
+int arcnet_close(struct net_device *dev);
+netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
+			       struct net_device *dev);
+void arcnet_timeout(struct net_device *dev);
+
+#endif				/* __KERNEL__ */
+#endif				/* _LINUX_ARCDEVICE_H */
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 00074e781353..c8423ee24e71 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -50,9 +50,10 @@
 #include <linux/if_arp.h>
 #include <net/arp.h>
 #include <linux/init.h>
-#include <linux/arcdevice.h>
 #include <linux/jiffies.h>
 
+#include "arcdevice.h"
+
 /* "do nothing" functions for protocol drivers */
 static void null_rx(struct net_device *dev, int bufnum,
 		    struct archdr *pkthdr, int length);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 46a76dcc21aa..2f2d0d94a80d 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -35,7 +35,8 @@
 #include <net/arp.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 /* packet receiver */
 static void rx(struct net_device *dev, int bufnum,
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index 352500b8d875..ab5202b506a6 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -39,11 +39,11 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/bootmem.h>
-#include <linux/arcdevice.h>
-#include <linux/com20020.h>
-
 #include <linux/io.h>
 
+#include "arcdevice.h"
+#include "com20020.h"
+
 /* We cannot (yet) probe for an IO mapped card, although we can check that
  * it's where we were told it was, and even do autoirq.
  */
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index a28887dfcc9e..1a75a6982031 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -39,12 +39,12 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/arcdevice.h>
-#include <linux/com20020.h>
 #include <linux/list.h>
-
 #include <linux/io.h>
 
+#include "arcdevice.h"
+#include "com20020.h"
+
 /* Module parameters */
 
 static int node;
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 00f910cf7b81..f46e22093c54 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -37,11 +37,11 @@
 #include <linux/netdevice.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/arcdevice.h>
-#include <linux/com20020.h>
-
 #include <linux/io.h>
 
+#include "arcdevice.h"
+#include "com20020.h"
+
 static char *clockrates[] = {
 	"XXXXXXX", "XXXXXXXX", "XXXXXX",
 	"2.5 Mb/s", "1.25Mb/s", "625 Kb/s", "312.5 Kb/s",
diff --git a/drivers/net/arcnet/com20020.h b/drivers/net/arcnet/com20020.h
new file mode 100644
index 000000000000..85898995b234
--- /dev/null
+++ b/drivers/net/arcnet/com20020.h
@@ -0,0 +1,145 @@
+/*
+ * Linux ARCnet driver - COM20020 chipset support - function declarations
+ * 
+ * Written 1997 by David Woodhouse.
+ * Written 1994-1999 by Avery Pennarun.
+ * Derived from skeleton.c by Donald Becker.
+ *
+ * Special thanks to Contemporary Controls, Inc. (www.ccontrols.com)
+ *  for sponsoring the further development of this driver.
+ *
+ * **********************
+ *
+ * The original copyright of skeleton.c was as follows:
+ *
+ * skeleton.c Written 1993 by Donald Becker.
+ * Copyright 1993 United States Government as represented by the
+ * Director, National Security Agency.  This software may only be used
+ * and distributed according to the terms of the GNU General Public License as
+ * modified by SRC, incorporated herein by reference.
+ *
+ * **********************
+ *
+ * For more details, see drivers/net/arcnet.c
+ *
+ * **********************
+ */
+#ifndef __COM20020_H
+#define __COM20020_H
+
+int com20020_check(struct net_device *dev);
+int com20020_found(struct net_device *dev, int shared);
+extern const struct net_device_ops com20020_netdev_ops;
+
+/* The number of low I/O ports used by the card. */
+#define ARCNET_TOTAL_SIZE 8
+
+/* various register addresses */
+#ifdef CONFIG_SA1100_CT6001
+#define BUS_ALIGN  2  /* 8 bit device on a 16 bit bus - needs padding */
+#else
+#define BUS_ALIGN  1
+#endif
+
+#define PLX_PCI_MAX_CARDS 2
+
+struct com20020_pci_channel_map {
+	u32 bar;
+	u32 offset;
+	u32 size;               /* 0x00 - auto, e.g. length of entire bar */
+};
+
+struct com20020_pci_card_info {
+	const char *name;
+	int devcount;
+
+	struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS];
+
+	unsigned int flags;
+};
+
+struct com20020_priv {
+	struct com20020_pci_card_info *ci;
+	struct list_head list_dev;
+};
+
+struct com20020_dev {
+	struct list_head list;
+	struct net_device *dev;
+
+	struct com20020_priv *pci_priv;
+	int index;
+};
+
+#define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
+#define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
+#define _COMMAND  (ioaddr+BUS_ALIGN*1)	/* standard arcnet commands */
+#define _DIAGSTAT (ioaddr+BUS_ALIGN*1)	/* diagnostic status register */
+#define _ADDR_HI  (ioaddr+BUS_ALIGN*2)	/* control registers for IO-mapped memory */
+#define _ADDR_LO  (ioaddr+BUS_ALIGN*3)
+#define _MEMDATA  (ioaddr+BUS_ALIGN*4)	/* data port for IO-mapped memory */
+#define _SUBADR   (ioaddr+BUS_ALIGN*5)	/* the extended port _XREG refers to */
+#define _CONFIG   (ioaddr+BUS_ALIGN*6)	/* configuration register */
+#define _XREG     (ioaddr+BUS_ALIGN*7)	/* extra registers (indexed by _CONFIG
+  					or _SUBADR) */
+
+/* in the ADDR_HI register */
+#define RDDATAflag	0x80	/* next access is a read (not a write) */
+
+/* in the DIAGSTAT register */
+#define NEWNXTIDflag	0x02	/* ID to which token is passed has changed */
+
+/* in the CONFIG register */
+#define RESETcfg	0x80	/* put card in reset state */
+#define TXENcfg		0x20	/* enable TX */
+
+/* in SETUP register */
+#define PROMISCset	0x10	/* enable RCV_ALL */
+#define P1MODE		0x80    /* enable P1-MODE for Backplane */
+#define SLOWARB		0x01    /* enable Slow Arbitration for >=5Mbps */
+
+/* COM2002x */
+#define SUB_TENTATIVE	0	/* tentative node ID */
+#define SUB_NODE	1	/* node ID */
+#define SUB_SETUP1	2	/* various options */
+#define SUB_TEST	3	/* test/diag register */
+
+/* COM20022 only */
+#define SUB_SETUP2	4	/* sundry options */
+#define SUB_BUSCTL	5	/* bus control options */
+#define SUB_DMACOUNT	6	/* DMA count options */
+
+#define SET_SUBADR(x) do { \
+	if ((x) < 4) \
+	{ \
+		lp->config = (lp->config & ~0x03) | (x); \
+		SETCONF; \
+	} \
+	else \
+	{ \
+		outb(x, _SUBADR); \
+	} \
+} while (0)
+
+#undef ARCRESET
+#undef ASTATUS
+#undef ACOMMAND
+#undef AINTMASK
+
+#define ARCRESET { outb(lp->config | 0x80, _CONFIG); \
+		    udelay(5);                        \
+		    outb(lp->config , _CONFIG);       \
+                  }
+#define ARCRESET0 { outb(0x18 | 0x80, _CONFIG);   \
+		    udelay(5);                       \
+		    outb(0x18 , _CONFIG);            \
+                  }
+
+#define ASTATUS()	inb(_STATUS)
+#define ADIAGSTATUS()	inb(_DIAGSTAT)
+#define ACOMMAND(cmd)	outb((cmd),_COMMAND)
+#define AINTMASK(msk)	outb((msk),_INTMASK)
+
+#define SETCONF		outb(lp->config, _CONFIG)
+
+#endif /* __COM20020_H */
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index 854169489b68..5fdde6946427 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -42,13 +42,12 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
-#include <linux/arcdevice.h>
-#include <linux/com20020.h>
-
+#include <linux/io.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
-#include <linux/io.h>
+#include "arcdevice.h"
+#include "com20020.h"
 
 static void regdump(struct net_device *dev)
 {
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 9e779a53035d..ce15d1911048 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -38,7 +38,8 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 /* Internal function declarations */
 
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index 62100acfb6a7..b672795acaa7 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -36,7 +36,8 @@
 #include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <linux/io.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 /* Define this to speed up the autoprobe by assuming if only one io port and
  * shmem are left in the list at Stage 5, they must correspond to each
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 691bee5bcd42..4b1a75469cb1 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -33,7 +33,8 @@
 #include <net/arp.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 static __be16 type_trans(struct sk_buff *skb, struct net_device *dev);
 static void rx(struct net_device *dev, int bufnum,
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index aaa1966593aa..566da5ecdc9d 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -32,7 +32,8 @@
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/arcdevice.h>
+
+#include "arcdevice.h"
 
 MODULE_LICENSE("GPL");
 
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
deleted file mode 100644
index 9ca135d0f114..000000000000
--- a/include/linux/arcdevice.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * INET         An implementation of the TCP/IP protocol suite for the LINUX
- *              operating system.  NET  is implemented using the  BSD Socket
- *              interface as the means of communication with the user level.
- *
- *              Definitions used by the ARCnet driver.
- *
- * Authors:     Avery Pennarun and David Woodhouse
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- */
-#ifndef _LINUX_ARCDEVICE_H
-#define _LINUX_ARCDEVICE_H
-
-#include <asm/timex.h>
-#include <linux/if_arcnet.h>
-
-#ifdef __KERNEL__
-#include  <linux/irqreturn.h>
-
-/*
- * RECON_THRESHOLD is the maximum number of RECON messages to receive
- * within one minute before printing a "cabling problem" warning. The
- * default value should be fine.
- *
- * After that, a "cabling restored" message will be printed on the next IRQ
- * if no RECON messages have been received for 10 seconds.
- *
- * Do not define RECON_THRESHOLD at all if you want to disable this feature.
- */
-#define RECON_THRESHOLD 30
-
-/*
- * Define this to the minimum "timeout" value.  If a transmit takes longer
- * than TX_TIMEOUT jiffies, Linux will abort the TX and retry.  On a large
- * network, or one with heavy network traffic, this timeout may need to be
- * increased.  The larger it is, though, the longer it will be between
- * necessary transmits - don't set this too high.
- */
-#define TX_TIMEOUT (HZ * 200 / 1000)
-
-/* Display warnings about the driver being an ALPHA version. */
-#undef ALPHA_WARNING
-
-/*
- * Debugging bitflags: each option can be enabled individually.
- *
- * Note: only debug flags included in the ARCNET_DEBUG_MAX define will
- *   actually be available.  GCC will (at least, GCC 2.7.0 will) notice
- *   lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize
- *   them out.
- */
-#define D_NORMAL	1	/* important operational info             */
-#define D_EXTRA		2	/* useful, but non-vital information      */
-#define	D_INIT		4	/* show init/probe messages               */
-#define D_INIT_REASONS	8	/* show reasons for discarding probes     */
-#define D_RECON		32	/* print a message whenever token is lost */
-#define D_PROTO		64	/* debug auto-protocol support            */
-/* debug levels below give LOTS of output during normal operation! */
-#define D_DURING	128	/* trace operations (including irq's)     */
-#define D_TX	        256	/* show tx packets                        */
-#define D_RX		512	/* show rx packets                        */
-#define D_SKB		1024	/* show skb's                             */
-#define D_SKB_SIZE	2048	/* show skb sizes			  */
-#define D_TIMING	4096	/* show time needed to copy buffers to card */
-#define D_DEBUG         8192    /* Very detailed debug line for line */
-
-#ifndef ARCNET_DEBUG_MAX
-#define ARCNET_DEBUG_MAX (127)	/* change to ~0 if you want detailed debugging */
-#endif
-
-#ifndef ARCNET_DEBUG
-#define ARCNET_DEBUG (D_NORMAL | D_EXTRA)
-#endif
-extern int arcnet_debug;
-
-#define BUGLVL(x)	((x) & ARCNET_DEBUG_MAX & arcnet_debug)
-
-/* macros to simplify debug checking */
-#define arc_printk(x, dev, fmt, ...)					\
-do {									\
-	if (BUGLVL(x)) {						\
-		if ((x) == D_NORMAL)					\
-			netdev_warn(dev, fmt, ##__VA_ARGS__);		\
-		else if ((x) < D_DURING)				\
-			netdev_info(dev, fmt, ##__VA_ARGS__);		\
-		else							\
-			netdev_dbg(dev, fmt, ##__VA_ARGS__);		\
-	}								\
-} while (0)
-
-#define arc_cont(x, fmt, ...)						\
-do {									\
-	if (BUGLVL(x))							\
-		pr_cont(fmt, ##__VA_ARGS__);				\
-} while (0)
-
-/* see how long a function call takes to run, expressed in CPU cycles */
-#define TIME(dev, name, bytes, call)					\
-do {									\
-	if (BUGLVL(D_TIMING)) {						\
-		unsigned long _x, _y;					\
-		_x = get_cycles();					\
-		call;							\
-		_y = get_cycles();					\
-		arc_printk(D_TIMING, dev,				\
-			   "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \
-			   name, bytes, _y - _x,			\
-			   100000000 / 1024 * bytes / (_y - _x + 1));	\
-	} else {							\
-		call;							\
-	}								\
-} while (0)
-
-/*
- * Time needed to reset the card - in ms (milliseconds).  This works on my
- * SMC PC100.  I can't find a reference that tells me just how long I
- * should wait.
- */
-#define RESETtime (300)
-
-/*
- * These are the max/min lengths of packet payload, not including the
- * arc_hardware header, but definitely including the soft header.
- *
- * Note: packet sizes 254, 255, 256 are impossible because of the way
- * ARCnet registers work  That's why RFC1201 defines "exception" packets.
- * In non-RFC1201 protocols, we have to just tack some extra bytes on the
- * end.
- */
-#define MTU	253		/* normal packet max size */
-#define MinTU	257		/* extended packet min size */
-#define XMTU	508		/* extended packet max size */
-
-/* status/interrupt mask bit fields */
-#define TXFREEflag	0x01	/* transmitter available */
-#define TXACKflag       0x02	/* transmitted msg. ackd */
-#define RECONflag       0x04	/* network reconfigured */
-#define TESTflag        0x08	/* test flag */
-#define EXCNAKflag      0x08    /* excesive nak flag */
-#define RESETflag       0x10	/* power-on-reset */
-#define RES1flag        0x20	/* reserved - usually set by jumper */
-#define RES2flag        0x40	/* reserved - usually set by jumper */
-#define NORXflag        0x80	/* receiver inhibited */
-
-/* Flags used for IO-mapped memory operations */
-#define AUTOINCflag     0x40	/* Increase location with each access */
-#define IOMAPflag       0x02	/* (for 90xx) Use IO mapped memory, not mmap */
-#define ENABLE16flag    0x80	/* (for 90xx) Enable 16-bit mode */
-
-/* in the command register, the following bits have these meanings:
- *                0-2     command
- *                3-4     page number (for enable rcv/xmt command)
- *                 7      receive broadcasts
- */
-#define NOTXcmd         0x01	/* disable transmitter */
-#define NORXcmd         0x02	/* disable receiver */
-#define TXcmd           0x03	/* enable transmitter */
-#define RXcmd           0x04	/* enable receiver */
-#define CONFIGcmd       0x05	/* define configuration */
-#define CFLAGScmd       0x06	/* clear flags */
-#define TESTcmd         0x07	/* load test flags */
-
-/* flags for "clear flags" command */
-#define RESETclear      0x08	/* power-on-reset */
-#define CONFIGclear     0x10	/* system reconfigured */
-
-#define EXCNAKclear     0x0E    /* Clear and acknowledge the excive nak bit */
-
-/* flags for "load test flags" command */
-#define TESTload        0x08	/* test flag (diagnostic) */
-
-/* byte deposited into first address of buffers on reset */
-#define TESTvalue       0321	/* that's octal for 0xD1 :) */
-
-/* for "enable receiver" command */
-#define RXbcasts        0x80	/* receive broadcasts */
-
-/* flags for "define configuration" command */
-#define NORMALconf      0x00	/* 1-249 byte packets */
-#define EXTconf         0x08	/* 250-504 byte packets */
-
-/* card feature flags, set during auto-detection.
- * (currently only used by com20020pci)
- */
-#define ARC_IS_5MBIT    1   /* card default speed is 5MBit */
-#define ARC_CAN_10MBIT  2   /* card uses COM20022, supporting 10MBit,
-				 but default is 2.5MBit. */
-
-/* information needed to define an encapsulation driver */
-struct ArcProto {
-	char suffix;		/* a for RFC1201, e for ether-encap, etc. */
-	int mtu;		/* largest possible packet */
-	int is_ip;              /* This is a ip plugin - not a raw thing */
-
-	void (*rx)(struct net_device *dev, int bufnum,
-		   struct archdr *pkthdr, int length);
-	int (*build_header)(struct sk_buff *skb, struct net_device *dev,
-			    unsigned short ethproto, uint8_t daddr);
-
-	/* these functions return '1' if the skb can now be freed */
-	int (*prepare_tx)(struct net_device *dev, struct archdr *pkt,
-			  int length, int bufnum);
-	int (*continue_tx)(struct net_device *dev, int bufnum);
-	int (*ack_tx)(struct net_device *dev, int acked);
-};
-
-extern struct ArcProto *arc_proto_map[256], *arc_proto_default,
-	*arc_bcast_proto, *arc_raw_proto;
-
-/*
- * "Incoming" is information needed for each address that could be sending
- * to us.  Mostly for partially-received split packets.
- */
-struct Incoming {
-	struct sk_buff *skb;	/* packet data buffer             */
-	__be16 sequence;	/* sequence number of assembly    */
-	uint8_t lastpacket,	/* number of last packet (from 1) */
-		numpackets;	/* number of packets in split     */
-};
-
-/* only needed for RFC1201 */
-struct Outgoing {
-	struct ArcProto *proto;	/* protocol driver that owns this:
-				 *   if NULL, no packet is pending.
-				 */
-	struct sk_buff *skb;	/* buffer from upper levels */
-	struct archdr *pkt;	/* a pointer into the skb */
-	uint16_t length,	/* bytes total */
-		dataleft,	/* bytes left */
-		segnum,		/* segment being sent */
-		numsegs;	/* number of segments */
-};
-
-struct arcnet_local {
-	uint8_t config,		/* current value of CONFIG register */
-		timeout,	/* Extended timeout for COM20020 */
-		backplane,	/* Backplane flag for COM20020 */
-		clockp,		/* COM20020 clock divider */
-		clockm,		/* COM20020 clock multiplier flag */
-		setup,		/* Contents of setup1 register */
-		setup2,		/* Contents of setup2 register */
-		intmask;	/* current value of INTMASK register */
-	uint8_t default_proto[256];	/* default encap to use for each host */
-	int	cur_tx,		/* buffer used by current transmit, or -1 */
-		next_tx,	/* buffer where a packet is ready to send */
-		cur_rx;		/* current receive buffer */
-	int	lastload_dest,	/* can last loaded packet be acked? */
-		lasttrans_dest;	/* can last TX'd packet be acked? */
-	int	timed_out;	/* need to process TX timeout and drop packet */
-	unsigned long last_timeout;	/* time of last reported timeout */
-	char *card_name;	/* card ident string */
-	int card_flags;		/* special card features */
-
-	/* On preemtive and SMB a lock is needed */
-	spinlock_t lock;
-
-	/*
-	 * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of
-	 * which can be used for either sending or receiving.  The new dynamic
-	 * buffer management routines use a simple circular queue of available
-	 * buffers, and take them as they're needed.  This way, we simplify
-	 * situations in which we (for example) want to pre-load a transmit
-	 * buffer, or start receiving while we copy a received packet to
-	 * memory.
-	 *
-	 * The rules: only the interrupt handler is allowed to _add_ buffers to
-	 * the queue; thus, this doesn't require a lock.  Both the interrupt
-	 * handler and the transmit function will want to _remove_ buffers, so
-	 * we need to handle the situation where they try to do it at the same
-	 * time.
-	 *
-	 * If next_buf == first_free_buf, the queue is empty.  Since there are
-	 * only four possible buffers, the queue should never be full.
-	 */
-	atomic_t buf_lock;
-	int buf_queue[5];
-	int next_buf, first_free_buf;
-
-	/* network "reconfiguration" handling */
-	unsigned long first_recon; /* time of "first" RECON message to count */
-	unsigned long last_recon;  /* time of most recent RECON */
-	int num_recons;		/* number of RECONs between first and last. */
-	int network_down;	/* do we think the network is down? */
-
-	int excnak_pending;    /* We just got an excesive nak interrupt */
-
-	struct {
-		uint16_t sequence;	/* sequence number (incs with each packet) */
-		__be16 aborted_seq;
-
-		struct Incoming incoming[256];	/* one from each address */
-	} rfc1201;
-
-	/* really only used by rfc1201, but we'll pretend it's not */
-	struct Outgoing outgoing;	/* packet currently being sent */
-
-	/* hardware-specific functions */
-	struct {
-		struct module *owner;
-		void (*command)(struct net_device *dev, int cmd);
-		int (*status)(struct net_device *dev);
-		void (*intmask)(struct net_device *dev, int mask);
-		int (*reset)(struct net_device *dev, int really_reset);
-		void (*open)(struct net_device *dev);
-		void (*close)(struct net_device *dev);
-
-		void (*copy_to_card)(struct net_device *dev, int bufnum,
-				     int offset, void *buf, int count);
-		void (*copy_from_card)(struct net_device *dev, int bufnum,
-				       int offset, void *buf, int count);
-	} hw;
-
-	void __iomem *mem_start;	/* pointer to ioremap'ed MMIO */
-};
-
-#define ARCRESET(x)  (lp->hw.reset(dev, (x)))
-#define ACOMMAND(x)  (lp->hw.command(dev, (x)))
-#define ASTATUS()    (lp->hw.status(dev))
-#define AINTMASK(x)  (lp->hw.intmask(dev, (x)))
-
-#if ARCNET_DEBUG_MAX & D_SKB
-void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
-#else
-static inline
-void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc)
-{
-}
-#endif
-
-void arcnet_unregister_proto(struct ArcProto *proto);
-irqreturn_t arcnet_interrupt(int irq, void *dev_id);
-struct net_device *alloc_arcdev(const char *name);
-
-int arcnet_open(struct net_device *dev);
-int arcnet_close(struct net_device *dev);
-netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
-			       struct net_device *dev);
-void arcnet_timeout(struct net_device *dev);
-
-#endif				/* __KERNEL__ */
-#endif				/* _LINUX_ARCDEVICE_H */
diff --git a/include/linux/com20020.h b/include/linux/com20020.h
deleted file mode 100644
index 85898995b234..000000000000
--- a/include/linux/com20020.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Linux ARCnet driver - COM20020 chipset support - function declarations
- * 
- * Written 1997 by David Woodhouse.
- * Written 1994-1999 by Avery Pennarun.
- * Derived from skeleton.c by Donald Becker.
- *
- * Special thanks to Contemporary Controls, Inc. (www.ccontrols.com)
- *  for sponsoring the further development of this driver.
- *
- * **********************
- *
- * The original copyright of skeleton.c was as follows:
- *
- * skeleton.c Written 1993 by Donald Becker.
- * Copyright 1993 United States Government as represented by the
- * Director, National Security Agency.  This software may only be used
- * and distributed according to the terms of the GNU General Public License as
- * modified by SRC, incorporated herein by reference.
- *
- * **********************
- *
- * For more details, see drivers/net/arcnet.c
- *
- * **********************
- */
-#ifndef __COM20020_H
-#define __COM20020_H
-
-int com20020_check(struct net_device *dev);
-int com20020_found(struct net_device *dev, int shared);
-extern const struct net_device_ops com20020_netdev_ops;
-
-/* The number of low I/O ports used by the card. */
-#define ARCNET_TOTAL_SIZE 8
-
-/* various register addresses */
-#ifdef CONFIG_SA1100_CT6001
-#define BUS_ALIGN  2  /* 8 bit device on a 16 bit bus - needs padding */
-#else
-#define BUS_ALIGN  1
-#endif
-
-#define PLX_PCI_MAX_CARDS 2
-
-struct com20020_pci_channel_map {
-	u32 bar;
-	u32 offset;
-	u32 size;               /* 0x00 - auto, e.g. length of entire bar */
-};
-
-struct com20020_pci_card_info {
-	const char *name;
-	int devcount;
-
-	struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS];
-
-	unsigned int flags;
-};
-
-struct com20020_priv {
-	struct com20020_pci_card_info *ci;
-	struct list_head list_dev;
-};
-
-struct com20020_dev {
-	struct list_head list;
-	struct net_device *dev;
-
-	struct com20020_priv *pci_priv;
-	int index;
-};
-
-#define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
-#define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
-#define _COMMAND  (ioaddr+BUS_ALIGN*1)	/* standard arcnet commands */
-#define _DIAGSTAT (ioaddr+BUS_ALIGN*1)	/* diagnostic status register */
-#define _ADDR_HI  (ioaddr+BUS_ALIGN*2)	/* control registers for IO-mapped memory */
-#define _ADDR_LO  (ioaddr+BUS_ALIGN*3)
-#define _MEMDATA  (ioaddr+BUS_ALIGN*4)	/* data port for IO-mapped memory */
-#define _SUBADR   (ioaddr+BUS_ALIGN*5)	/* the extended port _XREG refers to */
-#define _CONFIG   (ioaddr+BUS_ALIGN*6)	/* configuration register */
-#define _XREG     (ioaddr+BUS_ALIGN*7)	/* extra registers (indexed by _CONFIG
-  					or _SUBADR) */
-
-/* in the ADDR_HI register */
-#define RDDATAflag	0x80	/* next access is a read (not a write) */
-
-/* in the DIAGSTAT register */
-#define NEWNXTIDflag	0x02	/* ID to which token is passed has changed */
-
-/* in the CONFIG register */
-#define RESETcfg	0x80	/* put card in reset state */
-#define TXENcfg		0x20	/* enable TX */
-
-/* in SETUP register */
-#define PROMISCset	0x10	/* enable RCV_ALL */
-#define P1MODE		0x80    /* enable P1-MODE for Backplane */
-#define SLOWARB		0x01    /* enable Slow Arbitration for >=5Mbps */
-
-/* COM2002x */
-#define SUB_TENTATIVE	0	/* tentative node ID */
-#define SUB_NODE	1	/* node ID */
-#define SUB_SETUP1	2	/* various options */
-#define SUB_TEST	3	/* test/diag register */
-
-/* COM20022 only */
-#define SUB_SETUP2	4	/* sundry options */
-#define SUB_BUSCTL	5	/* bus control options */
-#define SUB_DMACOUNT	6	/* DMA count options */
-
-#define SET_SUBADR(x) do { \
-	if ((x) < 4) \
-	{ \
-		lp->config = (lp->config & ~0x03) | (x); \
-		SETCONF; \
-	} \
-	else \
-	{ \
-		outb(x, _SUBADR); \
-	} \
-} while (0)
-
-#undef ARCRESET
-#undef ASTATUS
-#undef ACOMMAND
-#undef AINTMASK
-
-#define ARCRESET { outb(lp->config | 0x80, _CONFIG); \
-		    udelay(5);                        \
-		    outb(lp->config , _CONFIG);       \
-                  }
-#define ARCRESET0 { outb(0x18 | 0x80, _CONFIG);   \
-		    udelay(5);                       \
-		    outb(0x18 , _CONFIG);            \
-                  }
-
-#define ASTATUS()	inb(_STATUS)
-#define ADIAGSTATUS()	inb(_DIAGSTAT)
-#define ACOMMAND(cmd)	outb((cmd),_COMMAND)
-#define AINTMASK(msk)	outb((msk),_INTMASK)
-
-#define SETCONF		outb(lp->config, _CONFIG)
-
-#endif /* __COM20020_H */
-- 
cgit v1.2.3


From 2726d6ce389788c7fe724961a6e1bfe569560088 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Wed, 2 Sep 2015 11:01:34 +0100
Subject: sched/deadline: Unify dl_time_before() usage

Move dl_time_before() static definition in include/linux/sched/deadline.h
so that it can be used by different parties without being re-defined.

Reported-by: Luca Abeni <luca.abeni@unitn.it>
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1441188096-23021-3-git-send-email-juri.lelli@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/deadline.h | 5 +++++
 kernel/sched/cpudeadline.c     | 5 -----
 kernel/sched/cpudeadline.h     | 1 +
 kernel/sched/sched.h           | 5 -----
 4 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9d303b8847df..9089a2ae913d 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -21,4 +21,9 @@ static inline int dl_task(struct task_struct *p)
 	return dl_prio(p->prio);
 }
 
+static inline bool dl_time_before(u64 a, u64 b)
+{
+	return (s64)(a - b) < 0;
+}
+
 #endif /* _SCHED_DEADLINE_H */
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index c6acb07466bb..5a75b08cfd85 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -31,11 +31,6 @@ static inline int right_child(int i)
 	return (i << 1) + 2;
 }
 
-static inline int dl_time_before(u64 a, u64 b)
-{
-	return (s64)(a - b) < 0;
-}
-
 static void cpudl_exchange(struct cpudl *cp, int a, int b)
 {
 	int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index 1a0a6ef2fbe1..fcbdf83fed7e 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -2,6 +2,7 @@
 #define _LINUX_CPUDL_H
 
 #include <linux/sched.h>
+#include <linux/sched/deadline.h>
 
 #define IDX_INVALID     -1
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3845a711c65e..af6f252e7e34 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -118,11 +118,6 @@ static inline int task_has_dl_policy(struct task_struct *p)
 	return dl_policy(p->policy);
 }
 
-static inline bool dl_time_before(u64 a, u64 b)
-{
-	return (s64)(a - b) < 0;
-}
-
 /*
  * Tells if entity @a should preempt entity @b.
  */
-- 
cgit v1.2.3


From 90fe65148ea76988d8d5acbf3e578aa74129a490 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 18 Sep 2015 15:04:59 +0200
Subject: atomic: Add atomic_long_t bitops

When adding the atomic bitops, I seem to have forgotten about
atomic_long_t, fix this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-long.h | 24 ++++++++++++++----------
 include/linux/atomic.h            |  3 ++-
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index a94cbebbc33d..c7a5c1ad2ed3 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -112,19 +112,23 @@ static inline void atomic_long_dec(atomic_long_t *l)
 	ATOMIC_LONG_PFX(_dec)(v);
 }
 
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	ATOMIC_LONG_PFX(_add)(i, v);
+#define ATOMIC_LONG_OP(op)						\
+static inline void							\
+atomic_long_##op(long i, atomic_long_t *l)				\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	ATOMIC_LONG_PFX(_##op)(i, v);					\
 }
 
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+ATOMIC_LONG_OP(add)
+ATOMIC_LONG_OP(sub)
+ATOMIC_LONG_OP(and)
+ATOMIC_LONG_OP(or)
+ATOMIC_LONG_OP(xor)
+ATOMIC_LONG_OP(andnot)
 
-	ATOMIC_LONG_PFX(_sub)(i, v);
-}
+#undef ATOMIC_LONG_OP
 
 static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
 {
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 00a5763e850e..29dafa184aeb 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -451,7 +451,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-#include <asm-generic/atomic-long.h>
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
@@ -463,4 +462,6 @@ static inline void atomic64_andnot(long long i, atomic64_t *v)
 }
 #endif
 
+#include <asm-generic/atomic-long.h>
+
 #endif /* _LINUX_ATOMIC_H */
-- 
cgit v1.2.3


From e3e72ab80a3fac0b88e07d358a2c75724ccd66b4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 18 Sep 2015 13:22:52 +0200
Subject: atomic: Implement atomic_read_ctrl()

Provide atomic_read_ctrl() to mirror READ_ONCE_CTRL(), such that we can
more conveniently use atomics in control dependencies.

Since we can assume atomic_read() implies a READ_ONCE(), we must only
emit an extra smp_read_barrier_depends() in order to upgrade to
READ_ONCE_CTRL() semantics.

Requested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: oleg@redhat.com
Link: http://lkml.kernel.org/r/20150918115637.GM3604@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/memory-barriers.txt | 17 +++++++++--------
 include/asm-generic/atomic-long.h |  3 ++-
 include/linux/atomic.h            | 18 ++++++++++++++++++
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 2ba8461b0631..41ffd7e9cdcf 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -637,7 +637,8 @@ as follows:
 	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
 
 Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
-that DEC Alpha needs in order to respect control depedencies.
+that DEC Alpha needs in order to respect control depedencies. Alternatively
+use one of atomic{,64}_read_ctrl().
 
 So don't leave out the READ_ONCE_CTRL().
 
@@ -796,9 +797,9 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
 
 In summary:
 
-  (*) Control dependencies must be headed by READ_ONCE_CTRL().
-      Or, as a much less preferable alternative, interpose
-      smp_read_barrier_depends() between a READ_ONCE() and the
+  (*) Control dependencies must be headed by READ_ONCE_CTRL(),
+      atomic{,64}_read_ctrl(). Or, as a much less preferable alternative,
+      interpose smp_read_barrier_depends() between a READ_ONCE() and the
       control-dependent write.
 
   (*) Control dependencies can order prior loads against later stores.
@@ -820,10 +821,10 @@ In summary:
       and WRITE_ONCE() can help to preserve the needed conditional.
 
   (*) Control dependencies require that the compiler avoid reordering the
-      dependency into nonexistence.  Careful use of READ_ONCE_CTRL()
-      or smp_read_barrier_depends() can help to preserve your control
-      dependency.  Please see the Compiler Barrier section for more
-      information.
+      dependency into nonexistence.  Careful use of READ_ONCE_CTRL(),
+      atomic{,64}_read_ctrl() or smp_read_barrier_depends() can help to
+      preserve your control dependency.  Please see the Compiler Barrier
+      section for more information.
 
   (*) Control dependencies pair normally with other types of barriers.
 
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index c7a5c1ad2ed3..8942cdc676a7 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -35,7 +35,7 @@ typedef atomic_t atomic_long_t;
 #endif
 
 #define ATOMIC_LONG_READ_OP(mo)						\
-static inline long atomic_long_read##mo(atomic_long_t *l)		\
+static inline long atomic_long_read##mo(const atomic_long_t *l)		\
 {									\
 	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
 									\
@@ -43,6 +43,7 @@ static inline long atomic_long_read##mo(atomic_long_t *l)		\
 }
 ATOMIC_LONG_READ_OP()
 ATOMIC_LONG_READ_OP(_acquire)
+ATOMIC_LONG_READ_OP(_ctrl)
 
 #undef ATOMIC_LONG_READ_OP
 
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 29dafa184aeb..e326469bb9d6 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -4,6 +4,15 @@
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 
+#ifndef atomic_read_ctrl
+static inline int atomic_read_ctrl(const atomic_t *v)
+{
+	int val = atomic_read(v);
+	smp_read_barrier_depends(); /* Enforce control dependency. */
+	return val;
+}
+#endif
+
 /*
  * Relaxed variants of xchg, cmpxchg and some atomic operations.
  *
@@ -455,6 +464,15 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #include <asm-generic/atomic64.h>
 #endif
 
+#ifndef atomic64_read_ctrl
+static inline long long atomic64_read_ctrl(const atomic64_t *v)
+{
+	long long val = atomic64_read(v);
+	smp_read_barrier_depends(); /* Enforce control dependency. */
+	return val;
+}
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
-- 
cgit v1.2.3


From c6e1e7b5b7f031910850ddaf7bfa65ba3b4843ea Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 22 Sep 2015 12:48:59 +0200
Subject: sched/core: Make 'sched_domain_topology' declaration static

The 'sched_domain_topology' variable is only used within kernel/sched/core.c.
Make it static.

Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1442918939-9907-1-git-send-email-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 kernel/sched/core.c   | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd38b3ee9e83..699228bb0035 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1127,8 +1127,6 @@ struct sched_domain_topology_level {
 #endif
 };
 
-extern struct sched_domain_topology_level *sched_domain_topology;
-
 extern void set_sched_topology(struct sched_domain_topology_level *tl);
 extern void wake_up_if_idle(int cpu);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1b30b5b24a4a..a91df6171f48 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6445,7 +6445,8 @@ static struct sched_domain_topology_level default_topology[] = {
 	{ NULL, },
 };
 
-struct sched_domain_topology_level *sched_domain_topology = default_topology;
+static struct sched_domain_topology_level *sched_domain_topology =
+	default_topology;
 
 #define for_each_sd_topology(tl)			\
 	for (tl = sched_domain_topology; tl->mask; tl++)
-- 
cgit v1.2.3


From 081bab217db769526c1202c87099ff69737126ae Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Tue, 22 Sep 2015 14:35:06 -0500
Subject: power: bq27x00_battery: Renaming for consistency

Rename functions that are used by multiple devices. New devices
have been added and the function names and driver name are no longer
general enough for the functionality they provide.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: GUAN Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 MAINTAINERS                           |    4 +-
 arch/arm/configs/omap2plus_defconfig  |    2 +-
 arch/unicore32/Kconfig                |    2 +-
 drivers/power/Kconfig                 |   22 +-
 drivers/power/Makefile                |    2 +-
 drivers/power/bq27x00_battery.c       | 1125 --------------------------------
 drivers/power/bq27xxx_battery.c       | 1126 +++++++++++++++++++++++++++++++++
 drivers/w1/slaves/w1_bq27000.c        |    4 +-
 include/linux/power/bq27x00_battery.h |   19 -
 include/linux/power/bq27xxx_battery.h |   19 +
 10 files changed, 1163 insertions(+), 1162 deletions(-)
 delete mode 100644 drivers/power/bq27x00_battery.c
 create mode 100644 drivers/power/bq27xxx_battery.c
 delete mode 100644 include/linux/power/bq27x00_battery.h
 create mode 100644 include/linux/power/bq27xxx_battery.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 274f85405584..33a12019652b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7393,10 +7393,10 @@ NOKIA N900 POWER SUPPLY DRIVERS
 M:	Pali Rohár <pali.rohar@gmail.com>
 S:	Maintained
 F:	include/linux/power/bq2415x_charger.h
-F:	include/linux/power/bq27x00_battery.h
+F:	include/linux/power/bq27xxx_battery.h
 F:	include/linux/power/isp1704_charger.h
 F:	drivers/power/bq2415x_charger.c
-F:	drivers/power/bq27x00_battery.c
+F:	drivers/power/bq27xxx_battery.c
 F:	drivers/power/isp1704_charger.c
 F:	drivers/power/rx51_battery.c
 
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 50c84e1876fc..6a9ec3e62625 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -245,7 +245,7 @@ CONFIG_GPIO_TWL4030=y
 CONFIG_GPIO_PALMAS=y
 CONFIG_W1=m
 CONFIG_HDQ_MASTER_OMAP=m
-CONFIG_BATTERY_BQ27x00=m
+CONFIG_BATTERY_BQ27XXX=m
 CONFIG_CHARGER_ISP1704=m
 CONFIG_CHARGER_TWL4030=m
 CONFIG_CHARGER_BQ2415X=m
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 928237a7b9ca..c9faddc61100 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -222,7 +222,7 @@ config I2C_BATTERY_BQ27200
 	tristate "I2C Battery BQ27200 Support"
 	select I2C_PUV3
 	select POWER_SUPPLY
-	select BATTERY_BQ27x00
+	select BATTERY_BQ27XXX
 
 config I2C_EEPROM_AT24
 	tristate "I2C EEPROMs AT24 support"
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 2c0520db5025..d3cd2ea54440 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -157,26 +157,26 @@ config BATTERY_SBS
 	  Say Y to include support for SBS battery driver for SBS-compliant
 	  gas gauges.
 
-config BATTERY_BQ27x00
-	tristate "BQ27x00 battery driver"
+config BATTERY_BQ27XXX
+	tristate "BQ27xxx battery driver"
 	depends on I2C || I2C=n
 	help
-	  Say Y here to enable support for batteries with BQ27x00 (I2C/HDQ) chips.
+	  Say Y here to enable support for batteries with BQ27xxx (I2C/HDQ) chips.
 
-config BATTERY_BQ27X00_I2C
-	bool "BQ27200/BQ27500 support"
-	depends on BATTERY_BQ27x00
+config BATTERY_BQ27XXX_I2C
+	bool "BQ27xxx I2C support"
+	depends on BATTERY_BQ27XXX
 	depends on I2C
 	default y
 	help
-	  Say Y here to enable support for batteries with BQ27x00 (I2C) chips.
+	  Say Y here to enable support for batteries with BQ27xxx (I2C) chips.
 
-config BATTERY_BQ27X00_PLATFORM
-	bool "BQ27000 support"
-	depends on BATTERY_BQ27x00
+config BATTERY_BQ27XXX_PLATFORM
+	bool "BQ27xxx HDQ support"
+	depends on BATTERY_BQ27XXX
 	default y
 	help
-	  Say Y here to enable support for batteries with BQ27000 (HDQ) chips.
+	  Say Y here to enable support for batteries with BQ27xxx (HDQ) chips.
 
 config BATTERY_DA9030
 	tristate "DA9030 battery driver"
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 81109baff598..8eb30a515d5c 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -30,7 +30,7 @@ obj-$(CONFIG_BATTERY_COLLIE)	+= collie_battery.o
 obj-$(CONFIG_BATTERY_IPAQ_MICRO) += ipaq_micro_battery.o
 obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
 obj-$(CONFIG_BATTERY_SBS)	+= sbs-battery.o
-obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
+obj-$(CONFIG_BATTERY_BQ27XXX)	+= bq27xxx_battery.o
 obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
 obj-$(CONFIG_BATTERY_DA9052)	+= da9052-battery.o
 obj-$(CONFIG_CHARGER_DA9150)	+= da9150-charger.o
diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
deleted file mode 100644
index d0b2f3b47b8f..000000000000
--- a/drivers/power/bq27x00_battery.c
+++ /dev/null
@@ -1,1125 +0,0 @@
-/*
- * BQ27x00 battery driver
- *
- * Copyright (C) 2008 Rodolfo Giometti <giometti@linux.it>
- * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it>
- * Copyright (C) 2010-2011 Lars-Peter Clausen <lars@metafoo.de>
- * Copyright (C) 2011 Pali Rohár <pali.rohar@gmail.com>
- *
- * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
- *
- * This package is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Datasheets:
- * http://focus.ti.com/docs/prod/folders/print/bq27000.html
- * http://focus.ti.com/docs/prod/folders/print/bq27500.html
- * http://www.ti.com/product/bq27425-g1
- * http://www.ti.com/product/BQ27742-G1
- * http://www.ti.com/product/BQ27510-G3
- */
-
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/param.h>
-#include <linux/jiffies.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/power_supply.h>
-#include <linux/idr.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>
-#include <asm/unaligned.h>
-
-#include <linux/power/bq27x00_battery.h>
-
-#define DRIVER_VERSION		"1.2.0"
-
-#define BQ27XXX_MANUFACTURER	"Texas Instruments"
-
-#define BQ27x00_REG_TEMP	0x06
-#define BQ27x00_REG_VOLT	0x08
-#define BQ27x00_REG_AI		0x14
-#define BQ27x00_REG_FLAGS	0x0A
-#define BQ27x00_REG_TTE		0x16
-#define BQ27x00_REG_TTF		0x18
-#define BQ27x00_REG_TTECP	0x26
-#define BQ27x00_REG_NAC		0x0C /* Nominal available capacity */
-#define BQ27x00_REG_LMD		0x12 /* Last measured discharge */
-#define BQ27x00_REG_CYCT	0x2A /* Cycle count total */
-#define BQ27x00_REG_AE		0x22 /* Available energy */
-#define BQ27x00_POWER_AVG	0x24
-
-#define BQ27000_REG_RSOC	0x0B /* Relative State-of-Charge */
-#define BQ27000_REG_ILMD	0x76 /* Initial last measured discharge */
-#define BQ27000_FLAG_EDVF	BIT(0) /* Final End-of-Discharge-Voltage flag */
-#define BQ27000_FLAG_EDV1	BIT(1) /* First End-of-Discharge-Voltage flag */
-#define BQ27000_FLAG_CI		BIT(4) /* Capacity Inaccurate flag */
-#define BQ27000_FLAG_FC		BIT(5)
-#define BQ27000_FLAG_CHGS	BIT(7) /* Charge state flag */
-
-#define BQ27500_REG_SOC		0x2C
-#define BQ27500_REG_DCAP	0x3C /* Design capacity */
-#define BQ27500_FLAG_DSC	BIT(0)
-#define BQ27500_FLAG_SOCF	BIT(1) /* State-of-Charge threshold final */
-#define BQ27500_FLAG_SOC1	BIT(2) /* State-of-Charge threshold 1 */
-#define BQ27500_FLAG_FC		BIT(9)
-#define BQ27500_FLAG_OTC	BIT(15)
-
-#define BQ27742_POWER_AVG	0x76
-
-#define BQ27510_REG_SOC		0x20
-#define BQ27510_REG_DCAP	0x2E /* Design capacity */
-#define BQ27510_REG_CYCT	0x1E /* Cycle count total */
-
-/* bq27425 register addresses are same as bq27x00 addresses minus 4 */
-#define BQ27425_REG_OFFSET	0x04
-#define BQ27425_REG_SOC		(0x1C + BQ27425_REG_OFFSET)
-#define BQ27425_REG_DCAP	(0x3C + BQ27425_REG_OFFSET)
-
-#define BQ27000_RS			20 /* Resistor sense */
-#define BQ27x00_POWER_CONSTANT		(256 * 29200 / 1000)
-
-struct bq27x00_device_info;
-struct bq27x00_access_methods {
-	int (*read)(struct bq27x00_device_info *di, u8 reg, bool single);
-};
-
-enum bq27x00_chip { BQ27000, BQ27500, BQ27425, BQ27742, BQ27510};
-
-struct bq27x00_reg_cache {
-	int temperature;
-	int time_to_empty;
-	int time_to_empty_avg;
-	int time_to_full;
-	int charge_full;
-	int cycle_count;
-	int capacity;
-	int energy;
-	int flags;
-	int power_avg;
-	int health;
-};
-
-struct bq27x00_device_info {
-	struct device		*dev;
-	int			id;
-	enum bq27x00_chip	chip;
-
-	struct bq27x00_reg_cache cache;
-	int charge_design_full;
-
-	unsigned long last_update;
-	struct delayed_work work;
-
-	struct power_supply	*bat;
-
-	struct bq27x00_access_methods bus;
-
-	struct mutex lock;
-};
-
-static enum power_supply_property bq27x00_battery_props[] = {
-	POWER_SUPPLY_PROP_STATUS,
-	POWER_SUPPLY_PROP_PRESENT,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_NOW,
-	POWER_SUPPLY_PROP_CAPACITY,
-	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
-	POWER_SUPPLY_PROP_TEMP,
-	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
-	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
-	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
-	POWER_SUPPLY_PROP_TECHNOLOGY,
-	POWER_SUPPLY_PROP_CHARGE_FULL,
-	POWER_SUPPLY_PROP_CHARGE_NOW,
-	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
-	POWER_SUPPLY_PROP_CYCLE_COUNT,
-	POWER_SUPPLY_PROP_ENERGY_NOW,
-	POWER_SUPPLY_PROP_POWER_AVG,
-	POWER_SUPPLY_PROP_HEALTH,
-	POWER_SUPPLY_PROP_MANUFACTURER,
-};
-
-static enum power_supply_property bq27425_battery_props[] = {
-	POWER_SUPPLY_PROP_STATUS,
-	POWER_SUPPLY_PROP_PRESENT,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_NOW,
-	POWER_SUPPLY_PROP_CAPACITY,
-	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
-	POWER_SUPPLY_PROP_TEMP,
-	POWER_SUPPLY_PROP_TECHNOLOGY,
-	POWER_SUPPLY_PROP_CHARGE_FULL,
-	POWER_SUPPLY_PROP_CHARGE_NOW,
-	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
-	POWER_SUPPLY_PROP_MANUFACTURER,
-};
-
-static enum power_supply_property bq27742_battery_props[] = {
-	POWER_SUPPLY_PROP_STATUS,
-	POWER_SUPPLY_PROP_PRESENT,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_NOW,
-	POWER_SUPPLY_PROP_CAPACITY,
-	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
-	POWER_SUPPLY_PROP_TEMP,
-	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
-	POWER_SUPPLY_PROP_TECHNOLOGY,
-	POWER_SUPPLY_PROP_CHARGE_FULL,
-	POWER_SUPPLY_PROP_CHARGE_NOW,
-	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
-	POWER_SUPPLY_PROP_CYCLE_COUNT,
-	POWER_SUPPLY_PROP_POWER_AVG,
-	POWER_SUPPLY_PROP_HEALTH,
-	POWER_SUPPLY_PROP_MANUFACTURER,
-};
-
-static enum power_supply_property bq27510_battery_props[] = {
-	POWER_SUPPLY_PROP_STATUS,
-	POWER_SUPPLY_PROP_PRESENT,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_NOW,
-	POWER_SUPPLY_PROP_CAPACITY,
-	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
-	POWER_SUPPLY_PROP_TEMP,
-	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
-	POWER_SUPPLY_PROP_TECHNOLOGY,
-	POWER_SUPPLY_PROP_CHARGE_FULL,
-	POWER_SUPPLY_PROP_CHARGE_NOW,
-	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
-	POWER_SUPPLY_PROP_CYCLE_COUNT,
-	POWER_SUPPLY_PROP_POWER_AVG,
-	POWER_SUPPLY_PROP_HEALTH,
-	POWER_SUPPLY_PROP_MANUFACTURER,
-};
-
-static unsigned int poll_interval = 360;
-module_param(poll_interval, uint, 0644);
-MODULE_PARM_DESC(poll_interval,
-		 "battery poll interval in seconds - 0 disables polling");
-
-/*
- * Common code for BQ27x00 devices
- */
-
-static inline int bq27x00_read(struct bq27x00_device_info *di, u8 reg,
-			       bool single)
-{
-	if (di->chip == BQ27425)
-		return di->bus.read(di, reg - BQ27425_REG_OFFSET, single);
-	return di->bus.read(di, reg, single);
-}
-
-/*
- * Higher versions of the chip like BQ27425 and BQ27500
- * differ from BQ27000 and BQ27200 in calculation of certain
- * parameters. Hence we need to check for the chip type.
- */
-static bool bq27xxx_is_chip_version_higher(struct bq27x00_device_info *di)
-{
-	if (di->chip == BQ27425 || di->chip == BQ27500 || di->chip == BQ27742
-	    || di->chip == BQ27510)
-		return true;
-	return false;
-}
-
-/*
- * Return the battery Relative State-of-Charge
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_read_rsoc(struct bq27x00_device_info *di)
-{
-	int rsoc;
-
-	if (di->chip == BQ27500 || di->chip == BQ27742)
-		rsoc = bq27x00_read(di, BQ27500_REG_SOC, false);
-	else if (di->chip == BQ27510)
-		rsoc = bq27x00_read(di, BQ27510_REG_SOC, false);
-	else if (di->chip == BQ27425)
-		rsoc = bq27x00_read(di, BQ27425_REG_SOC, false);
-	else
-		rsoc = bq27x00_read(di, BQ27000_REG_RSOC, true);
-
-	if (rsoc < 0)
-		dev_dbg(di->dev, "error reading relative State-of-Charge\n");
-
-	return rsoc;
-}
-
-/*
- * Return a battery charge value in µAh
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_read_charge(struct bq27x00_device_info *di, u8 reg)
-{
-	int charge;
-
-	charge = bq27x00_read(di, reg, false);
-	if (charge < 0) {
-		dev_dbg(di->dev, "error reading charge register %02x: %d\n",
-			reg, charge);
-		return charge;
-	}
-
-	if (bq27xxx_is_chip_version_higher(di))
-		charge *= 1000;
-	else
-		charge = charge * 3570 / BQ27000_RS;
-
-	return charge;
-}
-
-/*
- * Return the battery Nominal available capaciy in µAh
- * Or < 0 if something fails.
- */
-static inline int bq27x00_battery_read_nac(struct bq27x00_device_info *di)
-{
-	int flags;
-	bool is_bq27500 = di->chip == BQ27500;
-	bool is_bq27742 = di->chip == BQ27742;
-	bool is_higher = bq27xxx_is_chip_version_higher(di);
-	bool flags_1b = !(is_bq27500 || is_bq27742);
-
-	flags = bq27x00_read(di, BQ27x00_REG_FLAGS, flags_1b);
-	if (flags >= 0 && !is_higher && (flags & BQ27000_FLAG_CI))
-		return -ENODATA;
-
-	return bq27x00_battery_read_charge(di, BQ27x00_REG_NAC);
-}
-
-/*
- * Return the battery Last measured discharge in µAh
- * Or < 0 if something fails.
- */
-static inline int bq27x00_battery_read_lmd(struct bq27x00_device_info *di)
-{
-	return bq27x00_battery_read_charge(di, BQ27x00_REG_LMD);
-}
-
-/*
- * Return the battery Initial last measured discharge in µAh
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_read_ilmd(struct bq27x00_device_info *di)
-{
-	int ilmd;
-
-	if (bq27xxx_is_chip_version_higher(di)) {
-		if (di->chip == BQ27425)
-			ilmd = bq27x00_read(di, BQ27425_REG_DCAP, false);
-		else if (di->chip == BQ27510)
-			ilmd = bq27x00_read(di, BQ27510_REG_DCAP, false);
-		else
-			ilmd = bq27x00_read(di, BQ27500_REG_DCAP, false);
-	} else {
-		ilmd = bq27x00_read(di, BQ27000_REG_ILMD, true);
-	}
-
-	if (ilmd < 0) {
-		dev_dbg(di->dev, "error reading initial last measured discharge\n");
-		return ilmd;
-	}
-
-	if (bq27xxx_is_chip_version_higher(di))
-		ilmd *= 1000;
-	else
-		ilmd = ilmd * 256 * 3570 / BQ27000_RS;
-
-	return ilmd;
-}
-
-/*
- * Return the battery Available energy in µWh
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_read_energy(struct bq27x00_device_info *di)
-{
-	int ae;
-
-	ae = bq27x00_read(di, BQ27x00_REG_AE, false);
-	if (ae < 0) {
-		dev_dbg(di->dev, "error reading available energy\n");
-		return ae;
-	}
-
-	if (di->chip == BQ27500)
-		ae *= 1000;
-	else
-		ae = ae * 29200 / BQ27000_RS;
-
-	return ae;
-}
-
-/*
- * Return the battery temperature in tenths of degree Kelvin
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_read_temperature(struct bq27x00_device_info *di)
-{
-	int temp;
-
-	temp = bq27x00_read(di, BQ27x00_REG_TEMP, false);
-	if (temp < 0) {
-		dev_err(di->dev, "error reading temperature\n");
-		return temp;
-	}
-
-	if (!bq27xxx_is_chip_version_higher(di))
-		temp = 5 * temp / 2;
-
-	return temp;
-}
-
-/*
- * Return the battery Cycle count total
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_read_cyct(struct bq27x00_device_info *di)
-{
-	int cyct;
-
-	if (di->chip == BQ27510)
-		cyct = bq27x00_read(di, BQ27510_REG_CYCT, false);
-	else
-		cyct = bq27x00_read(di, BQ27x00_REG_CYCT, false);
-	if (cyct < 0)
-		dev_err(di->dev, "error reading cycle count total\n");
-
-	return cyct;
-}
-
-/*
- * Read a time register.
- * Return < 0 if something fails.
- */
-static int bq27x00_battery_read_time(struct bq27x00_device_info *di, u8 reg)
-{
-	int tval;
-
-	tval = bq27x00_read(di, reg, false);
-	if (tval < 0) {
-		dev_dbg(di->dev, "error reading time register %02x: %d\n",
-			reg, tval);
-		return tval;
-	}
-
-	if (tval == 65535)
-		return -ENODATA;
-
-	return tval * 60;
-}
-
-/*
- * Read a power avg register.
- * Return < 0 if something fails.
- */
-static int bq27x00_battery_read_pwr_avg(struct bq27x00_device_info *di, u8 reg)
-{
-	int tval;
-
-	tval = bq27x00_read(di, reg, false);
-	if (tval < 0) {
-		dev_err(di->dev, "error reading power avg rgister  %02x: %d\n",
-			reg, tval);
-		return tval;
-	}
-
-	if (di->chip == BQ27500)
-		return tval;
-	else
-		return (tval * BQ27x00_POWER_CONSTANT) / BQ27000_RS;
-}
-
-/*
- * Read flag register.
- * Return < 0 if something fails.
- */
-static int bq27x00_battery_read_health(struct bq27x00_device_info *di)
-{
-	int tval;
-
-	tval = bq27x00_read(di, BQ27x00_REG_FLAGS, false);
-	if (tval < 0) {
-		dev_err(di->dev, "error reading flag register:%d\n", tval);
-		return tval;
-	}
-
-	if (di->chip == BQ27500) {
-		if (tval & BQ27500_FLAG_SOCF)
-			tval = POWER_SUPPLY_HEALTH_DEAD;
-		else if (tval & BQ27500_FLAG_OTC)
-			tval = POWER_SUPPLY_HEALTH_OVERHEAT;
-		else
-			tval = POWER_SUPPLY_HEALTH_GOOD;
-		return tval;
-	} else if (di->chip == BQ27510) {
-		if (tval & BQ27500_FLAG_OTC)
-			return POWER_SUPPLY_HEALTH_OVERHEAT;
-		return POWER_SUPPLY_HEALTH_GOOD;
-	} else {
-		if (tval & BQ27000_FLAG_EDV1)
-			tval = POWER_SUPPLY_HEALTH_DEAD;
-		else
-			tval = POWER_SUPPLY_HEALTH_GOOD;
-		return tval;
-	}
-
-	return -1;
-}
-
-static void bq27x00_update(struct bq27x00_device_info *di)
-{
-	struct bq27x00_reg_cache cache = {0, };
-	bool is_bq27500 = di->chip == BQ27500;
-	bool is_bq27510 = di->chip == BQ27510;
-	bool is_bq27425 = di->chip == BQ27425;
-	bool is_bq27742 = di->chip == BQ27742;
-	bool flags_1b = !(is_bq27500 || is_bq27742);
-
-	cache.flags = bq27x00_read(di, BQ27x00_REG_FLAGS, flags_1b);
-	if ((cache.flags & 0xff) == 0xff)
-		/* read error */
-		cache.flags = -1;
-	if (cache.flags >= 0) {
-		if (!is_bq27500 && !is_bq27425 && !is_bq27742 && !is_bq27510
-				&& (cache.flags & BQ27000_FLAG_CI)) {
-			dev_info(di->dev, "battery is not calibrated! ignoring capacity values\n");
-			cache.capacity = -ENODATA;
-			cache.energy = -ENODATA;
-			cache.time_to_empty = -ENODATA;
-			cache.time_to_empty_avg = -ENODATA;
-			cache.time_to_full = -ENODATA;
-			cache.charge_full = -ENODATA;
-			cache.health = -ENODATA;
-		} else {
-			cache.capacity = bq27x00_battery_read_rsoc(di);
-			if (is_bq27742 || is_bq27510)
-				cache.time_to_empty =
-					bq27x00_battery_read_time(di,
-							BQ27x00_REG_TTE);
-			else if (!is_bq27425) {
-				cache.energy = bq27x00_battery_read_energy(di);
-				cache.time_to_empty =
-					bq27x00_battery_read_time(di,
-							BQ27x00_REG_TTE);
-				cache.time_to_empty_avg =
-					bq27x00_battery_read_time(di,
-							BQ27x00_REG_TTECP);
-				cache.time_to_full =
-					bq27x00_battery_read_time(di,
-							BQ27x00_REG_TTF);
-			}
-			cache.charge_full = bq27x00_battery_read_lmd(di);
-			cache.health = bq27x00_battery_read_health(di);
-		}
-		cache.temperature = bq27x00_battery_read_temperature(di);
-		if (!is_bq27425)
-			cache.cycle_count = bq27x00_battery_read_cyct(di);
-		if (is_bq27742)
-			cache.power_avg =
-				bq27x00_battery_read_pwr_avg(di,
-						BQ27742_POWER_AVG);
-		else
-			cache.power_avg =
-				bq27x00_battery_read_pwr_avg(di,
-						BQ27x00_POWER_AVG);
-
-		/* We only have to read charge design full once */
-		if (di->charge_design_full <= 0)
-			di->charge_design_full = bq27x00_battery_read_ilmd(di);
-	}
-
-	if (di->cache.capacity != cache.capacity)
-		power_supply_changed(di->bat);
-
-	if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
-		di->cache = cache;
-
-	di->last_update = jiffies;
-}
-
-static void bq27x00_battery_poll(struct work_struct *work)
-{
-	struct bq27x00_device_info *di =
-		container_of(work, struct bq27x00_device_info, work.work);
-
-	bq27x00_update(di);
-
-	if (poll_interval > 0) {
-		/* The timer does not have to be accurate. */
-		set_timer_slack(&di->work.timer, poll_interval * HZ / 4);
-		schedule_delayed_work(&di->work, poll_interval * HZ);
-	}
-}
-
-/*
- * Return the battery average current in µA
- * Note that current can be negative signed as well
- * Or 0 if something fails.
- */
-static int bq27x00_battery_current(struct bq27x00_device_info *di,
-				   union power_supply_propval *val)
-{
-	int curr;
-	int flags;
-
-	curr = bq27x00_read(di, BQ27x00_REG_AI, false);
-	if (curr < 0) {
-		dev_err(di->dev, "error reading current\n");
-		return curr;
-	}
-
-	if (bq27xxx_is_chip_version_higher(di)) {
-		/* bq27500 returns signed value */
-		val->intval = (int)((s16)curr) * 1000;
-	} else {
-		flags = bq27x00_read(di, BQ27x00_REG_FLAGS, false);
-		if (flags & BQ27000_FLAG_CHGS) {
-			dev_dbg(di->dev, "negative current!\n");
-			curr = -curr;
-		}
-
-		val->intval = curr * 3570 / BQ27000_RS;
-	}
-
-	return 0;
-}
-
-static int bq27x00_battery_status(struct bq27x00_device_info *di,
-				  union power_supply_propval *val)
-{
-	int status;
-
-	if (bq27xxx_is_chip_version_higher(di)) {
-		if (di->cache.flags & BQ27500_FLAG_FC)
-			status = POWER_SUPPLY_STATUS_FULL;
-		else if (di->cache.flags & BQ27500_FLAG_DSC)
-			status = POWER_SUPPLY_STATUS_DISCHARGING;
-		else
-			status = POWER_SUPPLY_STATUS_CHARGING;
-	} else {
-		if (di->cache.flags & BQ27000_FLAG_FC)
-			status = POWER_SUPPLY_STATUS_FULL;
-		else if (di->cache.flags & BQ27000_FLAG_CHGS)
-			status = POWER_SUPPLY_STATUS_CHARGING;
-		else if (power_supply_am_i_supplied(di->bat))
-			status = POWER_SUPPLY_STATUS_NOT_CHARGING;
-		else
-			status = POWER_SUPPLY_STATUS_DISCHARGING;
-	}
-
-	val->intval = status;
-
-	return 0;
-}
-
-static int bq27x00_battery_capacity_level(struct bq27x00_device_info *di,
-					  union power_supply_propval *val)
-{
-	int level;
-
-	if (bq27xxx_is_chip_version_higher(di)) {
-		if (di->cache.flags & BQ27500_FLAG_FC)
-			level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
-		else if (di->cache.flags & BQ27500_FLAG_SOC1)
-			level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
-		else if (di->cache.flags & BQ27500_FLAG_SOCF)
-			level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
-		else
-			level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
-	} else {
-		if (di->cache.flags & BQ27000_FLAG_FC)
-			level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
-		else if (di->cache.flags & BQ27000_FLAG_EDV1)
-			level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
-		else if (di->cache.flags & BQ27000_FLAG_EDVF)
-			level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
-		else
-			level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
-	}
-
-	val->intval = level;
-
-	return 0;
-}
-
-/*
- * Return the battery Voltage in millivolts
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_voltage(struct bq27x00_device_info *di,
-				   union power_supply_propval *val)
-{
-	int volt;
-
-	volt = bq27x00_read(di, BQ27x00_REG_VOLT, false);
-	if (volt < 0) {
-		dev_err(di->dev, "error reading voltage\n");
-		return volt;
-	}
-
-	val->intval = volt * 1000;
-
-	return 0;
-}
-
-static int bq27x00_simple_value(int value,
-				union power_supply_propval *val)
-{
-	if (value < 0)
-		return value;
-
-	val->intval = value;
-
-	return 0;
-}
-
-static int bq27x00_battery_get_property(struct power_supply *psy,
-					enum power_supply_property psp,
-					union power_supply_propval *val)
-{
-	int ret = 0;
-	struct bq27x00_device_info *di = power_supply_get_drvdata(psy);
-
-	mutex_lock(&di->lock);
-	if (time_is_before_jiffies(di->last_update + 5 * HZ)) {
-		cancel_delayed_work_sync(&di->work);
-		bq27x00_battery_poll(&di->work.work);
-	}
-	mutex_unlock(&di->lock);
-
-	if (psp != POWER_SUPPLY_PROP_PRESENT && di->cache.flags < 0)
-		return -ENODEV;
-
-	switch (psp) {
-	case POWER_SUPPLY_PROP_STATUS:
-		ret = bq27x00_battery_status(di, val);
-		break;
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-		ret = bq27x00_battery_voltage(di, val);
-		break;
-	case POWER_SUPPLY_PROP_PRESENT:
-		val->intval = di->cache.flags < 0 ? 0 : 1;
-		break;
-	case POWER_SUPPLY_PROP_CURRENT_NOW:
-		ret = bq27x00_battery_current(di, val);
-		break;
-	case POWER_SUPPLY_PROP_CAPACITY:
-		ret = bq27x00_simple_value(di->cache.capacity, val);
-		break;
-	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
-		ret = bq27x00_battery_capacity_level(di, val);
-		break;
-	case POWER_SUPPLY_PROP_TEMP:
-		ret = bq27x00_simple_value(di->cache.temperature, val);
-		if (ret == 0)
-			val->intval -= 2731;
-		break;
-	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
-		ret = bq27x00_simple_value(di->cache.time_to_empty, val);
-		break;
-	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
-		ret = bq27x00_simple_value(di->cache.time_to_empty_avg, val);
-		break;
-	case POWER_SUPPLY_PROP_TIME_TO_FULL_NOW:
-		ret = bq27x00_simple_value(di->cache.time_to_full, val);
-		break;
-	case POWER_SUPPLY_PROP_TECHNOLOGY:
-		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
-		break;
-	case POWER_SUPPLY_PROP_CHARGE_NOW:
-		ret = bq27x00_simple_value(bq27x00_battery_read_nac(di), val);
-		break;
-	case POWER_SUPPLY_PROP_CHARGE_FULL:
-		ret = bq27x00_simple_value(di->cache.charge_full, val);
-		break;
-	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-		ret = bq27x00_simple_value(di->charge_design_full, val);
-		break;
-	case POWER_SUPPLY_PROP_CYCLE_COUNT:
-		ret = bq27x00_simple_value(di->cache.cycle_count, val);
-		break;
-	case POWER_SUPPLY_PROP_ENERGY_NOW:
-		ret = bq27x00_simple_value(di->cache.energy, val);
-		break;
-	case POWER_SUPPLY_PROP_POWER_AVG:
-		ret = bq27x00_simple_value(di->cache.power_avg, val);
-		break;
-	case POWER_SUPPLY_PROP_HEALTH:
-		ret = bq27x00_simple_value(di->cache.health, val);
-		break;
-	case POWER_SUPPLY_PROP_MANUFACTURER:
-		val->strval = BQ27XXX_MANUFACTURER;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return ret;
-}
-
-static void bq27x00_external_power_changed(struct power_supply *psy)
-{
-	struct bq27x00_device_info *di = power_supply_get_drvdata(psy);
-
-	cancel_delayed_work_sync(&di->work);
-	schedule_delayed_work(&di->work, 0);
-}
-
-static int bq27x00_powersupply_init(struct bq27x00_device_info *di,
-				    const char *name)
-{
-	int ret;
-	struct power_supply_desc *psy_desc;
-	struct power_supply_config psy_cfg = { .drv_data = di, };
-
-	psy_desc = devm_kzalloc(di->dev, sizeof(*psy_desc), GFP_KERNEL);
-	if (!psy_desc)
-		return -ENOMEM;
-
-	psy_desc->name = name;
-	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
-	if (di->chip == BQ27425) {
-		psy_desc->properties = bq27425_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27425_battery_props);
-	} else if (di->chip == BQ27742) {
-		psy_desc->properties = bq27742_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27742_battery_props);
-	} else if (di->chip == BQ27510) {
-		psy_desc->properties = bq27510_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27510_battery_props);
-	} else {
-		psy_desc->properties = bq27x00_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27x00_battery_props);
-	}
-	psy_desc->get_property = bq27x00_battery_get_property;
-	psy_desc->external_power_changed = bq27x00_external_power_changed;
-
-	INIT_DELAYED_WORK(&di->work, bq27x00_battery_poll);
-	mutex_init(&di->lock);
-
-	di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg);
-	if (IS_ERR(di->bat)) {
-		ret = PTR_ERR(di->bat);
-		dev_err(di->dev, "failed to register battery: %d\n", ret);
-		return ret;
-	}
-
-	dev_info(di->dev, "support ver. %s enabled\n", DRIVER_VERSION);
-
-	bq27x00_update(di);
-
-	return 0;
-}
-
-static void bq27x00_powersupply_unregister(struct bq27x00_device_info *di)
-{
-	/*
-	 * power_supply_unregister call bq27x00_battery_get_property which
-	 * call bq27x00_battery_poll.
-	 * Make sure that bq27x00_battery_poll will not call
-	 * schedule_delayed_work again after unregister (which cause OOPS).
-	 */
-	poll_interval = 0;
-
-	cancel_delayed_work_sync(&di->work);
-
-	power_supply_unregister(di->bat);
-
-	mutex_destroy(&di->lock);
-}
-
-/* i2c specific code */
-#ifdef CONFIG_BATTERY_BQ27X00_I2C
-
-/* If the system has several batteries we need a different name for each
- * of them...
- */
-static DEFINE_IDR(battery_id);
-static DEFINE_MUTEX(battery_mutex);
-
-static int bq27x00_read_i2c(struct bq27x00_device_info *di, u8 reg, bool single)
-{
-	struct i2c_client *client = to_i2c_client(di->dev);
-	struct i2c_msg msg[2];
-	unsigned char data[2];
-	int ret;
-
-	if (!client->adapter)
-		return -ENODEV;
-
-	msg[0].addr = client->addr;
-	msg[0].flags = 0;
-	msg[0].buf = &reg;
-	msg[0].len = sizeof(reg);
-	msg[1].addr = client->addr;
-	msg[1].flags = I2C_M_RD;
-	msg[1].buf = data;
-	if (single)
-		msg[1].len = 1;
-	else
-		msg[1].len = 2;
-
-	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
-	if (ret < 0)
-		return ret;
-
-	if (!single)
-		ret = get_unaligned_le16(data);
-	else
-		ret = data[0];
-
-	return ret;
-}
-
-static int bq27x00_battery_probe(struct i2c_client *client,
-				 const struct i2c_device_id *id)
-{
-	char *name;
-	struct bq27x00_device_info *di;
-	int num;
-	int retval = 0;
-
-	/* Get new ID for the new battery device */
-	mutex_lock(&battery_mutex);
-	num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
-	mutex_unlock(&battery_mutex);
-	if (num < 0)
-		return num;
-
-	name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
-	if (!name) {
-		retval = -ENOMEM;
-		goto batt_failed;
-	}
-
-	di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
-	if (!di) {
-		retval = -ENOMEM;
-		goto batt_failed;
-	}
-
-	di->id = num;
-	di->dev = &client->dev;
-	di->chip = id->driver_data;
-	di->bus.read = &bq27x00_read_i2c;
-
-	retval = bq27x00_powersupply_init(di, name);
-	if (retval)
-		goto batt_failed;
-
-	i2c_set_clientdata(client, di);
-
-	return 0;
-
-batt_failed:
-	mutex_lock(&battery_mutex);
-	idr_remove(&battery_id, num);
-	mutex_unlock(&battery_mutex);
-
-	return retval;
-}
-
-static int bq27x00_battery_remove(struct i2c_client *client)
-{
-	struct bq27x00_device_info *di = i2c_get_clientdata(client);
-
-	bq27x00_powersupply_unregister(di);
-
-	mutex_lock(&battery_mutex);
-	idr_remove(&battery_id, di->id);
-	mutex_unlock(&battery_mutex);
-
-	return 0;
-}
-
-static const struct i2c_device_id bq27x00_id[] = {
-	{ "bq27200", BQ27000 },	/* bq27200 is same as bq27000, but with i2c */
-	{ "bq27500", BQ27500 },
-	{ "bq27425", BQ27425 },
-	{ "bq27742", BQ27742 },
-	{ "bq27510", BQ27510 },
-	{},
-};
-MODULE_DEVICE_TABLE(i2c, bq27x00_id);
-
-static struct i2c_driver bq27x00_battery_driver = {
-	.driver = {
-		.name = "bq27x00-battery",
-	},
-	.probe = bq27x00_battery_probe,
-	.remove = bq27x00_battery_remove,
-	.id_table = bq27x00_id,
-};
-
-static inline int bq27x00_battery_i2c_init(void)
-{
-	int ret = i2c_add_driver(&bq27x00_battery_driver);
-
-	if (ret)
-		pr_err("Unable to register BQ27x00 i2c driver\n");
-
-	return ret;
-}
-
-static inline void bq27x00_battery_i2c_exit(void)
-{
-	i2c_del_driver(&bq27x00_battery_driver);
-}
-
-#else
-
-static inline int bq27x00_battery_i2c_init(void) { return 0; }
-static inline void bq27x00_battery_i2c_exit(void) {};
-
-#endif
-
-/* platform specific code */
-#ifdef CONFIG_BATTERY_BQ27X00_PLATFORM
-
-static int bq27000_read_platform(struct bq27x00_device_info *di, u8 reg,
-				 bool single)
-{
-	struct device *dev = di->dev;
-	struct bq27000_platform_data *pdata = dev->platform_data;
-	unsigned int timeout = 3;
-	int upper, lower;
-	int temp;
-
-	if (!single) {
-		/* Make sure the value has not changed in between reading the
-		 * lower and the upper part */
-		upper = pdata->read(dev, reg + 1);
-		do {
-			temp = upper;
-			if (upper < 0)
-				return upper;
-
-			lower = pdata->read(dev, reg);
-			if (lower < 0)
-				return lower;
-
-			upper = pdata->read(dev, reg + 1);
-		} while (temp != upper && --timeout);
-
-		if (timeout == 0)
-			return -EIO;
-
-		return (upper << 8) | lower;
-	}
-
-	return pdata->read(dev, reg);
-}
-
-static int bq27000_battery_probe(struct platform_device *pdev)
-{
-	struct bq27x00_device_info *di;
-	struct bq27000_platform_data *pdata = pdev->dev.platform_data;
-	const char *name;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "no platform_data supplied\n");
-		return -EINVAL;
-	}
-
-	if (!pdata->read) {
-		dev_err(&pdev->dev, "no hdq read callback supplied\n");
-		return -EINVAL;
-	}
-
-	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
-	if (!di)
-		return -ENOMEM;
-
-	platform_set_drvdata(pdev, di);
-
-	di->dev = &pdev->dev;
-	di->chip = BQ27000;
-
-	name = pdata->name ?: dev_name(&pdev->dev);
-	di->bus.read = &bq27000_read_platform;
-
-	return bq27x00_powersupply_init(di, name);
-}
-
-static int bq27000_battery_remove(struct platform_device *pdev)
-{
-	struct bq27x00_device_info *di = platform_get_drvdata(pdev);
-
-	bq27x00_powersupply_unregister(di);
-
-	return 0;
-}
-
-static struct platform_driver bq27000_battery_driver = {
-	.probe	= bq27000_battery_probe,
-	.remove = bq27000_battery_remove,
-	.driver = {
-		.name = "bq27000-battery",
-	},
-};
-
-static inline int bq27x00_battery_platform_init(void)
-{
-	int ret = platform_driver_register(&bq27000_battery_driver);
-
-	if (ret)
-		pr_err("Unable to register BQ27000 platform driver\n");
-
-	return ret;
-}
-
-static inline void bq27x00_battery_platform_exit(void)
-{
-	platform_driver_unregister(&bq27000_battery_driver);
-}
-
-#else
-
-static inline int bq27x00_battery_platform_init(void) { return 0; }
-static inline void bq27x00_battery_platform_exit(void) {};
-
-#endif
-
-/*
- * Module stuff
- */
-
-static int __init bq27x00_battery_init(void)
-{
-	int ret;
-
-	ret = bq27x00_battery_i2c_init();
-	if (ret)
-		return ret;
-
-	ret = bq27x00_battery_platform_init();
-	if (ret)
-		bq27x00_battery_i2c_exit();
-
-	return ret;
-}
-module_init(bq27x00_battery_init);
-
-static void __exit bq27x00_battery_exit(void)
-{
-	bq27x00_battery_platform_exit();
-	bq27x00_battery_i2c_exit();
-}
-module_exit(bq27x00_battery_exit);
-
-#ifdef CONFIG_BATTERY_BQ27X00_PLATFORM
-MODULE_ALIAS("platform:bq27000-battery");
-#endif
-
-MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
-MODULE_DESCRIPTION("BQ27x00 battery monitor driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/power/bq27xxx_battery.c b/drivers/power/bq27xxx_battery.c
new file mode 100644
index 000000000000..e72055ce7b3c
--- /dev/null
+++ b/drivers/power/bq27xxx_battery.c
@@ -0,0 +1,1126 @@
+/*
+ * BQ27xxx battery driver
+ *
+ * Copyright (C) 2008 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it>
+ * Copyright (C) 2010-2011 Lars-Peter Clausen <lars@metafoo.de>
+ * Copyright (C) 2011 Pali Rohár <pali.rohar@gmail.com>
+ *
+ * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Datasheets:
+ * http://focus.ti.com/docs/prod/folders/print/bq27000.html
+ * http://focus.ti.com/docs/prod/folders/print/bq27500.html
+ * http://www.ti.com/product/bq27425-g1
+ * http://www.ti.com/product/BQ27742-G1
+ * http://www.ti.com/product/BQ27510-G3
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/idr.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#include <linux/power/bq27xxx_battery.h>
+
+#define DRIVER_VERSION		"1.2.0"
+
+#define BQ27XXX_MANUFACTURER	"Texas Instruments"
+
+#define BQ27x00_REG_TEMP	0x06
+#define BQ27x00_REG_VOLT	0x08
+#define BQ27x00_REG_AI		0x14
+#define BQ27x00_REG_FLAGS	0x0A
+#define BQ27x00_REG_TTE		0x16
+#define BQ27x00_REG_TTF		0x18
+#define BQ27x00_REG_TTECP	0x26
+#define BQ27x00_REG_NAC		0x0C /* Nominal available capacity */
+#define BQ27x00_REG_LMD		0x12 /* Last measured discharge */
+#define BQ27x00_REG_CYCT	0x2A /* Cycle count total */
+#define BQ27x00_REG_AE		0x22 /* Available energy */
+#define BQ27x00_POWER_AVG	0x24
+
+#define BQ27000_REG_RSOC	0x0B /* Relative State-of-Charge */
+#define BQ27000_REG_ILMD	0x76 /* Initial last measured discharge */
+#define BQ27000_FLAG_EDVF	BIT(0) /* Final End-of-Discharge-Voltage flag */
+#define BQ27000_FLAG_EDV1	BIT(1) /* First End-of-Discharge-Voltage flag */
+#define BQ27000_FLAG_CI		BIT(4) /* Capacity Inaccurate flag */
+#define BQ27000_FLAG_FC		BIT(5)
+#define BQ27000_FLAG_CHGS	BIT(7) /* Charge state flag */
+
+#define BQ27500_REG_SOC		0x2C
+#define BQ27500_REG_DCAP	0x3C /* Design capacity */
+#define BQ27500_FLAG_DSC	BIT(0)
+#define BQ27500_FLAG_SOCF	BIT(1) /* State-of-Charge threshold final */
+#define BQ27500_FLAG_SOC1	BIT(2) /* State-of-Charge threshold 1 */
+#define BQ27500_FLAG_FC		BIT(9)
+#define BQ27500_FLAG_OTC	BIT(15)
+
+#define BQ27742_POWER_AVG	0x76
+
+#define BQ27510_REG_SOC		0x20
+#define BQ27510_REG_DCAP	0x2E /* Design capacity */
+#define BQ27510_REG_CYCT	0x1E /* Cycle count total */
+
+/* bq27425 register addresses are same as bq27x00 addresses minus 4 */
+#define BQ27425_REG_OFFSET	0x04
+#define BQ27425_REG_SOC		(0x1C + BQ27425_REG_OFFSET)
+#define BQ27425_REG_DCAP	(0x3C + BQ27425_REG_OFFSET)
+
+#define BQ27XXX_RS			20 /* Resistor sense */
+#define BQ27XXX_POWER_CONSTANT		(256 * 29200 / 1000)
+
+struct bq27xxx_device_info;
+struct bq27xxx_access_methods {
+	int (*read)(struct bq27xxx_device_info *di, u8 reg, bool single);
+};
+
+enum bq27xxx_chip { BQ27000, BQ27500, BQ27425, BQ27742, BQ27510};
+
+struct bq27xxx_reg_cache {
+	int temperature;
+	int time_to_empty;
+	int time_to_empty_avg;
+	int time_to_full;
+	int charge_full;
+	int cycle_count;
+	int capacity;
+	int energy;
+	int flags;
+	int power_avg;
+	int health;
+};
+
+struct bq27xxx_device_info {
+	struct device		*dev;
+	int			id;
+	enum bq27xxx_chip	chip;
+
+	struct bq27xxx_reg_cache cache;
+	int charge_design_full;
+
+	unsigned long last_update;
+	struct delayed_work work;
+
+	struct power_supply	*bat;
+
+	struct bq27xxx_access_methods bus;
+
+	struct mutex lock;
+};
+
+static enum power_supply_property bq27x00_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_POWER_AVG,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property bq27425_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property bq27742_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_POWER_AVG,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property bq27510_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_POWER_AVG,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static unsigned int poll_interval = 360;
+module_param(poll_interval, uint, 0644);
+MODULE_PARM_DESC(poll_interval,
+		 "battery poll interval in seconds - 0 disables polling");
+
+/*
+ * Common code for BQ27xxx devices
+ */
+
+static inline int bq27xxx_read(struct bq27xxx_device_info *di, u8 reg,
+			       bool single)
+{
+	if (di->chip == BQ27425)
+		return di->bus.read(di, reg - BQ27425_REG_OFFSET, single);
+	return di->bus.read(di, reg, single);
+}
+
+/*
+ * Higher versions of the chip like BQ27425 and BQ27500
+ * differ from BQ27000 and BQ27200 in calculation of certain
+ * parameters. Hence we need to check for the chip type.
+ */
+static bool bq27xxx_is_chip_version_higher(struct bq27xxx_device_info *di)
+{
+	if (di->chip == BQ27425 || di->chip == BQ27500 || di->chip == BQ27742
+	    || di->chip == BQ27510)
+		return true;
+	return false;
+}
+
+/*
+ * Return the battery Relative State-of-Charge
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_read_rsoc(struct bq27xxx_device_info *di)
+{
+	int rsoc;
+
+	if (di->chip == BQ27500 || di->chip == BQ27742)
+		rsoc = bq27xxx_read(di, BQ27500_REG_SOC, false);
+	else if (di->chip == BQ27510)
+		rsoc = bq27xxx_read(di, BQ27510_REG_SOC, false);
+	else if (di->chip == BQ27425)
+		rsoc = bq27xxx_read(di, BQ27425_REG_SOC, false);
+	else
+		rsoc = bq27xxx_read(di, BQ27000_REG_RSOC, true);
+
+	if (rsoc < 0)
+		dev_dbg(di->dev, "error reading relative State-of-Charge\n");
+
+	return rsoc;
+}
+
+/*
+ * Return a battery charge value in µAh
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_read_charge(struct bq27xxx_device_info *di, u8 reg)
+{
+	int charge;
+
+	charge = bq27xxx_read(di, reg, false);
+	if (charge < 0) {
+		dev_dbg(di->dev, "error reading charge register %02x: %d\n",
+			reg, charge);
+		return charge;
+	}
+
+	if (bq27xxx_is_chip_version_higher(di))
+		charge *= 1000;
+	else
+		charge = charge * 3570 / BQ27XXX_RS;
+
+	return charge;
+}
+
+/*
+ * Return the battery Nominal available capaciy in µAh
+ * Or < 0 if something fails.
+ */
+static inline int bq27xxx_battery_read_nac(struct bq27xxx_device_info *di)
+{
+	int flags;
+	bool is_bq27500 = di->chip == BQ27500;
+	bool is_bq27742 = di->chip == BQ27742;
+	bool is_higher = bq27xxx_is_chip_version_higher(di);
+	bool flags_1b = !(is_bq27500 || is_bq27742);
+
+	flags = bq27xxx_read(di, BQ27x00_REG_FLAGS, flags_1b);
+	if (flags >= 0 && !is_higher && (flags & BQ27000_FLAG_CI))
+		return -ENODATA;
+
+	return bq27xxx_battery_read_charge(di, BQ27x00_REG_NAC);
+}
+
+/*
+ * Return the battery Last measured discharge in µAh
+ * Or < 0 if something fails.
+ */
+static inline int bq27xxx_battery_read_lmd(struct bq27xxx_device_info *di)
+{
+	return bq27xxx_battery_read_charge(di, BQ27x00_REG_LMD);
+}
+
+/*
+ * Return the battery Initial last measured discharge in µAh
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_read_ilmd(struct bq27xxx_device_info *di)
+{
+	int ilmd;
+
+	if (bq27xxx_is_chip_version_higher(di)) {
+		if (di->chip == BQ27425)
+			ilmd = bq27xxx_read(di, BQ27425_REG_DCAP, false);
+		else if (di->chip == BQ27510)
+			ilmd = bq27xxx_read(di, BQ27510_REG_DCAP, false);
+		else
+			ilmd = bq27xxx_read(di, BQ27500_REG_DCAP, false);
+	} else {
+		ilmd = bq27xxx_read(di, BQ27000_REG_ILMD, true);
+	}
+
+	if (ilmd < 0) {
+		dev_dbg(di->dev, "error reading initial last measured discharge\n");
+		return ilmd;
+	}
+
+	if (bq27xxx_is_chip_version_higher(di))
+		ilmd *= 1000;
+	else
+		ilmd = ilmd * 256 * 3570 / BQ27XXX_RS;
+
+	return ilmd;
+}
+
+/*
+ * Return the battery Available energy in µWh
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_read_energy(struct bq27xxx_device_info *di)
+{
+	int ae;
+
+	ae = bq27xxx_read(di, BQ27x00_REG_AE, false);
+	if (ae < 0) {
+		dev_dbg(di->dev, "error reading available energy\n");
+		return ae;
+	}
+
+	if (di->chip == BQ27500)
+		ae *= 1000;
+	else
+		ae = ae * 29200 / BQ27XXX_RS;
+
+	return ae;
+}
+
+/*
+ * Return the battery temperature in tenths of degree Kelvin
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_read_temperature(struct bq27xxx_device_info *di)
+{
+	int temp;
+
+	temp = bq27xxx_read(di, BQ27x00_REG_TEMP, false);
+	if (temp < 0) {
+		dev_err(di->dev, "error reading temperature\n");
+		return temp;
+	}
+
+	if (!bq27xxx_is_chip_version_higher(di))
+		temp = 5 * temp / 2;
+
+	return temp;
+}
+
+/*
+ * Return the battery Cycle count total
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_read_cyct(struct bq27xxx_device_info *di)
+{
+	int cyct;
+
+	if (di->chip == BQ27510)
+		cyct = bq27xxx_read(di, BQ27510_REG_CYCT, false);
+	else
+		cyct = bq27xxx_read(di, BQ27x00_REG_CYCT, false);
+	if (cyct < 0)
+		dev_err(di->dev, "error reading cycle count total\n");
+
+	return cyct;
+}
+
+/*
+ * Read a time register.
+ * Return < 0 if something fails.
+ */
+static int bq27xxx_battery_read_time(struct bq27xxx_device_info *di, u8 reg)
+{
+	int tval;
+
+	tval = bq27xxx_read(di, reg, false);
+	if (tval < 0) {
+		dev_dbg(di->dev, "error reading time register %02x: %d\n",
+			reg, tval);
+		return tval;
+	}
+
+	if (tval == 65535)
+		return -ENODATA;
+
+	return tval * 60;
+}
+
+/*
+ * Read a power avg register.
+ * Return < 0 if something fails.
+ */
+static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di, u8 reg)
+{
+	int tval;
+
+	tval = bq27xxx_read(di, reg, false);
+	if (tval < 0) {
+		dev_err(di->dev, "error reading power avg rgister  %02x: %d\n",
+			reg, tval);
+		return tval;
+	}
+
+	if (di->chip == BQ27500)
+		return tval;
+	else
+		return (tval * BQ27XXX_POWER_CONSTANT) / BQ27XXX_RS;
+}
+
+/*
+ * Read flag register.
+ * Return < 0 if something fails.
+ */
+static int bq27xxx_battery_read_health(struct bq27xxx_device_info *di)
+{
+	int tval;
+
+	tval = bq27xxx_read(di, BQ27x00_REG_FLAGS, false);
+	if (tval < 0) {
+		dev_err(di->dev, "error reading flag register:%d\n", tval);
+		return tval;
+	}
+
+	if (di->chip == BQ27500) {
+		if (tval & BQ27500_FLAG_SOCF)
+			tval = POWER_SUPPLY_HEALTH_DEAD;
+		else if (tval & BQ27500_FLAG_OTC)
+			tval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else
+			tval = POWER_SUPPLY_HEALTH_GOOD;
+		return tval;
+	} else if (di->chip == BQ27510) {
+		if (tval & BQ27500_FLAG_OTC)
+			return POWER_SUPPLY_HEALTH_OVERHEAT;
+		return POWER_SUPPLY_HEALTH_GOOD;
+	} else {
+		if (tval & BQ27000_FLAG_EDV1)
+			tval = POWER_SUPPLY_HEALTH_DEAD;
+		else
+			tval = POWER_SUPPLY_HEALTH_GOOD;
+		return tval;
+	}
+
+	return -1;
+}
+
+static void bq27xxx_battery_update(struct bq27xxx_device_info *di)
+{
+	struct bq27xxx_reg_cache cache = {0, };
+	bool is_bq27500 = di->chip == BQ27500;
+	bool is_bq27510 = di->chip == BQ27510;
+	bool is_bq27425 = di->chip == BQ27425;
+	bool is_bq27742 = di->chip == BQ27742;
+	bool flags_1b = !(is_bq27500 || is_bq27742);
+
+	cache.flags = bq27xxx_read(di, BQ27x00_REG_FLAGS, flags_1b);
+	if ((cache.flags & 0xff) == 0xff)
+		/* read error */
+		cache.flags = -1;
+	if (cache.flags >= 0) {
+		if (!is_bq27500 && !is_bq27425 && !is_bq27742 && !is_bq27510
+				&& (cache.flags & BQ27000_FLAG_CI)) {
+			dev_info(di->dev, "battery is not calibrated! ignoring capacity values\n");
+			cache.capacity = -ENODATA;
+			cache.energy = -ENODATA;
+			cache.time_to_empty = -ENODATA;
+			cache.time_to_empty_avg = -ENODATA;
+			cache.time_to_full = -ENODATA;
+			cache.charge_full = -ENODATA;
+			cache.health = -ENODATA;
+		} else {
+			cache.capacity = bq27xxx_battery_read_rsoc(di);
+			if (is_bq27742 || is_bq27510)
+				cache.time_to_empty =
+					bq27xxx_battery_read_time(di,
+							BQ27x00_REG_TTE);
+			else if (!is_bq27425) {
+				cache.energy = bq27xxx_battery_read_energy(di);
+				cache.time_to_empty =
+					bq27xxx_battery_read_time(di,
+							BQ27x00_REG_TTE);
+				cache.time_to_empty_avg =
+					bq27xxx_battery_read_time(di,
+							BQ27x00_REG_TTECP);
+				cache.time_to_full =
+					bq27xxx_battery_read_time(di,
+							BQ27x00_REG_TTF);
+			}
+			cache.charge_full = bq27xxx_battery_read_lmd(di);
+			cache.health = bq27xxx_battery_read_health(di);
+		}
+		cache.temperature = bq27xxx_battery_read_temperature(di);
+		if (!is_bq27425)
+			cache.cycle_count = bq27xxx_battery_read_cyct(di);
+		if (is_bq27742)
+			cache.power_avg =
+				bq27xxx_battery_read_pwr_avg(di,
+						BQ27742_POWER_AVG);
+		else
+			cache.power_avg =
+				bq27xxx_battery_read_pwr_avg(di,
+						BQ27x00_POWER_AVG);
+
+		/* We only have to read charge design full once */
+		if (di->charge_design_full <= 0)
+			di->charge_design_full = bq27xxx_battery_read_ilmd(di);
+	}
+
+	if (di->cache.capacity != cache.capacity)
+		power_supply_changed(di->bat);
+
+	if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
+		di->cache = cache;
+
+	di->last_update = jiffies;
+}
+
+static void bq27xxx_battery_poll(struct work_struct *work)
+{
+	struct bq27xxx_device_info *di =
+		container_of(work, struct bq27xxx_device_info, work.work);
+
+	bq27xxx_battery_update(di);
+
+	if (poll_interval > 0) {
+		/* The timer does not have to be accurate. */
+		set_timer_slack(&di->work.timer, poll_interval * HZ / 4);
+		schedule_delayed_work(&di->work, poll_interval * HZ);
+	}
+}
+
+/*
+ * Return the battery average current in µA
+ * Note that current can be negative signed as well
+ * Or 0 if something fails.
+ */
+static int bq27xxx_battery_current(struct bq27xxx_device_info *di,
+				   union power_supply_propval *val)
+{
+	int curr;
+	int flags;
+
+	curr = bq27xxx_read(di, BQ27x00_REG_AI, false);
+	if (curr < 0) {
+		dev_err(di->dev, "error reading current\n");
+		return curr;
+	}
+
+	if (bq27xxx_is_chip_version_higher(di)) {
+		/* bq27500 returns signed value */
+		val->intval = (int)((s16)curr) * 1000;
+	} else {
+		flags = bq27xxx_read(di, BQ27x00_REG_FLAGS, false);
+		if (flags & BQ27000_FLAG_CHGS) {
+			dev_dbg(di->dev, "negative current!\n");
+			curr = -curr;
+		}
+
+		val->intval = curr * 3570 / BQ27XXX_RS;
+	}
+
+	return 0;
+}
+
+static int bq27xxx_battery_status(struct bq27xxx_device_info *di,
+				  union power_supply_propval *val)
+{
+	int status;
+
+	if (bq27xxx_is_chip_version_higher(di)) {
+		if (di->cache.flags & BQ27500_FLAG_FC)
+			status = POWER_SUPPLY_STATUS_FULL;
+		else if (di->cache.flags & BQ27500_FLAG_DSC)
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_CHARGING;
+	} else {
+		if (di->cache.flags & BQ27000_FLAG_FC)
+			status = POWER_SUPPLY_STATUS_FULL;
+		else if (di->cache.flags & BQ27000_FLAG_CHGS)
+			status = POWER_SUPPLY_STATUS_CHARGING;
+		else if (power_supply_am_i_supplied(di->bat))
+			status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+
+	val->intval = status;
+
+	return 0;
+}
+
+static int bq27xxx_battery_capacity_level(struct bq27xxx_device_info *di,
+					  union power_supply_propval *val)
+{
+	int level;
+
+	if (bq27xxx_is_chip_version_higher(di)) {
+		if (di->cache.flags & BQ27500_FLAG_FC)
+			level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+		else if (di->cache.flags & BQ27500_FLAG_SOC1)
+			level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
+		else if (di->cache.flags & BQ27500_FLAG_SOCF)
+			level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
+		else
+			level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
+	} else {
+		if (di->cache.flags & BQ27000_FLAG_FC)
+			level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+		else if (di->cache.flags & BQ27000_FLAG_EDV1)
+			level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
+		else if (di->cache.flags & BQ27000_FLAG_EDVF)
+			level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
+		else
+			level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
+	}
+
+	val->intval = level;
+
+	return 0;
+}
+
+/*
+ * Return the battery Voltage in millivolts
+ * Or < 0 if something fails.
+ */
+static int bq27xxx_battery_voltage(struct bq27xxx_device_info *di,
+				   union power_supply_propval *val)
+{
+	int volt;
+
+	volt = bq27xxx_read(di, BQ27x00_REG_VOLT, false);
+	if (volt < 0) {
+		dev_err(di->dev, "error reading voltage\n");
+		return volt;
+	}
+
+	val->intval = volt * 1000;
+
+	return 0;
+}
+
+static int bq27xxx_simple_value(int value,
+				union power_supply_propval *val)
+{
+	if (value < 0)
+		return value;
+
+	val->intval = value;
+
+	return 0;
+}
+
+static int bq27xxx_battery_get_property(struct power_supply *psy,
+					enum power_supply_property psp,
+					union power_supply_propval *val)
+{
+	int ret = 0;
+	struct bq27xxx_device_info *di = power_supply_get_drvdata(psy);
+
+	mutex_lock(&di->lock);
+	if (time_is_before_jiffies(di->last_update + 5 * HZ)) {
+		cancel_delayed_work_sync(&di->work);
+		bq27xxx_battery_poll(&di->work.work);
+	}
+	mutex_unlock(&di->lock);
+
+	if (psp != POWER_SUPPLY_PROP_PRESENT && di->cache.flags < 0)
+		return -ENODEV;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = bq27xxx_battery_status(di, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = bq27xxx_battery_voltage(di, val);
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = di->cache.flags < 0 ? 0 : 1;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = bq27xxx_battery_current(di, val);
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		ret = bq27xxx_simple_value(di->cache.capacity, val);
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
+		ret = bq27xxx_battery_capacity_level(di, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		ret = bq27xxx_simple_value(di->cache.temperature, val);
+		if (ret == 0)
+			val->intval -= 2731;
+		break;
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
+		ret = bq27xxx_simple_value(di->cache.time_to_empty, val);
+		break;
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
+		ret = bq27xxx_simple_value(di->cache.time_to_empty_avg, val);
+		break;
+	case POWER_SUPPLY_PROP_TIME_TO_FULL_NOW:
+		ret = bq27xxx_simple_value(di->cache.time_to_full, val);
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		ret = bq27xxx_simple_value(bq27xxx_battery_read_nac(di), val);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+		ret = bq27xxx_simple_value(di->cache.charge_full, val);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		ret = bq27xxx_simple_value(di->charge_design_full, val);
+		break;
+	case POWER_SUPPLY_PROP_CYCLE_COUNT:
+		ret = bq27xxx_simple_value(di->cache.cycle_count, val);
+		break;
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+		ret = bq27xxx_simple_value(di->cache.energy, val);
+		break;
+	case POWER_SUPPLY_PROP_POWER_AVG:
+		ret = bq27xxx_simple_value(di->cache.power_avg, val);
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = bq27xxx_simple_value(di->cache.health, val);
+		break;
+	case POWER_SUPPLY_PROP_MANUFACTURER:
+		val->strval = BQ27XXX_MANUFACTURER;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static void bq27xxx_external_power_changed(struct power_supply *psy)
+{
+	struct bq27xxx_device_info *di = power_supply_get_drvdata(psy);
+
+	cancel_delayed_work_sync(&di->work);
+	schedule_delayed_work(&di->work, 0);
+}
+
+static int bq27xxx_powersupply_init(struct bq27xxx_device_info *di,
+				    const char *name)
+{
+	int ret;
+	struct power_supply_desc *psy_desc;
+	struct power_supply_config psy_cfg = { .drv_data = di, };
+
+	psy_desc = devm_kzalloc(di->dev, sizeof(*psy_desc), GFP_KERNEL);
+	if (!psy_desc)
+		return -ENOMEM;
+
+	psy_desc->name = name;
+	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+	if (di->chip == BQ27425) {
+		psy_desc->properties = bq27425_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27425_battery_props);
+	} else if (di->chip == BQ27742) {
+		psy_desc->properties = bq27742_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27742_battery_props);
+	} else if (di->chip == BQ27510) {
+		psy_desc->properties = bq27510_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27510_battery_props);
+	} else {
+		psy_desc->properties = bq27x00_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27x00_battery_props);
+	}
+	psy_desc->get_property = bq27xxx_battery_get_property;
+	psy_desc->external_power_changed = bq27xxx_external_power_changed;
+
+	INIT_DELAYED_WORK(&di->work, bq27xxx_battery_poll);
+	mutex_init(&di->lock);
+
+	di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg);
+	if (IS_ERR(di->bat)) {
+		ret = PTR_ERR(di->bat);
+		dev_err(di->dev, "failed to register battery: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(di->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+	bq27xxx_battery_update(di);
+
+	return 0;
+}
+
+static void bq27xxx_powersupply_unregister(struct bq27xxx_device_info *di)
+{
+	/*
+	 * power_supply_unregister call bq27xxx_battery_get_property which
+	 * call bq27xxx_battery_poll.
+	 * Make sure that bq27xxx_battery_poll will not call
+	 * schedule_delayed_work again after unregister (which cause OOPS).
+	 */
+	poll_interval = 0;
+
+	cancel_delayed_work_sync(&di->work);
+
+	power_supply_unregister(di->bat);
+
+	mutex_destroy(&di->lock);
+}
+
+/* i2c specific code */
+#ifdef CONFIG_BATTERY_BQ27XXX_I2C
+
+/* If the system has several batteries we need a different name for each
+ * of them...
+ */
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
+static int bq27xxx_battery_i2c_read(struct bq27xxx_device_info *di, u8 reg,
+				    bool single)
+{
+	struct i2c_client *client = to_i2c_client(di->dev);
+	struct i2c_msg msg[2];
+	unsigned char data[2];
+	int ret;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].buf = &reg;
+	msg[0].len = sizeof(reg);
+	msg[1].addr = client->addr;
+	msg[1].flags = I2C_M_RD;
+	msg[1].buf = data;
+	if (single)
+		msg[1].len = 1;
+	else
+		msg[1].len = 2;
+
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+	if (ret < 0)
+		return ret;
+
+	if (!single)
+		ret = get_unaligned_le16(data);
+	else
+		ret = data[0];
+
+	return ret;
+}
+
+static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
+				     const struct i2c_device_id *id)
+{
+	char *name;
+	struct bq27xxx_device_info *di;
+	int num;
+	int retval = 0;
+
+	/* Get new ID for the new battery device */
+	mutex_lock(&battery_mutex);
+	num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
+	mutex_unlock(&battery_mutex);
+	if (num < 0)
+		return num;
+
+	name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
+	if (!name) {
+		retval = -ENOMEM;
+		goto batt_failed;
+	}
+
+	di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		retval = -ENOMEM;
+		goto batt_failed;
+	}
+
+	di->id = num;
+	di->dev = &client->dev;
+	di->chip = id->driver_data;
+	di->bus.read = &bq27xxx_battery_i2c_read;
+
+	retval = bq27xxx_powersupply_init(di, name);
+	if (retval)
+		goto batt_failed;
+
+	i2c_set_clientdata(client, di);
+
+	return 0;
+
+batt_failed:
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, num);
+	mutex_unlock(&battery_mutex);
+
+	return retval;
+}
+
+static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
+{
+	struct bq27xxx_device_info *di = i2c_get_clientdata(client);
+
+	bq27xxx_powersupply_unregister(di);
+
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, di->id);
+	mutex_unlock(&battery_mutex);
+
+	return 0;
+}
+
+static const struct i2c_device_id bq27xxx_id[] = {
+	{ "bq27200", BQ27000 },	/* bq27200 is same as bq27000, but with i2c */
+	{ "bq27500", BQ27500 },
+	{ "bq27425", BQ27425 },
+	{ "bq27742", BQ27742 },
+	{ "bq27510", BQ27510 },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, bq27xxx_id);
+
+static struct i2c_driver bq27xxx_battery_i2c_driver = {
+	.driver = {
+		.name = "bq27xxx-battery",
+	},
+	.probe = bq27xxx_battery_i2c_probe,
+	.remove = bq27xxx_battery_i2c_remove,
+	.id_table = bq27xxx_id,
+};
+
+static inline int bq27xxx_battery_i2c_init(void)
+{
+	int ret = i2c_add_driver(&bq27xxx_battery_i2c_driver);
+
+	if (ret)
+		pr_err("Unable to register BQ27xxx i2c driver\n");
+
+	return ret;
+}
+
+static inline void bq27xxx_battery_i2c_exit(void)
+{
+	i2c_del_driver(&bq27xxx_battery_i2c_driver);
+}
+
+#else
+
+static inline int bq27xxx_battery_i2c_init(void) { return 0; }
+static inline void bq27xxx_battery_i2c_exit(void) {};
+
+#endif
+
+/* platform specific code */
+#ifdef CONFIG_BATTERY_BQ27XXX_PLATFORM
+
+static int bq27xxx_battery_platform_read(struct bq27xxx_device_info *di, u8 reg,
+					 bool single)
+{
+	struct device *dev = di->dev;
+	struct bq27xxx_platform_data *pdata = dev->platform_data;
+	unsigned int timeout = 3;
+	int upper, lower;
+	int temp;
+
+	if (!single) {
+		/* Make sure the value has not changed in between reading the
+		 * lower and the upper part */
+		upper = pdata->read(dev, reg + 1);
+		do {
+			temp = upper;
+			if (upper < 0)
+				return upper;
+
+			lower = pdata->read(dev, reg);
+			if (lower < 0)
+				return lower;
+
+			upper = pdata->read(dev, reg + 1);
+		} while (temp != upper && --timeout);
+
+		if (timeout == 0)
+			return -EIO;
+
+		return (upper << 8) | lower;
+	}
+
+	return pdata->read(dev, reg);
+}
+
+static int bq27xxx_battery_platform_probe(struct platform_device *pdev)
+{
+	struct bq27xxx_device_info *di;
+	struct bq27xxx_platform_data *pdata = pdev->dev.platform_data;
+	const char *name;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform_data supplied\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->read) {
+		dev_err(&pdev->dev, "no hdq read callback supplied\n");
+		return -EINVAL;
+	}
+
+	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
+	if (!di)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, di);
+
+	di->dev = &pdev->dev;
+	di->chip = BQ27000;
+
+	name = pdata->name ?: dev_name(&pdev->dev);
+	di->bus.read = &bq27xxx_battery_platform_read;
+
+	return bq27xxx_powersupply_init(di, name);
+}
+
+static int bq27xxx_battery_platform_remove(struct platform_device *pdev)
+{
+	struct bq27xxx_device_info *di = platform_get_drvdata(pdev);
+
+	bq27xxx_powersupply_unregister(di);
+
+	return 0;
+}
+
+static struct platform_driver bq27xxx_battery_platform_driver = {
+	.probe	= bq27xxx_battery_platform_probe,
+	.remove = bq27xxx_battery_platform_remove,
+	.driver = {
+		.name = "bq27000-battery",
+	},
+};
+
+static inline int bq27xxx_battery_platform_init(void)
+{
+	int ret = platform_driver_register(&bq27xxx_battery_platform_driver);
+
+	if (ret)
+		pr_err("Unable to register BQ27xxx platform driver\n");
+
+	return ret;
+}
+
+static inline void bq27xxx_battery_platform_exit(void)
+{
+	platform_driver_unregister(&bq27xxx_battery_platform_driver);
+}
+
+#else
+
+static inline int bq27xxx_battery_platform_init(void) { return 0; }
+static inline void bq27xxx_battery_platform_exit(void) {};
+
+#endif
+
+/*
+ * Module stuff
+ */
+
+static int __init bq27xxx_battery_init(void)
+{
+	int ret;
+
+	ret = bq27xxx_battery_i2c_init();
+	if (ret)
+		return ret;
+
+	ret = bq27xxx_battery_platform_init();
+	if (ret)
+		bq27xxx_battery_i2c_exit();
+
+	return ret;
+}
+module_init(bq27xxx_battery_init);
+
+static void __exit bq27xxx_battery_exit(void)
+{
+	bq27xxx_battery_platform_exit();
+	bq27xxx_battery_i2c_exit();
+}
+module_exit(bq27xxx_battery_exit);
+
+#ifdef CONFIG_BATTERY_BQ27XXX_PLATFORM
+MODULE_ALIAS("platform:bq27000-battery");
+#endif
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("BQ27xxx battery monitor driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/w1/slaves/w1_bq27000.c b/drivers/w1/slaves/w1_bq27000.c
index caafb1722783..84805317a5ce 100644
--- a/drivers/w1/slaves/w1_bq27000.c
+++ b/drivers/w1/slaves/w1_bq27000.c
@@ -15,7 +15,7 @@
 #include <linux/types.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
-#include <linux/power/bq27x00_battery.h>
+#include <linux/power/bq27xxx_battery.h>
 
 #include "../w1.h"
 #include "../w1_int.h"
@@ -39,7 +39,7 @@ static int w1_bq27000_read(struct device *dev, unsigned int reg)
 	return val;
 }
 
-static struct bq27000_platform_data bq27000_battery_info = {
+static struct bq27xxx_platform_data bq27000_battery_info = {
 	.read   = w1_bq27000_read,
 	.name   = "bq27000-battery",
 };
diff --git a/include/linux/power/bq27x00_battery.h b/include/linux/power/bq27x00_battery.h
deleted file mode 100644
index a857f719bf40..000000000000
--- a/include/linux/power/bq27x00_battery.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __LINUX_BQ27X00_BATTERY_H__
-#define __LINUX_BQ27X00_BATTERY_H__
-
-/**
- * struct bq27000_plaform_data - Platform data for bq27000 devices
- * @name: Name of the battery. If NULL the driver will fallback to "bq27000".
- * @read: HDQ read callback.
- *	This function should provide access to the HDQ bus the battery is
- *	connected to.
- *	The first parameter is a pointer to the battery device, the second the
- *	register to be read. The return value should either be the content of
- *	the passed register or an error value.
- */
-struct bq27000_platform_data {
-	const char *name;
-	int (*read)(struct device *dev, unsigned int);
-};
-
-#endif
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
new file mode 100644
index 000000000000..e70a93a6799f
--- /dev/null
+++ b/include/linux/power/bq27xxx_battery.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_BQ27X00_BATTERY_H__
+#define __LINUX_BQ27X00_BATTERY_H__
+
+/**
+ * struct bq27xxx_plaform_data - Platform data for bq27xxx devices
+ * @name: Name of the battery. If NULL the driver will fallback to "bq27000".
+ * @read: HDQ read callback.
+ *	This function should provide access to the HDQ bus the battery is
+ *	connected to.
+ *	The first parameter is a pointer to the battery device, the second the
+ *	register to be read. The return value should either be the content of
+ *	the passed register or an error value.
+ */
+struct bq27xxx_platform_data {
+	const char *name;
+	int (*read)(struct device *dev, unsigned int);
+};
+
+#endif
-- 
cgit v1.2.3


From 424cfde49acaf1426e90837961e8aead0238b11b Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Tue, 22 Sep 2015 14:35:07 -0500
Subject: power: bq27xxx_battery: Platform initialization must declare a device

When initialized as a platform device the initializer must now specify
a device. An empty device name is no longer valid.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/bq27xxx_battery.c       | 9 ++++++---
 drivers/w1/slaves/w1_bq27000.c        | 1 +
 include/linux/power/bq27xxx_battery.h | 6 +++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/bq27xxx_battery.c b/drivers/power/bq27xxx_battery.c
index e72055ce7b3c..073441383df1 100644
--- a/drivers/power/bq27xxx_battery.c
+++ b/drivers/power/bq27xxx_battery.c
@@ -91,8 +91,6 @@ struct bq27xxx_access_methods {
 	int (*read)(struct bq27xxx_device_info *di, u8 reg, bool single);
 };
 
-enum bq27xxx_chip { BQ27000, BQ27500, BQ27425, BQ27742, BQ27510};
-
 struct bq27xxx_reg_cache {
 	int temperature;
 	int time_to_empty;
@@ -1036,6 +1034,11 @@ static int bq27xxx_battery_platform_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	if (!pdata->chip) {
+		dev_err(&pdev->dev, "no device supplied\n");
+		return -EINVAL;
+	}
+
 	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
 	if (!di)
 		return -ENOMEM;
@@ -1043,7 +1046,7 @@ static int bq27xxx_battery_platform_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, di);
 
 	di->dev = &pdev->dev;
-	di->chip = BQ27000;
+	di->chip = pdata->chip;
 
 	name = pdata->name ?: dev_name(&pdev->dev);
 	di->bus.read = &bq27xxx_battery_platform_read;
diff --git a/drivers/w1/slaves/w1_bq27000.c b/drivers/w1/slaves/w1_bq27000.c
index 84805317a5ce..9f4a86b754ba 100644
--- a/drivers/w1/slaves/w1_bq27000.c
+++ b/drivers/w1/slaves/w1_bq27000.c
@@ -42,6 +42,7 @@ static int w1_bq27000_read(struct device *dev, unsigned int reg)
 static struct bq27xxx_platform_data bq27000_battery_info = {
 	.read   = w1_bq27000_read,
 	.name   = "bq27000-battery",
+	.chip   = BQ27000,
 };
 
 static int w1_bq27000_add_slave(struct w1_slave *sl)
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index e70a93a6799f..a4efb10a1bab 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -3,7 +3,8 @@
 
 /**
  * struct bq27xxx_plaform_data - Platform data for bq27xxx devices
- * @name: Name of the battery. If NULL the driver will fallback to "bq27000".
+ * @name: Name of the battery.
+ * @chip: Chip class number of this device.
  * @read: HDQ read callback.
  *	This function should provide access to the HDQ bus the battery is
  *	connected to.
@@ -11,8 +12,11 @@
  *	register to be read. The return value should either be the content of
  *	the passed register or an error value.
  */
+enum bq27xxx_chip { BQ27000 = 1, BQ27500, BQ27425, BQ27742, BQ27510 };
+
 struct bq27xxx_platform_data {
 	const char *name;
+	enum bq27xxx_chip chip;
 	int (*read)(struct device *dev, unsigned int);
 };
 
-- 
cgit v1.2.3


From d74534c27775857cb09abd0f92ed9539dc8d0a93 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Tue, 22 Sep 2015 14:35:09 -0500
Subject: power: bq27xxx_battery: Add support for additional bq27xxx family
 devices

Add support for additional devices and register equivalent family devices
including the bq27010, bq27210, bq27500, bq27510, bq27520, bq27530,
bq27531, bq27541, bq27542, bq27546, bq27545, bq27441, bq27421, and the
bq27641.

To facilitate this process the register mapings have been moved to tables
and other small cleanups have been made.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/bq27xxx_battery.c       | 622 ++++++++++++++++++++++------------
 include/linux/power/bq27xxx_battery.h |  10 +-
 2 files changed, 421 insertions(+), 211 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/bq27xxx_battery.c b/drivers/power/bq27xxx_battery.c
index 1ff88ade97fd..236c5888aa9d 100644
--- a/drivers/power/bq27xxx_battery.c
+++ b/drivers/power/bq27xxx_battery.c
@@ -17,11 +17,24 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  * Datasheets:
- * http://focus.ti.com/docs/prod/folders/print/bq27000.html
- * http://focus.ti.com/docs/prod/folders/print/bq27500.html
+ * http://www.ti.com/product/bq27000
+ * http://www.ti.com/product/bq27200
+ * http://www.ti.com/product/bq27010
+ * http://www.ti.com/product/bq27210
+ * http://www.ti.com/product/bq27500
+ * http://www.ti.com/product/bq27510-g3
+ * http://www.ti.com/product/bq27520-g4
+ * http://www.ti.com/product/bq27530-g1
+ * http://www.ti.com/product/bq27531-g1
+ * http://www.ti.com/product/bq27541-g1
+ * http://www.ti.com/product/bq27542-g1
+ * http://www.ti.com/product/bq27546-g1
+ * http://www.ti.com/product/bq27742-g1
+ * http://www.ti.com/product/bq27545-g1
+ * http://www.ti.com/product/bq27421-g1
  * http://www.ti.com/product/bq27425-g1
- * http://www.ti.com/product/BQ27742-G1
- * http://www.ti.com/product/BQ27510-G3
+ * http://www.ti.com/product/bq27411-g1
+ * http://www.ti.com/product/bq27621-g1
  */
 
 #include <linux/device.h>
@@ -43,54 +56,57 @@
 
 #define BQ27XXX_MANUFACTURER	"Texas Instruments"
 
-#define BQ27x00_REG_TEMP	0x06
-#define BQ27x00_REG_VOLT	0x08
-#define BQ27x00_REG_AI		0x14
-#define BQ27x00_REG_FLAGS	0x0A
-#define BQ27x00_REG_TTE		0x16
-#define BQ27x00_REG_TTF		0x18
-#define BQ27x00_REG_TTECP	0x26
-#define BQ27x00_REG_NAC		0x0C /* Nominal available capacity */
-#define BQ27x00_REG_LMD		0x12 /* Last measured discharge */
-#define BQ27x00_REG_CYCT	0x2A /* Cycle count total */
-#define BQ27x00_REG_AE		0x22 /* Available energy */
-#define BQ27x00_POWER_AVG	0x24
-
-#define BQ27000_REG_RSOC	0x0B /* Relative State-of-Charge */
-#define BQ27000_REG_ILMD	0x76 /* Initial last measured discharge */
+/* BQ27XXX Flags */
+#define BQ27XXX_FLAG_DSC	BIT(0)
+#define BQ27XXX_FLAG_SOCF	BIT(1) /* State-of-Charge threshold final */
+#define BQ27XXX_FLAG_SOC1	BIT(2) /* State-of-Charge threshold 1 */
+#define BQ27XXX_FLAG_FC		BIT(9)
+#define BQ27XXX_FLAG_OTD	BIT(14)
+#define BQ27XXX_FLAG_OTC	BIT(15)
+
+/* BQ27000 has different layout for Flags register */
 #define BQ27000_FLAG_EDVF	BIT(0) /* Final End-of-Discharge-Voltage flag */
 #define BQ27000_FLAG_EDV1	BIT(1) /* First End-of-Discharge-Voltage flag */
 #define BQ27000_FLAG_CI		BIT(4) /* Capacity Inaccurate flag */
 #define BQ27000_FLAG_FC		BIT(5)
 #define BQ27000_FLAG_CHGS	BIT(7) /* Charge state flag */
 
-#define BQ27500_REG_SOC		0x2C
-#define BQ27500_REG_DCAP	0x3C /* Design capacity */
-#define BQ27500_FLAG_DSC	BIT(0)
-#define BQ27500_FLAG_SOCF	BIT(1) /* State-of-Charge threshold final */
-#define BQ27500_FLAG_SOC1	BIT(2) /* State-of-Charge threshold 1 */
-#define BQ27500_FLAG_FC		BIT(9)
-#define BQ27500_FLAG_OTC	BIT(15)
-
-#define BQ27742_POWER_AVG	0x76
-
-#define BQ27510_REG_SOC		0x20
-#define BQ27510_REG_DCAP	0x2E /* Design capacity */
-#define BQ27510_REG_CYCT	0x1E /* Cycle count total */
-
-/* bq27425 register addresses are same as bq27x00 addresses minus 4 */
-#define BQ27425_REG_OFFSET	0x04
-#define BQ27425_REG_SOC		(0x1C + BQ27425_REG_OFFSET)
-#define BQ27425_REG_DCAP	(0x3C + BQ27425_REG_OFFSET)
-
-#define BQ27XXX_RS			20 /* Resistor sense */
-#define BQ27XXX_POWER_CONSTANT		(256 * 29200 / 1000)
+#define BQ27XXX_RS			(20) /* Resistor sense mOhm */
+#define BQ27XXX_POWER_CONSTANT		(29200) /* 29.2 µV^2 * 1000 */
+#define BQ27XXX_CURRENT_CONSTANT	(3570) /* 3.57 µV * 1000 */
 
 struct bq27xxx_device_info;
 struct bq27xxx_access_methods {
 	int (*read)(struct bq27xxx_device_info *di, u8 reg, bool single);
 };
 
+#define INVALID_REG_ADDR	0xff
+
+/*
+ * bq27xxx_reg_index - Register names
+ *
+ * These are indexes into a device's register mapping array.
+ */
+enum bq27xxx_reg_index {
+	BQ27XXX_REG_CTRL = 0,	/* Control */
+	BQ27XXX_REG_TEMP,	/* Temperature */
+	BQ27XXX_REG_INT_TEMP,	/* Internal Temperature */
+	BQ27XXX_REG_VOLT,	/* Voltage */
+	BQ27XXX_REG_AI,		/* Average Current */
+	BQ27XXX_REG_FLAGS,	/* Flags */
+	BQ27XXX_REG_TTE,	/* Time-to-Empty */
+	BQ27XXX_REG_TTF,	/* Time-to-Full */
+	BQ27XXX_REG_TTES,	/* Time-to-Empty Standby */
+	BQ27XXX_REG_TTECP,	/* Time-to-Empty at Constant Power */
+	BQ27XXX_REG_NAC,	/* Nominal Available Capacity */
+	BQ27XXX_REG_FCC,	/* Full Charge Capacity */
+	BQ27XXX_REG_CYCT,	/* Cycle Count */
+	BQ27XXX_REG_AE,		/* Available Energy */
+	BQ27XXX_REG_SOC,	/* State-of-Charge */
+	BQ27XXX_REG_DCAP,	/* Design Capacity */
+	BQ27XXX_REG_AP,		/* Average Power */
+};
+
 struct bq27xxx_reg_cache {
 	int temperature;
 	int time_to_empty;
@@ -121,9 +137,162 @@ struct bq27xxx_device_info {
 	struct bq27xxx_access_methods bus;
 
 	struct mutex lock;
+
+	u8 *regs;
+};
+
+/* Register mappings */
+static u8 bq27000_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x06,	/* TEMP		*/
+	INVALID_REG_ADDR,	/* INT TEMP - NA*/
+	0x08,	/* VOLT		*/
+	0x14,	/* AVG CURR	*/
+	0x0a,	/* FLAGS	*/
+	0x16,	/* TTE		*/
+	0x18,	/* TTF		*/
+	0x1c,	/* TTES		*/
+	0x26,	/* TTECP	*/
+	0x0c,	/* NAC		*/
+	0x12,	/* LMD(FCC)	*/
+	0x2a,	/* CYCT		*/
+	0x22,	/* AE		*/
+	0x0b,	/* SOC(RSOC)	*/
+	0x76,	/* DCAP(ILMD)	*/
+	0x24,	/* AP		*/
+};
+
+static u8 bq27010_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x06,	/* TEMP		*/
+	INVALID_REG_ADDR,	/* INT TEMP - NA*/
+	0x08,	/* VOLT		*/
+	0x14,	/* AVG CURR	*/
+	0x0a,	/* FLAGS	*/
+	0x16,	/* TTE		*/
+	0x18,	/* TTF		*/
+	0x1c,	/* TTES		*/
+	0x26,	/* TTECP	*/
+	0x0c,	/* NAC		*/
+	0x12,	/* LMD(FCC)	*/
+	0x2a,	/* CYCT		*/
+	INVALID_REG_ADDR,	/* AE - NA	*/
+	0x0b,	/* SOC(RSOC)	*/
+	0x76,	/* DCAP(ILMD)	*/
+	INVALID_REG_ADDR,	/* AP - NA	*/
+};
+
+static u8 bq27500_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x06,	/* TEMP		*/
+	0x28,	/* INT TEMP	*/
+	0x08,	/* VOLT		*/
+	0x14,	/* AVG CURR	*/
+	0x0a,	/* FLAGS	*/
+	0x16,	/* TTE		*/
+	INVALID_REG_ADDR,	/* TTF - NA	*/
+	0x1a,	/* TTES		*/
+	INVALID_REG_ADDR,	/* TTECP - NA	*/
+	0x0c,	/* NAC		*/
+	0x12,	/* LMD(FCC)	*/
+	0x1e,	/* CYCT		*/
+	INVALID_REG_ADDR,	/* AE - NA	*/
+	0x20,	/* SOC(RSOC)	*/
+	0x2e,	/* DCAP(ILMD)	*/
+	INVALID_REG_ADDR,	/* AP - NA	*/
 };
 
-static enum power_supply_property bq27x00_battery_props[] = {
+static u8 bq27530_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x06,	/* TEMP		*/
+	0x32,	/* INT TEMP	*/
+	0x08,	/* VOLT		*/
+	0x14,	/* AVG CURR	*/
+	0x0a,	/* FLAGS	*/
+	0x16,	/* TTE		*/
+	INVALID_REG_ADDR,	/* TTF - NA	*/
+	INVALID_REG_ADDR,	/* TTES - NA	*/
+	INVALID_REG_ADDR,	/* TTECP - NA	*/
+	0x0c,	/* NAC		*/
+	0x12,	/* LMD(FCC)	*/
+	0x2a,	/* CYCT		*/
+	INVALID_REG_ADDR,	/* AE - NA	*/
+	0x2c,	/* SOC(RSOC)	*/
+	INVALID_REG_ADDR,	/* DCAP - NA	*/
+	0x24,	/* AP		*/
+};
+
+static u8 bq27541_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x06,	/* TEMP		*/
+	0x28,	/* INT TEMP	*/
+	0x08,	/* VOLT		*/
+	0x14,	/* AVG CURR	*/
+	0x0a,	/* FLAGS	*/
+	0x16,	/* TTE		*/
+	INVALID_REG_ADDR,	/* TTF - NA	*/
+	INVALID_REG_ADDR,	/* TTES - NA	*/
+	INVALID_REG_ADDR,	/* TTECP - NA	*/
+	0x0c,	/* NAC		*/
+	0x12,	/* LMD(FCC)	*/
+	0x2a,	/* CYCT		*/
+	INVALID_REG_ADDR,	/* AE - NA	*/
+	0x2c,	/* SOC(RSOC)	*/
+	0x3c,	/* DCAP		*/
+	0x76,	/* AP		*/
+};
+
+static u8 bq27545_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x06,	/* TEMP		*/
+	0x28,	/* INT TEMP	*/
+	0x08,	/* VOLT		*/
+	0x14,	/* AVG CURR	*/
+	0x0a,	/* FLAGS	*/
+	0x16,	/* TTE		*/
+	INVALID_REG_ADDR,	/* TTF - NA	*/
+	INVALID_REG_ADDR,	/* TTES - NA	*/
+	INVALID_REG_ADDR,	/* TTECP - NA	*/
+	0x0c,	/* NAC		*/
+	0x12,	/* LMD(FCC)	*/
+	0x2a,	/* CYCT		*/
+	INVALID_REG_ADDR,	/* AE - NA	*/
+	0x2c,	/* SOC(RSOC)	*/
+	INVALID_REG_ADDR,	/* DCAP - NA */
+	0x24,	/* AP		*/
+};
+
+static u8 bq27421_regs[] = {
+	0x00,	/* CONTROL	*/
+	0x02,	/* TEMP		*/
+	0x1e,	/* INT TEMP	*/
+	0x04,	/* VOLT		*/
+	0x10,	/* AVG CURR	*/
+	0x06,	/* FLAGS	*/
+	INVALID_REG_ADDR,	/* TTE - NA	*/
+	INVALID_REG_ADDR,	/* TTF - NA	*/
+	INVALID_REG_ADDR,	/* TTES - NA	*/
+	INVALID_REG_ADDR,	/* TTECP - NA	*/
+	0x08,	/* NAC		*/
+	0x0e,	/* FCC		*/
+	INVALID_REG_ADDR,	/* CYCT - NA	*/
+	INVALID_REG_ADDR,	/* AE - NA	*/
+	0x1c,	/* SOC		*/
+	0x3c,	/* DCAP		*/
+	0x18,	/* AP		*/
+};
+
+static u8 *bq27xxx_regs[] = {
+	[BQ27000] = bq27000_regs,
+	[BQ27010] = bq27010_regs,
+	[BQ27500] = bq27500_regs,
+	[BQ27530] = bq27530_regs,
+	[BQ27541] = bq27541_regs,
+	[BQ27545] = bq27545_regs,
+	[BQ27421] = bq27421_regs,
+};
+
+static enum power_supply_property bq27000_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
@@ -145,7 +314,7 @@ static enum power_supply_property bq27x00_battery_props[] = {
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
-static enum power_supply_property bq27425_battery_props[] = {
+static enum power_supply_property bq27010_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
@@ -153,14 +322,19 @@ static enum power_supply_property bq27425_battery_props[] = {
 	POWER_SUPPLY_PROP_CAPACITY,
 	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
 	POWER_SUPPLY_PROP_TECHNOLOGY,
 	POWER_SUPPLY_PROP_CHARGE_FULL,
 	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
-static enum power_supply_property bq27742_battery_props[] = {
+static enum power_supply_property bq27500_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
@@ -174,12 +348,29 @@ static enum power_supply_property bq27742_battery_props[] = {
 	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property bq27530_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_POWER_AVG,
 	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
-static enum power_supply_property bq27510_battery_props[] = {
+static enum power_supply_property bq27541_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
@@ -198,6 +389,58 @@ static enum power_supply_property bq27510_battery_props[] = {
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
+static enum power_supply_property bq27545_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_POWER_AVG,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property bq27421_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+#define BQ27XXX_PROP(_id, _prop)		\
+	[_id] = {				\
+		.props = _prop,			\
+		.size = ARRAY_SIZE(_prop),	\
+	}
+
+static struct {
+	enum power_supply_property *props;
+	size_t size;
+} bq27xxx_battery_props[] = {
+	BQ27XXX_PROP(BQ27000, bq27000_battery_props),
+	BQ27XXX_PROP(BQ27010, bq27010_battery_props),
+	BQ27XXX_PROP(BQ27500, bq27500_battery_props),
+	BQ27XXX_PROP(BQ27530, bq27530_battery_props),
+	BQ27XXX_PROP(BQ27541, bq27541_battery_props),
+	BQ27XXX_PROP(BQ27545, bq27545_battery_props),
+	BQ27XXX_PROP(BQ27421, bq27421_battery_props),
+};
+
 static unsigned int poll_interval = 360;
 module_param(poll_interval, uint, 0644);
 MODULE_PARM_DESC(poll_interval,
@@ -207,25 +450,14 @@ MODULE_PARM_DESC(poll_interval,
  * Common code for BQ27xxx devices
  */
 
-static inline int bq27xxx_read(struct bq27xxx_device_info *di, u8 reg,
+static inline int bq27xxx_read(struct bq27xxx_device_info *di, int reg_index,
 			       bool single)
 {
-	if (di->chip == BQ27425)
-		return di->bus.read(di, reg - BQ27425_REG_OFFSET, single);
-	return di->bus.read(di, reg, single);
-}
+	/* Reports EINVAL for invalid/missing registers */
+	if (!di || di->regs[reg_index] == INVALID_REG_ADDR)
+		return -EINVAL;
 
-/*
- * Higher versions of the chip like BQ27425 and BQ27500
- * differ from BQ27000 and BQ27200 in calculation of certain
- * parameters. Hence we need to check for the chip type.
- */
-static bool bq27xxx_is_chip_version_higher(struct bq27xxx_device_info *di)
-{
-	if (di->chip == BQ27425 || di->chip == BQ27500 || di->chip == BQ27742
-	    || di->chip == BQ27510)
-		return true;
-	return false;
+	return di->bus.read(di, di->regs[reg_index], single);
 }
 
 /*
@@ -236,14 +468,7 @@ static int bq27xxx_battery_read_soc(struct bq27xxx_device_info *di)
 {
 	int soc;
 
-	if (di->chip == BQ27500 || di->chip == BQ27742)
-		soc = bq27xxx_read(di, BQ27500_REG_SOC, false);
-	else if (di->chip == BQ27510)
-		soc = bq27xxx_read(di, BQ27510_REG_SOC, false);
-	else if (di->chip == BQ27425)
-		soc = bq27xxx_read(di, BQ27425_REG_SOC, false);
-	else /* for the bq27000 we read the "relative" SoC register */
-		soc = bq27xxx_read(di, BQ27000_REG_RSOC, true);
+	soc = bq27xxx_read(di, BQ27XXX_REG_SOC, false);
 
 	if (soc < 0)
 		dev_dbg(di->dev, "error reading State-of-Charge\n");
@@ -266,10 +491,10 @@ static int bq27xxx_battery_read_charge(struct bq27xxx_device_info *di, u8 reg)
 		return charge;
 	}
 
-	if (bq27xxx_is_chip_version_higher(di))
-		charge *= 1000;
+	if (di->chip == BQ27000 || di->chip == BQ27010)
+		charge *= BQ27XXX_CURRENT_CONSTANT / BQ27XXX_RS;
 	else
-		charge = charge * 3570 / BQ27XXX_RS;
+		charge *= 1000;
 
 	return charge;
 }
@@ -281,57 +506,46 @@ static int bq27xxx_battery_read_charge(struct bq27xxx_device_info *di, u8 reg)
 static inline int bq27xxx_battery_read_nac(struct bq27xxx_device_info *di)
 {
 	int flags;
-	bool is_bq27500 = di->chip == BQ27500;
-	bool is_bq27742 = di->chip == BQ27742;
-	bool is_higher = bq27xxx_is_chip_version_higher(di);
-	bool flags_1b = !(is_bq27500 || is_bq27742);
 
-	flags = bq27xxx_read(di, BQ27x00_REG_FLAGS, flags_1b);
-	if (flags >= 0 && !is_higher && (flags & BQ27000_FLAG_CI))
-		return -ENODATA;
+	if (di->chip == BQ27000 || di->chip == BQ27010) {
+		flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, true);
+		if (flags >= 0 && (flags & BQ27000_FLAG_CI))
+			return -ENODATA;
+	}
 
-	return bq27xxx_battery_read_charge(di, BQ27x00_REG_NAC);
+	return bq27xxx_battery_read_charge(di, BQ27XXX_REG_NAC);
 }
 
 /*
- * Return the battery Last measured discharge in µAh
+ * Return the battery Full Charge Capacity in µAh
  * Or < 0 if something fails.
  */
-static inline int bq27xxx_battery_read_lmd(struct bq27xxx_device_info *di)
+static inline int bq27xxx_battery_read_fcc(struct bq27xxx_device_info *di)
 {
-	return bq27xxx_battery_read_charge(di, BQ27x00_REG_LMD);
+	return bq27xxx_battery_read_charge(di, BQ27XXX_REG_FCC);
 }
 
 /*
- * Return the battery Initial last measured discharge in µAh
+ * Return the Design Capacity in µAh
  * Or < 0 if something fails.
  */
-static int bq27xxx_battery_read_ilmd(struct bq27xxx_device_info *di)
+static int bq27xxx_battery_read_dcap(struct bq27xxx_device_info *di)
 {
-	int ilmd;
+	int dcap;
 
-	if (bq27xxx_is_chip_version_higher(di)) {
-		if (di->chip == BQ27425)
-			ilmd = bq27xxx_read(di, BQ27425_REG_DCAP, false);
-		else if (di->chip == BQ27510)
-			ilmd = bq27xxx_read(di, BQ27510_REG_DCAP, false);
-		else
-			ilmd = bq27xxx_read(di, BQ27500_REG_DCAP, false);
-	} else {
-		ilmd = bq27xxx_read(di, BQ27000_REG_ILMD, true);
-	}
+	dcap = bq27xxx_read(di, BQ27XXX_REG_DCAP, false);
 
-	if (ilmd < 0) {
+	if (dcap < 0) {
 		dev_dbg(di->dev, "error reading initial last measured discharge\n");
-		return ilmd;
+		return dcap;
 	}
 
-	if (bq27xxx_is_chip_version_higher(di))
-		ilmd *= 1000;
+	if (di->chip == BQ27000 || di->chip == BQ27010)
+		dcap *= BQ27XXX_CURRENT_CONSTANT / BQ27XXX_RS;
 	else
-		ilmd = ilmd * 256 * 3570 / BQ27XXX_RS;
+		dcap *= 1000;
 
-	return ilmd;
+	return dcap;
 }
 
 /*
@@ -342,16 +556,16 @@ static int bq27xxx_battery_read_energy(struct bq27xxx_device_info *di)
 {
 	int ae;
 
-	ae = bq27xxx_read(di, BQ27x00_REG_AE, false);
+	ae = bq27xxx_read(di, BQ27XXX_REG_AE, false);
 	if (ae < 0) {
 		dev_dbg(di->dev, "error reading available energy\n");
 		return ae;
 	}
 
-	if (di->chip == BQ27500)
-		ae *= 1000;
+	if (di->chip == BQ27000 || di->chip == BQ27010)
+		ae *= BQ27XXX_POWER_CONSTANT / BQ27XXX_RS;
 	else
-		ae = ae * 29200 / BQ27XXX_RS;
+		ae *= 1000;
 
 	return ae;
 }
@@ -364,13 +578,13 @@ static int bq27xxx_battery_read_temperature(struct bq27xxx_device_info *di)
 {
 	int temp;
 
-	temp = bq27xxx_read(di, BQ27x00_REG_TEMP, false);
+	temp = bq27xxx_read(di, BQ27XXX_REG_TEMP, false);
 	if (temp < 0) {
 		dev_err(di->dev, "error reading temperature\n");
 		return temp;
 	}
 
-	if (!bq27xxx_is_chip_version_higher(di))
+	if (di->chip == BQ27000 || di->chip == BQ27010)
 		temp = 5 * temp / 2;
 
 	return temp;
@@ -384,10 +598,7 @@ static int bq27xxx_battery_read_cyct(struct bq27xxx_device_info *di)
 {
 	int cyct;
 
-	if (di->chip == BQ27510)
-		cyct = bq27xxx_read(di, BQ27510_REG_CYCT, false);
-	else
-		cyct = bq27xxx_read(di, BQ27x00_REG_CYCT, false);
+	cyct = bq27xxx_read(di, BQ27XXX_REG_CYCT, false);
 	if (cyct < 0)
 		dev_err(di->dev, "error reading cycle count total\n");
 
@@ -419,21 +630,32 @@ static int bq27xxx_battery_read_time(struct bq27xxx_device_info *di, u8 reg)
  * Read an average power register.
  * Return < 0 if something fails.
  */
-static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di, u8 reg)
+static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di)
 {
 	int tval;
 
-	tval = bq27xxx_read(di, reg, false);
+	tval = bq27xxx_read(di, BQ27XXX_REG_AP, false);
 	if (tval < 0) {
-		dev_err(di->dev, "error reading power avg rgister  %02x: %d\n",
-			reg, tval);
+		dev_err(di->dev, "error reading average power register  %02x: %d\n",
+			BQ27XXX_REG_AP, tval);
 		return tval;
 	}
 
-	if (di->chip == BQ27500)
+	if (di->chip == BQ27000 || di->chip == BQ27010)
+		return (tval * BQ27XXX_POWER_CONSTANT) / BQ27XXX_RS;
+	else
 		return tval;
+}
+
+/*
+ * Returns true if a battery over temperature condition is detected
+ */
+static int bq27xxx_battery_overtemp(struct bq27xxx_device_info *di, u16 flags)
+{
+	if (di->chip == BQ27500 || di->chip == BQ27541)
+		return flags & (BQ27XXX_FLAG_OTC | BQ27XXX_FLAG_OTD);
 	else
-		return (tval * BQ27XXX_POWER_CONSTANT) / BQ27XXX_RS;
+		return flags & BQ27XXX_FLAG_OTC;
 }
 
 /*
@@ -442,53 +664,43 @@ static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di, u8 reg)
  */
 static int bq27xxx_battery_read_health(struct bq27xxx_device_info *di)
 {
-	int tval;
+	u16 tval;
 
-	tval = bq27xxx_read(di, BQ27x00_REG_FLAGS, false);
+	tval = bq27xxx_read(di, BQ27XXX_REG_FLAGS, false);
 	if (tval < 0) {
 		dev_err(di->dev, "error reading flag register:%d\n", tval);
 		return tval;
 	}
 
-	if (di->chip == BQ27500) {
-		if (tval & BQ27500_FLAG_SOCF)
+	if (di->chip == BQ27000 || di->chip == BQ27010) {
+		if (tval & BQ27000_FLAG_EDV1)
 			tval = POWER_SUPPLY_HEALTH_DEAD;
-		else if (tval & BQ27500_FLAG_OTC)
-			tval = POWER_SUPPLY_HEALTH_OVERHEAT;
 		else
 			tval = POWER_SUPPLY_HEALTH_GOOD;
-		return tval;
-	} else if (di->chip == BQ27510) {
-		if (tval & BQ27500_FLAG_OTC)
-			return POWER_SUPPLY_HEALTH_OVERHEAT;
-		return POWER_SUPPLY_HEALTH_GOOD;
 	} else {
-		if (tval & BQ27000_FLAG_EDV1)
+		if (tval & BQ27XXX_FLAG_SOCF)
 			tval = POWER_SUPPLY_HEALTH_DEAD;
+		else if (bq27xxx_battery_overtemp(di, tval))
+			tval = POWER_SUPPLY_HEALTH_OVERHEAT;
 		else
 			tval = POWER_SUPPLY_HEALTH_GOOD;
-		return tval;
 	}
 
-	return -1;
+	return tval;
 }
 
 static void bq27xxx_battery_update(struct bq27xxx_device_info *di)
 {
 	struct bq27xxx_reg_cache cache = {0, };
-	bool is_bq27500 = di->chip == BQ27500;
-	bool is_bq27510 = di->chip == BQ27510;
-	bool is_bq27425 = di->chip == BQ27425;
-	bool is_bq27742 = di->chip == BQ27742;
-	bool flags_1b = !(is_bq27500 || is_bq27742);
+	bool has_ci_flag = di->chip == BQ27000 || di->chip == BQ27010;
+	bool has_singe_flag = di->chip == BQ27000 || di->chip == BQ27010;
 
-	cache.flags = bq27xxx_read(di, BQ27x00_REG_FLAGS, flags_1b);
+	cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
 	if ((cache.flags & 0xff) == 0xff)
-		/* read error */
-		cache.flags = -1;
+		cache.flags = -1; /* read error */
 	if (cache.flags >= 0) {
-		if (!is_bq27500 && !is_bq27425 && !is_bq27742 && !is_bq27510
-				&& (cache.flags & BQ27000_FLAG_CI)) {
+		cache.temperature = bq27xxx_battery_read_temperature(di);
+		if (has_ci_flag && (cache.flags & BQ27000_FLAG_CI)) {
 			dev_info(di->dev, "battery is not calibrated! ignoring capacity values\n");
 			cache.capacity = -ENODATA;
 			cache.energy = -ENODATA;
@@ -498,41 +710,26 @@ static void bq27xxx_battery_update(struct bq27xxx_device_info *di)
 			cache.charge_full = -ENODATA;
 			cache.health = -ENODATA;
 		} else {
+			if (di->regs[BQ27XXX_REG_TTE] != INVALID_REG_ADDR)
+				cache.time_to_empty = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTE);
+			if (di->regs[BQ27XXX_REG_TTECP] != INVALID_REG_ADDR)
+				cache.time_to_empty_avg = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTECP);
+			if (di->regs[BQ27XXX_REG_TTF] != INVALID_REG_ADDR)
+				cache.time_to_full = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTF);
+			cache.charge_full = bq27xxx_battery_read_fcc(di);
 			cache.capacity = bq27xxx_battery_read_soc(di);
-			if (is_bq27742 || is_bq27510)
-				cache.time_to_empty =
-					bq27xxx_battery_read_time(di,
-							BQ27x00_REG_TTE);
-			else if (!is_bq27425) {
+			if (di->regs[BQ27XXX_REG_AE] != INVALID_REG_ADDR)
 				cache.energy = bq27xxx_battery_read_energy(di);
-				cache.time_to_empty =
-					bq27xxx_battery_read_time(di,
-							BQ27x00_REG_TTE);
-				cache.time_to_empty_avg =
-					bq27xxx_battery_read_time(di,
-							BQ27x00_REG_TTECP);
-				cache.time_to_full =
-					bq27xxx_battery_read_time(di,
-							BQ27x00_REG_TTF);
-			}
-			cache.charge_full = bq27xxx_battery_read_lmd(di);
 			cache.health = bq27xxx_battery_read_health(di);
 		}
-		cache.temperature = bq27xxx_battery_read_temperature(di);
-		if (!is_bq27425)
+		if (di->regs[BQ27XXX_REG_CYCT] != INVALID_REG_ADDR)
 			cache.cycle_count = bq27xxx_battery_read_cyct(di);
-		if (is_bq27742)
-			cache.power_avg =
-				bq27xxx_battery_read_pwr_avg(di,
-						BQ27742_POWER_AVG);
-		else
-			cache.power_avg =
-				bq27xxx_battery_read_pwr_avg(di,
-						BQ27x00_POWER_AVG);
+		if (di->regs[BQ27XXX_REG_AP] != INVALID_REG_ADDR)
+			cache.power_avg = bq27xxx_battery_read_pwr_avg(di);
 
 		/* We only have to read charge design full once */
 		if (di->charge_design_full <= 0)
-			di->charge_design_full = bq27xxx_battery_read_ilmd(di);
+			di->charge_design_full = bq27xxx_battery_read_dcap(di);
 	}
 
 	if (di->cache.capacity != cache.capacity)
@@ -547,7 +744,8 @@ static void bq27xxx_battery_update(struct bq27xxx_device_info *di)
 static void bq27xxx_battery_poll(struct work_struct *work)
 {
 	struct bq27xxx_device_info *di =
-		container_of(work, struct bq27xxx_device_info, work.work);
+			container_of(work, struct bq27xxx_device_info,
+				     work.work);
 
 	bq27xxx_battery_update(di);
 
@@ -569,23 +767,23 @@ static int bq27xxx_battery_current(struct bq27xxx_device_info *di,
 	int curr;
 	int flags;
 
-	curr = bq27xxx_read(di, BQ27x00_REG_AI, false);
+	curr = bq27xxx_read(di, BQ27XXX_REG_AI, false);
 	if (curr < 0) {
 		dev_err(di->dev, "error reading current\n");
 		return curr;
 	}
 
-	if (bq27xxx_is_chip_version_higher(di)) {
-		/* bq27500 returns signed value */
-		val->intval = (int)((s16)curr) * 1000;
-	} else {
-		flags = bq27xxx_read(di, BQ27x00_REG_FLAGS, false);
+	if (di->chip == BQ27000 || di->chip == BQ27010) {
+		flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, false);
 		if (flags & BQ27000_FLAG_CHGS) {
 			dev_dbg(di->dev, "negative current!\n");
 			curr = -curr;
 		}
 
-		val->intval = curr * 3570 / BQ27XXX_RS;
+		val->intval = curr * BQ27XXX_CURRENT_CONSTANT / BQ27XXX_RS;
+	} else {
+		/* Other gauges return signed value */
+		val->intval = (int)((s16)curr) * 1000;
 	}
 
 	return 0;
@@ -596,14 +794,7 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di,
 {
 	int status;
 
-	if (bq27xxx_is_chip_version_higher(di)) {
-		if (di->cache.flags & BQ27500_FLAG_FC)
-			status = POWER_SUPPLY_STATUS_FULL;
-		else if (di->cache.flags & BQ27500_FLAG_DSC)
-			status = POWER_SUPPLY_STATUS_DISCHARGING;
-		else
-			status = POWER_SUPPLY_STATUS_CHARGING;
-	} else {
+	if (di->chip == BQ27000 || di->chip == BQ27010) {
 		if (di->cache.flags & BQ27000_FLAG_FC)
 			status = POWER_SUPPLY_STATUS_FULL;
 		else if (di->cache.flags & BQ27000_FLAG_CHGS)
@@ -612,6 +803,13 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di,
 			status = POWER_SUPPLY_STATUS_NOT_CHARGING;
 		else
 			status = POWER_SUPPLY_STATUS_DISCHARGING;
+	} else {
+		if (di->cache.flags & BQ27XXX_FLAG_FC)
+			status = POWER_SUPPLY_STATUS_FULL;
+		else if (di->cache.flags & BQ27XXX_FLAG_DSC)
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_CHARGING;
 	}
 
 	val->intval = status;
@@ -624,21 +822,21 @@ static int bq27xxx_battery_capacity_level(struct bq27xxx_device_info *di,
 {
 	int level;
 
-	if (bq27xxx_is_chip_version_higher(di)) {
-		if (di->cache.flags & BQ27500_FLAG_FC)
+	if (di->chip == BQ27000 || di->chip == BQ27010) {
+		if (di->cache.flags & BQ27000_FLAG_FC)
 			level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
-		else if (di->cache.flags & BQ27500_FLAG_SOC1)
+		else if (di->cache.flags & BQ27000_FLAG_EDV1)
 			level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
-		else if (di->cache.flags & BQ27500_FLAG_SOCF)
+		else if (di->cache.flags & BQ27000_FLAG_EDVF)
 			level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
 		else
 			level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
 	} else {
-		if (di->cache.flags & BQ27000_FLAG_FC)
+		if (di->cache.flags & BQ27XXX_FLAG_FC)
 			level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
-		else if (di->cache.flags & BQ27000_FLAG_EDV1)
+		else if (di->cache.flags & BQ27XXX_FLAG_SOC1)
 			level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
-		else if (di->cache.flags & BQ27000_FLAG_EDVF)
+		else if (di->cache.flags & BQ27XXX_FLAG_SOCF)
 			level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
 		else
 			level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
@@ -658,7 +856,7 @@ static int bq27xxx_battery_voltage(struct bq27xxx_device_info *di,
 {
 	int volt;
 
-	volt = bq27xxx_read(di, BQ27x00_REG_VOLT, false);
+	volt = bq27xxx_read(di, BQ27XXX_REG_VOLT, false);
 	if (volt < 0) {
 		dev_err(di->dev, "error reading voltage\n");
 		return volt;
@@ -719,7 +917,7 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_TEMP:
 		ret = bq27xxx_simple_value(di->cache.temperature, val);
 		if (ret == 0)
-			val->intval -= 2731;
+			val->intval -= 2731; /* convert decidegree k to c */
 		break;
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
 		ret = bq27xxx_simple_value(di->cache.time_to_empty, val);
@@ -785,19 +983,8 @@ static int bq27xxx_powersupply_init(struct bq27xxx_device_info *di,
 
 	psy_desc->name = name;
 	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
-	if (di->chip == BQ27425) {
-		psy_desc->properties = bq27425_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27425_battery_props);
-	} else if (di->chip == BQ27742) {
-		psy_desc->properties = bq27742_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27742_battery_props);
-	} else if (di->chip == BQ27510) {
-		psy_desc->properties = bq27510_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27510_battery_props);
-	} else {
-		psy_desc->properties = bq27x00_battery_props;
-		psy_desc->num_properties = ARRAY_SIZE(bq27x00_battery_props);
-	}
+	psy_desc->properties = bq27xxx_battery_props[di->chip].props;
+	psy_desc->num_properties = bq27xxx_battery_props[di->chip].size;
 	psy_desc->get_property = bq27xxx_battery_get_property;
 	psy_desc->external_power_changed = bq27xxx_external_power_changed;
 
@@ -910,11 +1097,15 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
 	di->dev = &client->dev;
 	di->chip = id->driver_data;
 	di->bus.read = &bq27xxx_battery_i2c_read;
+	di->regs = bq27xxx_regs[di->chip];
 
 	retval = bq27xxx_powersupply_init(di, name);
 	if (retval)
 		goto batt_failed;
 
+	/* Schedule a polling after about 1 min */
+	schedule_delayed_work(&di->work, 60 * HZ);
+
 	i2c_set_clientdata(client, di);
 
 	return 0;
@@ -941,11 +1132,22 @@ static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id bq27xxx_id[] = {
-	{ "bq27200", BQ27000 },	/* bq27200 is same as bq27000, but with i2c */
+	{ "bq27200", BQ27000 },
+	{ "bq27210", BQ27010 },
 	{ "bq27500", BQ27500 },
-	{ "bq27425", BQ27425 },
-	{ "bq27742", BQ27742 },
-	{ "bq27510", BQ27510 },
+	{ "bq27510", BQ27500 },
+	{ "bq27520", BQ27500 },
+	{ "bq27530", BQ27530 },
+	{ "bq27531", BQ27530 },
+	{ "bq27541", BQ27541 },
+	{ "bq27542", BQ27541 },
+	{ "bq27546", BQ27541 },
+	{ "bq27742", BQ27541 },
+	{ "bq27545", BQ27545 },
+	{ "bq27421", BQ27421 },
+	{ "bq27425", BQ27421 },
+	{ "bq27441", BQ27421 },
+	{ "bq27621", BQ27421 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, bq27xxx_id);
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index a4efb10a1bab..45f6a7b5b3cb 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -12,7 +12,15 @@
  *	register to be read. The return value should either be the content of
  *	the passed register or an error value.
  */
-enum bq27xxx_chip { BQ27000 = 1, BQ27500, BQ27425, BQ27742, BQ27510 };
+enum bq27xxx_chip {
+	BQ27000 = 1, /* bq27000, bq27200 */
+	BQ27010, /* bq27010, bq27210 */
+	BQ27500, /* bq27500, bq27510, bq27520 */
+	BQ27530, /* bq27530, bq27531 */
+	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
+	BQ27545, /* bq27545 */
+	BQ27421, /* bq27421, bq27425, bq27441, bq27621 */
+};
 
 struct bq27xxx_platform_data {
 	const char *name;
-- 
cgit v1.2.3


From a79e88d9fbbe2e3ecb9d883fb59dca7468d42d79 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Wed, 23 Sep 2015 08:39:16 -0700
Subject: bridge: define some min/max/default ageing time constants

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 6 ++++++
 net/bridge/br_device.c    | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index dad8b00beed2..a338a688ee4a 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,6 +46,12 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC	BIT(9)
 #define BR_PROXYARP_WIFI	BIT(10)
 
+/* values as per ieee8021QBridgeFdbAgingTime */
+#define BR_MIN_AGEING_TIME	(10 * HZ)
+#define BR_MAX_AGEING_TIME	(1000000 * HZ)
+
+#define BR_DEFAULT_AGEING_TIME	(300 * HZ)
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6ed2feb51e3c..2f81624a8257 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_max_age = br->max_age = 20 * HZ;
 	br->bridge_hello_time = br->hello_time = 2 * HZ;
 	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
-	br->ageing_time = 300 * HZ;
+	br->ageing_time = BR_DEFAULT_AGEING_TIME;
 
 	br_netfilter_rtable_init(br);
 	br_stp_timer_init(br);
-- 
cgit v1.2.3


From b4fe8ba7a310da6a2b99e3abe67c7815198cde49 Mon Sep 17 00:00:00 2001
From: Qipeng Zha <qipeng.zha@intel.com>
Date: Tue, 15 Sep 2015 00:39:17 +0800
Subject: regmap: Add generic macro to define regmap_irq

Add REGMAP_IRQ_REG macro in regmap.h to define regmap_irq
structure easily for other driver module.

Signed-off-by: Qipeng Zha <qipeng.zha@intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/regmap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 8fc0bfd8edc4..f6226976e158 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -791,6 +791,9 @@ struct regmap_irq {
 	unsigned int mask;
 };
 
+#define REGMAP_IRQ_REG(_irq, _off, _mask)		\
+	[_irq] = { .reg_offset = (_off), .mask = (_mask) }
+
 /**
  * Description of a generic regmap irq_chip.  This is not intended to
  * handle every possible interrupt controller, but it should handle a
-- 
cgit v1.2.3


From c0017ed71966a19ec40c7bc900d4338ddfbc4105 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Fri, 14 Aug 2015 16:10:59 +0200
Subject: gpio: Introduce gpio descriptor 'name'

The latest gpio hogging mechanism assigns each gpio a 'line-name' in the
devicetree. The 'name' field is different from the 'label' field.
'label' is only used for requested GPIOs to describe its current use by
driver or userspace.

The 'name' field describes the GPIO itself, not the use. This is most
likely identical to the label in the schematic on the GPIO line and
should help to find this particular GPIO.

This is equivalent to the gpiochip->names array. However names should be
stored in the GPIO descriptor. We will use gpiochip->names in the future
only as initializer for the GPIO descriptors for drivers that assign
GPIO names hardcoded. All other GPIO names will be parsed from DT and
directly assigned to the GPIO descriptor.

This patch adds a helper function to find gpio descriptors by name
instead of gpio number.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c        | 32 ++++++++++++++++++++++++++++++++
 drivers/gpio/gpiolib.h        |  3 +++
 include/linux/gpio/consumer.h |  7 +++++++
 3 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5db3445552b1..697c40983246 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -89,6 +89,38 @@ struct gpio_desc *gpio_to_desc(unsigned gpio)
 }
 EXPORT_SYMBOL_GPL(gpio_to_desc);
 
+/**
+ * Convert a GPIO name to its descriptor
+ */
+struct gpio_desc *gpio_name_to_desc(const char * const name)
+{
+	struct gpio_chip *chip;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	list_for_each_entry(chip, &gpio_chips, list) {
+		int i;
+
+		for (i = 0; i != chip->ngpio; ++i) {
+			struct gpio_desc *gpio = &chip->desc[i];
+
+			if (!gpio->name)
+				continue;
+
+			if (!strcmp(gpio->name, name)) {
+				spin_unlock_irqrestore(&gpio_lock, flags);
+				return gpio;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(gpio_name_to_desc);
+
 /**
  * Get the GPIO descriptor corresponding to the given hw number for this chip.
  */
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index bf343004b008..78e634d1c719 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -89,7 +89,10 @@ struct gpio_desc {
 #define FLAG_USED_AS_IRQ 9	/* GPIO is connected to an IRQ */
 #define FLAG_IS_HOGGED	11	/* GPIO is hogged */
 
+	/* Connection label */
 	const char		*label;
+	/* Name of the GPIO */
+	const char		*name;
 };
 
 int gpiod_request(struct gpio_desc *desc, const char *label);
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 14cac67c2012..366a3fdbdbea 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -130,6 +130,7 @@ int gpiod_to_irq(const struct gpio_desc *desc);
 /* Convert between the old gpio_ and new gpiod_ interfaces */
 struct gpio_desc *gpio_to_desc(unsigned gpio);
 int desc_to_gpio(const struct gpio_desc *desc);
+struct gpio_desc *gpio_name_to_desc(const char *name);
 
 /* Child properties interface */
 struct fwnode_handle;
@@ -400,6 +401,12 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static inline struct gpio_desc *gpio_name_to_desc(const char *name)
+{
+	return ERR_PTR(-EINVAL);
+}
+
 static inline int desc_to_gpio(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-- 
cgit v1.2.3


From f881bab038c9667deab19a85d8666029cbfa6f2c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 23 Sep 2015 16:20:43 -0700
Subject: gpio: keep the GPIO line names internal

This refactors the changes to the GPIO line naming mechanism to
not have so widespread effects, instead we conclude the patch series
by having created a name attribute in the GPIO descriptor, that need
not be globally unique, and it will be initialized from the old
.names array in struct gpio_chip if it exists, then used in the legacy
sysfs code like the array was used previously.

The associated changes to name lines from the device tree are
controversial and need to stand alone from this. Resulting changes:

1. Remove the export and the header for the gpio_name_to_desc() as so
far the only use is inside gpiolib.c. Staticize gpio_name_to_desc()
and move it above the only function using it.

2. Only print a warning if there are two GPIO lines with the same name.
The reason is to preserve current behaviour: before the previous
changes to the naming mechanism this would not reject probing the
driver, instead the error would occur when trying to export the line
in sysfs, so restore this behaviour, but print a friendly warning
if names collide.

Cc: Johan Hovold <johan@kernel.org>
Cc: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c        | 72 +++++++++++++++++++++----------------------
 include/linux/gpio/consumer.h |  6 ----
 2 files changed, 35 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 09b7316a9834..7c7c39c46eb7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -89,38 +89,6 @@ struct gpio_desc *gpio_to_desc(unsigned gpio)
 }
 EXPORT_SYMBOL_GPL(gpio_to_desc);
 
-/**
- * Convert a GPIO name to its descriptor
- */
-struct gpio_desc *gpio_name_to_desc(const char * const name)
-{
-	struct gpio_chip *chip;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	list_for_each_entry(chip, &gpio_chips, list) {
-		int i;
-
-		for (i = 0; i != chip->ngpio; ++i) {
-			struct gpio_desc *gpio = &chip->desc[i];
-
-			if (!gpio->name)
-				continue;
-
-			if (!strcmp(gpio->name, name)) {
-				spin_unlock_irqrestore(&gpio_lock, flags);
-				return gpio;
-			}
-		}
-	}
-
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(gpio_name_to_desc);
-
 /**
  * Get the GPIO descriptor corresponding to the given hw number for this chip.
  */
@@ -250,6 +218,37 @@ static int gpiochip_add_to_list(struct gpio_chip *chip)
 	return err;
 }
 
+/**
+ * Convert a GPIO name to its descriptor
+ */
+static struct gpio_desc *gpio_name_to_desc(const char * const name)
+{
+	struct gpio_chip *chip;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	list_for_each_entry(chip, &gpio_chips, list) {
+		int i;
+
+		for (i = 0; i != chip->ngpio; ++i) {
+			struct gpio_desc *gpio = &chip->desc[i];
+
+			if (!gpio->name)
+				continue;
+
+			if (!strcmp(gpio->name, name)) {
+				spin_unlock_irqrestore(&gpio_lock, flags);
+				return gpio;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return NULL;
+}
+
 /*
  * Takes the names from gc->names and checks if they are all unique. If they
  * are, they are assigned to their gpio descriptors.
@@ -268,11 +267,10 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
 		struct gpio_desc *gpio;
 
 		gpio = gpio_name_to_desc(gc->names[i]);
-		if (gpio) {
-			dev_err(gc->dev, "Detected name collision for GPIO name '%s'\n",
-				gc->names[i]);
-			return -EEXIST;
-		}
+		if (gpio)
+			dev_warn(gc->dev, "Detected name collision for "
+				 "GPIO name '%s'\n",
+				 gc->names[i]);
 	}
 
 	/* Then add all names to the GPIO descriptors */
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 366a3fdbdbea..fb0fde686cb1 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -130,7 +130,6 @@ int gpiod_to_irq(const struct gpio_desc *desc);
 /* Convert between the old gpio_ and new gpiod_ interfaces */
 struct gpio_desc *gpio_to_desc(unsigned gpio);
 int desc_to_gpio(const struct gpio_desc *desc);
-struct gpio_desc *gpio_name_to_desc(const char *name);
 
 /* Child properties interface */
 struct fwnode_handle;
@@ -402,11 +401,6 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct gpio_desc *gpio_name_to_desc(const char *name)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 static inline int desc_to_gpio(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-- 
cgit v1.2.3


From f856f21dbcd162a53e30987a91d75d5ab54a7f80 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Thu, 24 Sep 2015 09:37:10 +0200
Subject: ieee802154: remove unnecessary includes

This patch removes some unnecessary includes from ieee802154 header,
which was introduced by commit b609fb54adfa ("ieee802154: add helpers for
frame control checks").

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index aca228b81464..d3e415674dac 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -25,9 +25,6 @@
 
 #include <linux/types.h>
 #include <linux/random.h>
-#include <linux/skbuff.h>
-#include <linux/unaligned/memmove.h>
-#include <asm/byteorder.h>
 
 #define IEEE802154_MTU			127
 #define IEEE802154_ACK_PSDU_LEN		5
-- 
cgit v1.2.3


From 3f5c8d3187852b1cbed8546169e6293d6d421751 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 14 Sep 2015 17:37:35 +0300
Subject: device property: Add fwnode_property_match_string()

Sometimes it is useful to be able to extract an index of certain string
value from an array of strings. A typical use case is to give a name to a
DMA channel, PWM, clock and so on.

Provide an implementation using unified device property accessors that
follows of_property_match_string() but works for all supported fwnodes.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 68 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/property.h |  4 +++
 2 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 660ecec60bdf..de40623bbd8a 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -287,6 +287,28 @@ int device_property_read_string(struct device *dev, const char *propname,
 }
 EXPORT_SYMBOL_GPL(device_property_read_string);
 
+/**
+ * device_property_match_string - find a string in an array and return index
+ * @dev: Device to get the property of
+ * @propname: Name of the property holding the array
+ * @string: String to look for
+ *
+ * Find a given string in a string array and if it is found return the
+ * index back.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of strings,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int device_property_match_string(struct device *dev, const char *propname,
+				 const char *string)
+{
+	return fwnode_property_match_string(dev_fwnode(dev), propname, string);
+}
+EXPORT_SYMBOL_GPL(device_property_match_string);
+
 #define OF_DEV_PROP_READ_ARRAY(node, propname, type, val, nval) \
 	(val) ? of_property_read_##type##_array((node), (propname), (val), (nval)) \
 	      : of_property_count_elems_of_size((node), (propname), sizeof(type))
@@ -478,6 +500,52 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_string);
 
+/**
+ * fwnode_property_match_string - find a string in an array and return index
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property holding the array
+ * @string: String to look for
+ *
+ * Find a given string in a string array and if it is found return the
+ * index back.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of strings,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_match_string(struct fwnode_handle *fwnode,
+	const char *propname, const char *string)
+{
+	const char **values;
+	int nval, ret, i;
+
+	nval = fwnode_property_read_string_array(fwnode, propname, NULL, 0);
+	if (nval < 0)
+		return nval;
+
+	values = kcalloc(nval, sizeof(*values), GFP_KERNEL);
+	if (!values)
+		return -ENOMEM;
+
+	ret = fwnode_property_read_string_array(fwnode, propname, values, nval);
+	if (ret < 0)
+		goto out;
+
+	ret = -ENODATA;
+	for (i = 0; i < nval; i++) {
+		if (!strcmp(values[i], string)) {
+			ret = i;
+			break;
+		}
+	}
+out:
+	kfree(values);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(fwnode_property_match_string);
+
 /**
  * device_get_next_child_node - Return the next child node handle for a device
  * @dev: Device to find the next child node for.
diff --git a/include/linux/property.h b/include/linux/property.h
index a59c6ee566c2..463de52fe891 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -40,6 +40,8 @@ int device_property_read_string_array(struct device *dev, const char *propname,
 				      const char **val, size_t nval);
 int device_property_read_string(struct device *dev, const char *propname,
 				const char **val);
+int device_property_match_string(struct device *dev,
+				 const char *propname, const char *string);
 
 bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
@@ -59,6 +61,8 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
 				      size_t nval);
 int fwnode_property_read_string(struct fwnode_handle *fwnode,
 				const char *propname, const char **val);
+int fwnode_property_match_string(struct fwnode_handle *fwnode,
+				 const char *propname, const char *string);
 
 struct fwnode_handle *device_get_next_child_node(struct device *dev,
 						 struct fwnode_handle *child);
-- 
cgit v1.2.3


From c6790aa9f4fdc26b1246ba36da2fd749663beb65 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Thu, 24 Sep 2015 10:34:23 +0300
Subject: IB/mlx5: Remove support for IB_DEVICE_LOCAL_DMA_LKEY

Commit 96249d70dd70 ("IB/core: Guarantee that a local_dma_lkey
is available") allows ULPs that make use of the local dma key to keep
working as before by allocating a DMA MR with local permissions and
converted these consumers to use the MR associated with the PD
rather then device->local_dma_lkey.

ConnectIB has some known issues with memory registration
using the local_dma_lkey (SEND, RDMA, RECV seems to work ok).

Thus don't expose support for it (remove device->local_dma_lkey
setting), and take advantage of the above commit such that no regression
is introduced to working systems.

The local_dma_lkey support will be restored in CX4 depending on FW
capability query.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/main.c            | 10 +---------
 drivers/net/ethernet/mellanox/mlx5/core/fw.c | 22 ----------------------
 include/linux/mlx5/device.h                  | 11 -----------
 include/linux/mlx5/driver.h                  |  1 -
 4 files changed, 1 insertion(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 41d6911e244e..0ab9625911a1 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -245,7 +245,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
 	if (MLX5_CAP_GEN(mdev, apm))
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
-	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
 	if (MLX5_CAP_GEN(mdev, xrc))
 		props->device_cap_flags |= IB_DEVICE_XRC;
 	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
@@ -1245,18 +1244,10 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
 	struct ib_srq_init_attr attr;
 	struct mlx5_ib_dev *dev;
 	struct ib_cq_init_attr cq_attr = {.cqe = 1};
-	u32 rsvd_lkey;
 	int ret = 0;
 
 	dev = container_of(devr, struct mlx5_ib_dev, devr);
 
-	ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey);
-	if (ret) {
-		pr_err("Failed to query special context %d\n", ret);
-		return ret;
-	}
-	dev->ib_dev.local_dma_lkey = rsvd_lkey;
-
 	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
 	if (IS_ERR(devr->p0)) {
 		ret = PTR_ERR(devr->p0);
@@ -1418,6 +1409,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner		= THIS_MODULE;
 	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
+	dev->ib_dev.local_dma_lkey	= 0 /* not supported for now */;
 	dev->num_ports		= MLX5_CAP_GEN(mdev, num_ports);
 	dev->ib_dev.phys_port_cnt     = dev->num_ports;
 	dev->ib_dev.num_comp_vectors    =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index aa0d5ffe92d8..9335e5ae18cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -200,25 +200,3 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 
 	return err;
 }
-
-int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey)
-{
-	struct mlx5_cmd_query_special_contexts_mbox_in in;
-	struct mlx5_cmd_query_special_contexts_mbox_out out;
-	int err;
-
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	*rsvd_lkey = be32_to_cpu(out.resd_lkey);
-
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_query_special_context);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8eb3b19af2a4..250b1ff8b48d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -402,17 +402,6 @@ struct mlx5_cmd_teardown_hca_mbox_out {
 	u8			rsvd[8];
 };
 
-struct mlx5_cmd_query_special_contexts_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_cmd_query_special_contexts_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32                  dump_fill_mkey;
-	__be32                  resd_lkey;
-};
-
 struct mlx5_cmd_layout {
 	u8		type;
 	u8		rsvd0[3];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 27b53f9a24ad..8b6d6f2154a4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -845,7 +845,6 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
 int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
 int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
-int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey);
 
 struct mlx5_profile {
 	u64	mask;
-- 
cgit v1.2.3


From 41907416bc24412f839559ea10a2b9e4eeb21aa7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 25 Sep 2015 15:06:00 -0400
Subject: tracing: Remove unused function trace_current_buffer_lock_reserve()

trace_current_buffer_lock_reserve() is not used by anything. Might as well
get rid of it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 3 ---
 kernel/trace/trace.c         | 8 --------
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index ed27917cabc9..71c1191b9954 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -168,9 +168,6 @@ struct ring_buffer_event *
 trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
 				  int type, unsigned long len,
 				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
-					struct ring_buffer_event *event,
-					unsigned long flags, int pc);
 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 50820887dce9..a499ec95fc61 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1741,14 +1741,6 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
-void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
-					struct ring_buffer_event *event,
-					unsigned long flags, int pc)
-{
-	__trace_buffer_unlock_commit(buffer, event, flags, pc);
-}
-EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
-
 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
 				     struct ring_buffer_event *event,
 				     unsigned long flags, int pc,
-- 
cgit v1.2.3


From b7f0c959edfb4448f94bd33c39fda08e10ce6ede Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 25 Sep 2015 17:38:44 -0400
Subject: tracing: Pass trace_array into trace_buffer_unlock_commit()

In preparation for having trace options be per instance, the trace_array
needs to be passed to the trace_buffer_unlock_commit(). The
trace_event_buffer_lock_reserve() already passes in the trace_event_file
where the trace_array can be derived from.

Also added a "__init" to the boot up test event plus function tracing
function function_test_events_call().

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h      | 10 ++++++----
 kernel/trace/blktrace.c           |  4 ++--
 kernel/trace/trace.c              | 18 ++++++------------
 kernel/trace/trace_events.c       |  9 +++++++--
 kernel/trace/trace_mmiotrace.c    |  4 ++--
 kernel/trace/trace_sched_wakeup.c |  4 ++--
 6 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 71c1191b9954..f85693bbcdc3 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -168,10 +168,12 @@ struct ring_buffer_event *
 trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
 				  int type, unsigned long len,
 				  unsigned long flags, int pc);
-void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+void trace_buffer_unlock_commit(struct trace_array *tr,
+				struct ring_buffer *buffer,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
-void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+void trace_buffer_unlock_commit_regs(struct trace_array *tr,
+				     struct ring_buffer *buffer,
 				     struct ring_buffer_event *event,
 				     unsigned long flags, int pc,
 				     struct pt_regs *regs);
@@ -505,7 +507,7 @@ event_trigger_unlock_commit(struct trace_event_file *file,
 	enum event_trigger_type tt = ETT_NONE;
 
 	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
-		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
 	if (tt)
 		event_triggers_post_call(file, tt);
@@ -537,7 +539,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
 	enum event_trigger_type tt = ETT_NONE;
 
 	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
-		trace_buffer_unlock_commit_regs(buffer, event,
+		trace_buffer_unlock_commit_regs(file->tr, buffer, event,
 						irq_flags, pc, regs);
 
 	if (tt)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 90e72a0c3047..973d41d81aa5 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -103,7 +103,7 @@ record_it:
 		memcpy((void *) t + sizeof(*t), data, len);
 
 		if (blk_tracer)
-			trace_buffer_unlock_commit(buffer, event, 0, pc);
+			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
 	}
 }
 
@@ -278,7 +278,7 @@ record_it:
 			memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
 
 		if (blk_tracer) {
-			trace_buffer_unlock_commit(buffer, event, 0, pc);
+			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
 			return;
 		}
 	}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a499ec95fc61..3329c8efb34f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1683,23 +1683,16 @@ __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *eve
 	ring_buffer_unlock_commit(buffer, event);
 }
 
-static inline void
-__trace_buffer_unlock_commit(struct ring_buffer *buffer,
-			     struct ring_buffer_event *event,
-			     unsigned long flags, int pc)
+void trace_buffer_unlock_commit(struct trace_array *tr,
+				struct ring_buffer *buffer,
+				struct ring_buffer_event *event,
+				unsigned long flags, int pc)
 {
 	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack(buffer, flags, 6, pc);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
-
-void trace_buffer_unlock_commit(struct ring_buffer *buffer,
-				struct ring_buffer_event *event,
-				unsigned long flags, int pc)
-{
-	__trace_buffer_unlock_commit(buffer, event, flags, pc);
-}
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
 
 static struct ring_buffer *temp_buffer;
@@ -1741,7 +1734,8 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
-void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+void trace_buffer_unlock_commit_regs(struct trace_array *tr,
+				     struct ring_buffer *buffer,
 				     struct ring_buffer_event *event,
 				     unsigned long flags, int pc,
 				     struct pt_regs *regs)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7ca09cdc20c2..b2e3d8d80df8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2891,7 +2891,9 @@ static __init void event_trace_self_tests(void)
 
 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
 
-static void
+static struct trace_array *event_tr;
+
+static void __init
 function_test_events_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *op, struct pt_regs *pt_regs)
 {
@@ -2922,7 +2924,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	trace_buffer_unlock_commit(buffer, event, flags, pc);
+	trace_buffer_unlock_commit(event_tr, buffer, event, flags, pc);
 
  out:
 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
@@ -2944,6 +2946,9 @@ static __init void event_trace_self_test_with_function(void)
 		return;
 	}
 	pr_info("Running tests again, along with the function tracer\n");
+	event_tr = top_trace_array();
+	if (WARN_ON(!event_tr))
+		return;
 	event_trace_self_tests();
 	unregister_ftrace_function(&trace_ops);
 }
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 638e110c5bfd..2be8c4f2403d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -314,7 +314,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	entry->rw			= *rw;
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, pc);
+		trace_buffer_unlock_commit(tr, buffer, event, 0, pc);
 }
 
 void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -344,7 +344,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	entry->map			= *map;
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, pc);
+		trace_buffer_unlock_commit(tr, buffer, event, 0, pc);
 }
 
 void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 12cbe77b4136..c29d49e0102b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -388,7 +388,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_cpu	= task_cpu(next);
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, flags, pc);
+		trace_buffer_unlock_commit(tr, buffer, event, flags, pc);
 }
 
 static void
@@ -416,7 +416,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_cpu			= task_cpu(wakee);
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, flags, pc);
+		trace_buffer_unlock_commit(tr, buffer, event, flags, pc);
 }
 
 static void notrace
-- 
cgit v1.2.3


From 5ebc76035303016ec41bb752bec156ea9fde7c34 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 17 Sep 2015 14:02:45 +0800
Subject: ACPI, PCI, irq: Do not share PCI IRQ with ISA IRQ

Avoid IRQs occupied by ISA IRQs when allocating IRQs for PCI link devices,
otherwise it may cause interrupt storm due to incompatible pin attributes.

This issue was triggered on a KVM virtual machine, which
 1) uses IRQ9 for SCI in high level mode.
 2) defines an PCI interrupt link device (LNKS) with IRQ9 as the only
    possible irq.
 3) has an PCI device referring to link device LNKS.
So it causes interrupt storm when enabling the PCI device because PCI IRQ
works in low level mode.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pci_irq.c  |  1 +
 drivers/acpi/pci_link.c | 13 +++++++++++++
 include/linux/acpi.h    |  1 +
 3 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 6da0f9beab19..c9336751e5e3 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -372,6 +372,7 @@ static int acpi_isa_register_gsi(struct pci_dev *dev)
 
 	/* Interrupt Line values above 0xF are forbidden */
 	if (dev->irq > 0 && (dev->irq <= 0xF) &&
+	    acpi_isa_irq_available(dev->irq) &&
 	    (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) {
 		dev_warn(&dev->dev, "PCI INT %c: no GSI - using ISA IRQ %d\n",
 			 pin_name(dev->pin), dev->irq);
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 3b4ea98e3ea0..246e50d22120 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -553,6 +553,13 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
 				irq = link->irq.possible[i];
 		}
 	}
+	if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
+		printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
+			    "Try pci=noacpi or acpi=off\n",
+			    acpi_device_name(link->device),
+			    acpi_device_bid(link->device));
+		return -ENODEV;
+	}
 
 	/* Attempt to enable the link device at this IRQ. */
 	if (acpi_pci_link_set(link, irq)) {
@@ -821,6 +828,12 @@ void acpi_penalize_isa_irq(int irq, int active)
 	}
 }
 
+bool acpi_isa_irq_available(int irq)
+{
+	return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
+			    acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
+}
+
 /*
  * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with
  * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7235c4851460..43856d19cf4d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -217,6 +217,7 @@ struct pci_dev;
 
 int acpi_pci_irq_enable (struct pci_dev *dev);
 void acpi_penalize_isa_irq(int irq, int active);
+bool acpi_isa_irq_available(int irq);
 void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
 void acpi_pci_irq_disable (struct pci_dev *dev);
 
-- 
cgit v1.2.3


From c6f7b48e7e21989f4cfc996837d55c595d5dbf87 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 10 Sep 2015 16:00:50 +0200
Subject: PM / Domains: Remove name based API for genpd

As all users of the named based APIs now have converted to the non-named
based APIs, the time has come to remove them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 90 ---------------------------------------------
 include/linux/pm_domain.h   | 38 -------------------
 2 files changed, 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 16550c63d611..a46427e72034 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -53,24 +53,6 @@
 static LIST_HEAD(gpd_list);
 static DEFINE_MUTEX(gpd_list_lock);
 
-static struct generic_pm_domain *pm_genpd_lookup_name(const char *domain_name)
-{
-	struct generic_pm_domain *genpd = NULL, *gpd;
-
-	if (IS_ERR_OR_NULL(domain_name))
-		return NULL;
-
-	mutex_lock(&gpd_list_lock);
-	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
-		if (!strcmp(gpd->name, domain_name)) {
-			genpd = gpd;
-			break;
-		}
-	}
-	mutex_unlock(&gpd_list_lock);
-	return genpd;
-}
-
 /*
  * Get the generic PM domain for a particular struct device.
  * This validates the struct device pointer, the PM domain pointer,
@@ -295,18 +277,6 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd)
 	return ret;
 }
 
-/**
- * pm_genpd_name_poweron - Restore power to a given PM domain and its masters.
- * @domain_name: Name of the PM domain to power up.
- */
-int pm_genpd_name_poweron(const char *domain_name)
-{
-	struct generic_pm_domain *genpd;
-
-	genpd = pm_genpd_lookup_name(domain_name);
-	return genpd ? pm_genpd_poweron(genpd) : -EINVAL;
-}
-
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
 	return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev,
@@ -1316,18 +1286,6 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	return ret;
 }
 
-/**
- * __pm_genpd_name_add_device - Find I/O PM domain and add a device to it.
- * @domain_name: Name of the PM domain to add the device to.
- * @dev: Device to be added.
- * @td: Set of PM QoS timing parameters to attach to the device.
- */
-int __pm_genpd_name_add_device(const char *domain_name, struct device *dev,
-			       struct gpd_timing_data *td)
-{
-	return __pm_genpd_add_device(pm_genpd_lookup_name(domain_name), dev, td);
-}
-
 /**
  * pm_genpd_remove_device - Remove a device from an I/O PM domain.
  * @genpd: PM domain to remove the device from.
@@ -1428,35 +1386,6 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 	return ret;
 }
 
-/**
- * pm_genpd_add_subdomain_names - Add a subdomain to an I/O PM domain.
- * @master_name: Name of the master PM domain to add the subdomain to.
- * @subdomain_name: Name of the subdomain to be added.
- */
-int pm_genpd_add_subdomain_names(const char *master_name,
-				 const char *subdomain_name)
-{
-	struct generic_pm_domain *master = NULL, *subdomain = NULL, *gpd;
-
-	if (IS_ERR_OR_NULL(master_name) || IS_ERR_OR_NULL(subdomain_name))
-		return -EINVAL;
-
-	mutex_lock(&gpd_list_lock);
-	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
-		if (!master && !strcmp(gpd->name, master_name))
-			master = gpd;
-
-		if (!subdomain && !strcmp(gpd->name, subdomain_name))
-			subdomain = gpd;
-
-		if (master && subdomain)
-			break;
-	}
-	mutex_unlock(&gpd_list_lock);
-
-	return pm_genpd_add_subdomain(master, subdomain);
-}
-
 /**
  * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain.
  * @genpd: Master PM domain to remove the subdomain from.
@@ -1565,16 +1494,6 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 	goto out;
 }
 
-/**
- * pm_genpd_name_attach_cpuidle - Find PM domain and connect cpuidle to it.
- * @name: Name of the domain to connect to cpuidle.
- * @state: cpuidle state this domain can manipulate.
- */
-int pm_genpd_name_attach_cpuidle(const char *name, int state)
-{
-	return pm_genpd_attach_cpuidle(pm_genpd_lookup_name(name), state);
-}
-
 /**
  * pm_genpd_detach_cpuidle - Remove the cpuidle connection from a PM domain.
  * @genpd: PM domain to remove the cpuidle connection from.
@@ -1613,15 +1532,6 @@ int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
 	return ret;
 }
 
-/**
- * pm_genpd_name_detach_cpuidle - Find PM domain and disconnect cpuidle from it.
- * @name: Name of the domain to disconnect cpuidle from.
- */
-int pm_genpd_name_detach_cpuidle(const char *name)
-{
-	return pm_genpd_detach_cpuidle(pm_genpd_lookup_name(name));
-}
-
 /* Default device callbacks for generic PM domains. */
 
 /**
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b1cf7e797892..443fc196dc01 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -125,27 +125,18 @@ extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
 				 struct gpd_timing_data *td);
 
-extern int __pm_genpd_name_add_device(const char *domain_name,
-				      struct device *dev,
-				      struct gpd_timing_data *td);
-
 extern int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 				  struct device *dev);
 extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
-extern int pm_genpd_add_subdomain_names(const char *master_name,
-					const char *subdomain_name);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
 extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state);
-extern int pm_genpd_name_attach_cpuidle(const char *name, int state);
 extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd);
-extern int pm_genpd_name_detach_cpuidle(const char *name);
 extern void pm_genpd_init(struct generic_pm_domain *genpd,
 			  struct dev_power_governor *gov, bool is_off);
 
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
-extern int pm_genpd_name_poweron(const char *domain_name);
 extern void pm_genpd_poweroff_unused(void);
 
 extern struct dev_power_governor simple_qos_governor;
@@ -166,12 +157,6 @@ static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int __pm_genpd_name_add_device(const char *domain_name,
-					     struct device *dev,
-					     struct gpd_timing_data *td)
-{
-	return -ENOSYS;
-}
 static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 					 struct device *dev)
 {
@@ -182,11 +167,6 @@ static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_add_subdomain_names(const char *master_name,
-					       const char *subdomain_name)
-{
-	return -ENOSYS;
-}
 static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 					    struct generic_pm_domain *target)
 {
@@ -196,18 +176,10 @@ static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int s
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_name_attach_cpuidle(const char *name, int state)
-{
-	return -ENOSYS;
-}
 static inline int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_name_detach_cpuidle(const char *name)
-{
-	return -ENOSYS;
-}
 static inline void pm_genpd_init(struct generic_pm_domain *genpd,
 				 struct dev_power_governor *gov, bool is_off)
 {
@@ -216,10 +188,6 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_name_poweron(const char *domain_name)
-{
-	return -ENOSYS;
-}
 static inline void pm_genpd_poweroff_unused(void) {}
 #endif
 
@@ -229,12 +197,6 @@ static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
 	return __pm_genpd_add_device(genpd, dev, NULL);
 }
 
-static inline int pm_genpd_name_add_device(const char *domain_name,
-					   struct device *dev)
-{
-	return __pm_genpd_name_add_device(domain_name, dev, NULL);
-}
-
 #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP
 extern void pm_genpd_syscore_poweroff(struct device *dev);
 extern void pm_genpd_syscore_poweron(struct device *dev);
-- 
cgit v1.2.3


From cea3ad93d9a5e054d916f1ad71da02cb306e4828 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 1 Sep 2015 20:37:49 +0200
Subject: PM / Domains: Remove cpuidle attach

The power domains code allows to tie a cpuidle state with a power domain.

Preventing the cpuidle framework to enter a specific idle state by disabling
from the power domain framework is a good idea. Unfortunately, the current
implementation has some gaps with a SMP system and a complex cpuidle
implementation. Enabling a power domain wakes up all the cpus even if a cpu
does not belong to the power domain.

There is some work to do a logical representation with the power domains of
the hardware dependencies (eg. a cpu belongs to a power domains, these power
domains belong to a higher power domain for a cluster, etc ...). A new code
relying on the genpd hierarchy to disable the idle states would make more
sense.

As the unique user of this code has been removed, let's wipe out this code
to prevent new user and to have a clean place to put a new implementation.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 135 --------------------------------------------
 include/linux/pm_domain.h   |  17 ------
 2 files changed, 152 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index a46427e72034..a544887d03c7 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -122,19 +122,6 @@ static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
 	smp_mb__after_atomic();
 }
 
-static void genpd_recalc_cpu_exit_latency(struct generic_pm_domain *genpd)
-{
-	s64 usecs64;
-
-	if (!genpd->cpuidle_data)
-		return;
-
-	usecs64 = genpd->power_on_latency_ns;
-	do_div(usecs64, NSEC_PER_USEC);
-	usecs64 += genpd->cpuidle_data->saved_exit_latency;
-	genpd->cpuidle_data->idle_state->exit_latency = usecs64;
-}
-
 static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 {
 	ktime_t time_start;
@@ -158,7 +145,6 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 
 	genpd->power_on_latency_ns = elapsed_ns;
 	genpd->max_off_time_changed = true;
-	genpd_recalc_cpu_exit_latency(genpd);
 	pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
 		 genpd->name, "on", elapsed_ns);
 
@@ -222,13 +208,6 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 	    || (genpd->prepared_count > 0 && genpd->suspend_power_off))
 		return 0;
 
-	if (genpd->cpuidle_data) {
-		cpuidle_pause_and_lock();
-		genpd->cpuidle_data->idle_state->disabled = true;
-		cpuidle_resume_and_unlock();
-		goto out;
-	}
-
 	/*
 	 * The list is guaranteed not to change while the loop below is being
 	 * executed, unless one of the masters' .power_on() callbacks fiddles
@@ -381,21 +360,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 			return -EAGAIN;
 	}
 
-	if (genpd->cpuidle_data) {
-		/*
-		 * If cpuidle_data is set, cpuidle should turn the domain off
-		 * when the CPU in it is idle.  In that case we don't decrement
-		 * the subdomain counts of the master domains, so that power is
-		 * not removed from the current domain prematurely as a result
-		 * of cutting off the masters' power.
-		 */
-		genpd->status = GPD_STATE_POWER_OFF;
-		cpuidle_pause_and_lock();
-		genpd->cpuidle_data->idle_state->disabled = false;
-		cpuidle_resume_and_unlock();
-		return 0;
-	}
-
 	if (genpd->power_off) {
 		int ret;
 
@@ -1433,105 +1397,6 @@ out:
 	return ret;
 }
 
-/**
- * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle.
- * @genpd: PM domain to be connected with cpuidle.
- * @state: cpuidle state this domain can disable/enable.
- *
- * Make a PM domain behave as though it contained a CPU core, that is, instead
- * of calling its power down routine it will enable the given cpuidle state so
- * that the cpuidle subsystem can power it down (if possible and desirable).
- */
-int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
-{
-	struct cpuidle_driver *cpuidle_drv;
-	struct gpd_cpuidle_data *cpuidle_data;
-	struct cpuidle_state *idle_state;
-	int ret = 0;
-
-	if (IS_ERR_OR_NULL(genpd) || state < 0)
-		return -EINVAL;
-
-	mutex_lock(&genpd->lock);
-
-	if (genpd->cpuidle_data) {
-		ret = -EEXIST;
-		goto out;
-	}
-	cpuidle_data = kzalloc(sizeof(*cpuidle_data), GFP_KERNEL);
-	if (!cpuidle_data) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	cpuidle_drv = cpuidle_driver_ref();
-	if (!cpuidle_drv) {
-		ret = -ENODEV;
-		goto err_drv;
-	}
-	if (cpuidle_drv->state_count <= state) {
-		ret = -EINVAL;
-		goto err;
-	}
-	idle_state = &cpuidle_drv->states[state];
-	if (!idle_state->disabled) {
-		ret = -EAGAIN;
-		goto err;
-	}
-	cpuidle_data->idle_state = idle_state;
-	cpuidle_data->saved_exit_latency = idle_state->exit_latency;
-	genpd->cpuidle_data = cpuidle_data;
-	genpd_recalc_cpu_exit_latency(genpd);
-
- out:
-	mutex_unlock(&genpd->lock);
-	return ret;
-
- err:
-	cpuidle_driver_unref();
-
- err_drv:
-	kfree(cpuidle_data);
-	goto out;
-}
-
-/**
- * pm_genpd_detach_cpuidle - Remove the cpuidle connection from a PM domain.
- * @genpd: PM domain to remove the cpuidle connection from.
- *
- * Remove the cpuidle connection set up by pm_genpd_attach_cpuidle() from the
- * given PM domain.
- */
-int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
-{
-	struct gpd_cpuidle_data *cpuidle_data;
-	struct cpuidle_state *idle_state;
-	int ret = 0;
-
-	if (IS_ERR_OR_NULL(genpd))
-		return -EINVAL;
-
-	mutex_lock(&genpd->lock);
-
-	cpuidle_data = genpd->cpuidle_data;
-	if (!cpuidle_data) {
-		ret = -ENODEV;
-		goto out;
-	}
-	idle_state = cpuidle_data->idle_state;
-	if (!idle_state->disabled) {
-		ret = -EAGAIN;
-		goto out;
-	}
-	idle_state->exit_latency = cpuidle_data->saved_exit_latency;
-	cpuidle_driver_unref();
-	genpd->cpuidle_data = NULL;
-	kfree(cpuidle_data);
-
- out:
-	mutex_unlock(&genpd->lock);
-	return ret;
-}
-
 /* Default device callbacks for generic PM domains. */
 
 /**
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 443fc196dc01..384b1d193707 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -15,7 +15,6 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/notifier.h>
-#include <linux/cpuidle.h>
 
 /* Defines used for the flags field in the struct generic_pm_domain */
 #define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
@@ -38,11 +37,6 @@ struct gpd_dev_ops {
 	bool (*active_wakeup)(struct device *dev);
 };
 
-struct gpd_cpuidle_data {
-	unsigned int saved_exit_latency;
-	struct cpuidle_state *idle_state;
-};
-
 struct generic_pm_domain {
 	struct dev_pm_domain domain;	/* PM domain operations */
 	struct list_head gpd_list_node;	/* Node in the global PM domains list */
@@ -68,7 +62,6 @@ struct generic_pm_domain {
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
-	struct gpd_cpuidle_data *cpuidle_data;
 	int (*attach_dev)(struct generic_pm_domain *domain,
 			  struct device *dev);
 	void (*detach_dev)(struct generic_pm_domain *domain,
@@ -131,8 +124,6 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
-extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state);
-extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd);
 extern void pm_genpd_init(struct generic_pm_domain *genpd,
 			  struct dev_power_governor *gov, bool is_off);
 
@@ -172,14 +163,6 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int st)
-{
-	return -ENOSYS;
-}
-static inline int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
-{
-	return -ENOSYS;
-}
 static inline void pm_genpd_init(struct generic_pm_domain *genpd,
 				 struct dev_power_governor *gov, bool is_off)
 {
-- 
cgit v1.2.3


From 9b7537642cb6ad400ee4a95114582ba758b3009c Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Wed, 9 Sep 2015 13:17:23 -0500
Subject: usb: musb: set the controller speed based on the config setting

Set the Power register HSENAB bit based on musb->config->maximum_speed,
so that the glue layer can control MUSB to work in high- or full-speed.

Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/musb_core.c | 16 ++++++++++------
 include/linux/usb/musb.h     |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 4a518ff12310..e0e10dd424e0 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1028,18 +1028,22 @@ void musb_start(struct musb *musb)
 {
 	void __iomem    *regs = musb->mregs;
 	u8              devctl = musb_readb(regs, MUSB_DEVCTL);
+	u8		power;
 
 	dev_dbg(musb->controller, "<== devctl %02x\n", devctl);
 
 	musb_enable_interrupts(musb);
 	musb_writeb(regs, MUSB_TESTMODE, 0);
 
-	/* put into basic highspeed mode and start session */
-	musb_writeb(regs, MUSB_POWER, MUSB_POWER_ISOUPDATE
-			| MUSB_POWER_HSENAB
-			/* ENSUSPEND wedges tusb */
-			/* | MUSB_POWER_ENSUSPEND */
-		   );
+	power = MUSB_POWER_ISOUPDATE;
+	/*
+	 * treating UNKNOWN as unspecified maximum speed, in which case
+	 * we will default to high-speed.
+	 */
+	if (musb->config->maximum_speed == USB_SPEED_HIGH ||
+			musb->config->maximum_speed == USB_SPEED_UNKNOWN)
+		power |= MUSB_POWER_HSENAB;
+	musb_writeb(regs, MUSB_POWER, power);
 
 	musb->is_active = 0;
 	devctl = musb_readb(regs, MUSB_DEVCTL);
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index a4ee1b582183..fa6dc132bd1b 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -95,7 +95,7 @@ struct musb_hdrc_config {
 	/* musb CLKIN in Blackfin in MHZ */
 	unsigned char   clkin;
 #endif
-
+	u32		maximum_speed;
 };
 
 struct musb_hdrc_platform_data {
-- 
cgit v1.2.3


From 1f91b4cc03556ba0d43ac80621dac8263cda3880 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 6 Aug 2015 18:11:54 -0500
Subject: usb: dwc2: rename all s3c_* to dwc2_*

this driver has long ago became dwc2.ko with
both peripheral and host roles, there's no point
in keeping the old function names.

Acked-by: John Youn <johnyoun@synopsys.com>
Tested-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-s3c64xx/mach-crag6410.c   |   4 +-
 arch/arm/mach-s3c64xx/mach-smartq.c     |   4 +-
 arch/arm/mach-s3c64xx/mach-smdk6410.c   |   4 +-
 arch/arm/plat-samsung/devs.c            |   6 +-
 drivers/usb/dwc2/core.h                 |  52 +--
 drivers/usb/dwc2/core_intr.c            |   4 +-
 drivers/usb/dwc2/debugfs.c              |  20 +-
 drivers/usb/dwc2/gadget.c               | 696 ++++++++++++++++----------------
 drivers/usb/dwc2/hcd.c                  |   4 +-
 drivers/usb/dwc2/platform.c             |   8 +-
 include/linux/platform_data/s3c-hsotg.h |  10 +-
 11 files changed, 406 insertions(+), 406 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index 65c426bc45f7..14bd9ae3f476 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -809,7 +809,7 @@ static const struct gpio_led_platform_data gpio_leds_pdata = {
 	.num_leds = ARRAY_SIZE(gpio_leds),
 };
 
-static struct s3c_hsotg_plat crag6410_hsotg_pdata;
+static struct dwc2_hsotg_plat crag6410_hsotg_pdata;
 
 static void __init crag6410_machine_init(void)
 {
@@ -835,7 +835,7 @@ static void __init crag6410_machine_init(void)
 	s3c_i2c0_set_platdata(&i2c0_pdata);
 	s3c_i2c1_set_platdata(&i2c1_pdata);
 	s3c_fb_set_platdata(&crag6410_lcd_pdata);
-	s3c_hsotg_set_platdata(&crag6410_hsotg_pdata);
+	dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata);
 
 	i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0));
 	i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
diff --git a/arch/arm/mach-s3c64xx/mach-smartq.c b/arch/arm/mach-s3c64xx/mach-smartq.c
index b3d13537a7f0..719843dca510 100644
--- a/arch/arm/mach-s3c64xx/mach-smartq.c
+++ b/arch/arm/mach-s3c64xx/mach-smartq.c
@@ -189,7 +189,7 @@ static struct s3c_hwmon_pdata smartq_hwmon_pdata __initdata = {
 	},
 };
 
-static struct s3c_hsotg_plat smartq_hsotg_pdata;
+static struct dwc2_hsotg_plat smartq_hsotg_pdata;
 
 static int __init smartq_lcd_setup_gpio(void)
 {
@@ -382,7 +382,7 @@ void __init smartq_map_io(void)
 void __init smartq_machine_init(void)
 {
 	s3c_i2c0_set_platdata(NULL);
-	s3c_hsotg_set_platdata(&smartq_hsotg_pdata);
+	dwc2_hsotg_set_platdata(&smartq_hsotg_pdata);
 	s3c_hwmon_set_platdata(&smartq_hwmon_pdata);
 	s3c_sdhci1_set_platdata(&smartq_internal_hsmmc_pdata);
 	s3c_sdhci2_set_platdata(&smartq_internal_hsmmc_pdata);
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index d590b88bd8a8..286c9bd676e1 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -628,7 +628,7 @@ static struct platform_pwm_backlight_data smdk6410_bl_data = {
 	.enable_gpio = -1,
 };
 
-static struct s3c_hsotg_plat smdk6410_hsotg_pdata;
+static struct dwc2_hsotg_plat smdk6410_hsotg_pdata;
 
 static void __init smdk6410_map_io(void)
 {
@@ -659,7 +659,7 @@ static void __init smdk6410_machine_init(void)
 	s3c_i2c0_set_platdata(NULL);
 	s3c_i2c1_set_platdata(NULL);
 	s3c_fb_set_platdata(&smdk6410_lcd_pdata);
-	s3c_hsotg_set_platdata(&smdk6410_hsotg_pdata);
+	dwc2_hsotg_set_platdata(&smdk6410_hsotg_pdata);
 
 	samsung_keypad_set_platdata(&smdk6410_keypad_data);
 
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 83c7d154bde0..82074625de5c 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -1042,11 +1042,11 @@ struct platform_device s3c_device_usb_hsotg = {
 	},
 };
 
-void __init s3c_hsotg_set_platdata(struct s3c_hsotg_plat *pd)
+void __init dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd)
 {
-	struct s3c_hsotg_plat *npd;
+	struct dwc2_hsotg_plat *npd;
 
-	npd = s3c_set_platdata(pd, sizeof(struct s3c_hsotg_plat),
+	npd = s3c_set_platdata(pd, sizeof(struct dwc2_hsotg_plat),
 			&s3c_device_usb_hsotg);
 
 	if (!npd->phy_init)
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 0ed87620941b..9655b1ec4f34 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -58,8 +58,8 @@ static inline void do_write(u32 value, void *addr)
 /* Maximum number of Endpoints/HostChannels */
 #define MAX_EPS_CHANNELS	16
 
-/* s3c-hsotg declarations */
-static const char * const s3c_hsotg_supply_names[] = {
+/* dwc2-hsotg declarations */
+static const char * const dwc2_hsotg_supply_names[] = {
 	"vusb_d",               /* digital USB supply, 1.2V */
 	"vusb_a",               /* analog USB supply, 1.1V */
 };
@@ -85,10 +85,10 @@ static const char * const s3c_hsotg_supply_names[] = {
 #define EP0_MPS_LIMIT   64
 
 struct dwc2_hsotg;
-struct s3c_hsotg_req;
+struct dwc2_hsotg_req;
 
 /**
- * struct s3c_hsotg_ep - driver endpoint definition.
+ * struct dwc2_hsotg_ep - driver endpoint definition.
  * @ep: The gadget layer representation of the endpoint.
  * @name: The driver generated name for the endpoint.
  * @queue: Queue of requests for this endpoint.
@@ -127,11 +127,11 @@ struct s3c_hsotg_req;
  * as in shared-fifo mode periodic in acts like a single-frame packet
  * buffer than a fifo)
  */
-struct s3c_hsotg_ep {
+struct dwc2_hsotg_ep {
 	struct usb_ep           ep;
 	struct list_head        queue;
 	struct dwc2_hsotg       *parent;
-	struct s3c_hsotg_req    *req;
+	struct dwc2_hsotg_req    *req;
 	struct dentry           *debugfs;
 
 	unsigned long           total_data;
@@ -155,12 +155,12 @@ struct s3c_hsotg_ep {
 };
 
 /**
- * struct s3c_hsotg_req - data transfer request
+ * struct dwc2_hsotg_req - data transfer request
  * @req: The USB gadget request
  * @queue: The list of requests for the endpoint this is queued for.
  * @saved_req_buf: variable to save req.buf when bounce buffers are used.
  */
-struct s3c_hsotg_req {
+struct dwc2_hsotg_req {
 	struct usb_request      req;
 	struct list_head        queue;
 	void *saved_req_buf;
@@ -693,7 +693,7 @@ struct dwc2_hsotg {
 
 	struct phy *phy;
 	struct usb_phy *uphy;
-	struct regulator_bulk_data supplies[ARRAY_SIZE(s3c_hsotg_supply_names)];
+	struct regulator_bulk_data supplies[ARRAY_SIZE(dwc2_hsotg_supply_names)];
 
 	spinlock_t lock;
 	struct mutex init_mutex;
@@ -796,7 +796,7 @@ struct dwc2_hsotg {
 #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
 	/* Gadget structures */
 	struct usb_gadget_driver *driver;
-	struct s3c_hsotg_plat *plat;
+	struct dwc2_hsotg_plat *plat;
 
 	u32 phyif;
 	int fifo_mem;
@@ -815,8 +815,8 @@ struct dwc2_hsotg {
 	unsigned int enabled:1;
 	unsigned int connected:1;
 	unsigned long last_rst;
-	struct s3c_hsotg_ep *eps_in[MAX_EPS_CHANNELS];
-	struct s3c_hsotg_ep *eps_out[MAX_EPS_CHANNELS];
+	struct dwc2_hsotg_ep *eps_in[MAX_EPS_CHANNELS];
+	struct dwc2_hsotg_ep *eps_out[MAX_EPS_CHANNELS];
 	u32 g_using_dma;
 	u32 g_rx_fifo_sz;
 	u32 g_np_g_tx_fifo_sz;
@@ -1104,30 +1104,30 @@ extern u16 dwc2_get_otg_version(struct dwc2_hsotg *hsotg);
 
 /* Gadget defines */
 #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
-extern int s3c_hsotg_remove(struct dwc2_hsotg *hsotg);
-extern int s3c_hsotg_suspend(struct dwc2_hsotg *dwc2);
-extern int s3c_hsotg_resume(struct dwc2_hsotg *dwc2);
+extern int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg);
+extern int dwc2_hsotg_suspend(struct dwc2_hsotg *dwc2);
+extern int dwc2_hsotg_resume(struct dwc2_hsotg *dwc2);
 extern int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq);
-extern void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *dwc2,
+extern void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *dwc2,
 		bool reset);
-extern void s3c_hsotg_core_connect(struct dwc2_hsotg *hsotg);
-extern void s3c_hsotg_disconnect(struct dwc2_hsotg *dwc2);
-extern int s3c_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode);
+extern void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg);
+extern void dwc2_hsotg_disconnect(struct dwc2_hsotg *dwc2);
+extern int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode);
 #define dwc2_is_device_connected(hsotg) (hsotg->connected)
 #else
-static inline int s3c_hsotg_remove(struct dwc2_hsotg *dwc2)
+static inline int dwc2_hsotg_remove(struct dwc2_hsotg *dwc2)
 { return 0; }
-static inline int s3c_hsotg_suspend(struct dwc2_hsotg *dwc2)
+static inline int dwc2_hsotg_suspend(struct dwc2_hsotg *dwc2)
 { return 0; }
-static inline int s3c_hsotg_resume(struct dwc2_hsotg *dwc2)
+static inline int dwc2_hsotg_resume(struct dwc2_hsotg *dwc2)
 { return 0; }
 static inline int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 { return 0; }
-static inline void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *dwc2,
+static inline void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *dwc2,
 		bool reset) {}
-static inline void s3c_hsotg_core_connect(struct dwc2_hsotg *hsotg) {}
-static inline void s3c_hsotg_disconnect(struct dwc2_hsotg *dwc2) {}
-static inline int s3c_hsotg_set_test_mode(struct dwc2_hsotg *hsotg,
+static inline void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg) {}
+static inline void dwc2_hsotg_disconnect(struct dwc2_hsotg *dwc2) {}
+static inline int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg,
 							int testmode)
 { return 0; }
 #define dwc2_is_device_connected(hsotg) (0)
diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 927be1e8b3dc..344a859c2153 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -129,7 +129,7 @@ static void dwc2_handle_otg_intr(struct dwc2_hsotg *hsotg)
 		gotgctl = readl(hsotg->regs + GOTGCTL);
 
 		if (dwc2_is_device_mode(hsotg))
-			s3c_hsotg_disconnect(hsotg);
+			dwc2_hsotg_disconnect(hsotg);
 
 		if (hsotg->op_state == OTG_STATE_B_HOST) {
 			hsotg->op_state = OTG_STATE_B_PERIPHERAL;
@@ -322,7 +322,7 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg)
 	 * Report disconnect if there is any previous session established
 	 */
 	if (dwc2_is_device_mode(hsotg))
-		s3c_hsotg_disconnect(hsotg);
+		dwc2_hsotg_disconnect(hsotg);
 }
 
 /*
diff --git a/drivers/usb/dwc2/debugfs.c b/drivers/usb/dwc2/debugfs.c
index ef2ee3d9a25d..cee3d771b75e 100644
--- a/drivers/usb/dwc2/debugfs.c
+++ b/drivers/usb/dwc2/debugfs.c
@@ -57,7 +57,7 @@ static ssize_t testmode_write(struct file *file, const char __user *ubuf, size_t
 		testmode = 0;
 
 	spin_lock_irqsave(&hsotg->lock, flags);
-	s3c_hsotg_set_test_mode(hsotg, testmode);
+	dwc2_hsotg_set_test_mode(hsotg, testmode);
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 	return count;
 }
@@ -256,9 +256,9 @@ static const char *decode_direction(int is_in)
  */
 static int ep_show(struct seq_file *seq, void *v)
 {
-	struct s3c_hsotg_ep *ep = seq->private;
+	struct dwc2_hsotg_ep *ep = seq->private;
 	struct dwc2_hsotg *hsotg = ep->parent;
-	struct s3c_hsotg_req *req;
+	struct dwc2_hsotg_req *req;
 	void __iomem *regs = hsotg->regs;
 	int index = ep->index;
 	int show_limit = 15;
@@ -326,7 +326,7 @@ static const struct file_operations ep_fops = {
 };
 
 /**
- * s3c_hsotg_create_debug - create debugfs directory and files
+ * dwc2_hsotg_create_debug - create debugfs directory and files
  * @hsotg: The driver state
  *
  * Create the debugfs files to allow the user to get information
@@ -334,7 +334,7 @@ static const struct file_operations ep_fops = {
  * with the same name as the device itself, in case we end up
  * with multiple blocks in future systems.
  */
-static void s3c_hsotg_create_debug(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_create_debug(struct dwc2_hsotg *hsotg)
 {
 	struct dentry *root;
 	struct dentry *file;
@@ -360,7 +360,7 @@ static void s3c_hsotg_create_debug(struct dwc2_hsotg *hsotg)
 
 	/* Create one file for each out endpoint */
 	for (epidx = 0; epidx < hsotg->num_of_eps; epidx++) {
-		struct s3c_hsotg_ep *ep;
+		struct dwc2_hsotg_ep *ep;
 
 		ep = hsotg->eps_out[epidx];
 		if (ep) {
@@ -373,7 +373,7 @@ static void s3c_hsotg_create_debug(struct dwc2_hsotg *hsotg)
 	}
 	/* Create one file for each in endpoint. EP0 is handled with out eps */
 	for (epidx = 1; epidx < hsotg->num_of_eps; epidx++) {
-		struct s3c_hsotg_ep *ep;
+		struct dwc2_hsotg_ep *ep;
 
 		ep = hsotg->eps_in[epidx];
 		if (ep) {
@@ -386,10 +386,10 @@ static void s3c_hsotg_create_debug(struct dwc2_hsotg *hsotg)
 	}
 }
 #else
-static inline void s3c_hsotg_create_debug(struct dwc2_hsotg *hsotg) {}
+static inline void dwc2_hsotg_create_debug(struct dwc2_hsotg *hsotg) {}
 #endif
 
-/* s3c_hsotg_delete_debug is removed as cleanup in done in dwc2_debugfs_exit */
+/* dwc2_hsotg_delete_debug is removed as cleanup in done in dwc2_debugfs_exit */
 
 #define dump_register(nm)	\
 {				\
@@ -737,7 +737,7 @@ int dwc2_debugfs_init(struct dwc2_hsotg *hsotg)
 	}
 
 	/* Add gadget debugfs nodes */
-	s3c_hsotg_create_debug(hsotg);
+	dwc2_hsotg_create_debug(hsotg);
 
 	hsotg->regset = devm_kzalloc(hsotg->dev, sizeof(*hsotg->regset),
 								GFP_KERNEL);
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 3ee5b4c77a1f..2ed9ad84d979 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -39,14 +39,14 @@
 #include "hw.h"
 
 /* conversion functions */
-static inline struct s3c_hsotg_req *our_req(struct usb_request *req)
+static inline struct dwc2_hsotg_req *our_req(struct usb_request *req)
 {
-	return container_of(req, struct s3c_hsotg_req, req);
+	return container_of(req, struct dwc2_hsotg_req, req);
 }
 
-static inline struct s3c_hsotg_ep *our_ep(struct usb_ep *ep)
+static inline struct dwc2_hsotg_ep *our_ep(struct usb_ep *ep)
 {
-	return container_of(ep, struct s3c_hsotg_ep, ep);
+	return container_of(ep, struct dwc2_hsotg_ep, ep);
 }
 
 static inline struct dwc2_hsotg *to_hsotg(struct usb_gadget *gadget)
@@ -64,7 +64,7 @@ static inline void __bic32(void __iomem *ptr, u32 val)
 	writel(readl(ptr) & ~val, ptr);
 }
 
-static inline struct s3c_hsotg_ep *index_to_ep(struct dwc2_hsotg *hsotg,
+static inline struct dwc2_hsotg_ep *index_to_ep(struct dwc2_hsotg *hsotg,
 						u32 ep_index, u32 dir_in)
 {
 	if (dir_in)
@@ -74,7 +74,7 @@ static inline struct s3c_hsotg_ep *index_to_ep(struct dwc2_hsotg *hsotg,
 }
 
 /* forward declaration of functions */
-static void s3c_hsotg_dump(struct dwc2_hsotg *hsotg);
+static void dwc2_hsotg_dump(struct dwc2_hsotg *hsotg);
 
 /**
  * using_dma - return the DMA status of the driver.
@@ -101,11 +101,11 @@ static inline bool using_dma(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_en_gsint - enable one or more of the general interrupt
+ * dwc2_hsotg_en_gsint - enable one or more of the general interrupt
  * @hsotg: The device state
  * @ints: A bitmask of the interrupts to enable
  */
-static void s3c_hsotg_en_gsint(struct dwc2_hsotg *hsotg, u32 ints)
+static void dwc2_hsotg_en_gsint(struct dwc2_hsotg *hsotg, u32 ints)
 {
 	u32 gsintmsk = readl(hsotg->regs + GINTMSK);
 	u32 new_gsintmsk;
@@ -119,11 +119,11 @@ static void s3c_hsotg_en_gsint(struct dwc2_hsotg *hsotg, u32 ints)
 }
 
 /**
- * s3c_hsotg_disable_gsint - disable one or more of the general interrupt
+ * dwc2_hsotg_disable_gsint - disable one or more of the general interrupt
  * @hsotg: The device state
  * @ints: A bitmask of the interrupts to enable
  */
-static void s3c_hsotg_disable_gsint(struct dwc2_hsotg *hsotg, u32 ints)
+static void dwc2_hsotg_disable_gsint(struct dwc2_hsotg *hsotg, u32 ints)
 {
 	u32 gsintmsk = readl(hsotg->regs + GINTMSK);
 	u32 new_gsintmsk;
@@ -135,7 +135,7 @@ static void s3c_hsotg_disable_gsint(struct dwc2_hsotg *hsotg, u32 ints)
 }
 
 /**
- * s3c_hsotg_ctrl_epint - enable/disable an endpoint irq
+ * dwc2_hsotg_ctrl_epint - enable/disable an endpoint irq
  * @hsotg: The device state
  * @ep: The endpoint index
  * @dir_in: True if direction is in.
@@ -144,7 +144,7 @@ static void s3c_hsotg_disable_gsint(struct dwc2_hsotg *hsotg, u32 ints)
  * Set or clear the mask for an individual endpoint's interrupt
  * request.
  */
-static void s3c_hsotg_ctrl_epint(struct dwc2_hsotg *hsotg,
+static void dwc2_hsotg_ctrl_epint(struct dwc2_hsotg *hsotg,
 				 unsigned int ep, unsigned int dir_in,
 				 unsigned int en)
 {
@@ -166,10 +166,10 @@ static void s3c_hsotg_ctrl_epint(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * s3c_hsotg_init_fifo - initialise non-periodic FIFOs
+ * dwc2_hsotg_init_fifo - initialise non-periodic FIFOs
  * @hsotg: The device instance.
  */
-static void s3c_hsotg_init_fifo(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg)
 {
 	unsigned int ep;
 	unsigned int addr;
@@ -248,12 +248,12 @@ static void s3c_hsotg_init_fifo(struct dwc2_hsotg *hsotg)
  *
  * Allocate a new USB request structure appropriate for the specified endpoint
  */
-static struct usb_request *s3c_hsotg_ep_alloc_request(struct usb_ep *ep,
+static struct usb_request *dwc2_hsotg_ep_alloc_request(struct usb_ep *ep,
 						      gfp_t flags)
 {
-	struct s3c_hsotg_req *req;
+	struct dwc2_hsotg_req *req;
 
-	req = kzalloc(sizeof(struct s3c_hsotg_req), flags);
+	req = kzalloc(sizeof(struct dwc2_hsotg_req), flags);
 	if (!req)
 		return NULL;
 
@@ -269,23 +269,23 @@ static struct usb_request *s3c_hsotg_ep_alloc_request(struct usb_ep *ep,
  * Returns true if the endpoint is in periodic mode, meaning it is being
  * used for an Interrupt or ISO transfer.
  */
-static inline int is_ep_periodic(struct s3c_hsotg_ep *hs_ep)
+static inline int is_ep_periodic(struct dwc2_hsotg_ep *hs_ep)
 {
 	return hs_ep->periodic;
 }
 
 /**
- * s3c_hsotg_unmap_dma - unmap the DMA memory being used for the request
+ * dwc2_hsotg_unmap_dma - unmap the DMA memory being used for the request
  * @hsotg: The device state.
  * @hs_ep: The endpoint for the request
  * @hs_req: The request being processed.
  *
- * This is the reverse of s3c_hsotg_map_dma(), called for the completion
+ * This is the reverse of dwc2_hsotg_map_dma(), called for the completion
  * of a request to ensure the buffer is ready for access by the caller.
  */
-static void s3c_hsotg_unmap_dma(struct dwc2_hsotg *hsotg,
-				struct s3c_hsotg_ep *hs_ep,
-				struct s3c_hsotg_req *hs_req)
+static void dwc2_hsotg_unmap_dma(struct dwc2_hsotg *hsotg,
+				struct dwc2_hsotg_ep *hs_ep,
+				struct dwc2_hsotg_req *hs_req)
 {
 	struct usb_request *req = &hs_req->req;
 
@@ -297,7 +297,7 @@ static void s3c_hsotg_unmap_dma(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * s3c_hsotg_write_fifo - write packet Data to the TxFIFO
+ * dwc2_hsotg_write_fifo - write packet Data to the TxFIFO
  * @hsotg: The controller state.
  * @hs_ep: The endpoint we're going to write for.
  * @hs_req: The request to write data for.
@@ -312,9 +312,9 @@ static void s3c_hsotg_unmap_dma(struct dwc2_hsotg *hsotg,
  *
  * This routine is only needed for PIO
  */
-static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
-				struct s3c_hsotg_ep *hs_ep,
-				struct s3c_hsotg_req *hs_req)
+static int dwc2_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
+				struct dwc2_hsotg_ep *hs_ep,
+				struct dwc2_hsotg_req *hs_req)
 {
 	bool periodic = is_ep_periodic(hs_ep);
 	u32 gnptxsts = readl(hsotg->regs + GNPTXSTS);
@@ -348,7 +348,7 @@ static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
 		 * previous data has been completely sent.
 		 */
 		if (hs_ep->fifo_load != 0) {
-			s3c_hsotg_en_gsint(hsotg, GINTSTS_PTXFEMP);
+			dwc2_hsotg_en_gsint(hsotg, GINTSTS_PTXFEMP);
 			return -ENOSPC;
 		}
 
@@ -369,7 +369,7 @@ static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
 			__func__, can_write);
 
 		if (can_write <= 0) {
-			s3c_hsotg_en_gsint(hsotg, GINTSTS_PTXFEMP);
+			dwc2_hsotg_en_gsint(hsotg, GINTSTS_PTXFEMP);
 			return -ENOSPC;
 		}
 	} else if (hsotg->dedicated_fifos && hs_ep->index != 0) {
@@ -383,7 +383,7 @@ static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
 				"%s: no queue slots available (0x%08x)\n",
 				__func__, gnptxsts);
 
-			s3c_hsotg_en_gsint(hsotg, GINTSTS_NPTXFEMP);
+			dwc2_hsotg_en_gsint(hsotg, GINTSTS_NPTXFEMP);
 			return -ENOSPC;
 		}
 
@@ -414,7 +414,7 @@ static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
 
 		/* it's needed only when we do not use dedicated fifos */
 		if (!hsotg->dedicated_fifos)
-			s3c_hsotg_en_gsint(hsotg,
+			dwc2_hsotg_en_gsint(hsotg,
 					   periodic ? GINTSTS_PTXFEMP :
 					   GINTSTS_NPTXFEMP);
 	}
@@ -443,7 +443,7 @@ static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
 
 		/* it's needed only when we do not use dedicated fifos */
 		if (!hsotg->dedicated_fifos)
-			s3c_hsotg_en_gsint(hsotg,
+			dwc2_hsotg_en_gsint(hsotg,
 					   periodic ? GINTSTS_PTXFEMP :
 					   GINTSTS_NPTXFEMP);
 	}
@@ -475,7 +475,7 @@ static int s3c_hsotg_write_fifo(struct dwc2_hsotg *hsotg,
  * Return the maximum data that can be queued in one go on a given endpoint
  * so that transfers that are too long can be split.
  */
-static unsigned get_ep_limit(struct s3c_hsotg_ep *hs_ep)
+static unsigned get_ep_limit(struct dwc2_hsotg_ep *hs_ep)
 {
 	int index = hs_ep->index;
 	unsigned maxsize;
@@ -508,7 +508,7 @@ static unsigned get_ep_limit(struct s3c_hsotg_ep *hs_ep)
 }
 
 /**
- * s3c_hsotg_start_req - start a USB request from an endpoint's queue
+ * dwc2_hsotg_start_req - start a USB request from an endpoint's queue
  * @hsotg: The controller state.
  * @hs_ep: The endpoint to process a request for
  * @hs_req: The request to start.
@@ -517,9 +517,9 @@ static unsigned get_ep_limit(struct s3c_hsotg_ep *hs_ep)
  * Start the given request running by setting the endpoint registers
  * appropriately, and writing any data to the FIFOs.
  */
-static void s3c_hsotg_start_req(struct dwc2_hsotg *hsotg,
-				struct s3c_hsotg_ep *hs_ep,
-				struct s3c_hsotg_req *hs_req,
+static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
+				struct dwc2_hsotg_ep *hs_ep,
+				struct dwc2_hsotg_req *hs_req,
 				bool continuing)
 {
 	struct usb_request *ureq = &hs_req->req;
@@ -625,7 +625,7 @@ static void s3c_hsotg_start_req(struct dwc2_hsotg *hsotg,
 
 		/*
 		 * write DMA address to control register, buffer already
-		 * synced by s3c_hsotg_ep_queue().
+		 * synced by dwc2_hsotg_ep_queue().
 		 */
 
 		dma_reg = dir_in ? DIEPDMA(index) : DOEPDMA(index);
@@ -659,7 +659,7 @@ static void s3c_hsotg_start_req(struct dwc2_hsotg *hsotg,
 		/* set these anyway, we may need them for non-periodic in */
 		hs_ep->fifo_load = 0;
 
-		s3c_hsotg_write_fifo(hsotg, hs_ep, hs_req);
+		dwc2_hsotg_write_fifo(hsotg, hs_ep, hs_req);
 	}
 
 	/*
@@ -685,11 +685,11 @@ static void s3c_hsotg_start_req(struct dwc2_hsotg *hsotg,
 		__func__, readl(hsotg->regs + epctrl_reg));
 
 	/* enable ep interrupts */
-	s3c_hsotg_ctrl_epint(hsotg, hs_ep->index, hs_ep->dir_in, 1);
+	dwc2_hsotg_ctrl_epint(hsotg, hs_ep->index, hs_ep->dir_in, 1);
 }
 
 /**
- * s3c_hsotg_map_dma - map the DMA memory being used for the request
+ * dwc2_hsotg_map_dma - map the DMA memory being used for the request
  * @hsotg: The device state.
  * @hs_ep: The endpoint the request is on.
  * @req: The request being processed.
@@ -700,11 +700,11 @@ static void s3c_hsotg_start_req(struct dwc2_hsotg *hsotg,
  * DMA memory, then we map the memory and mark our request to allow us to
  * cleanup on completion.
  */
-static int s3c_hsotg_map_dma(struct dwc2_hsotg *hsotg,
-			     struct s3c_hsotg_ep *hs_ep,
+static int dwc2_hsotg_map_dma(struct dwc2_hsotg *hsotg,
+			     struct dwc2_hsotg_ep *hs_ep,
 			     struct usb_request *req)
 {
-	struct s3c_hsotg_req *hs_req = our_req(req);
+	struct dwc2_hsotg_req *hs_req = our_req(req);
 	int ret;
 
 	/* if the length is zero, ignore the DMA data */
@@ -724,8 +724,8 @@ dma_error:
 	return -EIO;
 }
 
-static int s3c_hsotg_handle_unaligned_buf_start(struct dwc2_hsotg *hsotg,
-	struct s3c_hsotg_ep *hs_ep, struct s3c_hsotg_req *hs_req)
+static int dwc2_hsotg_handle_unaligned_buf_start(struct dwc2_hsotg *hsotg,
+	struct dwc2_hsotg_ep *hs_ep, struct dwc2_hsotg_req *hs_req)
 {
 	void *req_buf = hs_req->req.buf;
 
@@ -755,8 +755,8 @@ static int s3c_hsotg_handle_unaligned_buf_start(struct dwc2_hsotg *hsotg,
 	return 0;
 }
 
-static void s3c_hsotg_handle_unaligned_buf_complete(struct dwc2_hsotg *hsotg,
-	struct s3c_hsotg_ep *hs_ep, struct s3c_hsotg_req *hs_req)
+static void dwc2_hsotg_handle_unaligned_buf_complete(struct dwc2_hsotg *hsotg,
+	struct dwc2_hsotg_ep *hs_ep, struct dwc2_hsotg_req *hs_req)
 {
 	/* If dma is not being used or buffer was aligned */
 	if (!using_dma(hsotg) || !hs_req->saved_req_buf)
@@ -777,11 +777,11 @@ static void s3c_hsotg_handle_unaligned_buf_complete(struct dwc2_hsotg *hsotg,
 	hs_req->saved_req_buf = NULL;
 }
 
-static int s3c_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
+static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
 			      gfp_t gfp_flags)
 {
-	struct s3c_hsotg_req *hs_req = our_req(req);
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_req *hs_req = our_req(req);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
 	bool first;
 	int ret;
@@ -802,13 +802,13 @@ static int s3c_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
 	req->actual = 0;
 	req->status = -EINPROGRESS;
 
-	ret = s3c_hsotg_handle_unaligned_buf_start(hs, hs_ep, hs_req);
+	ret = dwc2_hsotg_handle_unaligned_buf_start(hs, hs_ep, hs_req);
 	if (ret)
 		return ret;
 
 	/* if we're using DMA, sync the buffers as necessary */
 	if (using_dma(hs)) {
-		ret = s3c_hsotg_map_dma(hs, hs_ep, req);
+		ret = dwc2_hsotg_map_dma(hs, hs_ep, req);
 		if (ret)
 			return ret;
 	}
@@ -817,51 +817,51 @@ static int s3c_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
 	list_add_tail(&hs_req->queue, &hs_ep->queue);
 
 	if (first)
-		s3c_hsotg_start_req(hs, hs_ep, hs_req, false);
+		dwc2_hsotg_start_req(hs, hs_ep, hs_req, false);
 
 	return 0;
 }
 
-static int s3c_hsotg_ep_queue_lock(struct usb_ep *ep, struct usb_request *req,
+static int dwc2_hsotg_ep_queue_lock(struct usb_ep *ep, struct usb_request *req,
 			      gfp_t gfp_flags)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
 	unsigned long flags = 0;
 	int ret = 0;
 
 	spin_lock_irqsave(&hs->lock, flags);
-	ret = s3c_hsotg_ep_queue(ep, req, gfp_flags);
+	ret = dwc2_hsotg_ep_queue(ep, req, gfp_flags);
 	spin_unlock_irqrestore(&hs->lock, flags);
 
 	return ret;
 }
 
-static void s3c_hsotg_ep_free_request(struct usb_ep *ep,
+static void dwc2_hsotg_ep_free_request(struct usb_ep *ep,
 				      struct usb_request *req)
 {
-	struct s3c_hsotg_req *hs_req = our_req(req);
+	struct dwc2_hsotg_req *hs_req = our_req(req);
 
 	kfree(hs_req);
 }
 
 /**
- * s3c_hsotg_complete_oursetup - setup completion callback
+ * dwc2_hsotg_complete_oursetup - setup completion callback
  * @ep: The endpoint the request was on.
  * @req: The request completed.
  *
  * Called on completion of any requests the driver itself
  * submitted that need cleaning up.
  */
-static void s3c_hsotg_complete_oursetup(struct usb_ep *ep,
+static void dwc2_hsotg_complete_oursetup(struct usb_ep *ep,
 					struct usb_request *req)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 
 	dev_dbg(hsotg->dev, "%s: ep %p, req %p\n", __func__, ep, req);
 
-	s3c_hsotg_ep_free_request(ep, req);
+	dwc2_hsotg_ep_free_request(ep, req);
 }
 
 /**
@@ -872,10 +872,10 @@ static void s3c_hsotg_complete_oursetup(struct usb_ep *ep,
  * Convert the given wIndex into a pointer to an driver endpoint
  * structure, or return NULL if it is not a valid endpoint.
  */
-static struct s3c_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg,
+static struct dwc2_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg,
 					   u32 windex)
 {
-	struct s3c_hsotg_ep *ep;
+	struct dwc2_hsotg_ep *ep;
 	int dir = (windex & USB_DIR_IN) ? 1 : 0;
 	int idx = windex & 0x7F;
 
@@ -894,12 +894,12 @@ static struct s3c_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * s3c_hsotg_set_test_mode - Enable usb Test Modes
+ * dwc2_hsotg_set_test_mode - Enable usb Test Modes
  * @hsotg: The driver state.
  * @testmode: requested usb test mode
  * Enable usb Test Mode requested by the Host.
  */
-int s3c_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode)
+int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode)
 {
 	int dctl = readl(hsotg->regs + DCTL);
 
@@ -920,7 +920,7 @@ int s3c_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode)
 }
 
 /**
- * s3c_hsotg_send_reply - send reply to control request
+ * dwc2_hsotg_send_reply - send reply to control request
  * @hsotg: The device state
  * @ep: Endpoint 0
  * @buff: Buffer for request
@@ -929,8 +929,8 @@ int s3c_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode)
  * Create a request and queue it on the given endpoint. This is useful as
  * an internal method of sending replies to certain control requests, etc.
  */
-static int s3c_hsotg_send_reply(struct dwc2_hsotg *hsotg,
-				struct s3c_hsotg_ep *ep,
+static int dwc2_hsotg_send_reply(struct dwc2_hsotg *hsotg,
+				struct dwc2_hsotg_ep *ep,
 				void *buff,
 				int length)
 {
@@ -939,7 +939,7 @@ static int s3c_hsotg_send_reply(struct dwc2_hsotg *hsotg,
 
 	dev_dbg(hsotg->dev, "%s: buff %p, len %d\n", __func__, buff, length);
 
-	req = s3c_hsotg_ep_alloc_request(&ep->ep, GFP_ATOMIC);
+	req = dwc2_hsotg_ep_alloc_request(&ep->ep, GFP_ATOMIC);
 	hsotg->ep0_reply = req;
 	if (!req) {
 		dev_warn(hsotg->dev, "%s: cannot alloc req\n", __func__);
@@ -953,12 +953,12 @@ static int s3c_hsotg_send_reply(struct dwc2_hsotg *hsotg,
 	 * STATUS stage.
 	 */
 	req->zero = 0;
-	req->complete = s3c_hsotg_complete_oursetup;
+	req->complete = dwc2_hsotg_complete_oursetup;
 
 	if (length)
 		memcpy(req->buf, buff, length);
 
-	ret = s3c_hsotg_ep_queue(&ep->ep, req, GFP_ATOMIC);
+	ret = dwc2_hsotg_ep_queue(&ep->ep, req, GFP_ATOMIC);
 	if (ret) {
 		dev_warn(hsotg->dev, "%s: cannot queue req\n", __func__);
 		return ret;
@@ -968,15 +968,15 @@ static int s3c_hsotg_send_reply(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * s3c_hsotg_process_req_status - process request GET_STATUS
+ * dwc2_hsotg_process_req_status - process request GET_STATUS
  * @hsotg: The device state
  * @ctrl: USB control request
  */
-static int s3c_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
+static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
 					struct usb_ctrlrequest *ctrl)
 {
-	struct s3c_hsotg_ep *ep0 = hsotg->eps_out[0];
-	struct s3c_hsotg_ep *ep;
+	struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0];
+	struct dwc2_hsotg_ep *ep;
 	__le16 reply;
 	int ret;
 
@@ -1013,7 +1013,7 @@ static int s3c_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
 	if (le16_to_cpu(ctrl->wLength) != 2)
 		return -EINVAL;
 
-	ret = s3c_hsotg_send_reply(hsotg, ep0, &reply, 2);
+	ret = dwc2_hsotg_send_reply(hsotg, ep0, &reply, 2);
 	if (ret) {
 		dev_err(hsotg->dev, "%s: failed to send reply\n", __func__);
 		return ret;
@@ -1022,7 +1022,7 @@ static int s3c_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
 	return 1;
 }
 
-static int s3c_hsotg_ep_sethalt(struct usb_ep *ep, int value);
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value);
 
 /**
  * get_ep_head - return the first request on the endpoint
@@ -1030,27 +1030,27 @@ static int s3c_hsotg_ep_sethalt(struct usb_ep *ep, int value);
  *
  * Get the first request on the endpoint.
  */
-static struct s3c_hsotg_req *get_ep_head(struct s3c_hsotg_ep *hs_ep)
+static struct dwc2_hsotg_req *get_ep_head(struct dwc2_hsotg_ep *hs_ep)
 {
 	if (list_empty(&hs_ep->queue))
 		return NULL;
 
-	return list_first_entry(&hs_ep->queue, struct s3c_hsotg_req, queue);
+	return list_first_entry(&hs_ep->queue, struct dwc2_hsotg_req, queue);
 }
 
 /**
- * s3c_hsotg_process_req_feature - process request {SET,CLEAR}_FEATURE
+ * dwc2_hsotg_process_req_feature - process request {SET,CLEAR}_FEATURE
  * @hsotg: The device state
  * @ctrl: USB control request
  */
-static int s3c_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
+static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 					 struct usb_ctrlrequest *ctrl)
 {
-	struct s3c_hsotg_ep *ep0 = hsotg->eps_out[0];
-	struct s3c_hsotg_req *hs_req;
+	struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0];
+	struct dwc2_hsotg_req *hs_req;
 	bool restart;
 	bool set = (ctrl->bRequest == USB_REQ_SET_FEATURE);
-	struct s3c_hsotg_ep *ep;
+	struct dwc2_hsotg_ep *ep;
 	int ret;
 	bool halted;
 	u32 recip;
@@ -1074,7 +1074,7 @@ static int s3c_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 				return -EINVAL;
 
 			hsotg->test_mode = wIndex >> 8;
-			ret = s3c_hsotg_send_reply(hsotg, ep0, NULL, 0);
+			ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
 			if (ret) {
 				dev_err(hsotg->dev,
 					"%s: failed to send reply\n", __func__);
@@ -1098,9 +1098,9 @@ static int s3c_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 		case USB_ENDPOINT_HALT:
 			halted = ep->halted;
 
-			s3c_hsotg_ep_sethalt(&ep->ep, set);
+			dwc2_hsotg_ep_sethalt(&ep->ep, set);
 
-			ret = s3c_hsotg_send_reply(hsotg, ep0, NULL, 0);
+			ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
 			if (ret) {
 				dev_err(hsotg->dev,
 					"%s: failed to send reply\n", __func__);
@@ -1134,7 +1134,7 @@ static int s3c_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 					restart = !list_empty(&ep->queue);
 					if (restart) {
 						hs_req = get_ep_head(ep);
-						s3c_hsotg_start_req(hsotg, ep,
+						dwc2_hsotg_start_req(hsotg, ep,
 								hs_req, false);
 					}
 				}
@@ -1152,17 +1152,17 @@ static int s3c_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 	return 1;
 }
 
-static void s3c_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg);
+static void dwc2_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg);
 
 /**
- * s3c_hsotg_stall_ep0 - stall ep0
+ * dwc2_hsotg_stall_ep0 - stall ep0
  * @hsotg: The device state
  *
  * Set stall for ep0 as response for setup request.
  */
-static void s3c_hsotg_stall_ep0(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_stall_ep0(struct dwc2_hsotg *hsotg)
 {
-	struct s3c_hsotg_ep *ep0 = hsotg->eps_out[0];
+	struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0];
 	u32 reg;
 	u32 ctrl;
 
@@ -1187,11 +1187,11 @@ static void s3c_hsotg_stall_ep0(struct dwc2_hsotg *hsotg)
 	  * complete won't be called, so we enqueue
 	  * setup request here
 	  */
-	 s3c_hsotg_enqueue_setup(hsotg);
+	 dwc2_hsotg_enqueue_setup(hsotg);
 }
 
 /**
- * s3c_hsotg_process_control - process a control request
+ * dwc2_hsotg_process_control - process a control request
  * @hsotg: The device state
  * @ctrl: The control request received
  *
@@ -1199,10 +1199,10 @@ static void s3c_hsotg_stall_ep0(struct dwc2_hsotg *hsotg)
  * needs to work out what to do next (and whether to pass it on to the
  * gadget driver).
  */
-static void s3c_hsotg_process_control(struct dwc2_hsotg *hsotg,
+static void dwc2_hsotg_process_control(struct dwc2_hsotg *hsotg,
 				      struct usb_ctrlrequest *ctrl)
 {
-	struct s3c_hsotg_ep *ep0 = hsotg->eps_out[0];
+	struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0];
 	int ret = 0;
 	u32 dcfg;
 
@@ -1233,16 +1233,16 @@ static void s3c_hsotg_process_control(struct dwc2_hsotg *hsotg,
 
 			dev_info(hsotg->dev, "new address %d\n", ctrl->wValue);
 
-			ret = s3c_hsotg_send_reply(hsotg, ep0, NULL, 0);
+			ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
 			return;
 
 		case USB_REQ_GET_STATUS:
-			ret = s3c_hsotg_process_req_status(hsotg, ctrl);
+			ret = dwc2_hsotg_process_req_status(hsotg, ctrl);
 			break;
 
 		case USB_REQ_CLEAR_FEATURE:
 		case USB_REQ_SET_FEATURE:
-			ret = s3c_hsotg_process_req_feature(hsotg, ctrl);
+			ret = dwc2_hsotg_process_req_feature(hsotg, ctrl);
 			break;
 		}
 	}
@@ -1263,21 +1263,21 @@ static void s3c_hsotg_process_control(struct dwc2_hsotg *hsotg,
 	 */
 
 	if (ret < 0)
-		s3c_hsotg_stall_ep0(hsotg);
+		dwc2_hsotg_stall_ep0(hsotg);
 }
 
 /**
- * s3c_hsotg_complete_setup - completion of a setup transfer
+ * dwc2_hsotg_complete_setup - completion of a setup transfer
  * @ep: The endpoint the request was on.
  * @req: The request completed.
  *
  * Called on completion of any requests the driver itself submitted for
  * EP0 setup packets
  */
-static void s3c_hsotg_complete_setup(struct usb_ep *ep,
+static void dwc2_hsotg_complete_setup(struct usb_ep *ep,
 				     struct usb_request *req)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 
 	if (req->status < 0) {
@@ -1287,23 +1287,23 @@ static void s3c_hsotg_complete_setup(struct usb_ep *ep,
 
 	spin_lock(&hsotg->lock);
 	if (req->actual == 0)
-		s3c_hsotg_enqueue_setup(hsotg);
+		dwc2_hsotg_enqueue_setup(hsotg);
 	else
-		s3c_hsotg_process_control(hsotg, req->buf);
+		dwc2_hsotg_process_control(hsotg, req->buf);
 	spin_unlock(&hsotg->lock);
 }
 
 /**
- * s3c_hsotg_enqueue_setup - start a request for EP0 packets
+ * dwc2_hsotg_enqueue_setup - start a request for EP0 packets
  * @hsotg: The device state.
  *
  * Enqueue a request on EP0 if necessary to received any SETUP packets
  * received from the host.
  */
-static void s3c_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg)
 {
 	struct usb_request *req = hsotg->ctrl_req;
-	struct s3c_hsotg_req *hs_req = our_req(req);
+	struct dwc2_hsotg_req *hs_req = our_req(req);
 	int ret;
 
 	dev_dbg(hsotg->dev, "%s: queueing setup request\n", __func__);
@@ -1311,7 +1311,7 @@ static void s3c_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg)
 	req->zero = 0;
 	req->length = 8;
 	req->buf = hsotg->ctrl_buff;
-	req->complete = s3c_hsotg_complete_setup;
+	req->complete = dwc2_hsotg_complete_setup;
 
 	if (!list_empty(&hs_req->queue)) {
 		dev_dbg(hsotg->dev, "%s already queued???\n", __func__);
@@ -1322,7 +1322,7 @@ static void s3c_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg)
 	hsotg->eps_out[0]->send_zlp = 0;
 	hsotg->ep0_state = DWC2_EP0_SETUP;
 
-	ret = s3c_hsotg_ep_queue(&hsotg->eps_out[0]->ep, req, GFP_ATOMIC);
+	ret = dwc2_hsotg_ep_queue(&hsotg->eps_out[0]->ep, req, GFP_ATOMIC);
 	if (ret < 0) {
 		dev_err(hsotg->dev, "%s: failed queue (%d)\n", __func__, ret);
 		/*
@@ -1332,8 +1332,8 @@ static void s3c_hsotg_enqueue_setup(struct dwc2_hsotg *hsotg)
 	}
 }
 
-static void s3c_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
-					struct s3c_hsotg_ep *hs_ep)
+static void dwc2_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
+					struct dwc2_hsotg_ep *hs_ep)
 {
 	u32 ctrl;
 	u8 index = hs_ep->index;
@@ -1359,7 +1359,7 @@ static void s3c_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * s3c_hsotg_complete_request - complete a request given to us
+ * dwc2_hsotg_complete_request - complete a request given to us
  * @hsotg: The device state.
  * @hs_ep: The endpoint the request was on.
  * @hs_req: The request to complete.
@@ -1371,9 +1371,9 @@ static void s3c_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
  *
  * Note, expects the ep to already be locked as appropriate.
  */
-static void s3c_hsotg_complete_request(struct dwc2_hsotg *hsotg,
-				       struct s3c_hsotg_ep *hs_ep,
-				       struct s3c_hsotg_req *hs_req,
+static void dwc2_hsotg_complete_request(struct dwc2_hsotg *hsotg,
+				       struct dwc2_hsotg_ep *hs_ep,
+				       struct dwc2_hsotg_req *hs_req,
 				       int result)
 {
 	bool restart;
@@ -1394,13 +1394,13 @@ static void s3c_hsotg_complete_request(struct dwc2_hsotg *hsotg,
 	if (hs_req->req.status == -EINPROGRESS)
 		hs_req->req.status = result;
 
-	s3c_hsotg_handle_unaligned_buf_complete(hsotg, hs_ep, hs_req);
+	dwc2_hsotg_handle_unaligned_buf_complete(hsotg, hs_ep, hs_req);
 
 	hs_ep->req = NULL;
 	list_del_init(&hs_req->queue);
 
 	if (using_dma(hsotg))
-		s3c_hsotg_unmap_dma(hsotg, hs_ep, hs_req);
+		dwc2_hsotg_unmap_dma(hsotg, hs_ep, hs_req);
 
 	/*
 	 * call the complete request with the locks off, just in case the
@@ -1423,13 +1423,13 @@ static void s3c_hsotg_complete_request(struct dwc2_hsotg *hsotg,
 		restart = !list_empty(&hs_ep->queue);
 		if (restart) {
 			hs_req = get_ep_head(hs_ep);
-			s3c_hsotg_start_req(hsotg, hs_ep, hs_req, false);
+			dwc2_hsotg_start_req(hsotg, hs_ep, hs_req, false);
 		}
 	}
 }
 
 /**
- * s3c_hsotg_rx_data - receive data from the FIFO for an endpoint
+ * dwc2_hsotg_rx_data - receive data from the FIFO for an endpoint
  * @hsotg: The device state.
  * @ep_idx: The endpoint index for the data
  * @size: The size of data in the fifo, in bytes
@@ -1438,10 +1438,10 @@ static void s3c_hsotg_complete_request(struct dwc2_hsotg *hsotg,
  * endpoint, so sort out whether we need to read the data into a request
  * that has been made for that endpoint.
  */
-static void s3c_hsotg_rx_data(struct dwc2_hsotg *hsotg, int ep_idx, int size)
+static void dwc2_hsotg_rx_data(struct dwc2_hsotg *hsotg, int ep_idx, int size)
 {
-	struct s3c_hsotg_ep *hs_ep = hsotg->eps_out[ep_idx];
-	struct s3c_hsotg_req *hs_req = hs_ep->req;
+	struct dwc2_hsotg_ep *hs_ep = hsotg->eps_out[ep_idx];
+	struct dwc2_hsotg_req *hs_req = hs_ep->req;
 	void __iomem *fifo = hsotg->regs + EPFIFO(ep_idx);
 	int to_read;
 	int max_req;
@@ -1492,7 +1492,7 @@ static void s3c_hsotg_rx_data(struct dwc2_hsotg *hsotg, int ep_idx, int size)
 }
 
 /**
- * s3c_hsotg_ep0_zlp - send/receive zero-length packet on control endpoint
+ * dwc2_hsotg_ep0_zlp - send/receive zero-length packet on control endpoint
  * @hsotg: The device instance
  * @dir_in: If IN zlp
  *
@@ -1503,17 +1503,17 @@ static void s3c_hsotg_rx_data(struct dwc2_hsotg *hsotg, int ep_idx, int size)
  * currently believed that we do not need to wait for any space in
  * the TxFIFO.
  */
-static void s3c_hsotg_ep0_zlp(struct dwc2_hsotg *hsotg, bool dir_in)
+static void dwc2_hsotg_ep0_zlp(struct dwc2_hsotg *hsotg, bool dir_in)
 {
 	/* eps_out[0] is used in both directions */
 	hsotg->eps_out[0]->dir_in = dir_in;
 	hsotg->ep0_state = dir_in ? DWC2_EP0_STATUS_IN : DWC2_EP0_STATUS_OUT;
 
-	s3c_hsotg_program_zlp(hsotg, hsotg->eps_out[0]);
+	dwc2_hsotg_program_zlp(hsotg, hsotg->eps_out[0]);
 }
 
 /**
- * s3c_hsotg_handle_outdone - handle receiving OutDone/SetupDone from RXFIFO
+ * dwc2_hsotg_handle_outdone - handle receiving OutDone/SetupDone from RXFIFO
  * @hsotg: The device instance
  * @epnum: The endpoint received from
  *
@@ -1521,11 +1521,11 @@ static void s3c_hsotg_ep0_zlp(struct dwc2_hsotg *hsotg, bool dir_in)
  * transfer for an OUT endpoint has been completed, either by a short
  * packet or by the finish of a transfer.
  */
-static void s3c_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
+static void dwc2_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
 {
 	u32 epsize = readl(hsotg->regs + DOEPTSIZ(epnum));
-	struct s3c_hsotg_ep *hs_ep = hsotg->eps_out[epnum];
-	struct s3c_hsotg_req *hs_req = hs_ep->req;
+	struct dwc2_hsotg_ep *hs_ep = hsotg->eps_out[epnum];
+	struct dwc2_hsotg_req *hs_req = hs_ep->req;
 	struct usb_request *req = &hs_req->req;
 	unsigned size_left = DXEPTSIZ_XFERSIZE_GET(epsize);
 	int result = 0;
@@ -1537,8 +1537,8 @@ static void s3c_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
 
 	if (epnum == 0 && hsotg->ep0_state == DWC2_EP0_STATUS_OUT) {
 		dev_dbg(hsotg->dev, "zlp packet received\n");
-		s3c_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
-		s3c_hsotg_enqueue_setup(hsotg);
+		dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
+		dwc2_hsotg_enqueue_setup(hsotg);
 		return;
 	}
 
@@ -1562,7 +1562,7 @@ static void s3c_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
 
 	/* if there is more request to do, schedule new transfer */
 	if (req->actual < req->length && size_left == 0) {
-		s3c_hsotg_start_req(hsotg, hs_ep, hs_req, true);
+		dwc2_hsotg_start_req(hsotg, hs_ep, hs_req, true);
 		return;
 	}
 
@@ -1578,20 +1578,20 @@ static void s3c_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
 
 	if (epnum == 0 && hsotg->ep0_state == DWC2_EP0_DATA_OUT) {
 		/* Move to STATUS IN */
-		s3c_hsotg_ep0_zlp(hsotg, true);
+		dwc2_hsotg_ep0_zlp(hsotg, true);
 		return;
 	}
 
-	s3c_hsotg_complete_request(hsotg, hs_ep, hs_req, result);
+	dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, result);
 }
 
 /**
- * s3c_hsotg_read_frameno - read current frame number
+ * dwc2_hsotg_read_frameno - read current frame number
  * @hsotg: The device instance
  *
  * Return the current frame number
  */
-static u32 s3c_hsotg_read_frameno(struct dwc2_hsotg *hsotg)
+static u32 dwc2_hsotg_read_frameno(struct dwc2_hsotg *hsotg)
 {
 	u32 dsts;
 
@@ -1603,7 +1603,7 @@ static u32 s3c_hsotg_read_frameno(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_handle_rx - RX FIFO has data
+ * dwc2_hsotg_handle_rx - RX FIFO has data
  * @hsotg: The device instance
  *
  * The IRQ handler has detected that the RX FIFO has some data in it
@@ -1618,7 +1618,7 @@ static u32 s3c_hsotg_read_frameno(struct dwc2_hsotg *hsotg)
  * as the actual data should be sent to the memory directly and we turn
  * on the completion interrupts to get notifications of transfer completion.
  */
-static void s3c_hsotg_handle_rx(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_handle_rx(struct dwc2_hsotg *hsotg)
 {
 	u32 grxstsr = readl(hsotg->regs + GRXSTSP);
 	u32 epnum, status, size;
@@ -1641,55 +1641,55 @@ static void s3c_hsotg_handle_rx(struct dwc2_hsotg *hsotg)
 
 	case GRXSTS_PKTSTS_OUTDONE:
 		dev_dbg(hsotg->dev, "OutDone (Frame=0x%08x)\n",
-			s3c_hsotg_read_frameno(hsotg));
+			dwc2_hsotg_read_frameno(hsotg));
 
 		if (!using_dma(hsotg))
-			s3c_hsotg_handle_outdone(hsotg, epnum);
+			dwc2_hsotg_handle_outdone(hsotg, epnum);
 		break;
 
 	case GRXSTS_PKTSTS_SETUPDONE:
 		dev_dbg(hsotg->dev,
 			"SetupDone (Frame=0x%08x, DOPEPCTL=0x%08x)\n",
-			s3c_hsotg_read_frameno(hsotg),
+			dwc2_hsotg_read_frameno(hsotg),
 			readl(hsotg->regs + DOEPCTL(0)));
 		/*
-		 * Call s3c_hsotg_handle_outdone here if it was not called from
+		 * Call dwc2_hsotg_handle_outdone here if it was not called from
 		 * GRXSTS_PKTSTS_OUTDONE. That is, if the core didn't
 		 * generate GRXSTS_PKTSTS_OUTDONE for setup packet.
 		 */
 		if (hsotg->ep0_state == DWC2_EP0_SETUP)
-			s3c_hsotg_handle_outdone(hsotg, epnum);
+			dwc2_hsotg_handle_outdone(hsotg, epnum);
 		break;
 
 	case GRXSTS_PKTSTS_OUTRX:
-		s3c_hsotg_rx_data(hsotg, epnum, size);
+		dwc2_hsotg_rx_data(hsotg, epnum, size);
 		break;
 
 	case GRXSTS_PKTSTS_SETUPRX:
 		dev_dbg(hsotg->dev,
 			"SetupRX (Frame=0x%08x, DOPEPCTL=0x%08x)\n",
-			s3c_hsotg_read_frameno(hsotg),
+			dwc2_hsotg_read_frameno(hsotg),
 			readl(hsotg->regs + DOEPCTL(0)));
 
 		WARN_ON(hsotg->ep0_state != DWC2_EP0_SETUP);
 
-		s3c_hsotg_rx_data(hsotg, epnum, size);
+		dwc2_hsotg_rx_data(hsotg, epnum, size);
 		break;
 
 	default:
 		dev_warn(hsotg->dev, "%s: unknown status %08x\n",
 			 __func__, grxstsr);
 
-		s3c_hsotg_dump(hsotg);
+		dwc2_hsotg_dump(hsotg);
 		break;
 	}
 }
 
 /**
- * s3c_hsotg_ep0_mps - turn max packet size into register setting
+ * dwc2_hsotg_ep0_mps - turn max packet size into register setting
  * @mps: The maximum packet size in bytes.
  */
-static u32 s3c_hsotg_ep0_mps(unsigned int mps)
+static u32 dwc2_hsotg_ep0_mps(unsigned int mps)
 {
 	switch (mps) {
 	case 64:
@@ -1708,7 +1708,7 @@ static u32 s3c_hsotg_ep0_mps(unsigned int mps)
 }
 
 /**
- * s3c_hsotg_set_ep_maxpacket - set endpoint's max-packet field
+ * dwc2_hsotg_set_ep_maxpacket - set endpoint's max-packet field
  * @hsotg: The driver state.
  * @ep: The index number of the endpoint
  * @mps: The maximum packet size in bytes
@@ -1716,10 +1716,10 @@ static u32 s3c_hsotg_ep0_mps(unsigned int mps)
  * Configure the maximum packet size for the given endpoint, updating
  * the hardware control registers to reflect this.
  */
-static void s3c_hsotg_set_ep_maxpacket(struct dwc2_hsotg *hsotg,
+static void dwc2_hsotg_set_ep_maxpacket(struct dwc2_hsotg *hsotg,
 			unsigned int ep, unsigned int mps, unsigned int dir_in)
 {
-	struct s3c_hsotg_ep *hs_ep;
+	struct dwc2_hsotg_ep *hs_ep;
 	void __iomem *regs = hsotg->regs;
 	u32 mpsval;
 	u32 mcval;
@@ -1731,7 +1731,7 @@ static void s3c_hsotg_set_ep_maxpacket(struct dwc2_hsotg *hsotg,
 
 	if (ep == 0) {
 		/* EP0 is a special case */
-		mpsval = s3c_hsotg_ep0_mps(mps);
+		mpsval = dwc2_hsotg_ep0_mps(mps);
 		if (mpsval > 3)
 			goto bad_mps;
 		hs_ep->ep.maxpacket = mps;
@@ -1766,11 +1766,11 @@ bad_mps:
 }
 
 /**
- * s3c_hsotg_txfifo_flush - flush Tx FIFO
+ * dwc2_hsotg_txfifo_flush - flush Tx FIFO
  * @hsotg: The driver state
  * @idx: The index for the endpoint (0..15)
  */
-static void s3c_hsotg_txfifo_flush(struct dwc2_hsotg *hsotg, unsigned int idx)
+static void dwc2_hsotg_txfifo_flush(struct dwc2_hsotg *hsotg, unsigned int idx)
 {
 	int timeout;
 	int val;
@@ -1799,17 +1799,17 @@ static void s3c_hsotg_txfifo_flush(struct dwc2_hsotg *hsotg, unsigned int idx)
 }
 
 /**
- * s3c_hsotg_trytx - check to see if anything needs transmitting
+ * dwc2_hsotg_trytx - check to see if anything needs transmitting
  * @hsotg: The driver state
  * @hs_ep: The driver endpoint to check.
  *
  * Check to see if there is a request that has data to send, and if so
  * make an attempt to write data into the FIFO.
  */
-static int s3c_hsotg_trytx(struct dwc2_hsotg *hsotg,
-			   struct s3c_hsotg_ep *hs_ep)
+static int dwc2_hsotg_trytx(struct dwc2_hsotg *hsotg,
+			   struct dwc2_hsotg_ep *hs_ep)
 {
-	struct s3c_hsotg_req *hs_req = hs_ep->req;
+	struct dwc2_hsotg_req *hs_req = hs_ep->req;
 
 	if (!hs_ep->dir_in || !hs_req) {
 		/**
@@ -1817,7 +1817,7 @@ static int s3c_hsotg_trytx(struct dwc2_hsotg *hsotg,
 		 * for endpoints, excepting ep0
 		 */
 		if (hs_ep->index != 0)
-			s3c_hsotg_ctrl_epint(hsotg, hs_ep->index,
+			dwc2_hsotg_ctrl_epint(hsotg, hs_ep->index,
 					     hs_ep->dir_in, 0);
 		return 0;
 	}
@@ -1825,24 +1825,24 @@ static int s3c_hsotg_trytx(struct dwc2_hsotg *hsotg,
 	if (hs_req->req.actual < hs_req->req.length) {
 		dev_dbg(hsotg->dev, "trying to write more for ep%d\n",
 			hs_ep->index);
-		return s3c_hsotg_write_fifo(hsotg, hs_ep, hs_req);
+		return dwc2_hsotg_write_fifo(hsotg, hs_ep, hs_req);
 	}
 
 	return 0;
 }
 
 /**
- * s3c_hsotg_complete_in - complete IN transfer
+ * dwc2_hsotg_complete_in - complete IN transfer
  * @hsotg: The device state.
  * @hs_ep: The endpoint that has just completed.
  *
  * An IN transfer has been completed, update the transfer's state and then
  * call the relevant completion routines.
  */
-static void s3c_hsotg_complete_in(struct dwc2_hsotg *hsotg,
-				  struct s3c_hsotg_ep *hs_ep)
+static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg,
+				  struct dwc2_hsotg_ep *hs_ep)
 {
-	struct s3c_hsotg_req *hs_req = hs_ep->req;
+	struct dwc2_hsotg_req *hs_req = hs_ep->req;
 	u32 epsize = readl(hsotg->regs + DIEPTSIZ(hs_ep->index));
 	int size_left, size_done;
 
@@ -1854,19 +1854,19 @@ static void s3c_hsotg_complete_in(struct dwc2_hsotg *hsotg,
 	/* Finish ZLP handling for IN EP0 transactions */
 	if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_STATUS_IN) {
 		dev_dbg(hsotg->dev, "zlp packet sent\n");
-		s3c_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
+		dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
 		if (hsotg->test_mode) {
 			int ret;
 
-			ret = s3c_hsotg_set_test_mode(hsotg, hsotg->test_mode);
+			ret = dwc2_hsotg_set_test_mode(hsotg, hsotg->test_mode);
 			if (ret < 0) {
 				dev_dbg(hsotg->dev, "Invalid Test #%d\n",
 						hsotg->test_mode);
-				s3c_hsotg_stall_ep0(hsotg);
+				dwc2_hsotg_stall_ep0(hsotg);
 				return;
 			}
 		}
-		s3c_hsotg_enqueue_setup(hsotg);
+		dwc2_hsotg_enqueue_setup(hsotg);
 		return;
 	}
 
@@ -1895,13 +1895,13 @@ static void s3c_hsotg_complete_in(struct dwc2_hsotg *hsotg,
 
 	if (!size_left && hs_req->req.actual < hs_req->req.length) {
 		dev_dbg(hsotg->dev, "%s trying more for req...\n", __func__);
-		s3c_hsotg_start_req(hsotg, hs_ep, hs_req, true);
+		dwc2_hsotg_start_req(hsotg, hs_ep, hs_req, true);
 		return;
 	}
 
 	/* Zlp for all endpoints, for ep0 only in DATA IN stage */
 	if (hs_ep->send_zlp) {
-		s3c_hsotg_program_zlp(hsotg, hs_ep);
+		dwc2_hsotg_program_zlp(hsotg, hs_ep);
 		hs_ep->send_zlp = 0;
 		/* transfer will be completed on next complete interrupt */
 		return;
@@ -1909,25 +1909,25 @@ static void s3c_hsotg_complete_in(struct dwc2_hsotg *hsotg,
 
 	if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) {
 		/* Move to STATUS OUT */
-		s3c_hsotg_ep0_zlp(hsotg, false);
+		dwc2_hsotg_ep0_zlp(hsotg, false);
 		return;
 	}
 
-	s3c_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
+	dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
 }
 
 /**
- * s3c_hsotg_epint - handle an in/out endpoint interrupt
+ * dwc2_hsotg_epint - handle an in/out endpoint interrupt
  * @hsotg: The driver state
  * @idx: The index for the endpoint (0..15)
  * @dir_in: Set if this is an IN endpoint
  *
  * Process and clear any interrupt pending for an individual endpoint
  */
-static void s3c_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
+static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 			    int dir_in)
 {
-	struct s3c_hsotg_ep *hs_ep = index_to_ep(hsotg, idx, dir_in);
+	struct dwc2_hsotg_ep *hs_ep = index_to_ep(hsotg, idx, dir_in);
 	u32 epint_reg = dir_in ? DIEPINT(idx) : DOEPINT(idx);
 	u32 epctl_reg = dir_in ? DIEPCTL(idx) : DOEPCTL(idx);
 	u32 epsiz_reg = dir_in ? DIEPTSIZ(idx) : DOEPTSIZ(idx);
@@ -1972,17 +1972,17 @@ static void s3c_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 		 * at completing IN requests here
 		 */
 		if (dir_in) {
-			s3c_hsotg_complete_in(hsotg, hs_ep);
+			dwc2_hsotg_complete_in(hsotg, hs_ep);
 
 			if (idx == 0 && !hs_ep->req)
-				s3c_hsotg_enqueue_setup(hsotg);
+				dwc2_hsotg_enqueue_setup(hsotg);
 		} else if (using_dma(hsotg)) {
 			/*
 			 * We're using DMA, we need to fire an OutDone here
 			 * as we ignore the RXFIFO.
 			 */
 
-			s3c_hsotg_handle_outdone(hsotg, idx);
+			dwc2_hsotg_handle_outdone(hsotg, idx);
 		}
 	}
 
@@ -1992,7 +1992,7 @@ static void s3c_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 		if (dir_in) {
 			int epctl = readl(hsotg->regs + epctl_reg);
 
-			s3c_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index);
+			dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index);
 
 			if ((epctl & DXEPCTL_STALL) &&
 				(epctl & DXEPCTL_EPTYPE_BULK)) {
@@ -2021,7 +2021,7 @@ static void s3c_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 			if (dir_in)
 				WARN_ON_ONCE(1);
 			else
-				s3c_hsotg_handle_outdone(hsotg, 0);
+				dwc2_hsotg_handle_outdone(hsotg, 0);
 		}
 	}
 
@@ -2047,19 +2047,19 @@ static void s3c_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx,
 			dev_dbg(hsotg->dev, "%s: ep%d: TxFIFOEmpty\n",
 				__func__, idx);
 			if (!using_dma(hsotg))
-				s3c_hsotg_trytx(hsotg, hs_ep);
+				dwc2_hsotg_trytx(hsotg, hs_ep);
 		}
 	}
 }
 
 /**
- * s3c_hsotg_irq_enumdone - Handle EnumDone interrupt (enumeration done)
+ * dwc2_hsotg_irq_enumdone - Handle EnumDone interrupt (enumeration done)
  * @hsotg: The device state.
  *
  * Handle updating the device settings after the enumeration phase has
  * been completed.
  */
-static void s3c_hsotg_irq_enumdone(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_irq_enumdone(struct dwc2_hsotg *hsotg)
 {
 	u32 dsts = readl(hsotg->regs + DSTS);
 	int ep0_mps = 0, ep_mps = 8;
@@ -2113,19 +2113,19 @@ static void s3c_hsotg_irq_enumdone(struct dwc2_hsotg *hsotg)
 	if (ep0_mps) {
 		int i;
 		/* Initialize ep0 for both in and out directions */
-		s3c_hsotg_set_ep_maxpacket(hsotg, 0, ep0_mps, 1);
-		s3c_hsotg_set_ep_maxpacket(hsotg, 0, ep0_mps, 0);
+		dwc2_hsotg_set_ep_maxpacket(hsotg, 0, ep0_mps, 1);
+		dwc2_hsotg_set_ep_maxpacket(hsotg, 0, ep0_mps, 0);
 		for (i = 1; i < hsotg->num_of_eps; i++) {
 			if (hsotg->eps_in[i])
-				s3c_hsotg_set_ep_maxpacket(hsotg, i, ep_mps, 1);
+				dwc2_hsotg_set_ep_maxpacket(hsotg, i, ep_mps, 1);
 			if (hsotg->eps_out[i])
-				s3c_hsotg_set_ep_maxpacket(hsotg, i, ep_mps, 0);
+				dwc2_hsotg_set_ep_maxpacket(hsotg, i, ep_mps, 0);
 		}
 	}
 
 	/* ensure after enumeration our EP0 is active */
 
-	s3c_hsotg_enqueue_setup(hsotg);
+	dwc2_hsotg_enqueue_setup(hsotg);
 
 	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
 		readl(hsotg->regs + DIEPCTL0),
@@ -2142,34 +2142,34 @@ static void s3c_hsotg_irq_enumdone(struct dwc2_hsotg *hsotg)
  * completed with the given result code.
  */
 static void kill_all_requests(struct dwc2_hsotg *hsotg,
-			      struct s3c_hsotg_ep *ep,
+			      struct dwc2_hsotg_ep *ep,
 			      int result)
 {
-	struct s3c_hsotg_req *req, *treq;
+	struct dwc2_hsotg_req *req, *treq;
 	unsigned size;
 
 	ep->req = NULL;
 
 	list_for_each_entry_safe(req, treq, &ep->queue, queue)
-		s3c_hsotg_complete_request(hsotg, ep, req,
+		dwc2_hsotg_complete_request(hsotg, ep, req,
 					   result);
 
 	if (!hsotg->dedicated_fifos)
 		return;
 	size = (readl(hsotg->regs + DTXFSTS(ep->index)) & 0xffff) * 4;
 	if (size < ep->fifo_size)
-		s3c_hsotg_txfifo_flush(hsotg, ep->fifo_index);
+		dwc2_hsotg_txfifo_flush(hsotg, ep->fifo_index);
 }
 
 /**
- * s3c_hsotg_disconnect - disconnect service
+ * dwc2_hsotg_disconnect - disconnect service
  * @hsotg: The device state.
  *
  * The device has been disconnected. Remove all current
  * transactions and signal the gadget driver that this
  * has happened.
  */
-void s3c_hsotg_disconnect(struct dwc2_hsotg *hsotg)
+void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg)
 {
 	unsigned ep;
 
@@ -2192,13 +2192,13 @@ void s3c_hsotg_disconnect(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_irq_fifoempty - TX FIFO empty interrupt handler
+ * dwc2_hsotg_irq_fifoempty - TX FIFO empty interrupt handler
  * @hsotg: The device state:
  * @periodic: True if this is a periodic FIFO interrupt
  */
-static void s3c_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
+static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
 {
-	struct s3c_hsotg_ep *ep;
+	struct dwc2_hsotg_ep *ep;
 	int epno, ret;
 
 	/* look through for any more data to transmit */
@@ -2215,7 +2215,7 @@ static void s3c_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
 		    (!periodic && ep->periodic))
 			continue;
 
-		ret = s3c_hsotg_trytx(hsotg, ep);
+		ret = dwc2_hsotg_trytx(hsotg, ep);
 		if (ret < 0)
 			break;
 	}
@@ -2227,12 +2227,12 @@ static void s3c_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
 			GINTSTS_RXFLVL)
 
 /**
- * s3c_hsotg_corereset - issue softreset to the core
+ * dwc2_hsotg_corereset - issue softreset to the core
  * @hsotg: The device state
  *
  * Issue a soft reset to the core, and await the core finishing it.
  */
-static int s3c_hsotg_corereset(struct dwc2_hsotg *hsotg)
+static int dwc2_hsotg_corereset(struct dwc2_hsotg *hsotg)
 {
 	int timeout;
 	u32 grstctl;
@@ -2275,18 +2275,18 @@ static int s3c_hsotg_corereset(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_core_init - issue softreset to the core
+ * dwc2_hsotg_core_init - issue softreset to the core
  * @hsotg: The device state
  *
  * Issue a soft reset to the core, and await the core finishing it.
  */
-void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
+void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 						bool is_usb_reset)
 {
 	u32 val;
 
 	if (!is_usb_reset)
-		s3c_hsotg_corereset(hsotg);
+		dwc2_hsotg_corereset(hsotg);
 
 	/*
 	 * we must now enable ep0 ready for host detection and then
@@ -2298,7 +2298,7 @@ void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	writel(hsotg->phyif | GUSBCFG_TOUTCAL(7) |
 	       (val << GUSBCFG_USBTRDTIM_SHIFT), hsotg->regs + GUSBCFG);
 
-	s3c_hsotg_init_fifo(hsotg);
+	dwc2_hsotg_init_fifo(hsotg);
 
 	if (!is_usb_reset)
 		__orr32(hsotg->regs + DCTL, DCTL_SFTDISCON);
@@ -2359,7 +2359,7 @@ void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 		readl(hsotg->regs + DOEPCTL0));
 
 	/* enable in and out endpoint interrupts */
-	s3c_hsotg_en_gsint(hsotg, GINTSTS_OEPINT | GINTSTS_IEPINT);
+	dwc2_hsotg_en_gsint(hsotg, GINTSTS_OEPINT | GINTSTS_IEPINT);
 
 	/*
 	 * Enable the RXFIFO when in slave mode, as this is how we collect
@@ -2367,11 +2367,11 @@ void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	 * things we cannot process, so do not use it.
 	 */
 	if (!using_dma(hsotg))
-		s3c_hsotg_en_gsint(hsotg, GINTSTS_RXFLVL);
+		dwc2_hsotg_en_gsint(hsotg, GINTSTS_RXFLVL);
 
 	/* Enable interrupts for EP0 in and out */
-	s3c_hsotg_ctrl_epint(hsotg, 0, 0, 1);
-	s3c_hsotg_ctrl_epint(hsotg, 0, 1, 1);
+	dwc2_hsotg_ctrl_epint(hsotg, 0, 0, 1);
+	dwc2_hsotg_ctrl_epint(hsotg, 0, 1, 1);
 
 	if (!is_usb_reset) {
 		__orr32(hsotg->regs + DCTL, DCTL_PWRONPRGDONE);
@@ -2390,16 +2390,16 @@ void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	writel(DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) |
 	       DXEPTSIZ_XFERSIZE(8), hsotg->regs + DOEPTSIZ0);
 
-	writel(s3c_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
+	writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
 	       DXEPCTL_CNAK | DXEPCTL_EPENA |
 	       DXEPCTL_USBACTEP,
 	       hsotg->regs + DOEPCTL0);
 
 	/* enable, but don't activate EP0in */
-	writel(s3c_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
+	writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) |
 	       DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0);
 
-	s3c_hsotg_enqueue_setup(hsotg);
+	dwc2_hsotg_enqueue_setup(hsotg);
 
 	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
 		readl(hsotg->regs + DIEPCTL0),
@@ -2417,24 +2417,24 @@ void s3c_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	hsotg->last_rst = jiffies;
 }
 
-static void s3c_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)
 {
 	/* set the soft-disconnect bit */
 	__orr32(hsotg->regs + DCTL, DCTL_SFTDISCON);
 }
 
-void s3c_hsotg_core_connect(struct dwc2_hsotg *hsotg)
+void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg)
 {
 	/* remove the soft-disconnect and let's go */
 	__bic32(hsotg->regs + DCTL, DCTL_SFTDISCON);
 }
 
 /**
- * s3c_hsotg_irq - handle device interrupt
+ * dwc2_hsotg_irq - handle device interrupt
  * @irq: The IRQ number triggered
  * @pw: The pw value when registered the handler.
  */
-static irqreturn_t s3c_hsotg_irq(int irq, void *pw)
+static irqreturn_t dwc2_hsotg_irq(int irq, void *pw)
 {
 	struct dwc2_hsotg *hsotg = pw;
 	int retry_count = 8;
@@ -2454,7 +2454,7 @@ irq_retry:
 	if (gintsts & GINTSTS_ENUMDONE) {
 		writel(GINTSTS_ENUMDONE, hsotg->regs + GINTSTS);
 
-		s3c_hsotg_irq_enumdone(hsotg);
+		dwc2_hsotg_irq_enumdone(hsotg);
 	}
 
 	if (gintsts & (GINTSTS_OEPINT | GINTSTS_IEPINT)) {
@@ -2472,13 +2472,13 @@ irq_retry:
 		for (ep = 0; ep < hsotg->num_of_eps && daint_out;
 						ep++, daint_out >>= 1) {
 			if (daint_out & 1)
-				s3c_hsotg_epint(hsotg, ep, 0);
+				dwc2_hsotg_epint(hsotg, ep, 0);
 		}
 
 		for (ep = 0; ep < hsotg->num_of_eps  && daint_in;
 						ep++, daint_in >>= 1) {
 			if (daint_in & 1)
-				s3c_hsotg_epint(hsotg, ep, 1);
+				dwc2_hsotg_epint(hsotg, ep, 1);
 		}
 	}
 
@@ -2505,7 +2505,7 @@ irq_retry:
 		writel(GINTSTS_USBRST, hsotg->regs + GINTSTS);
 
 		/* Report disconnection if it is not already done. */
-		s3c_hsotg_disconnect(hsotg);
+		dwc2_hsotg_disconnect(hsotg);
 
 		if (usb_status & GOTGCTL_BSESVLD) {
 			if (time_after(jiffies, hsotg->last_rst +
@@ -2515,7 +2515,7 @@ irq_retry:
 							  -ECONNRESET);
 
 				hsotg->lx_state = DWC2_L0;
-				s3c_hsotg_core_init_disconnected(hsotg, true);
+				dwc2_hsotg_core_init_disconnected(hsotg, true);
 			}
 		}
 	}
@@ -2531,8 +2531,8 @@ irq_retry:
 		 * it needs re-enabling
 		 */
 
-		s3c_hsotg_disable_gsint(hsotg, GINTSTS_NPTXFEMP);
-		s3c_hsotg_irq_fifoempty(hsotg, false);
+		dwc2_hsotg_disable_gsint(hsotg, GINTSTS_NPTXFEMP);
+		dwc2_hsotg_irq_fifoempty(hsotg, false);
 	}
 
 	if (gintsts & GINTSTS_PTXFEMP) {
@@ -2540,18 +2540,18 @@ irq_retry:
 
 		/* See note in GINTSTS_NPTxFEmp */
 
-		s3c_hsotg_disable_gsint(hsotg, GINTSTS_PTXFEMP);
-		s3c_hsotg_irq_fifoempty(hsotg, true);
+		dwc2_hsotg_disable_gsint(hsotg, GINTSTS_PTXFEMP);
+		dwc2_hsotg_irq_fifoempty(hsotg, true);
 	}
 
 	if (gintsts & GINTSTS_RXFLVL) {
 		/*
 		 * note, since GINTSTS_RxFLvl doubles as FIFO-not-empty,
-		 * we need to retry s3c_hsotg_handle_rx if this is still
+		 * we need to retry dwc2_hsotg_handle_rx if this is still
 		 * set.
 		 */
 
-		s3c_hsotg_handle_rx(hsotg);
+		dwc2_hsotg_handle_rx(hsotg);
 	}
 
 	if (gintsts & GINTSTS_ERLYSUSP) {
@@ -2570,7 +2570,7 @@ irq_retry:
 
 		writel(DCTL_CGOUTNAK, hsotg->regs + DCTL);
 
-		s3c_hsotg_dump(hsotg);
+		dwc2_hsotg_dump(hsotg);
 	}
 
 	if (gintsts & GINTSTS_GINNAKEFF) {
@@ -2578,7 +2578,7 @@ irq_retry:
 
 		writel(DCTL_CGNPINNAK, hsotg->regs + DCTL);
 
-		s3c_hsotg_dump(hsotg);
+		dwc2_hsotg_dump(hsotg);
 	}
 
 	/*
@@ -2595,16 +2595,16 @@ irq_retry:
 }
 
 /**
- * s3c_hsotg_ep_enable - enable the given endpoint
+ * dwc2_hsotg_ep_enable - enable the given endpoint
  * @ep: The USB endpint to configure
  * @desc: The USB endpoint descriptor to configure with.
  *
  * This is called from the USB gadget code's usb_ep_enable().
  */
-static int s3c_hsotg_ep_enable(struct usb_ep *ep,
+static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
 			       const struct usb_endpoint_descriptor *desc)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 	unsigned long flags;
 	unsigned int index = hs_ep->index;
@@ -2631,7 +2631,7 @@ static int s3c_hsotg_ep_enable(struct usb_ep *ep,
 
 	mps = usb_endpoint_maxp(desc);
 
-	/* note, we handle this here instead of s3c_hsotg_set_ep_maxpacket */
+	/* note, we handle this here instead of dwc2_hsotg_set_ep_maxpacket */
 
 	epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
 	epctrl = readl(hsotg->regs + epctrl_reg);
@@ -2660,7 +2660,7 @@ static int s3c_hsotg_ep_enable(struct usb_ep *ep,
 	epctrl |= DXEPCTL_SNAK;
 
 	/* update the endpoint state */
-	s3c_hsotg_set_ep_maxpacket(hsotg, hs_ep->index, mps, dir_in);
+	dwc2_hsotg_set_ep_maxpacket(hsotg, hs_ep->index, mps, dir_in);
 
 	/* default, set to non-periodic */
 	hs_ep->isochronous = 0;
@@ -2752,7 +2752,7 @@ static int s3c_hsotg_ep_enable(struct usb_ep *ep,
 		__func__, readl(hsotg->regs + epctrl_reg));
 
 	/* enable the endpoint interrupt */
-	s3c_hsotg_ctrl_epint(hsotg, index, dir_in, 1);
+	dwc2_hsotg_ctrl_epint(hsotg, index, dir_in, 1);
 
 error:
 	spin_unlock_irqrestore(&hsotg->lock, flags);
@@ -2760,12 +2760,12 @@ error:
 }
 
 /**
- * s3c_hsotg_ep_disable - disable given endpoint
+ * dwc2_hsotg_ep_disable - disable given endpoint
  * @ep: The endpoint to disable.
  */
-static int s3c_hsotg_ep_disable(struct usb_ep *ep)
+static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 	int dir_in = hs_ep->dir_in;
 	int index = hs_ep->index;
@@ -2797,7 +2797,7 @@ static int s3c_hsotg_ep_disable(struct usb_ep *ep)
 	writel(ctrl, hsotg->regs + epctrl_reg);
 
 	/* disable endpoint interrupts */
-	s3c_hsotg_ctrl_epint(hsotg, hs_ep->index, hs_ep->dir_in, 0);
+	dwc2_hsotg_ctrl_epint(hsotg, hs_ep->index, hs_ep->dir_in, 0);
 
 	/* terminate all requests with shutdown */
 	kill_all_requests(hsotg, hs_ep, -ESHUTDOWN);
@@ -2811,9 +2811,9 @@ static int s3c_hsotg_ep_disable(struct usb_ep *ep)
  * @ep: The endpoint to check.
  * @test: The request to test if it is on the endpoint.
  */
-static bool on_list(struct s3c_hsotg_ep *ep, struct s3c_hsotg_req *test)
+static bool on_list(struct dwc2_hsotg_ep *ep, struct dwc2_hsotg_req *test)
 {
-	struct s3c_hsotg_req *req, *treq;
+	struct dwc2_hsotg_req *req, *treq;
 
 	list_for_each_entry_safe(req, treq, &ep->queue, queue) {
 		if (req == test)
@@ -2824,14 +2824,14 @@ static bool on_list(struct s3c_hsotg_ep *ep, struct s3c_hsotg_req *test)
 }
 
 /**
- * s3c_hsotg_ep_dequeue - dequeue given endpoint
+ * dwc2_hsotg_ep_dequeue - dequeue given endpoint
  * @ep: The endpoint to dequeue.
  * @req: The request to be removed from a queue.
  */
-static int s3c_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
+static int dwc2_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
 {
-	struct s3c_hsotg_req *hs_req = our_req(req);
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_req *hs_req = our_req(req);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
 	unsigned long flags;
 
@@ -2844,20 +2844,20 @@ static int s3c_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
 		return -EINVAL;
 	}
 
-	s3c_hsotg_complete_request(hs, hs_ep, hs_req, -ECONNRESET);
+	dwc2_hsotg_complete_request(hs, hs_ep, hs_req, -ECONNRESET);
 	spin_unlock_irqrestore(&hs->lock, flags);
 
 	return 0;
 }
 
 /**
- * s3c_hsotg_ep_sethalt - set halt on a given endpoint
+ * dwc2_hsotg_ep_sethalt - set halt on a given endpoint
  * @ep: The endpoint to set halt.
  * @value: Set or unset the halt.
  */
-static int s3c_hsotg_ep_sethalt(struct usb_ep *ep, int value)
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
 	int index = hs_ep->index;
 	u32 epreg;
@@ -2868,7 +2868,7 @@ static int s3c_hsotg_ep_sethalt(struct usb_ep *ep, int value)
 
 	if (index == 0) {
 		if (value)
-			s3c_hsotg_stall_ep0(hs);
+			dwc2_hsotg_stall_ep0(hs);
 		else
 			dev_warn(hs->dev,
 				 "%s: can't clear halt on ep0\n", __func__);
@@ -2914,43 +2914,43 @@ static int s3c_hsotg_ep_sethalt(struct usb_ep *ep, int value)
 }
 
 /**
- * s3c_hsotg_ep_sethalt_lock - set halt on a given endpoint with lock held
+ * dwc2_hsotg_ep_sethalt_lock - set halt on a given endpoint with lock held
  * @ep: The endpoint to set halt.
  * @value: Set or unset the halt.
  */
-static int s3c_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
+static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
 {
-	struct s3c_hsotg_ep *hs_ep = our_ep(ep);
+	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
 	unsigned long flags = 0;
 	int ret = 0;
 
 	spin_lock_irqsave(&hs->lock, flags);
-	ret = s3c_hsotg_ep_sethalt(ep, value);
+	ret = dwc2_hsotg_ep_sethalt(ep, value);
 	spin_unlock_irqrestore(&hs->lock, flags);
 
 	return ret;
 }
 
-static struct usb_ep_ops s3c_hsotg_ep_ops = {
-	.enable		= s3c_hsotg_ep_enable,
-	.disable	= s3c_hsotg_ep_disable,
-	.alloc_request	= s3c_hsotg_ep_alloc_request,
-	.free_request	= s3c_hsotg_ep_free_request,
-	.queue		= s3c_hsotg_ep_queue_lock,
-	.dequeue	= s3c_hsotg_ep_dequeue,
-	.set_halt	= s3c_hsotg_ep_sethalt_lock,
+static struct usb_ep_ops dwc2_hsotg_ep_ops = {
+	.enable		= dwc2_hsotg_ep_enable,
+	.disable	= dwc2_hsotg_ep_disable,
+	.alloc_request	= dwc2_hsotg_ep_alloc_request,
+	.free_request	= dwc2_hsotg_ep_free_request,
+	.queue		= dwc2_hsotg_ep_queue_lock,
+	.dequeue	= dwc2_hsotg_ep_dequeue,
+	.set_halt	= dwc2_hsotg_ep_sethalt_lock,
 	/* note, don't believe we have any call for the fifo routines */
 };
 
 /**
- * s3c_hsotg_phy_enable - enable platform phy dev
+ * dwc2_hsotg_phy_enable - enable platform phy dev
  * @hsotg: The driver state
  *
  * A wrapper for platform code responsible for controlling
  * low-level USB code
  */
-static void s3c_hsotg_phy_enable(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_phy_enable(struct dwc2_hsotg *hsotg)
 {
 	struct platform_device *pdev = to_platform_device(hsotg->dev);
 
@@ -2967,13 +2967,13 @@ static void s3c_hsotg_phy_enable(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_phy_disable - disable platform phy dev
+ * dwc2_hsotg_phy_disable - disable platform phy dev
  * @hsotg: The driver state
  *
  * A wrapper for platform code responsible for controlling
  * low-level USB code
  */
-static void s3c_hsotg_phy_disable(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_phy_disable(struct dwc2_hsotg *hsotg)
 {
 	struct platform_device *pdev = to_platform_device(hsotg->dev);
 
@@ -2988,10 +2988,10 @@ static void s3c_hsotg_phy_disable(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_init - initalize the usb core
+ * dwc2_hsotg_init - initalize the usb core
  * @hsotg: The driver state
  */
-static void s3c_hsotg_init(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_init(struct dwc2_hsotg *hsotg)
 {
 	u32 trdtim;
 	/* unmask subset of endpoint interrupts */
@@ -3015,7 +3015,7 @@ static void s3c_hsotg_init(struct dwc2_hsotg *hsotg)
 		readl(hsotg->regs + GRXFSIZ),
 		readl(hsotg->regs + GNPTXFSIZ));
 
-	s3c_hsotg_init_fifo(hsotg);
+	dwc2_hsotg_init_fifo(hsotg);
 
 	/* set the PLL on, remove the HNP/SRP and set the PHY */
 	trdtim = (hsotg->phyif == GUSBCFG_PHYIF8) ? 9 : 5;
@@ -3028,14 +3028,14 @@ static void s3c_hsotg_init(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_udc_start - prepare the udc for work
+ * dwc2_hsotg_udc_start - prepare the udc for work
  * @gadget: The usb gadget state
  * @driver: The usb gadget driver
  *
  * Perform initialization to prepare udc device and driver
  * to work.
  */
-static int s3c_hsotg_udc_start(struct usb_gadget *gadget,
+static int dwc2_hsotg_udc_start(struct usb_gadget *gadget,
 			   struct usb_gadget_driver *driver)
 {
 	struct dwc2_hsotg *hsotg = to_hsotg(gadget);
@@ -3077,13 +3077,13 @@ static int s3c_hsotg_udc_start(struct usb_gadget *gadget,
 		goto err;
 	}
 
-	s3c_hsotg_phy_enable(hsotg);
+	dwc2_hsotg_phy_enable(hsotg);
 	if (!IS_ERR_OR_NULL(hsotg->uphy))
 		otg_set_peripheral(hsotg->uphy->otg, &hsotg->gadget);
 
 	spin_lock_irqsave(&hsotg->lock, flags);
-	s3c_hsotg_init(hsotg);
-	s3c_hsotg_core_init_disconnected(hsotg, false);
+	dwc2_hsotg_init(hsotg);
+	dwc2_hsotg_core_init_disconnected(hsotg, false);
 	hsotg->enabled = 0;
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 
@@ -3100,13 +3100,13 @@ err:
 }
 
 /**
- * s3c_hsotg_udc_stop - stop the udc
+ * dwc2_hsotg_udc_stop - stop the udc
  * @gadget: The usb gadget state
  * @driver: The usb gadget driver
  *
  * Stop udc hw block and stay tunned for future transmissions
  */
-static int s3c_hsotg_udc_stop(struct usb_gadget *gadget)
+static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget)
 {
 	struct dwc2_hsotg *hsotg = to_hsotg(gadget);
 	unsigned long flags = 0;
@@ -3120,9 +3120,9 @@ static int s3c_hsotg_udc_stop(struct usb_gadget *gadget)
 	/* all endpoints should be shutdown */
 	for (ep = 1; ep < hsotg->num_of_eps; ep++) {
 		if (hsotg->eps_in[ep])
-			s3c_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
+			dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
 		if (hsotg->eps_out[ep])
-			s3c_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
+			dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
 	}
 
 	spin_lock_irqsave(&hsotg->lock, flags);
@@ -3135,7 +3135,7 @@ static int s3c_hsotg_udc_stop(struct usb_gadget *gadget)
 
 	if (!IS_ERR_OR_NULL(hsotg->uphy))
 		otg_set_peripheral(hsotg->uphy->otg, NULL);
-	s3c_hsotg_phy_disable(hsotg);
+	dwc2_hsotg_phy_disable(hsotg);
 
 	regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies);
 
@@ -3147,24 +3147,24 @@ static int s3c_hsotg_udc_stop(struct usb_gadget *gadget)
 }
 
 /**
- * s3c_hsotg_gadget_getframe - read the frame number
+ * dwc2_hsotg_gadget_getframe - read the frame number
  * @gadget: The usb gadget state
  *
  * Read the {micro} frame number
  */
-static int s3c_hsotg_gadget_getframe(struct usb_gadget *gadget)
+static int dwc2_hsotg_gadget_getframe(struct usb_gadget *gadget)
 {
-	return s3c_hsotg_read_frameno(to_hsotg(gadget));
+	return dwc2_hsotg_read_frameno(to_hsotg(gadget));
 }
 
 /**
- * s3c_hsotg_pullup - connect/disconnect the USB PHY
+ * dwc2_hsotg_pullup - connect/disconnect the USB PHY
  * @gadget: The usb gadget state
  * @is_on: Current state of the USB PHY
  *
  * Connect/Disconnect the USB PHY pullup
  */
-static int s3c_hsotg_pullup(struct usb_gadget *gadget, int is_on)
+static int dwc2_hsotg_pullup(struct usb_gadget *gadget, int is_on)
 {
 	struct dwc2_hsotg *hsotg = to_hsotg(gadget);
 	unsigned long flags = 0;
@@ -3176,11 +3176,11 @@ static int s3c_hsotg_pullup(struct usb_gadget *gadget, int is_on)
 	if (is_on) {
 		clk_enable(hsotg->clk);
 		hsotg->enabled = 1;
-		s3c_hsotg_core_init_disconnected(hsotg, false);
-		s3c_hsotg_core_connect(hsotg);
+		dwc2_hsotg_core_init_disconnected(hsotg, false);
+		dwc2_hsotg_core_connect(hsotg);
 	} else {
-		s3c_hsotg_core_disconnect(hsotg);
-		s3c_hsotg_disconnect(hsotg);
+		dwc2_hsotg_core_disconnect(hsotg);
+		dwc2_hsotg_disconnect(hsotg);
 		hsotg->enabled = 0;
 		clk_disable(hsotg->clk);
 	}
@@ -3192,7 +3192,7 @@ static int s3c_hsotg_pullup(struct usb_gadget *gadget, int is_on)
 	return 0;
 }
 
-static int s3c_hsotg_vbus_session(struct usb_gadget *gadget, int is_active)
+static int dwc2_hsotg_vbus_session(struct usb_gadget *gadget, int is_active)
 {
 	struct dwc2_hsotg *hsotg = to_hsotg(gadget);
 	unsigned long flags;
@@ -3211,12 +3211,12 @@ static int s3c_hsotg_vbus_session(struct usb_gadget *gadget, int is_active)
 		}
 		/* Kill any ep0 requests as controller will be reinitialized */
 		kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET);
-		s3c_hsotg_core_init_disconnected(hsotg, false);
+		dwc2_hsotg_core_init_disconnected(hsotg, false);
 		if (hsotg->enabled)
-			s3c_hsotg_core_connect(hsotg);
+			dwc2_hsotg_core_connect(hsotg);
 	} else {
-		s3c_hsotg_core_disconnect(hsotg);
-		s3c_hsotg_disconnect(hsotg);
+		dwc2_hsotg_core_disconnect(hsotg);
+		dwc2_hsotg_disconnect(hsotg);
 	}
 
 	spin_unlock_irqrestore(&hsotg->lock, flags);
@@ -3224,13 +3224,13 @@ static int s3c_hsotg_vbus_session(struct usb_gadget *gadget, int is_active)
 }
 
 /**
- * s3c_hsotg_vbus_draw - report bMaxPower field
+ * dwc2_hsotg_vbus_draw - report bMaxPower field
  * @gadget: The usb gadget state
  * @mA: Amount of current
  *
  * Report how much power the device may consume to the phy.
  */
-static int s3c_hsotg_vbus_draw(struct usb_gadget *gadget, unsigned mA)
+static int dwc2_hsotg_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 {
 	struct dwc2_hsotg *hsotg = to_hsotg(gadget);
 
@@ -3239,17 +3239,17 @@ static int s3c_hsotg_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 	return usb_phy_set_power(hsotg->uphy, mA);
 }
 
-static const struct usb_gadget_ops s3c_hsotg_gadget_ops = {
-	.get_frame	= s3c_hsotg_gadget_getframe,
-	.udc_start		= s3c_hsotg_udc_start,
-	.udc_stop		= s3c_hsotg_udc_stop,
-	.pullup                 = s3c_hsotg_pullup,
-	.vbus_session		= s3c_hsotg_vbus_session,
-	.vbus_draw		= s3c_hsotg_vbus_draw,
+static const struct usb_gadget_ops dwc2_hsotg_gadget_ops = {
+	.get_frame	= dwc2_hsotg_gadget_getframe,
+	.udc_start		= dwc2_hsotg_udc_start,
+	.udc_stop		= dwc2_hsotg_udc_stop,
+	.pullup                 = dwc2_hsotg_pullup,
+	.vbus_session		= dwc2_hsotg_vbus_session,
+	.vbus_draw		= dwc2_hsotg_vbus_draw,
 };
 
 /**
- * s3c_hsotg_initep - initialise a single endpoint
+ * dwc2_hsotg_initep - initialise a single endpoint
  * @hsotg: The device state.
  * @hs_ep: The endpoint to be initialised.
  * @epnum: The endpoint number
@@ -3258,8 +3258,8 @@ static const struct usb_gadget_ops s3c_hsotg_gadget_ops = {
  * creation) to give to the gadget driver. Setup the endpoint name, any
  * direction information and other state that may be required.
  */
-static void s3c_hsotg_initep(struct dwc2_hsotg *hsotg,
-				       struct s3c_hsotg_ep *hs_ep,
+static void dwc2_hsotg_initep(struct dwc2_hsotg *hsotg,
+				       struct dwc2_hsotg_ep *hs_ep,
 				       int epnum,
 				       bool dir_in)
 {
@@ -3287,7 +3287,7 @@ static void s3c_hsotg_initep(struct dwc2_hsotg *hsotg,
 	hs_ep->parent = hsotg;
 	hs_ep->ep.name = hs_ep->name;
 	usb_ep_set_maxpacket_limit(&hs_ep->ep, epnum ? 1024 : EP0_MPS_LIMIT);
-	hs_ep->ep.ops = &s3c_hsotg_ep_ops;
+	hs_ep->ep.ops = &dwc2_hsotg_ep_ops;
 
 	if (epnum == 0) {
 		hs_ep->ep.caps.type_control = true;
@@ -3317,12 +3317,12 @@ static void s3c_hsotg_initep(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * s3c_hsotg_hw_cfg - read HW configuration registers
+ * dwc2_hsotg_hw_cfg - read HW configuration registers
  * @param: The device state
  *
  * Read the USB core HW configuration registers
  */
-static int s3c_hsotg_hw_cfg(struct dwc2_hsotg *hsotg)
+static int dwc2_hsotg_hw_cfg(struct dwc2_hsotg *hsotg)
 {
 	u32 cfg;
 	u32 ep_type;
@@ -3335,11 +3335,11 @@ static int s3c_hsotg_hw_cfg(struct dwc2_hsotg *hsotg)
 	/* Add ep0 */
 	hsotg->num_of_eps++;
 
-	hsotg->eps_in[0] = devm_kzalloc(hsotg->dev, sizeof(struct s3c_hsotg_ep),
+	hsotg->eps_in[0] = devm_kzalloc(hsotg->dev, sizeof(struct dwc2_hsotg_ep),
 								GFP_KERNEL);
 	if (!hsotg->eps_in[0])
 		return -ENOMEM;
-	/* Same s3c_hsotg_ep is used in both directions for ep0 */
+	/* Same dwc2_hsotg_ep is used in both directions for ep0 */
 	hsotg->eps_out[0] = hsotg->eps_in[0];
 
 	cfg = readl(hsotg->regs + GHWCFG1);
@@ -3348,14 +3348,14 @@ static int s3c_hsotg_hw_cfg(struct dwc2_hsotg *hsotg)
 		/* Direction in or both */
 		if (!(ep_type & 2)) {
 			hsotg->eps_in[i] = devm_kzalloc(hsotg->dev,
-				sizeof(struct s3c_hsotg_ep), GFP_KERNEL);
+				sizeof(struct dwc2_hsotg_ep), GFP_KERNEL);
 			if (!hsotg->eps_in[i])
 				return -ENOMEM;
 		}
 		/* Direction out or both */
 		if (!(ep_type & 1)) {
 			hsotg->eps_out[i] = devm_kzalloc(hsotg->dev,
-				sizeof(struct s3c_hsotg_ep), GFP_KERNEL);
+				sizeof(struct dwc2_hsotg_ep), GFP_KERNEL);
 			if (!hsotg->eps_out[i])
 				return -ENOMEM;
 		}
@@ -3375,10 +3375,10 @@ static int s3c_hsotg_hw_cfg(struct dwc2_hsotg *hsotg)
 }
 
 /**
- * s3c_hsotg_dump - dump state of the udc
+ * dwc2_hsotg_dump - dump state of the udc
  * @param: The device state
  */
-static void s3c_hsotg_dump(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_dump(struct dwc2_hsotg *hsotg)
 {
 #ifdef DEBUG
 	struct device *dev = hsotg->dev;
@@ -3427,7 +3427,7 @@ static void s3c_hsotg_dump(struct dwc2_hsotg *hsotg)
 }
 
 #ifdef CONFIG_OF
-static void s3c_hsotg_of_probe(struct dwc2_hsotg *hsotg)
+static void dwc2_hsotg_of_probe(struct dwc2_hsotg *hsotg)
 {
 	struct device_node *np = hsotg->dev->of_node;
 	u32 len = 0;
@@ -3468,7 +3468,7 @@ rx_fifo:
 						&hsotg->g_np_g_tx_fifo_sz);
 }
 #else
-static inline void s3c_hsotg_of_probe(struct dwc2_hsotg *hsotg) { }
+static inline void dwc2_hsotg_of_probe(struct dwc2_hsotg *hsotg) { }
 #endif
 
 /**
@@ -3479,7 +3479,7 @@ static inline void s3c_hsotg_of_probe(struct dwc2_hsotg *hsotg) { }
 int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 {
 	struct device *dev = hsotg->dev;
-	struct s3c_hsotg_plat *plat = dev->platform_data;
+	struct dwc2_hsotg_plat *plat = dev->platform_data;
 	int epnum;
 	int ret;
 	int i;
@@ -3488,14 +3488,14 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	/* Set default UTMI width */
 	hsotg->phyif = GUSBCFG_PHYIF16;
 
-	s3c_hsotg_of_probe(hsotg);
+	dwc2_hsotg_of_probe(hsotg);
 
 	/* Initialize to legacy fifo configuration values */
 	hsotg->g_rx_fifo_sz = 2048;
 	hsotg->g_np_g_tx_fifo_sz = 1024;
 	memcpy(&hsotg->g_tx_fifo_sz[1], p_tx_fifo, sizeof(p_tx_fifo));
 	/* Device tree specific probe */
-	s3c_hsotg_of_probe(hsotg);
+	dwc2_hsotg_of_probe(hsotg);
 	/* Dump fifo information */
 	dev_dbg(dev, "NonPeriodic TXFIFO size: %d\n",
 						hsotg->g_np_g_tx_fifo_sz);
@@ -3531,7 +3531,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	}
 
 	hsotg->gadget.max_speed = USB_SPEED_HIGH;
-	hsotg->gadget.ops = &s3c_hsotg_gadget_ops;
+	hsotg->gadget.ops = &dwc2_hsotg_gadget_ops;
 	hsotg->gadget.name = dev_name(dev);
 	if (hsotg->dr_mode == USB_DR_MODE_OTG)
 		hsotg->gadget.is_otg = 1;
@@ -3548,7 +3548,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	/* regulators */
 
 	for (i = 0; i < ARRAY_SIZE(hsotg->supplies); i++)
-		hsotg->supplies[i].supply = s3c_hsotg_supply_names[i];
+		hsotg->supplies[i].supply = dwc2_hsotg_supply_names[i];
 
 	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(hsotg->supplies),
 				 hsotg->supplies);
@@ -3566,7 +3566,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	}
 
 	/* usb phy enable */
-	s3c_hsotg_phy_enable(hsotg);
+	dwc2_hsotg_phy_enable(hsotg);
 
 	/*
 	 * Force Device mode before initialization.
@@ -3581,14 +3581,14 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	 */
 	msleep(25);
 
-	s3c_hsotg_corereset(hsotg);
-	ret = s3c_hsotg_hw_cfg(hsotg);
+	dwc2_hsotg_corereset(hsotg);
+	ret = dwc2_hsotg_hw_cfg(hsotg);
 	if (ret) {
 		dev_err(hsotg->dev, "Hardware configuration failed: %d\n", ret);
 		goto err_clk;
 	}
 
-	s3c_hsotg_init(hsotg);
+	dwc2_hsotg_init(hsotg);
 
 	/* Switch back to default configuration */
 	__bic32(hsotg->regs + GUSBCFG, GUSBCFG_FORCEDEVMODE);
@@ -3609,10 +3609,10 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 		goto err_supplies;
 	}
 
-	ret = devm_request_irq(hsotg->dev, irq, s3c_hsotg_irq, IRQF_SHARED,
+	ret = devm_request_irq(hsotg->dev, irq, dwc2_hsotg_irq, IRQF_SHARED,
 				dev_name(hsotg->dev), hsotg);
 	if (ret < 0) {
-		s3c_hsotg_phy_disable(hsotg);
+		dwc2_hsotg_phy_disable(hsotg);
 		clk_disable_unprepare(hsotg->clk);
 		regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies),
 				       hsotg->supplies);
@@ -3635,7 +3635,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 
 	/* allocate EP0 request */
 
-	hsotg->ctrl_req = s3c_hsotg_ep_alloc_request(&hsotg->eps_out[0]->ep,
+	hsotg->ctrl_req = dwc2_hsotg_ep_alloc_request(&hsotg->eps_out[0]->ep,
 						     GFP_KERNEL);
 	if (!hsotg->ctrl_req) {
 		dev_err(dev, "failed to allocate ctrl req\n");
@@ -3646,15 +3646,15 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	/* initialise the endpoints now the core has been initialised */
 	for (epnum = 0; epnum < hsotg->num_of_eps; epnum++) {
 		if (hsotg->eps_in[epnum])
-			s3c_hsotg_initep(hsotg, hsotg->eps_in[epnum],
+			dwc2_hsotg_initep(hsotg, hsotg->eps_in[epnum],
 								epnum, 1);
 		if (hsotg->eps_out[epnum])
-			s3c_hsotg_initep(hsotg, hsotg->eps_out[epnum],
+			dwc2_hsotg_initep(hsotg, hsotg->eps_out[epnum],
 								epnum, 0);
 	}
 
 	/* disable power and clock */
-	s3c_hsotg_phy_disable(hsotg);
+	dwc2_hsotg_phy_disable(hsotg);
 
 	ret = regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies),
 				    hsotg->supplies);
@@ -3667,12 +3667,12 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	if (ret)
 		goto err_supplies;
 
-	s3c_hsotg_dump(hsotg);
+	dwc2_hsotg_dump(hsotg);
 
 	return 0;
 
 err_supplies:
-	s3c_hsotg_phy_disable(hsotg);
+	dwc2_hsotg_phy_disable(hsotg);
 err_clk:
 	clk_disable_unprepare(hsotg->clk);
 
@@ -3680,10 +3680,10 @@ err_clk:
 }
 
 /**
- * s3c_hsotg_remove - remove function for hsotg driver
+ * dwc2_hsotg_remove - remove function for hsotg driver
  * @pdev: The platform information for the driver
  */
-int s3c_hsotg_remove(struct dwc2_hsotg *hsotg)
+int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg)
 {
 	usb_del_gadget_udc(&hsotg->gadget);
 	clk_disable_unprepare(hsotg->clk);
@@ -3691,7 +3691,7 @@ int s3c_hsotg_remove(struct dwc2_hsotg *hsotg)
 	return 0;
 }
 
-int s3c_hsotg_suspend(struct dwc2_hsotg *hsotg)
+int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
 {
 	unsigned long flags;
 	int ret = 0;
@@ -3709,18 +3709,18 @@ int s3c_hsotg_suspend(struct dwc2_hsotg *hsotg)
 
 		spin_lock_irqsave(&hsotg->lock, flags);
 		if (hsotg->enabled)
-			s3c_hsotg_core_disconnect(hsotg);
-		s3c_hsotg_disconnect(hsotg);
+			dwc2_hsotg_core_disconnect(hsotg);
+		dwc2_hsotg_disconnect(hsotg);
 		hsotg->gadget.speed = USB_SPEED_UNKNOWN;
 		spin_unlock_irqrestore(&hsotg->lock, flags);
 
-		s3c_hsotg_phy_disable(hsotg);
+		dwc2_hsotg_phy_disable(hsotg);
 
 		for (ep = 0; ep < hsotg->num_of_eps; ep++) {
 			if (hsotg->eps_in[ep])
-				s3c_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
+				dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
 			if (hsotg->eps_out[ep])
-				s3c_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
+				dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
 		}
 
 		ret = regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies),
@@ -3733,7 +3733,7 @@ int s3c_hsotg_suspend(struct dwc2_hsotg *hsotg)
 	return ret;
 }
 
-int s3c_hsotg_resume(struct dwc2_hsotg *hsotg)
+int dwc2_hsotg_resume(struct dwc2_hsotg *hsotg)
 {
 	unsigned long flags;
 	int ret = 0;
@@ -3751,12 +3751,12 @@ int s3c_hsotg_resume(struct dwc2_hsotg *hsotg)
 		ret = regulator_bulk_enable(ARRAY_SIZE(hsotg->supplies),
 					    hsotg->supplies);
 
-		s3c_hsotg_phy_enable(hsotg);
+		dwc2_hsotg_phy_enable(hsotg);
 
 		spin_lock_irqsave(&hsotg->lock, flags);
-		s3c_hsotg_core_init_disconnected(hsotg, false);
+		dwc2_hsotg_core_init_disconnected(hsotg, false);
 		if (hsotg->enabled)
-			s3c_hsotg_core_connect(hsotg);
+			dwc2_hsotg_core_connect(hsotg);
 		spin_unlock_irqrestore(&hsotg->lock, flags);
 	}
 	mutex_unlock(&hsotg->init_mutex);
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index f845c41fe9e5..007a3d5a0642 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -1382,8 +1382,8 @@ static void dwc2_conn_id_status_change(struct work_struct *work)
 		hsotg->op_state = OTG_STATE_B_PERIPHERAL;
 		dwc2_core_init(hsotg, false, -1);
 		dwc2_enable_global_interrupts(hsotg);
-		s3c_hsotg_core_init_disconnected(hsotg, false);
-		s3c_hsotg_core_connect(hsotg);
+		dwc2_hsotg_core_init_disconnected(hsotg, false);
+		dwc2_hsotg_core_connect(hsotg);
 	} else {
 		/* A-Device connector (Host Mode) */
 		dev_dbg(hsotg->dev, "connId A\n");
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 90935304185a..3d1f82def2f3 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -130,7 +130,7 @@ static int dwc2_driver_remove(struct platform_device *dev)
 	if (hsotg->hcd_enabled)
 		dwc2_hcd_remove(hsotg);
 	if (hsotg->gadget_enabled)
-		s3c_hsotg_remove(hsotg);
+		dwc2_hsotg_remove(hsotg);
 
 	return 0;
 }
@@ -269,7 +269,7 @@ static int dwc2_driver_probe(struct platform_device *dev)
 		retval = dwc2_hcd_init(hsotg, irq);
 		if (retval) {
 			if (hsotg->gadget_enabled)
-				s3c_hsotg_remove(hsotg);
+				dwc2_hsotg_remove(hsotg);
 			return retval;
 		}
 		hsotg->hcd_enabled = 1;
@@ -288,7 +288,7 @@ static int __maybe_unused dwc2_suspend(struct device *dev)
 	int ret = 0;
 
 	if (dwc2_is_device_mode(dwc2)) {
-		ret = s3c_hsotg_suspend(dwc2);
+		ret = dwc2_hsotg_suspend(dwc2);
 	} else {
 		if (dwc2->lx_state == DWC2_L0)
 			return 0;
@@ -305,7 +305,7 @@ static int __maybe_unused dwc2_resume(struct device *dev)
 	int ret = 0;
 
 	if (dwc2_is_device_mode(dwc2)) {
-		ret = s3c_hsotg_resume(dwc2);
+		ret = dwc2_hsotg_resume(dwc2);
 	} else {
 		phy_power_on(dwc2->phy);
 		phy_init(dwc2->phy);
diff --git a/include/linux/platform_data/s3c-hsotg.h b/include/linux/platform_data/s3c-hsotg.h
index 3f1cbf95ec3b..3982586ba6df 100644
--- a/include/linux/platform_data/s3c-hsotg.h
+++ b/include/linux/platform_data/s3c-hsotg.h
@@ -17,19 +17,19 @@
 
 struct platform_device;
 
-enum s3c_hsotg_dmamode {
+enum dwc2_hsotg_dmamode {
 	S3C_HSOTG_DMA_NONE,	/* do not use DMA at-all */
 	S3C_HSOTG_DMA_ONLY,	/* always use DMA */
 	S3C_HSOTG_DMA_DRV,	/* DMA is chosen by driver */
 };
 
 /**
- * struct s3c_hsotg_plat - platform data for high-speed otg/udc
+ * struct dwc2_hsotg_plat - platform data for high-speed otg/udc
  * @dma: Whether to use DMA or not.
  * @is_osc: The clock source is an oscillator, not a crystal
  */
-struct s3c_hsotg_plat {
-	enum s3c_hsotg_dmamode	dma;
+struct dwc2_hsotg_plat {
+	enum dwc2_hsotg_dmamode	dma;
 	unsigned int		is_osc:1;
 	int                     phy_type;
 
@@ -37,6 +37,6 @@ struct s3c_hsotg_plat {
 	int (*phy_exit)(struct platform_device *pdev, int type);
 };
 
-extern void s3c_hsotg_set_platdata(struct s3c_hsotg_plat *pd);
+extern void dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd);
 
 #endif /* __LINUX_USB_S3C_HSOTG_H */
-- 
cgit v1.2.3


From 428163d703712d11cacfddaf30f40b18ccc50042 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:46:19 +0200
Subject: usb: gadget: at91_udc: move at91_udc_data in at91_udc.h

struct at91_udc_data is now only used inside the driver, move it to its
include.

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc/at91_udc.h   | 8 ++++++++
 include/linux/platform_data/atmel.h | 9 ---------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc/at91_udc.h b/drivers/usb/gadget/udc/at91_udc.h
index 2679c8b217cc..0a433e6b346b 100644
--- a/drivers/usb/gadget/udc/at91_udc.h
+++ b/drivers/usb/gadget/udc/at91_udc.h
@@ -112,6 +112,14 @@ struct at91_udc_caps {
 	void (*pullup)(struct at91_udc *udc, int is_on);
 };
 
+struct at91_udc_data {
+	int	vbus_pin;		/* high == host powering us */
+	u8	vbus_active_low;	/* vbus polarity */
+	u8	vbus_polled;		/* Use polling, not interrupt */
+	int	pullup_pin;		/* active == D+ pulled up */
+	u8	pullup_active_low;	/* true == pullup_pin is active low */
+};
+
 /*
  * driver is non-SMP, and just blocks IRQs whenever it needs
  * access protection for chip registers or driver state
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 527a85c61924..9127ebbaa487 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -25,15 +25,6 @@
  */
 #define ATMEL_MAX_UART	7
 
- /* USB Device */
-struct at91_udc_data {
-	int	vbus_pin;		/* high == host powering us */
-	u8	vbus_active_low;	/* vbus polarity */
-	u8	vbus_polled;		/* Use polling, not interrupt */
-	int	pullup_pin;		/* active == D+ pulled up */
-	u8	pullup_active_low;	/* true == pullup_pin is active low */
-};
-
  /* Compact Flash */
 struct at91_cf_data {
 	int	irq_pin;		/* I/O IRQ */
-- 
cgit v1.2.3


From b67f628c84329a9ce82dbff5fde196dc4624e7c2 Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Wed, 16 Sep 2015 12:10:41 +0200
Subject: usb: gadget: epautoconf: add usb_ep_autoconfig_release() function

This patch introduces usb_ep_autoconfig_release() function which allows
to release endpoint previously obtained from usb_ep_autoconfig() during
USB function bind.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/epautoconf.c | 17 +++++++++++++++++
 include/linux/usb/gadget.h      |  2 ++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c
index 0f4ece44d703..30fdab0ae383 100644
--- a/drivers/usb/gadget/epautoconf.c
+++ b/drivers/usb/gadget/epautoconf.c
@@ -171,6 +171,23 @@ struct usb_ep *usb_ep_autoconfig(
 }
 EXPORT_SYMBOL_GPL(usb_ep_autoconfig);
 
+/**
+ * usb_ep_autoconfig_release - releases endpoint and set it to initial state
+ * @ep: endpoint which should be released
+ *
+ * This function can be used during function bind for endpoints obtained
+ * from usb_ep_autoconfig(). It unclaims endpoint claimed by
+ * usb_ep_autoconfig() to make it available for other functions. Endpoint
+ * which was released is no longer invalid and shouldn't be used in
+ * context of function which released it.
+ */
+void usb_ep_autoconfig_release(struct usb_ep *ep)
+{
+	ep->claimed = false;
+	ep->driver_data = NULL;
+}
+EXPORT_SYMBOL_GPL(usb_ep_autoconfig_release);
+
 /**
  * usb_ep_autoconfig_reset - reset endpoint autoconfig state
  * @gadget: device for which autoconfig state will be reset
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index c14a69b36d27..3f299e2b6942 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1233,6 +1233,8 @@ extern struct usb_ep *usb_ep_autoconfig_ss(struct usb_gadget *,
 			struct usb_endpoint_descriptor *,
 			struct usb_ss_ep_comp_descriptor *);
 
+extern void usb_ep_autoconfig_release(struct usb_ep *);
+
 extern void usb_ep_autoconfig_reset(struct usb_gadget *);
 
 #endif /* __LINUX_USB_GADGET_H */
-- 
cgit v1.2.3


From b0bac2581c1918cc4ab0aca01977ad69f0bc127a Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Wed, 16 Sep 2015 12:10:42 +0200
Subject: usb: gadget: introduce 'enabled' flag in struct usb_ep

This patch introduces 'enabled' flag in struct usb_ep, and modifies
usb_ep_enable() and usb_ep_disable() functions to encapsulate endpoint
enabled/disabled state. It helps to avoid enabling endpoints which are
already enabled, and disabling endpoints which are already disables.

From now USB functions don't have to remember current endpoint
enable/disable state, as this state is now handled automatically which
makes this API less bug-prone.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 3f299e2b6942..3d583a10b926 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -215,6 +215,7 @@ struct usb_ep {
 	struct list_head	ep_list;
 	struct usb_ep_caps	caps;
 	bool			claimed;
+	bool			enabled;
 	unsigned		maxpacket:16;
 	unsigned		maxpacket_limit:16;
 	unsigned		max_streams:16;
@@ -264,7 +265,18 @@ static inline void usb_ep_set_maxpacket_limit(struct usb_ep *ep,
  */
 static inline int usb_ep_enable(struct usb_ep *ep)
 {
-	return ep->ops->enable(ep, ep->desc);
+	int ret;
+
+	if (ep->enabled)
+		return 0;
+
+	ret = ep->ops->enable(ep, ep->desc);
+	if (ret)
+		return ret;
+
+	ep->enabled = true;
+
+	return 0;
 }
 
 /**
@@ -281,7 +293,18 @@ static inline int usb_ep_enable(struct usb_ep *ep)
  */
 static inline int usb_ep_disable(struct usb_ep *ep)
 {
-	return ep->ops->disable(ep);
+	int ret;
+
+	if (!ep->enabled)
+		return 0;
+
+	ret = ep->ops->disable(ep);
+	if (ret)
+		return ret;
+
+	ep->enabled = false;
+
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 58efd4b06df4a421652cb2c8a850a9697a37915c Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 22 Sep 2015 15:31:34 +0800
Subject: usb: phy: change some comments

- Replace all "transceiver" with "phy"
- Replace one "OTG controller" with "phy"

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/phy.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index e39f251cf861..31a8068c42a5 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -63,7 +63,7 @@ enum usb_otg_state {
 struct usb_phy;
 struct usb_otg;
 
-/* for transceivers connected thru an ULPI interface, the user must
+/* for phys connected thru an ULPI interface, the user must
  * provide access ops
  */
 struct usb_phy_io_ops {
@@ -92,10 +92,10 @@ struct usb_phy {
 	u16			port_status;
 	u16			port_change;
 
-	/* to support controllers that have multiple transceivers */
+	/* to support controllers that have multiple phys */
 	struct list_head	head;
 
-	/* initialize/shutdown the OTG controller */
+	/* initialize/shutdown the phy */
 	int	(*init)(struct usb_phy *x);
 	void	(*shutdown)(struct usb_phy *x);
 
@@ -106,7 +106,7 @@ struct usb_phy {
 	int	(*set_power)(struct usb_phy *x,
 				unsigned mA);
 
-	/* Set transceiver into suspend mode */
+	/* Set phy into suspend mode */
 	int	(*set_suspend)(struct usb_phy *x,
 				int suspend);
 
-- 
cgit v1.2.3


From 63863b988eeca2823ce76b28b104e0b8366cafec Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 21 Sep 2015 11:14:32 +0300
Subject: usb: common: of_usb_get_maximum_speed to usb_get_maximum_speed

By using the unified device property interface, the function
can be made available for all platforms and not just the
ones using DT.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/chipidea/core.c  |  2 +-
 drivers/usb/common/common.c  | 44 ++++++++++++++++++--------------------------
 drivers/usb/dwc3/core.c      |  3 ++-
 drivers/usb/musb/musb_dsps.c |  2 +-
 include/linux/usb/ch9.h      | 11 ++++++++++-
 include/linux/usb/of.h       |  6 ------
 6 files changed, 32 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 3feebf7f31f0..ce7153232425 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -648,7 +648,7 @@ static int ci_get_platdata(struct device *dev,
 			return ret;
 	}
 
-	if (of_usb_get_maximum_speed(dev->of_node) == USB_SPEED_FULL)
+	if (usb_get_maximum_speed(dev) == USB_SPEED_FULL)
 		platdata->flags |= CI_HDRC_FORCE_FULLSPEED;
 
 	platdata->itc_setting = 1;
diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c
index 9e39286a4e5a..b25a111903ab 100644
--- a/drivers/usb/common/common.c
+++ b/drivers/usb/common/common.c
@@ -60,6 +60,24 @@ const char *usb_speed_string(enum usb_device_speed speed)
 }
 EXPORT_SYMBOL_GPL(usb_speed_string);
 
+enum usb_device_speed usb_get_maximum_speed(struct device *dev)
+{
+	const char *maximum_speed;
+	int err;
+	int i;
+
+	err = device_property_read_string(dev, "maximum-speed", &maximum_speed);
+	if (err < 0)
+		return USB_SPEED_UNKNOWN;
+
+	for (i = 0; i < ARRAY_SIZE(speed_names); i++)
+		if (strcmp(maximum_speed, speed_names[i]) == 0)
+			return i;
+
+	return USB_SPEED_UNKNOWN;
+}
+EXPORT_SYMBOL_GPL(usb_get_maximum_speed);
+
 const char *usb_state_string(enum usb_device_state state)
 {
 	static const char *const names[] = {
@@ -113,32 +131,6 @@ enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_usb_get_dr_mode);
 
-/**
- * of_usb_get_maximum_speed - Get maximum requested speed for a given USB
- * controller.
- * @np: Pointer to the given device_node
- *
- * The function gets the maximum speed string from property "maximum-speed",
- * and returns the corresponding enum usb_device_speed.
- */
-enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np)
-{
-	const char *maximum_speed;
-	int err;
-	int i;
-
-	err = of_property_read_string(np, "maximum-speed", &maximum_speed);
-	if (err < 0)
-		return USB_SPEED_UNKNOWN;
-
-	for (i = 0; i < ARRAY_SIZE(speed_names); i++)
-		if (strcmp(maximum_speed, speed_names[i]) == 0)
-			return i;
-
-	return USB_SPEED_UNKNOWN;
-}
-EXPORT_SYMBOL_GPL(of_usb_get_maximum_speed);
-
 /**
  * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported
  * for given targeted hosts (non-PC hosts)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index cfbcebe2cf8f..61a56357baf0 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -870,8 +870,9 @@ static int dwc3_probe(struct platform_device *pdev)
 	 */
 	hird_threshold = 12;
 
+	dwc->maximum_speed = usb_get_maximum_speed(dev);
+
 	if (node) {
-		dwc->maximum_speed = of_usb_get_maximum_speed(node);
 		dwc->has_lpm_erratum = of_property_read_bool(node,
 				"snps,has-lpm-erratum");
 		of_property_read_u8(node, "snps,lpm-nyet-threshold",
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index c8b2ec9a79d6..c6a69eaf280f 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -747,7 +747,7 @@ static int dsps_create_musb_pdev(struct dsps_glue *glue,
 	if (!ret && val)
 		config->multipoint = true;
 
-	config->maximum_speed = of_usb_get_maximum_speed(dn);
+	config->maximum_speed = usb_get_maximum_speed(&parent->dev);
 	switch (config->maximum_speed) {
 	case USB_SPEED_LOW:
 	case USB_SPEED_FULL:
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 27603bcbb9b9..6cc96bb12ddc 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -32,9 +32,9 @@
 #ifndef __LINUX_USB_CH9_H
 #define __LINUX_USB_CH9_H
 
+#include <linux/device.h>
 #include <uapi/linux/usb/ch9.h>
 
-
 /**
  * usb_speed_string() - Returns human readable-name of the speed.
  * @speed: The speed to return human-readable name for.  If it's not
@@ -43,6 +43,15 @@
  */
 extern const char *usb_speed_string(enum usb_device_speed speed);
 
+/**
+ * usb_get_maximum_speed - Get maximum requested speed for a given USB
+ * controller.
+ * @dev: Pointer to the given USB controller device
+ *
+ * The function gets the maximum speed string from property "maximum-speed",
+ * and returns the corresponding enum usb_device_speed.
+ */
+extern enum usb_device_speed usb_get_maximum_speed(struct device *dev);
 
 /**
  * usb_state_string - Returns human readable name for the state.
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index 8c5a818ec244..ff23fea49fca 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -13,7 +13,6 @@
 
 #if IS_ENABLED(CONFIG_OF)
 enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np);
-enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np);
 bool of_usb_host_tpl_support(struct device_node *np);
 int of_usb_update_otg_caps(struct device_node *np,
 			struct usb_otg_caps *otg_caps);
@@ -23,11 +22,6 @@ static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
 	return USB_DR_MODE_UNKNOWN;
 }
 
-static inline enum usb_device_speed
-of_usb_get_maximum_speed(struct device_node *np)
-{
-	return USB_SPEED_UNKNOWN;
-}
 static inline bool of_usb_host_tpl_support(struct device_node *np)
 {
 	return false;
-- 
cgit v1.2.3


From 06e7114f0d8297278eb24f4e9bee3393a94bd8ce Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 21 Sep 2015 11:14:34 +0300
Subject: usb: common: of_usb_get_dr_mode to usb_get_dr_mode

By using the unified device property interface, the function
can be made available for all platforms and not just the
ones using DT.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/chipidea/core.c     |  2 +-
 drivers/usb/common/common.c     | 15 ++++-----------
 drivers/usb/dwc2/platform.c     |  2 +-
 drivers/usb/dwc3/core.c         |  2 +-
 drivers/usb/dwc3/dwc3-st.c      |  2 +-
 drivers/usb/musb/musb_dsps.c    |  2 +-
 drivers/usb/musb/sunxi.c        |  2 +-
 drivers/usb/phy/phy-msm-usb.c   |  2 +-
 drivers/usb/phy/phy-tegra-usb.c |  2 +-
 include/linux/usb/of.h          |  6 ------
 include/linux/usb/otg.h         |  9 +++++++++
 11 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index ce7153232425..bf2599757f55 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -611,7 +611,7 @@ static int ci_get_platdata(struct device *dev,
 		platdata->phy_mode = of_usb_get_phy_mode(dev->of_node);
 
 	if (!platdata->dr_mode)
-		platdata->dr_mode = of_usb_get_dr_mode(dev->of_node);
+		platdata->dr_mode = usb_get_dr_mode(dev);
 
 	if (platdata->dr_mode == USB_DR_MODE_UNKNOWN)
 		platdata->dr_mode = USB_DR_MODE_OTG;
diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c
index b25a111903ab..673d53038ed2 100644
--- a/drivers/usb/common/common.c
+++ b/drivers/usb/common/common.c
@@ -99,7 +99,6 @@ const char *usb_state_string(enum usb_device_state state)
 }
 EXPORT_SYMBOL_GPL(usb_state_string);
 
-#ifdef CONFIG_OF
 static const char *const usb_dr_modes[] = {
 	[USB_DR_MODE_UNKNOWN]		= "",
 	[USB_DR_MODE_HOST]		= "host",
@@ -107,19 +106,12 @@ static const char *const usb_dr_modes[] = {
 	[USB_DR_MODE_OTG]		= "otg",
 };
 
-/**
- * of_usb_get_dr_mode - Get dual role mode for given device_node
- * @np:	Pointer to the given device_node
- *
- * The function gets phy interface string from property 'dr_mode',
- * and returns the correspondig enum usb_dr_mode
- */
-enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
+enum usb_dr_mode usb_get_dr_mode(struct device *dev)
 {
 	const char *dr_mode;
 	int err, i;
 
-	err = of_property_read_string(np, "dr_mode", &dr_mode);
+	err = device_property_read_string(dev, "dr_mode", &dr_mode);
 	if (err < 0)
 		return USB_DR_MODE_UNKNOWN;
 
@@ -129,8 +121,9 @@ enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
 
 	return USB_DR_MODE_UNKNOWN;
 }
-EXPORT_SYMBOL_GPL(of_usb_get_dr_mode);
+EXPORT_SYMBOL_GPL(usb_get_dr_mode);
 
+#ifdef CONFIG_OF
 /**
  * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported
  * for given targeted hosts (non-PC hosts)
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 3d1f82def2f3..28abd1f90900 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -220,7 +220,7 @@ static int dwc2_driver_probe(struct platform_device *dev)
 	dev_dbg(&dev->dev, "mapped PA %08lx to VA %p\n",
 		(unsigned long)res->start, hsotg->regs);
 
-	hsotg->dr_mode = of_usb_get_dr_mode(dev->dev.of_node);
+	hsotg->dr_mode = usb_get_dr_mode(&dev->dev);
 
 	/*
 	 * Attempt to find a generic PHY, then look for an old style
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 61a56357baf0..407151465645 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -871,6 +871,7 @@ static int dwc3_probe(struct platform_device *pdev)
 	hird_threshold = 12;
 
 	dwc->maximum_speed = usb_get_maximum_speed(dev);
+	dwc->dr_mode = usb_get_dr_mode(dev);
 
 	if (node) {
 		dwc->has_lpm_erratum = of_property_read_bool(node,
@@ -886,7 +887,6 @@ static int dwc3_probe(struct platform_device *pdev)
 
 		dwc->needs_fifo_resize = of_property_read_bool(node,
 				"tx-fifo-resize");
-		dwc->dr_mode = of_usb_get_dr_mode(node);
 
 		dwc->disable_scramble_quirk = of_property_read_bool(node,
 				"snps,disable_scramble_quirk");
diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index 02d47d50905b..5c0adb9c6fb2 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -268,7 +268,7 @@ static int st_dwc3_probe(struct platform_device *pdev)
 		goto undo_softreset;
 	}
 
-	dwc3_data->dr_mode = of_usb_get_dr_mode(child_pdev->dev.of_node);
+	dwc3_data->dr_mode = usb_get_dr_mode(&child_pdev->dev);
 
 	/*
 	 * Configure the USB port as device or host according to the static
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index c6a69eaf280f..eeb7d9ecf7df 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -666,7 +666,7 @@ static int get_musb_port_mode(struct device *dev)
 {
 	enum usb_dr_mode mode;
 
-	mode = of_usb_get_dr_mode(dev->of_node);
+	mode = usb_get_dr_mode(dev);
 	switch (mode) {
 	case USB_DR_MODE_HOST:
 		return MUSB_PORT_MODE_HOST;
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index f9f6304ad854..f11b8f681b30 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -617,7 +617,7 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	memset(&pdata, 0, sizeof(pdata));
-	switch (of_usb_get_dr_mode(np)) {
+	switch (usb_get_dr_mode(&pdev->dev)) {
 #if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_HOST
 	case USB_DR_MODE_HOST:
 		pdata.mode = MUSB_PORT_MODE_HOST;
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index c58c3c0dbe35..80eb991c2506 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -1529,7 +1529,7 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 	if (IS_ERR(motg->phy_rst))
 		motg->phy_rst = NULL;
 
-	pdata->mode = of_usb_get_dr_mode(node);
+	pdata->mode = usb_get_dr_mode(&pdev->dev);
 	if (pdata->mode == USB_DR_MODE_UNKNOWN)
 		pdata->mode = USB_DR_MODE_OTG;
 
diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c
index ab025b00964c..5fe4a5704bde 100644
--- a/drivers/usb/phy/phy-tegra-usb.c
+++ b/drivers/usb/phy/phy-tegra-usb.c
@@ -1029,7 +1029,7 @@ static int tegra_usb_phy_probe(struct platform_device *pdev)
 	}
 
 	if (of_find_property(np, "dr_mode", NULL))
-		tegra_phy->mode = of_usb_get_dr_mode(np);
+		tegra_phy->mode = usb_get_dr_mode(&pdev->dev);
 	else
 		tegra_phy->mode = USB_DR_MODE_HOST;
 
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index ff23fea49fca..c3fe9e48ce27 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -12,16 +12,10 @@
 #include <linux/usb/phy.h>
 
 #if IS_ENABLED(CONFIG_OF)
-enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np);
 bool of_usb_host_tpl_support(struct device_node *np);
 int of_usb_update_otg_caps(struct device_node *np,
 			struct usb_otg_caps *otg_caps);
 #else
-static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
-{
-	return USB_DR_MODE_UNKNOWN;
-}
-
 static inline bool of_usb_host_tpl_support(struct device_node *np)
 {
 	return false;
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index bd1dcf816100..67929df86df5 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -119,4 +119,13 @@ enum usb_dr_mode {
 	USB_DR_MODE_OTG,
 };
 
+/**
+ * usb_get_dr_mode - Get dual role mode for given device
+ * @dev: Pointer to the given device
+ *
+ * The function gets phy interface string from property 'dr_mode',
+ * and returns the correspondig enum usb_dr_mode
+ */
+extern enum usb_dr_mode usb_get_dr_mode(struct device *dev);
+
 #endif /* __LINUX_USB_OTG_H */
-- 
cgit v1.2.3


From 13c23cd18bd12ddbf00beddd136e3cd33b4f2dfa Mon Sep 17 00:00:00 2001
From: Franklin S Cooper Jr <fcooper@ti.com>
Date: Fri, 11 Sep 2015 17:30:34 -0700
Subject: Input: edt-ft5x06 - switch to newer gpio framework

The current/old gpio framework used doesn't properly listen to
ACTIVE_LOW and ACTIVE_HIGH flags. The newer gpio framework takes into
account these flags when setting gpio values.

Since the values being output were based on voltage and not logic they
change to reflect this difference. Also use gpiod_set_value_cansleep since
wake and reset pins can be provided by bus based io expanders.

Switch from msleep(5) to udelay_range(5000,6000) to avoid check patch
warning.

Signed-off-by: Franklin S Cooper Jr <fcooper@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../bindings/input/touchscreen/edt-ft5x06.txt      |   4 +-
 drivers/input/touchscreen/edt-ft5x06.c             | 129 ++++++---------------
 include/linux/input/edt-ft5x06.h                   |   3 -
 3 files changed, 40 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
index 76db96704a60..9330d4d34cb9 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
@@ -50,6 +50,6 @@ Example:
 		pinctrl-0 = <&edt_ft5x06_pins>;
 		interrupt-parent = <&gpio2>;
 		interrupts = <5 0>;
-		reset-gpios = <&gpio2 6 1>;
-		wake-gpios = <&gpio4 9 0>;
+		reset-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
+		wake-gpios = <&gpio4 9 GPIO_ACTIVE_HIGH>;
 	};
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 48de1e8b3c93..5738722873bb 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -34,8 +34,7 @@
 #include <linux/delay.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
 #include <linux/input/edt-ft5x06.h>
@@ -91,9 +90,9 @@ struct edt_ft5x06_ts_data {
 	u16 num_x;
 	u16 num_y;
 
-	int reset_pin;
-	int irq_pin;
-	int wake_pin;
+	struct gpio_desc *reset_gpio;
+	struct gpio_desc *wake_gpio;
+	struct gpio_desc *irq_gpio;
 
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *debug_dir;
@@ -752,45 +751,6 @@ edt_ft5x06_ts_teardown_debugfs(struct edt_ft5x06_ts_data *tsdata)
 
 #endif /* CONFIG_DEBUGFS */
 
-static int edt_ft5x06_ts_reset(struct i2c_client *client,
-			struct edt_ft5x06_ts_data *tsdata)
-{
-	int error;
-
-	if (gpio_is_valid(tsdata->wake_pin)) {
-		error = devm_gpio_request_one(&client->dev,
-					tsdata->wake_pin, GPIOF_OUT_INIT_LOW,
-					"edt-ft5x06 wake");
-		if (error) {
-			dev_err(&client->dev,
-				"Failed to request GPIO %d as wake pin, error %d\n",
-				tsdata->wake_pin, error);
-			return error;
-		}
-
-		msleep(5);
-		gpio_set_value(tsdata->wake_pin, 1);
-	}
-	if (gpio_is_valid(tsdata->reset_pin)) {
-		/* this pulls reset down, enabling the low active reset */
-		error = devm_gpio_request_one(&client->dev,
-					tsdata->reset_pin, GPIOF_OUT_INIT_LOW,
-					"edt-ft5x06 reset");
-		if (error) {
-			dev_err(&client->dev,
-				"Failed to request GPIO %d as reset pin, error %d\n",
-				tsdata->reset_pin, error);
-			return error;
-		}
-
-		msleep(5);
-		gpio_set_value(tsdata->reset_pin, 1);
-		msleep(300);
-	}
-
-	return 0;
-}
-
 static int edt_ft5x06_ts_identify(struct i2c_client *client,
 					struct edt_ft5x06_ts_data *tsdata,
 					char *fw_version)
@@ -931,30 +891,6 @@ edt_ft5x06_ts_set_regs(struct edt_ft5x06_ts_data *tsdata)
 	}
 }
 
-#ifdef CONFIG_OF
-static int edt_ft5x06_i2c_ts_probe_dt(struct device *dev,
-				struct edt_ft5x06_ts_data *tsdata)
-{
-	struct device_node *np = dev->of_node;
-
-	/*
-	 * irq_pin is not needed for DT setup.
-	 * irq is associated via 'interrupts' property in DT
-	 */
-	tsdata->irq_pin = -EINVAL;
-	tsdata->reset_pin = of_get_named_gpio(np, "reset-gpios", 0);
-	tsdata->wake_pin = of_get_named_gpio(np, "wake-gpios", 0);
-
-	return 0;
-}
-#else
-static inline int edt_ft5x06_i2c_ts_probe_dt(struct device *dev,
-					struct edt_ft5x06_ts_data *tsdata)
-{
-	return -ENODEV;
-}
-#endif
-
 static int edt_ft5x06_ts_probe(struct i2c_client *client,
 					 const struct i2c_device_id *id)
 {
@@ -973,32 +909,42 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
 		return -ENOMEM;
 	}
 
-	if (!pdata) {
-		error = edt_ft5x06_i2c_ts_probe_dt(&client->dev, tsdata);
-		if (error) {
-			dev_err(&client->dev,
-				"DT probe failed and no platform data present\n");
-			return error;
-		}
-	} else {
-		tsdata->reset_pin = pdata->reset_pin;
-		tsdata->irq_pin = pdata->irq_pin;
-		tsdata->wake_pin = -EINVAL;
+	tsdata->reset_gpio = devm_gpiod_get_optional(&client->dev,
+						     "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(tsdata->reset_gpio)) {
+		error = PTR_ERR(tsdata->reset_gpio);
+		dev_err(&client->dev,
+			"Failed to request GPIO reset pin, error %d\n", error);
+		return error;
 	}
 
-	error = edt_ft5x06_ts_reset(client, tsdata);
-	if (error)
+	tsdata->wake_gpio = devm_gpiod_get_optional(&client->dev,
+						    "wake", GPIOD_OUT_LOW);
+	if (IS_ERR(tsdata->wake_gpio)) {
+		error = PTR_ERR(tsdata->wake_gpio);
+		dev_err(&client->dev,
+			"Failed to request GPIO wake pin, error %d\n", error);
 		return error;
+	}
 
-	if (gpio_is_valid(tsdata->irq_pin)) {
-		error = devm_gpio_request_one(&client->dev, tsdata->irq_pin,
-					GPIOF_IN, "edt-ft5x06 irq");
-		if (error) {
-			dev_err(&client->dev,
-				"Failed to request GPIO %d, error %d\n",
-				tsdata->irq_pin, error);
-			return error;
-		}
+	tsdata->irq_gpio = devm_gpiod_get_optional(&client->dev,
+						   "irq", GPIOD_IN);
+	if (IS_ERR(tsdata->irq_gpio)) {
+		error = PTR_ERR(tsdata->irq_gpio);
+		dev_err(&client->dev,
+			"Failed to request GPIO irq pin, error %d\n", error);
+		return error;
+	}
+
+	if (tsdata->wake_gpio) {
+		usleep_range(5000, 6000);
+		gpiod_set_value_cansleep(tsdata->wake_gpio, 1);
+	}
+
+	if (tsdata->reset_gpio) {
+		usleep_range(5000, 6000);
+		gpiod_set_value_cansleep(tsdata->reset_gpio, 0);
+		msleep(300);
 	}
 
 	input = devm_input_allocate_device(&client->dev);
@@ -1074,7 +1020,8 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
 
 	dev_dbg(&client->dev,
 		"EDT FT5x06 initialized: IRQ %d, WAKE pin %d, Reset pin %d.\n",
-		client->irq, tsdata->wake_pin, tsdata->reset_pin);
+		client->irq, desc_to_gpio(tsdata->wake_gpio),
+		desc_to_gpio(tsdata->reset_gpio));
 
 	return 0;
 
diff --git a/include/linux/input/edt-ft5x06.h b/include/linux/input/edt-ft5x06.h
index 8a1e0d1a0124..5ca5b4cc308c 100644
--- a/include/linux/input/edt-ft5x06.h
+++ b/include/linux/input/edt-ft5x06.h
@@ -10,9 +10,6 @@
  */
 
 struct edt_ft5x06_platform_data {
-	int irq_pin;
-	int reset_pin;
-
 	/* startup defaults for operational parameters */
 	bool use_parameters;
 	u8 gain;
-- 
cgit v1.2.3


From 22ddbacc4bb54ef8577c46114227c06c31e3c47d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 12 Sep 2015 09:37:03 -0700
Subject: Input: edt-ft5x06 - remove support for platform data

We do not have any users of platform data in the tree and all newer
platforms are either DT or ACPI, so let's drop handling of platform data.

Tested-by: Franklin S Cooper Jr <fcooper@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/edt-ft5x06.c | 41 ++++------------------------------
 include/linux/input/edt-ft5x06.h       | 21 -----------------
 2 files changed, 4 insertions(+), 58 deletions(-)
 delete mode 100644 include/linux/input/edt-ft5x06.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index df76f19673e8..134c66c431b8 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -37,7 +37,6 @@
 #include <linux/gpio/consumer.h>
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
-#include <linux/input/edt-ft5x06.h>
 
 #define MAX_SUPPORT_POINTS		5
 
@@ -809,21 +808,14 @@ static int edt_ft5x06_ts_identify(struct i2c_client *client,
 	return 0;
 }
 
-#define EDT_ATTR_CHECKSET(name, reg) \
-do {								\
-	if (pdata->name >= edt_ft5x06_attr_##name.limit_low &&		\
-	    pdata->name <= edt_ft5x06_attr_##name.limit_high)		\
-		edt_ft5x06_register_write(tsdata, reg, pdata->name);	\
-} while (0)
-
 #define EDT_GET_PROP(name, reg) {				\
 	u32 val;						\
 	if (of_property_read_u32(np, #name, &val) == 0)		\
 		edt_ft5x06_register_write(tsdata, reg, val);	\
 }
 
-static void edt_ft5x06_ts_get_dt_defaults(struct device_node *np,
-					struct edt_ft5x06_ts_data *tsdata)
+static void edt_ft5x06_ts_get_defaults(struct device_node *np,
+				       struct edt_ft5x06_ts_data *tsdata)
 {
 	struct edt_reg_addr *reg_addr = &tsdata->reg_addr;
 
@@ -832,23 +824,6 @@ static void edt_ft5x06_ts_get_dt_defaults(struct device_node *np,
 	EDT_GET_PROP(offset, reg_addr->reg_offset);
 }
 
-static void
-edt_ft5x06_ts_get_defaults(struct edt_ft5x06_ts_data *tsdata,
-			   const struct edt_ft5x06_platform_data *pdata)
-{
-	struct edt_reg_addr *reg_addr = &tsdata->reg_addr;
-
-	if (!pdata->use_parameters)
-		return;
-
-	/* pick up defaults from the platform data */
-	EDT_ATTR_CHECKSET(threshold, reg_addr->reg_threshold);
-	EDT_ATTR_CHECKSET(gain, reg_addr->reg_gain);
-	EDT_ATTR_CHECKSET(offset, reg_addr->reg_offset);
-	if (reg_addr->reg_report_rate != NO_REGISTER)
-		EDT_ATTR_CHECKSET(report_rate, reg_addr->reg_report_rate);
-}
-
 static void
 edt_ft5x06_ts_get_parameters(struct edt_ft5x06_ts_data *tsdata)
 {
@@ -893,8 +868,6 @@ edt_ft5x06_ts_set_regs(struct edt_ft5x06_ts_data *tsdata)
 static int edt_ft5x06_ts_probe(struct i2c_client *client,
 					 const struct i2c_device_id *id)
 {
-	const struct edt_ft5x06_platform_data *pdata =
-						dev_get_platdata(&client->dev);
 	struct edt_ft5x06_ts_data *tsdata;
 	struct input_dev *input;
 	int error;
@@ -955,12 +928,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
 	}
 
 	edt_ft5x06_ts_set_regs(tsdata);
-
-	if (!pdata)
-		edt_ft5x06_ts_get_dt_defaults(client->dev.of_node, tsdata);
-	else
-		edt_ft5x06_ts_get_defaults(tsdata, pdata);
-
+	edt_ft5x06_ts_get_defaults(client->dev.of_node, tsdata);
 	edt_ft5x06_ts_get_parameters(tsdata);
 
 	dev_dbg(&client->dev,
@@ -976,8 +944,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
 	input_set_abs_params(input, ABS_MT_POSITION_Y,
 			     0, tsdata->num_y * 64 - 1, 0, 0);
 
-	if (!pdata)
-		touchscreen_parse_properties(input, true);
+	touchscreen_parse_properties(input, true);
 
 	error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, INPUT_MT_DIRECT);
 	if (error) {
diff --git a/include/linux/input/edt-ft5x06.h b/include/linux/input/edt-ft5x06.h
deleted file mode 100644
index 5ca5b4cc308c..000000000000
--- a/include/linux/input/edt-ft5x06.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _EDT_FT5X06_H
-#define _EDT_FT5X06_H
-
-/*
- * Copyright (c) 2012 Simon Budig, <simon.budig@kernelconcepts.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- */
-
-struct edt_ft5x06_platform_data {
-	/* startup defaults for operational parameters */
-	bool use_parameters;
-	u8 gain;
-	u8 threshold;
-	u8 offset;
-	u8 report_rate;
-};
-
-#endif /* _EDT_FT5X06_H */
-- 
cgit v1.2.3


From 8cb7cf56c9fe5412de238465b27ef35b4d2801aa Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 30 Mar 2015 10:59:52 +0100
Subject: firmware: add support for ARM System Control and Power
 Interface(SCPI) protocol

This patch adds support for System Control and Power Interface (SCPI)
Message Protocol used between the Application Cores(AP) and the System
Control Processor(SCP). The MHU peripheral provides a mechanism for
inter-processor communication between SCP's M3 processor and AP.

SCP offers control and management of the core/cluster power states,
various power domain DVFS including the core/cluster, certain system
clocks configuration, thermal sensors and many others.

This protocol driver provides interface for all the client drivers using
SCPI to make use of the features offered by the SCP.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Jon Medhurst (Tixy) <tixy@linaro.org>
Cc: Jassi Brar <jassisinghbrar@gmail.com>
Cc: Liviu Dudau <Liviu.Dudau@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 MAINTAINERS                   |   2 +
 drivers/firmware/Kconfig      |  19 ++
 drivers/firmware/Makefile     |   1 +
 drivers/firmware/arm_scpi.c   | 711 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/scpi_protocol.h |  61 ++++
 5 files changed, 794 insertions(+)
 create mode 100644 drivers/firmware/arm_scpi.c
 create mode 100644 include/linux/scpi_protocol.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index be7d5c14729b..9598821d6a37 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9154,6 +9154,8 @@ M:	Sudeep Holla <sudeep.holla@arm.com>
 L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/arm/arm,scpi.txt
+F:	drivers/firmware/arm_scpi.c
+F:	include/linux/scpi_protocol.h
 
 SCSI CDROM DRIVER
 M:	Jens Axboe <axboe@kernel.dk>
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index d8de6a8dd4de..800b7439606e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -8,6 +8,25 @@ menu "Firmware Drivers"
 config ARM_PSCI_FW
 	bool
 
+config ARM_SCPI_PROTOCOL
+	tristate "ARM System Control and Power Interface (SCPI) Message Protocol"
+	depends on ARM_MHU
+	help
+	  System Control and Power Interface (SCPI) Message Protocol is
+	  defined for the purpose of communication between the Application
+	  Cores(AP) and the System Control Processor(SCP). The MHU peripheral
+	  provides a mechanism for inter-processor communication between SCP
+	  and AP.
+
+	  SCP controls most of the power managament on the Application
+	  Processors. It offers control and management of: the core/cluster
+	  power states, various power domain DVFS including the core/cluster,
+	  certain system clocks configuration, thermal sensors and many
+	  others.
+
+	  This protocol library provides interface for all the client drivers
+	  making use of the features offered by the SCP.
+
 config EDD
 	tristate "BIOS Enhanced Disk Drive calls determine boot disk"
 	depends on X86
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 000830fc6707..e4c3a3fa1580 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the linux kernel.
 #
 obj-$(CONFIG_ARM_PSCI_FW)	+= psci.o
+obj-$(CONFIG_ARM_SCPI_PROTOCOL)	+= arm_scpi.o
 obj-$(CONFIG_DMI)		+= dmi_scan.o
 obj-$(CONFIG_DMI_SYSFS)		+= dmi-sysfs.o
 obj-$(CONFIG_EDD)		+= edd.o
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
new file mode 100644
index 000000000000..cb75c750ca54
--- /dev/null
+++ b/drivers/firmware/arm_scpi.c
@@ -0,0 +1,711 @@
+/*
+ * System Control and Power Interface (SCPI) Message Protocol driver
+ *
+ * SCPI Message Protocol is used between the System Control Processor(SCP)
+ * and the Application Processors(AP). The Message Handling Unit(MHU)
+ * provides a mechanism for inter-processor communication between SCP's
+ * Cortex M3 and AP.
+ *
+ * SCP offers control and management of the core/cluster power states,
+ * various power domain DVFS including the core/cluster, certain system
+ * clocks configuration, thermal sensors and many others.
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/printk.h>
+#include <linux/scpi_protocol.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/spinlock.h>
+
+#define CMD_ID_SHIFT		0
+#define CMD_ID_MASK		0x7f
+#define CMD_TOKEN_ID_SHIFT	8
+#define CMD_TOKEN_ID_MASK	0xff
+#define CMD_DATA_SIZE_SHIFT	16
+#define CMD_DATA_SIZE_MASK	0x1ff
+#define PACK_SCPI_CMD(cmd_id, tx_sz)			\
+	((((cmd_id) & CMD_ID_MASK) << CMD_ID_SHIFT) |	\
+	(((tx_sz) & CMD_DATA_SIZE_MASK) << CMD_DATA_SIZE_SHIFT))
+#define ADD_SCPI_TOKEN(cmd, token)			\
+	((cmd) |= (((token) & CMD_TOKEN_ID_MASK) << CMD_TOKEN_ID_SHIFT))
+
+#define CMD_SIZE(cmd)	(((cmd) >> CMD_DATA_SIZE_SHIFT) & CMD_DATA_SIZE_MASK)
+#define CMD_UNIQ_MASK	(CMD_TOKEN_ID_MASK << CMD_TOKEN_ID_SHIFT | CMD_ID_MASK)
+#define CMD_XTRACT_UNIQ(cmd)	((cmd) & CMD_UNIQ_MASK)
+
+#define SCPI_SLOT		0
+
+#define MAX_DVFS_DOMAINS	8
+#define MAX_DVFS_OPPS		8
+#define DVFS_LATENCY(hdr)	(le32_to_cpu(hdr) >> 16)
+#define DVFS_OPP_COUNT(hdr)	((le32_to_cpu(hdr) >> 8) & 0xff)
+
+#define PROTOCOL_REV_MINOR_BITS	16
+#define PROTOCOL_REV_MINOR_MASK	((1U << PROTOCOL_REV_MINOR_BITS) - 1)
+#define PROTOCOL_REV_MAJOR(x)	((x) >> PROTOCOL_REV_MINOR_BITS)
+#define PROTOCOL_REV_MINOR(x)	((x) & PROTOCOL_REV_MINOR_MASK)
+
+#define FW_REV_MAJOR_BITS	24
+#define FW_REV_MINOR_BITS	16
+#define FW_REV_PATCH_MASK	((1U << FW_REV_MINOR_BITS) - 1)
+#define FW_REV_MINOR_MASK	((1U << FW_REV_MAJOR_BITS) - 1)
+#define FW_REV_MAJOR(x)		((x) >> FW_REV_MAJOR_BITS)
+#define FW_REV_MINOR(x)		(((x) & FW_REV_MINOR_MASK) >> FW_REV_MINOR_BITS)
+#define FW_REV_PATCH(x)		((x) & FW_REV_PATCH_MASK)
+
+#define MAX_RX_TIMEOUT		(msecs_to_jiffies(20))
+
+enum scpi_error_codes {
+	SCPI_SUCCESS = 0, /* Success */
+	SCPI_ERR_PARAM = 1, /* Invalid parameter(s) */
+	SCPI_ERR_ALIGN = 2, /* Invalid alignment */
+	SCPI_ERR_SIZE = 3, /* Invalid size */
+	SCPI_ERR_HANDLER = 4, /* Invalid handler/callback */
+	SCPI_ERR_ACCESS = 5, /* Invalid access/permission denied */
+	SCPI_ERR_RANGE = 6, /* Value out of range */
+	SCPI_ERR_TIMEOUT = 7, /* Timeout has occurred */
+	SCPI_ERR_NOMEM = 8, /* Invalid memory area or pointer */
+	SCPI_ERR_PWRSTATE = 9, /* Invalid power state */
+	SCPI_ERR_SUPPORT = 10, /* Not supported or disabled */
+	SCPI_ERR_DEVICE = 11, /* Device error */
+	SCPI_ERR_BUSY = 12, /* Device busy */
+	SCPI_ERR_MAX
+};
+
+enum scpi_std_cmd {
+	SCPI_CMD_INVALID		= 0x00,
+	SCPI_CMD_SCPI_READY		= 0x01,
+	SCPI_CMD_SCPI_CAPABILITIES	= 0x02,
+	SCPI_CMD_SET_CSS_PWR_STATE	= 0x03,
+	SCPI_CMD_GET_CSS_PWR_STATE	= 0x04,
+	SCPI_CMD_SET_SYS_PWR_STATE	= 0x05,
+	SCPI_CMD_SET_CPU_TIMER		= 0x06,
+	SCPI_CMD_CANCEL_CPU_TIMER	= 0x07,
+	SCPI_CMD_DVFS_CAPABILITIES	= 0x08,
+	SCPI_CMD_GET_DVFS_INFO		= 0x09,
+	SCPI_CMD_SET_DVFS		= 0x0a,
+	SCPI_CMD_GET_DVFS		= 0x0b,
+	SCPI_CMD_GET_DVFS_STAT		= 0x0c,
+	SCPI_CMD_CLOCK_CAPABILITIES	= 0x0d,
+	SCPI_CMD_GET_CLOCK_INFO		= 0x0e,
+	SCPI_CMD_SET_CLOCK_VALUE	= 0x0f,
+	SCPI_CMD_GET_CLOCK_VALUE	= 0x10,
+	SCPI_CMD_PSU_CAPABILITIES	= 0x11,
+	SCPI_CMD_GET_PSU_INFO		= 0x12,
+	SCPI_CMD_SET_PSU		= 0x13,
+	SCPI_CMD_GET_PSU		= 0x14,
+	SCPI_CMD_SENSOR_CAPABILITIES	= 0x15,
+	SCPI_CMD_SENSOR_INFO		= 0x16,
+	SCPI_CMD_SENSOR_VALUE		= 0x17,
+	SCPI_CMD_SENSOR_CFG_PERIODIC	= 0x18,
+	SCPI_CMD_SENSOR_CFG_BOUNDS	= 0x19,
+	SCPI_CMD_SENSOR_ASYNC_VALUE	= 0x1a,
+	SCPI_CMD_SET_DEVICE_PWR_STATE	= 0x1b,
+	SCPI_CMD_GET_DEVICE_PWR_STATE	= 0x1c,
+	SCPI_CMD_COUNT
+};
+
+struct scpi_xfer {
+	u32 slot; /* has to be first element */
+	u32 cmd;
+	u32 status;
+	const void *tx_buf;
+	void *rx_buf;
+	unsigned int tx_len;
+	unsigned int rx_len;
+	struct list_head node;
+	struct completion done;
+};
+
+struct scpi_chan {
+	struct mbox_client cl;
+	struct mbox_chan *chan;
+	void __iomem *tx_payload;
+	void __iomem *rx_payload;
+	struct list_head rx_pending;
+	struct list_head xfers_list;
+	struct scpi_xfer *xfers;
+	spinlock_t rx_lock; /* locking for the rx pending list */
+	struct mutex xfers_lock;
+	u8 token;
+};
+
+struct scpi_drvinfo {
+	u32 protocol_version;
+	u32 firmware_version;
+	int num_chans;
+	atomic_t next_chan;
+	struct scpi_ops *scpi_ops;
+	struct scpi_chan *channels;
+	struct scpi_dvfs_info *dvfs[MAX_DVFS_DOMAINS];
+};
+
+/*
+ * The SCP firmware only executes in little-endian mode, so any buffers
+ * shared through SCPI should have their contents converted to little-endian
+ */
+struct scpi_shared_mem {
+	__le32 command;
+	__le32 status;
+	u8 payload[0];
+} __packed;
+
+struct scp_capabilities {
+	__le32 protocol_version;
+	__le32 event_version;
+	__le32 platform_version;
+	__le32 commands[4];
+} __packed;
+
+struct clk_get_info {
+	__le16 id;
+	__le16 flags;
+	__le32 min_rate;
+	__le32 max_rate;
+	u8 name[20];
+} __packed;
+
+struct clk_get_value {
+	__le32 rate;
+} __packed;
+
+struct clk_set_value {
+	__le16 id;
+	__le16 reserved;
+	__le32 rate;
+} __packed;
+
+struct dvfs_info {
+	__le32 header;
+	struct {
+		__le32 freq;
+		__le32 m_volt;
+	} opps[MAX_DVFS_OPPS];
+} __packed;
+
+struct dvfs_get {
+	u8 index;
+} __packed;
+
+struct dvfs_set {
+	u8 domain;
+	u8 index;
+} __packed;
+
+static struct scpi_drvinfo *scpi_info;
+
+static int scpi_linux_errmap[SCPI_ERR_MAX] = {
+	/* better than switch case as long as return value is continuous */
+	0, /* SCPI_SUCCESS */
+	-EINVAL, /* SCPI_ERR_PARAM */
+	-ENOEXEC, /* SCPI_ERR_ALIGN */
+	-EMSGSIZE, /* SCPI_ERR_SIZE */
+	-EINVAL, /* SCPI_ERR_HANDLER */
+	-EACCES, /* SCPI_ERR_ACCESS */
+	-ERANGE, /* SCPI_ERR_RANGE */
+	-ETIMEDOUT, /* SCPI_ERR_TIMEOUT */
+	-ENOMEM, /* SCPI_ERR_NOMEM */
+	-EINVAL, /* SCPI_ERR_PWRSTATE */
+	-EOPNOTSUPP, /* SCPI_ERR_SUPPORT */
+	-EIO, /* SCPI_ERR_DEVICE */
+	-EBUSY, /* SCPI_ERR_BUSY */
+};
+
+static inline int scpi_to_linux_errno(int errno)
+{
+	if (errno >= SCPI_SUCCESS && errno < SCPI_ERR_MAX)
+		return scpi_linux_errmap[errno];
+	return -EIO;
+}
+
+static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd)
+{
+	unsigned long flags;
+	struct scpi_xfer *t, *match = NULL;
+
+	spin_lock_irqsave(&ch->rx_lock, flags);
+	if (list_empty(&ch->rx_pending)) {
+		spin_unlock_irqrestore(&ch->rx_lock, flags);
+		return;
+	}
+
+	list_for_each_entry(t, &ch->rx_pending, node)
+		if (CMD_XTRACT_UNIQ(t->cmd) == CMD_XTRACT_UNIQ(cmd)) {
+			list_del(&t->node);
+			match = t;
+			break;
+		}
+	/* check if wait_for_completion is in progress or timed-out */
+	if (match && !completion_done(&match->done)) {
+		struct scpi_shared_mem *mem = ch->rx_payload;
+		unsigned int len = min(match->rx_len, CMD_SIZE(cmd));
+
+		match->status = le32_to_cpu(mem->status);
+		memcpy_fromio(match->rx_buf, mem->payload, len);
+		if (match->rx_len > len)
+			memset(match->rx_buf + len, 0, match->rx_len - len);
+		complete(&match->done);
+	}
+	spin_unlock_irqrestore(&ch->rx_lock, flags);
+}
+
+static void scpi_handle_remote_msg(struct mbox_client *c, void *msg)
+{
+	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
+	struct scpi_shared_mem *mem = ch->rx_payload;
+	u32 cmd = le32_to_cpu(mem->command);
+
+	scpi_process_cmd(ch, cmd);
+}
+
+static void scpi_tx_prepare(struct mbox_client *c, void *msg)
+{
+	unsigned long flags;
+	struct scpi_xfer *t = msg;
+	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
+	struct scpi_shared_mem *mem = (struct scpi_shared_mem *)ch->tx_payload;
+
+	if (t->tx_buf)
+		memcpy_toio(mem->payload, t->tx_buf, t->tx_len);
+	if (t->rx_buf) {
+		if (!(++ch->token))
+			++ch->token;
+		ADD_SCPI_TOKEN(t->cmd, ch->token);
+		spin_lock_irqsave(&ch->rx_lock, flags);
+		list_add_tail(&t->node, &ch->rx_pending);
+		spin_unlock_irqrestore(&ch->rx_lock, flags);
+	}
+	mem->command = cpu_to_le32(t->cmd);
+}
+
+static struct scpi_xfer *get_scpi_xfer(struct scpi_chan *ch)
+{
+	struct scpi_xfer *t;
+
+	mutex_lock(&ch->xfers_lock);
+	if (list_empty(&ch->xfers_list)) {
+		mutex_unlock(&ch->xfers_lock);
+		return NULL;
+	}
+	t = list_first_entry(&ch->xfers_list, struct scpi_xfer, node);
+	list_del(&t->node);
+	mutex_unlock(&ch->xfers_lock);
+	return t;
+}
+
+static void put_scpi_xfer(struct scpi_xfer *t, struct scpi_chan *ch)
+{
+	mutex_lock(&ch->xfers_lock);
+	list_add_tail(&t->node, &ch->xfers_list);
+	mutex_unlock(&ch->xfers_lock);
+}
+
+static int scpi_send_message(u8 cmd, void *tx_buf, unsigned int tx_len,
+			     void *rx_buf, unsigned int rx_len)
+{
+	int ret;
+	u8 chan;
+	struct scpi_xfer *msg;
+	struct scpi_chan *scpi_chan;
+
+	chan = atomic_inc_return(&scpi_info->next_chan) % scpi_info->num_chans;
+	scpi_chan = scpi_info->channels + chan;
+
+	msg = get_scpi_xfer(scpi_chan);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->slot = BIT(SCPI_SLOT);
+	msg->cmd = PACK_SCPI_CMD(cmd, tx_len);
+	msg->tx_buf = tx_buf;
+	msg->tx_len = tx_len;
+	msg->rx_buf = rx_buf;
+	msg->rx_len = rx_len;
+	init_completion(&msg->done);
+
+	ret = mbox_send_message(scpi_chan->chan, msg);
+	if (ret < 0 || !rx_buf)
+		goto out;
+
+	if (!wait_for_completion_timeout(&msg->done, MAX_RX_TIMEOUT))
+		ret = -ETIMEDOUT;
+	else
+		/* first status word */
+		ret = le32_to_cpu(msg->status);
+out:
+	if (ret < 0 && rx_buf) /* remove entry from the list if timed-out */
+		scpi_process_cmd(scpi_chan, msg->cmd);
+
+	put_scpi_xfer(msg, scpi_chan);
+	/* SCPI error codes > 0, translate them to Linux scale*/
+	return ret > 0 ? scpi_to_linux_errno(ret) : ret;
+}
+
+static u32 scpi_get_version(void)
+{
+	return scpi_info->protocol_version;
+}
+
+static int
+scpi_clk_get_range(u16 clk_id, unsigned long *min, unsigned long *max)
+{
+	int ret;
+	struct clk_get_info clk;
+	__le16 le_clk_id = cpu_to_le16(clk_id);
+
+	ret = scpi_send_message(SCPI_CMD_GET_CLOCK_INFO, &le_clk_id,
+				sizeof(le_clk_id), &clk, sizeof(clk));
+	if (!ret) {
+		*min = le32_to_cpu(clk.min_rate);
+		*max = le32_to_cpu(clk.max_rate);
+	}
+	return ret;
+}
+
+static unsigned long scpi_clk_get_val(u16 clk_id)
+{
+	int ret;
+	struct clk_get_value clk;
+	__le16 le_clk_id = cpu_to_le16(clk_id);
+
+	ret = scpi_send_message(SCPI_CMD_GET_CLOCK_VALUE, &le_clk_id,
+				sizeof(le_clk_id), &clk, sizeof(clk));
+	return ret ? ret : le32_to_cpu(clk.rate);
+}
+
+static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
+{
+	int stat;
+	struct clk_set_value clk = {
+		.id = cpu_to_le16(clk_id),
+		.rate = cpu_to_le32(rate)
+	};
+
+	return scpi_send_message(SCPI_CMD_SET_CLOCK_VALUE, &clk, sizeof(clk),
+				 &stat, sizeof(stat));
+}
+
+static int scpi_dvfs_get_idx(u8 domain)
+{
+	int ret;
+	struct dvfs_get dvfs;
+
+	ret = scpi_send_message(SCPI_CMD_GET_DVFS, &domain, sizeof(domain),
+				&dvfs, sizeof(dvfs));
+	return ret ? ret : dvfs.index;
+}
+
+static int scpi_dvfs_set_idx(u8 domain, u8 index)
+{
+	int stat;
+	struct dvfs_set dvfs = {domain, index};
+
+	return scpi_send_message(SCPI_CMD_SET_DVFS, &dvfs, sizeof(dvfs),
+				 &stat, sizeof(stat));
+}
+
+static int opp_cmp_func(const void *opp1, const void *opp2)
+{
+	const struct scpi_opp *t1 = opp1, *t2 = opp2;
+
+	return t1->freq - t2->freq;
+}
+
+static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
+{
+	struct scpi_dvfs_info *info;
+	struct scpi_opp *opp;
+	struct dvfs_info buf;
+	int ret, i;
+
+	if (domain >= MAX_DVFS_DOMAINS)
+		return ERR_PTR(-EINVAL);
+
+	if (scpi_info->dvfs[domain])	/* data already populated */
+		return scpi_info->dvfs[domain];
+
+	ret = scpi_send_message(SCPI_CMD_GET_DVFS_INFO, &domain, sizeof(domain),
+				&buf, sizeof(buf));
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	info->count = DVFS_OPP_COUNT(buf.header);
+	info->latency = DVFS_LATENCY(buf.header) * 1000; /* uS to nS */
+
+	info->opps = kcalloc(info->count, sizeof(*opp), GFP_KERNEL);
+	if (!info->opps) {
+		kfree(info);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, opp = info->opps; i < info->count; i++, opp++) {
+		opp->freq = le32_to_cpu(buf.opps[i].freq);
+		opp->m_volt = le32_to_cpu(buf.opps[i].m_volt);
+	}
+
+	sort(info->opps, info->count, sizeof(*opp), opp_cmp_func, NULL);
+
+	scpi_info->dvfs[domain] = info;
+	return info;
+}
+
+static struct scpi_ops scpi_ops = {
+	.get_version = scpi_get_version,
+	.clk_get_range = scpi_clk_get_range,
+	.clk_get_val = scpi_clk_get_val,
+	.clk_set_val = scpi_clk_set_val,
+	.dvfs_get_idx = scpi_dvfs_get_idx,
+	.dvfs_set_idx = scpi_dvfs_set_idx,
+	.dvfs_get_info = scpi_dvfs_get_info,
+};
+
+struct scpi_ops *get_scpi_ops(void)
+{
+	return scpi_info ? scpi_info->scpi_ops : NULL;
+}
+EXPORT_SYMBOL_GPL(get_scpi_ops);
+
+static int scpi_init_versions(struct scpi_drvinfo *info)
+{
+	int ret;
+	struct scp_capabilities caps;
+
+	ret = scpi_send_message(SCPI_CMD_SCPI_CAPABILITIES, NULL, 0,
+				&caps, sizeof(caps));
+	if (!ret) {
+		info->protocol_version = le32_to_cpu(caps.protocol_version);
+		info->firmware_version = le32_to_cpu(caps.platform_version);
+	}
+	return ret;
+}
+
+static ssize_t protocol_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d\n",
+		       PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		       PROTOCOL_REV_MINOR(scpi_info->protocol_version));
+}
+static DEVICE_ATTR_RO(protocol_version);
+
+static ssize_t firmware_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d.%d\n",
+		       FW_REV_MAJOR(scpi_info->firmware_version),
+		       FW_REV_MINOR(scpi_info->firmware_version),
+		       FW_REV_PATCH(scpi_info->firmware_version));
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static struct attribute *versions_attrs[] = {
+	&dev_attr_firmware_version.attr,
+	&dev_attr_protocol_version.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(versions);
+
+static void
+scpi_free_channels(struct device *dev, struct scpi_chan *pchan, int count)
+{
+	int i;
+
+	for (i = 0; i < count && pchan->chan; i++, pchan++) {
+		mbox_free_channel(pchan->chan);
+		devm_kfree(dev, pchan->xfers);
+		devm_iounmap(dev, pchan->rx_payload);
+	}
+}
+
+static int scpi_remove(struct platform_device *pdev)
+{
+	int i;
+	struct device *dev = &pdev->dev;
+	struct scpi_drvinfo *info = platform_get_drvdata(pdev);
+
+	scpi_info = NULL; /* stop exporting SCPI ops through get_scpi_ops */
+
+	of_platform_depopulate(dev);
+	sysfs_remove_groups(&dev->kobj, versions_groups);
+	scpi_free_channels(dev, info->channels, info->num_chans);
+	platform_set_drvdata(pdev, NULL);
+
+	for (i = 0; i < MAX_DVFS_DOMAINS && info->dvfs[i]; i++) {
+		kfree(info->dvfs[i]->opps);
+		kfree(info->dvfs[i]);
+	}
+	devm_kfree(dev, info->channels);
+	devm_kfree(dev, info);
+
+	return 0;
+}
+
+#define MAX_SCPI_XFERS		10
+static int scpi_alloc_xfer_list(struct device *dev, struct scpi_chan *ch)
+{
+	int i;
+	struct scpi_xfer *xfers;
+
+	xfers = devm_kzalloc(dev, MAX_SCPI_XFERS * sizeof(*xfers), GFP_KERNEL);
+	if (!xfers)
+		return -ENOMEM;
+
+	ch->xfers = xfers;
+	for (i = 0; i < MAX_SCPI_XFERS; i++, xfers++)
+		list_add_tail(&xfers->node, &ch->xfers_list);
+	return 0;
+}
+
+static int scpi_probe(struct platform_device *pdev)
+{
+	int count, idx, ret;
+	struct resource res;
+	struct scpi_chan *scpi_chan;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+
+	scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
+	if (!scpi_info)
+		return -ENOMEM;
+
+	count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+	if (count < 0) {
+		dev_err(dev, "no mboxes property in '%s'\n", np->full_name);
+		return -ENODEV;
+	}
+
+	scpi_chan = devm_kcalloc(dev, count, sizeof(*scpi_chan), GFP_KERNEL);
+	if (!scpi_chan)
+		return -ENOMEM;
+
+	for (idx = 0; idx < count; idx++) {
+		resource_size_t size;
+		struct scpi_chan *pchan = scpi_chan + idx;
+		struct mbox_client *cl = &pchan->cl;
+		struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
+
+		if (of_address_to_resource(shmem, 0, &res)) {
+			dev_err(dev, "failed to get SCPI payload mem resource\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		size = resource_size(&res);
+		pchan->rx_payload = devm_ioremap(dev, res.start, size);
+		if (!pchan->rx_payload) {
+			dev_err(dev, "failed to ioremap SCPI payload\n");
+			ret = -EADDRNOTAVAIL;
+			goto err;
+		}
+		pchan->tx_payload = pchan->rx_payload + (size >> 1);
+
+		cl->dev = dev;
+		cl->rx_callback = scpi_handle_remote_msg;
+		cl->tx_prepare = scpi_tx_prepare;
+		cl->tx_block = true;
+		cl->tx_tout = 50;
+		cl->knows_txdone = false; /* controller can't ack */
+
+		INIT_LIST_HEAD(&pchan->rx_pending);
+		INIT_LIST_HEAD(&pchan->xfers_list);
+		spin_lock_init(&pchan->rx_lock);
+		mutex_init(&pchan->xfers_lock);
+
+		ret = scpi_alloc_xfer_list(dev, pchan);
+		if (!ret) {
+			pchan->chan = mbox_request_channel(cl, idx);
+			if (!IS_ERR(pchan->chan))
+				continue;
+			ret = PTR_ERR(pchan->chan);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "failed to get channel%d err %d\n",
+					idx, ret);
+		}
+err:
+		scpi_free_channels(dev, scpi_chan, idx);
+		scpi_info = NULL;
+		return ret;
+	}
+
+	scpi_info->channels = scpi_chan;
+	scpi_info->num_chans = count;
+	platform_set_drvdata(pdev, scpi_info);
+
+	ret = scpi_init_versions(scpi_info);
+	if (ret) {
+		dev_err(dev, "incorrect or no SCP firmware found\n");
+		scpi_remove(pdev);
+		return ret;
+	}
+
+	_dev_info(dev, "SCP Protocol %d.%d Firmware %d.%d.%d version\n",
+		  PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		  PROTOCOL_REV_MINOR(scpi_info->protocol_version),
+		  FW_REV_MAJOR(scpi_info->firmware_version),
+		  FW_REV_MINOR(scpi_info->firmware_version),
+		  FW_REV_PATCH(scpi_info->firmware_version));
+	scpi_info->scpi_ops = &scpi_ops;
+
+	ret = sysfs_create_groups(&dev->kobj, versions_groups);
+	if (ret)
+		dev_err(dev, "unable to create sysfs version group\n");
+
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
+}
+
+static const struct of_device_id scpi_of_match[] = {
+	{.compatible = "arm,scpi"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, scpi_of_match);
+
+static struct platform_driver scpi_driver = {
+	.driver = {
+		.name = "scpi_protocol",
+		.of_match_table = scpi_of_match,
+	},
+	.probe = scpi_probe,
+	.remove = scpi_remove,
+};
+module_platform_driver(scpi_driver);
+
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("ARM SCPI mailbox protocol driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
new file mode 100644
index 000000000000..e7169cd54e19
--- /dev/null
+++ b/include/linux/scpi_protocol.h
@@ -0,0 +1,61 @@
+/*
+ * SCPI Message Protocol driver header
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+
+struct scpi_opp {
+	u32 freq;
+	u32 m_volt;
+} __packed;
+
+struct scpi_dvfs_info {
+	unsigned int count;
+	unsigned int latency; /* in nanoseconds */
+	struct scpi_opp *opps;
+};
+
+/**
+ * struct scpi_ops - represents the various operations provided
+ *	by SCP through SCPI message protocol
+ * @get_version: returns the major and minor revision on the SCPI
+ *	message protocol
+ * @clk_get_range: gets clock range limit(min - max in Hz)
+ * @clk_get_val: gets clock value(in Hz)
+ * @clk_set_val: sets the clock value, setting to 0 will disable the
+ *	clock (if supported)
+ * @dvfs_get_idx: gets the Operating Point of the given power domain.
+ *	OPP is an index to the list return by @dvfs_get_info
+ * @dvfs_set_idx: sets the Operating Point of the given power domain.
+ *	OPP is an index to the list return by @dvfs_get_info
+ * @dvfs_get_info: returns the DVFS capabilities of the given power
+ *	domain. It includes the OPP list and the latency information
+ */
+struct scpi_ops {
+	u32 (*get_version)(void);
+	int (*clk_get_range)(u16, unsigned long *, unsigned long *);
+	unsigned long (*clk_get_val)(u16);
+	int (*clk_set_val)(u16, unsigned long);
+	int (*dvfs_get_idx)(u8);
+	int (*dvfs_set_idx)(u8, u8);
+	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
+};
+
+#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
+struct scpi_ops *get_scpi_ops(void);
+#else
+static inline struct scpi_ops *get_scpi_ops(void) { return NULL; }
+#endif
-- 
cgit v1.2.3


From 07294cc2ea3000da706fd88c8ec7dcfadc715e14 Mon Sep 17 00:00:00 2001
From: Stefan Koch <stefan.koch10@gmail.com>
Date: Mon, 28 Sep 2015 23:59:52 +0200
Subject: USB: Added forgotten parameter description for authorized attribute
 in usb.h

Signed-off-by: Stefan Koch <stefan.koch10@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3deccab2ce0a..2cbcf8db517a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -122,6 +122,8 @@ enum usb_interface_condition {
  *	has been deferred.
  * @needs_binding: flag set when the driver should be re-probed or unbound
  *	following a reset or suspend operation it doesn't support.
+ * @authorized: This allows to (de)authorize individual interfaces instead
+ *	a whole device in contrast to the device authorization.
  * @dev: driver model's view of this device
  * @usb_dev: if an interface is bound to the USB major, this will point
  *	to the sysfs representation for that device.
-- 
cgit v1.2.3


From 78ccb25861d76a8fc5c678d762180e6918834200 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Fri, 25 Sep 2015 10:49:15 +0300
Subject: net/mlx5_core: Fix wrong name in struct

The name refers to syndrome so uset ext_synd instread of ext_sync.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +-
 include/linux/mlx5/device.h                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 292d76f2a904..6e69de00cea8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -130,7 +130,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
 	pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id));
 	pr_info("irisc_index %d\n", readb(&h->irisc_index));
 	pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd)));
-	pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync));
+	pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_synd));
 }
 
 static void poll_health(unsigned long data)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8eb3b19af2a4..41e9f3bd663c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -440,7 +440,7 @@ struct health_buffer {
 	__be32		rsvd2;
 	u8		irisc_index;
 	u8		synd;
-	__be16		ext_sync;
+	__be16		ext_synd;
 };
 
 struct mlx5_init_seg {
-- 
cgit v1.2.3


From 55acca90da52b85299c033354e51ddaa7b73e019 Mon Sep 17 00:00:00 2001
From: Hante Meuleman <meuleman@broadcom.com>
Date: Fri, 18 Sep 2015 22:08:17 +0200
Subject: brcmfmac: Add support for the BCM4365 and BCM4366 PCIE devices.

This patch adds support for the BCM4365 and BCM4366 11ac Wave2
PCIE devices.

Reviewed-by: Arend Van Spriel <arend@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Signed-off-by: Hante Meuleman <meuleman@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/chip.c     | 110 +++++++++++++++++++--
 drivers/net/wireless/brcm80211/brcmfmac/pcie.c     |  22 +++++
 .../net/wireless/brcm80211/include/brcm_hw_ids.h   |   9 ++
 include/linux/bcma/bcma.h                          |   2 +
 4 files changed, 133 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
index ff0c7c620c4e..ffc3ace24903 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
@@ -208,6 +208,7 @@ struct sbsocramregs {
 };
 
 #define SOCRAMREGOFFS(_f)	offsetof(struct sbsocramregs, _f)
+#define SYSMEMREGOFFS(_f)	offsetof(struct sbsocramregs, _f)
 
 #define ARMCR4_CAP		(0x04)
 #define ARMCR4_BANKIDX		(0x40)
@@ -516,6 +517,9 @@ static int brcmf_chip_cores_check(struct brcmf_chip_priv *ci)
 		case BCMA_CORE_ARM_CR4:
 			cpu_found = true;
 			break;
+		case BCMA_CORE_ARM_CA7:
+			cpu_found = true;
+			break;
 		default:
 			break;
 		}
@@ -614,6 +618,29 @@ static void brcmf_chip_socram_ramsize(struct brcmf_core_priv *sr, u32 *ramsize,
 	}
 }
 
+/** Return the SYS MEM size */
+static u32 brcmf_chip_sysmem_ramsize(struct brcmf_core_priv *sysmem)
+{
+	u32 memsize = 0;
+	u32 coreinfo;
+	u32 idx;
+	u32 nb;
+	u32 banksize;
+
+	if (!brcmf_chip_iscoreup(&sysmem->pub))
+		brcmf_chip_resetcore(&sysmem->pub, 0, 0, 0);
+
+	coreinfo = brcmf_chip_core_read32(sysmem, SYSMEMREGOFFS(coreinfo));
+	nb = (coreinfo & SRCI_SRNB_MASK) >> SRCI_SRNB_SHIFT;
+
+	for (idx = 0; idx < nb; idx++) {
+		brcmf_chip_socram_banksize(sysmem, idx, &banksize);
+		memsize += banksize;
+	}
+
+	return memsize;
+}
+
 /** Return the TCM-RAM size of the ARMCR4 core. */
 static u32 brcmf_chip_tcm_ramsize(struct brcmf_core_priv *cr4)
 {
@@ -656,6 +683,9 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci)
 	case BRCM_CC_4358_CHIP_ID:
 	case BRCM_CC_43602_CHIP_ID:
 		return 0x180000;
+	case BRCM_CC_4365_CHIP_ID:
+	case BRCM_CC_4366_CHIP_ID:
+		return 0x200000;
 	default:
 		brcmf_err("unknown chip: %s\n", ci->pub.name);
 		break;
@@ -678,10 +708,28 @@ static int brcmf_chip_get_raminfo(struct brcmf_chip_priv *ci)
 			return -EINVAL;
 		}
 	} else {
-		mem = brcmf_chip_get_core(&ci->pub, BCMA_CORE_INTERNAL_MEM);
-		mem_core = container_of(mem, struct brcmf_core_priv, pub);
-		brcmf_chip_socram_ramsize(mem_core, &ci->pub.ramsize,
-					  &ci->pub.srsize);
+		mem = brcmf_chip_get_core(&ci->pub, BCMA_CORE_SYS_MEM);
+		if (mem) {
+			mem_core = container_of(mem, struct brcmf_core_priv,
+						pub);
+			ci->pub.ramsize = brcmf_chip_sysmem_ramsize(mem_core);
+			ci->pub.rambase = brcmf_chip_tcm_rambase(ci);
+			if (!ci->pub.rambase) {
+				brcmf_err("RAM base not provided with ARM CA7 core\n");
+				return -EINVAL;
+			}
+		} else {
+			mem = brcmf_chip_get_core(&ci->pub,
+						  BCMA_CORE_INTERNAL_MEM);
+			if (!mem) {
+				brcmf_err("No memory cores found\n");
+				return -ENOMEM;
+			}
+			mem_core = container_of(mem, struct brcmf_core_priv,
+						pub);
+			brcmf_chip_socram_ramsize(mem_core, &ci->pub.ramsize,
+						  &ci->pub.srsize);
+		}
 	}
 	brcmf_dbg(INFO, "RAM: base=0x%x size=%d (0x%x) sr=%d (0x%x)\n",
 		  ci->pub.rambase, ci->pub.ramsize, ci->pub.ramsize,
@@ -924,7 +972,7 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
 static void brcmf_chip_disable_arm(struct brcmf_chip_priv *chip, u16 id)
 {
 	struct brcmf_core *core;
-	struct brcmf_core_priv *cr4;
+	struct brcmf_core_priv *cpu;
 	u32 val;
 
 
@@ -937,10 +985,11 @@ static void brcmf_chip_disable_arm(struct brcmf_chip_priv *chip, u16 id)
 		brcmf_chip_coredisable(core, 0, 0);
 		break;
 	case BCMA_CORE_ARM_CR4:
-		cr4 = container_of(core, struct brcmf_core_priv, pub);
+	case BCMA_CORE_ARM_CA7:
+		cpu = container_of(core, struct brcmf_core_priv, pub);
 
 		/* clear all IOCTL bits except HALT bit */
-		val = chip->ops->read32(chip->ctx, cr4->wrapbase + BCMA_IOCTL);
+		val = chip->ops->read32(chip->ctx, cpu->wrapbase + BCMA_IOCTL);
 		val &= ARMCR4_BCMA_IOCTL_CPUHALT;
 		brcmf_chip_resetcore(core, val, ARMCR4_BCMA_IOCTL_CPUHALT,
 				     ARMCR4_BCMA_IOCTL_CPUHALT);
@@ -1162,6 +1211,33 @@ static bool brcmf_chip_cr4_set_active(struct brcmf_chip_priv *chip, u32 rstvec)
 	return true;
 }
 
+static inline void
+brcmf_chip_ca7_set_passive(struct brcmf_chip_priv *chip)
+{
+	struct brcmf_core *core;
+
+	brcmf_chip_disable_arm(chip, BCMA_CORE_ARM_CA7);
+
+	core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_80211);
+	brcmf_chip_resetcore(core, D11_BCMA_IOCTL_PHYRESET |
+				   D11_BCMA_IOCTL_PHYCLOCKEN,
+			     D11_BCMA_IOCTL_PHYCLOCKEN,
+			     D11_BCMA_IOCTL_PHYCLOCKEN);
+}
+
+static bool brcmf_chip_ca7_set_active(struct brcmf_chip_priv *chip, u32 rstvec)
+{
+	struct brcmf_core *core;
+
+	chip->ops->activate(chip->ctx, &chip->pub, rstvec);
+
+	/* restore ARM */
+	core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_ARM_CA7);
+	brcmf_chip_resetcore(core, ARMCR4_BCMA_IOCTL_CPUHALT, 0, 0);
+
+	return true;
+}
+
 void brcmf_chip_set_passive(struct brcmf_chip *pub)
 {
 	struct brcmf_chip_priv *chip;
@@ -1175,8 +1251,16 @@ void brcmf_chip_set_passive(struct brcmf_chip *pub)
 		brcmf_chip_cr4_set_passive(chip);
 		return;
 	}
-
-	brcmf_chip_cm3_set_passive(chip);
+	arm = brcmf_chip_get_core(pub, BCMA_CORE_ARM_CA7);
+	if (arm) {
+		brcmf_chip_ca7_set_passive(chip);
+		return;
+	}
+	arm = brcmf_chip_get_core(pub, BCMA_CORE_ARM_CM3);
+	if (arm) {
+		brcmf_chip_cm3_set_passive(chip);
+		return;
+	}
 }
 
 bool brcmf_chip_set_active(struct brcmf_chip *pub, u32 rstvec)
@@ -1190,8 +1274,14 @@ bool brcmf_chip_set_active(struct brcmf_chip *pub, u32 rstvec)
 	arm = brcmf_chip_get_core(pub, BCMA_CORE_ARM_CR4);
 	if (arm)
 		return brcmf_chip_cr4_set_active(chip, rstvec);
+	arm = brcmf_chip_get_core(pub, BCMA_CORE_ARM_CA7);
+	if (arm)
+		return brcmf_chip_ca7_set_active(chip, rstvec);
+	arm = brcmf_chip_get_core(pub, BCMA_CORE_ARM_CM3);
+	if (arm)
+		return brcmf_chip_cm3_set_active(chip);
 
-	return brcmf_chip_cm3_set_active(chip);
+	return false;
 }
 
 bool brcmf_chip_sr_capable(struct brcmf_chip *pub)
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
index 451022e4cdea..30baf352e234 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
@@ -55,6 +55,10 @@ enum brcmf_pcie_state {
 #define BRCMF_PCIE_43570_NVRAM_NAME		"brcm/brcmfmac43570-pcie.txt"
 #define BRCMF_PCIE_4358_FW_NAME			"brcm/brcmfmac4358-pcie.bin"
 #define BRCMF_PCIE_4358_NVRAM_NAME		"brcm/brcmfmac4358-pcie.txt"
+#define BRCMF_PCIE_4365_FW_NAME			"brcm/brcmfmac4365b-pcie.bin"
+#define BRCMF_PCIE_4365_NVRAM_NAME		"brcm/brcmfmac4365b-pcie.txt"
+#define BRCMF_PCIE_4366_FW_NAME			"brcm/brcmfmac4366b-pcie.bin"
+#define BRCMF_PCIE_4366_NVRAM_NAME		"brcm/brcmfmac4366b-pcie.txt"
 
 #define BRCMF_PCIE_FW_UP_TIMEOUT		2000 /* msec */
 
@@ -204,6 +208,10 @@ MODULE_FIRMWARE(BRCMF_PCIE_43570_FW_NAME);
 MODULE_FIRMWARE(BRCMF_PCIE_43570_NVRAM_NAME);
 MODULE_FIRMWARE(BRCMF_PCIE_4358_FW_NAME);
 MODULE_FIRMWARE(BRCMF_PCIE_4358_NVRAM_NAME);
+MODULE_FIRMWARE(BRCMF_PCIE_4365_FW_NAME);
+MODULE_FIRMWARE(BRCMF_PCIE_4365_NVRAM_NAME);
+MODULE_FIRMWARE(BRCMF_PCIE_4366_FW_NAME);
+MODULE_FIRMWARE(BRCMF_PCIE_4366_NVRAM_NAME);
 
 
 struct brcmf_pcie_console {
@@ -1440,6 +1448,14 @@ static int brcmf_pcie_get_fwnames(struct brcmf_pciedev_info *devinfo)
 		fw_name = BRCMF_PCIE_4358_FW_NAME;
 		nvram_name = BRCMF_PCIE_4358_NVRAM_NAME;
 		break;
+	case BRCM_CC_4365_CHIP_ID:
+		fw_name = BRCMF_PCIE_4365_FW_NAME;
+		nvram_name = BRCMF_PCIE_4365_NVRAM_NAME;
+		break;
+	case BRCM_CC_4366_CHIP_ID:
+		fw_name = BRCMF_PCIE_4366_FW_NAME;
+		nvram_name = BRCMF_PCIE_4366_NVRAM_NAME;
+		break;
 	default:
 		brcmf_err("Unsupported chip 0x%04x\n", devinfo->ci->chip);
 		return -ENODEV;
@@ -1973,6 +1989,12 @@ static struct pci_device_id brcmf_pcie_devid_table[] = {
 	BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_2G_DEVICE_ID),
 	BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_5G_DEVICE_ID),
 	BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_RAW_DEVICE_ID),
+	BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_DEVICE_ID),
+	BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_2G_DEVICE_ID),
+	BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_5G_DEVICE_ID),
+	BRCMF_PCIE_DEVICE(BRCM_PCIE_4366_DEVICE_ID),
+	BRCMF_PCIE_DEVICE(BRCM_PCIE_4366_2G_DEVICE_ID),
+	BRCMF_PCIE_DEVICE(BRCM_PCIE_4366_5G_DEVICE_ID),
 	{ /* end: all zeroes */ }
 };
 
diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
index 8278376aee40..d823734a4713 100644
--- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
@@ -48,6 +48,8 @@
 #define BRCM_CC_43570_CHIP_ID		43570
 #define BRCM_CC_4358_CHIP_ID		0x4358
 #define BRCM_CC_43602_CHIP_ID		43602
+#define BRCM_CC_4365_CHIP_ID		0x4365
+#define BRCM_CC_4366_CHIP_ID		0x4366
 
 /* USB Device IDs */
 #define BRCM_USB_43143_DEVICE_ID	0xbd1e
@@ -67,6 +69,13 @@
 #define BRCM_PCIE_43602_2G_DEVICE_ID	0x43bb
 #define BRCM_PCIE_43602_5G_DEVICE_ID	0x43bc
 #define BRCM_PCIE_43602_RAW_DEVICE_ID	43602
+#define BRCM_PCIE_4365_DEVICE_ID	0x43ca
+#define BRCM_PCIE_4365_2G_DEVICE_ID	0x43cb
+#define BRCM_PCIE_4365_5G_DEVICE_ID	0x43cc
+#define BRCM_PCIE_4366_DEVICE_ID	0x43c3
+#define BRCM_PCIE_4366_2G_DEVICE_ID	0x43c4
+#define BRCM_PCIE_4366_5G_DEVICE_ID	0x43c5
+
 
 /* brcmsmac IDs */
 #define BCM4313_D11N2G_ID	0x4727	/* 4313 802.11n 2.4G device */
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 2ff4a9961e1d..3feb1b2d75d8 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -151,6 +151,8 @@ struct bcma_host_ops {
 #define BCMA_CORE_PCIE2			0x83C	/* PCI Express Gen2 */
 #define BCMA_CORE_USB30_DEV		0x83D
 #define BCMA_CORE_ARM_CR4		0x83E
+#define BCMA_CORE_ARM_CA7		0x847
+#define BCMA_CORE_SYS_MEM		0x849
 #define BCMA_CORE_DEFAULT		0xFFF
 
 #define BCMA_MAX_NR_CORES		16
-- 
cgit v1.2.3


From 30f2136346cab91e1ffd9ee6370d76809f20487a Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@cavium.com>
Date: Mon, 21 Sep 2015 22:58:34 +0200
Subject: irqchip/gicv3-its: Add range check for number of allocated pages

The number of pages for the its table may exceed the maximum of 256.
Adding a range check and limitting the number to its maximum.

Based on a patch from Tirumalesh Chalamarla <tchalamarla@cavium.com>.

Signed-off-by: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Signed-off-by: Robert Richter <rrichter@cavium.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1442869119-1814-2-git-send-email-rric@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic-v3-its.c   | 11 ++++++++++-
 include/linux/irqchip/arm-gic-v3.h |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ac7ae2b3cb83..d9052fdf98d7 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -822,6 +822,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
 		int order = get_order(psz);
 		int alloc_size;
+		int alloc_pages;
 		u64 tmp;
 		void *base;
 
@@ -856,6 +857,14 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 		}
 
 		alloc_size = (1 << order) * PAGE_SIZE;
+		alloc_pages = (alloc_size / psz);
+		if (alloc_pages > GITS_BASER_PAGES_MAX) {
+			alloc_pages = GITS_BASER_PAGES_MAX;
+			order = get_order(GITS_BASER_PAGES_MAX * psz);
+			pr_warn("%s: Device Table too large, reduce its page order to %u (%u pages)\n",
+				node_name, order, alloc_pages);
+		}
+
 		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 		if (!base) {
 			err = -ENOMEM;
@@ -884,7 +893,7 @@ retry_baser:
 			break;
 		}
 
-		val |= (alloc_size / psz) - 1;
+		val |= alloc_pages - 1;
 
 		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
 		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 9eeeb9589acf..c0c8a2ef9d90 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -231,6 +231,7 @@
 #define GITS_BASER_PAGE_SIZE_16K	(1UL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGE_SIZE_64K	(2UL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGE_SIZE_MASK	(3UL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGES_MAX		256
 
 #define GITS_BASER_TYPE_NONE		0
 #define GITS_BASER_TYPE_DEVICE		1
-- 
cgit v1.2.3


From 4593fdbe7a2f44d5e64c627c715dd0bcec9bdf14 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 27 Sep 2015 02:09:20 +0900
Subject: blk-mq: fix sysfs registration/unregistration race

There is a race between cpu hotplug handling and adding/deleting
gendisk for blk-mq, where both are trying to register and unregister
the same sysfs entries.

null_add_dev
    --> blk_mq_init_queue
        --> blk_mq_init_allocated_queue
            --> add to 'all_q_list' (*)
    --> add_disk
        --> blk_register_queue
            --> blk_mq_register_disk (++)

null_del_dev
    --> del_gendisk
        --> blk_unregister_queue
            --> blk_mq_unregister_disk (--)
    --> blk_cleanup_queue
        --> blk_mq_free_queue
            --> del from 'all_q_list' (*)

blk_mq_queue_reinit
    --> blk_mq_sysfs_unregister (-)
    --> blk_mq_sysfs_register (+)

While the request queue is added to 'all_q_list' (*),
blk_mq_queue_reinit() can be called for the queue anytime by CPU
hotplug callback.  But blk_mq_sysfs_unregister (-) and
blk_mq_sysfs_register (+) in blk_mq_queue_reinit must not be called
before blk_mq_register_disk (++) and after blk_mq_unregister_disk (--)
is finished.  Because '/sys/block/*/mq/' is not exists.

There has already been BLK_MQ_F_SYSFS_UP flag in hctx->flags which can
be used to track these sysfs stuff, but it is only fixing this issue
partially.

In order to fix it completely, we just need per-queue flag instead of
per-hctx flag with appropriate locking.  So this introduces
q->mq_sysfs_init_done which is properly protected with all_q_mutex.

Also, we need to ensure that blk_mq_map_swqueue() is called with
all_q_mutex is held.  Since hctx->nr_ctx is reset temporarily and
updated in blk_mq_map_swqueue(), so we should avoid
blk_mq_register_hctx() seeing the temporary hctx->nr_ctx value
in CPU hotplug handling or adding/deleting gendisk .

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Cc: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c   | 30 ++++++++++++++++++++++--------
 block/blk-mq.c         |  6 +++---
 include/linux/blk-mq.h |  1 -
 include/linux/blkdev.h |  2 ++
 4 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 279c5d674edf..189f5ae6522a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -343,7 +343,7 @@ static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
 	struct blk_mq_ctx *ctx;
 	int i;
 
-	if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP))
+	if (!hctx->nr_ctx)
 		return;
 
 	hctx_for_each_ctx(hctx, ctx, i)
@@ -358,7 +358,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 	struct blk_mq_ctx *ctx;
 	int i, ret;
 
-	if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP))
+	if (!hctx->nr_ctx)
 		return 0;
 
 	ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
@@ -381,6 +381,8 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	struct blk_mq_ctx *ctx;
 	int i, j;
 
+	blk_mq_disable_hotplug();
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		blk_mq_unregister_hctx(hctx);
 
@@ -395,6 +397,9 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	kobject_put(&q->mq_kobj);
 
 	kobject_put(&disk_to_dev(disk)->kobj);
+
+	q->mq_sysfs_init_done = false;
+	blk_mq_enable_hotplug();
 }
 
 static void blk_mq_sysfs_init(struct request_queue *q)
@@ -425,27 +430,30 @@ int blk_mq_register_disk(struct gendisk *disk)
 	struct blk_mq_hw_ctx *hctx;
 	int ret, i;
 
+	blk_mq_disable_hotplug();
+
 	blk_mq_sysfs_init(q);
 
 	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		hctx->flags |= BLK_MQ_F_SYSFS_UP;
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
 			break;
 	}
 
-	if (ret) {
+	if (ret)
 		blk_mq_unregister_disk(disk);
-		return ret;
-	}
+	else
+		q->mq_sysfs_init_done = true;
+out:
+	blk_mq_enable_hotplug();
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(blk_mq_register_disk);
 
@@ -454,6 +462,9 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
+	if (!q->mq_sysfs_init_done)
+		return;
+
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 }
@@ -463,6 +474,9 @@ int blk_mq_sysfs_register(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i, ret = 0;
 
+	if (!q->mq_sysfs_init_done)
+		return ret;
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2fd7283ec62b..0262131ac5f2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2035,13 +2035,13 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		goto err_hctxs;
 
 	mutex_lock(&all_q_mutex);
-	list_add_tail(&q->all_q_node, &all_q_list);
-	mutex_unlock(&all_q_mutex);
 
+	list_add_tail(&q->all_q_node, &all_q_list);
 	blk_mq_add_queue_tag_set(set, q);
-
 	blk_mq_map_swqueue(q);
 
+	mutex_unlock(&all_q_mutex);
+
 	return q;
 
 err_hctxs:
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 37d1602c4f7a..b80ba4572a31 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -145,7 +145,6 @@ enum {
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
-	BLK_MQ_F_SYSFS_UP	= 1 << 3,
 	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99da9ebc7377..19c2e947d4d1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -456,6 +456,8 @@ struct request_queue {
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
 	struct bio_set		*bio_split;
+
+	bool			mq_sysfs_init_done;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit v1.2.3


From d815d90bbbc08777c0e3a36f57b97fc4a4fb3150 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 25 Sep 2015 15:07:28 -0500
Subject: netfilter: Push struct net down into nf_afinfo.reroute

The network namespace is needed when routing a packet.
Stop making nf_afinfo.reroute guess which network namespace
is the proper namespace to route the packet in.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 2 +-
 net/ipv4/netfilter.c      | 2 +-
 net/ipv6/netfilter.c      | 2 +-
 net/netfilter/nf_queue.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 987c74cd523c..165ab2d14734 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -283,7 +283,7 @@ struct nf_afinfo {
 				 struct flowi *fl, bool strict);
 	void		(*saveroute)(const struct sk_buff *skb,
 				     struct nf_queue_entry *entry);
-	int		(*reroute)(struct sk_buff *skb,
+	int		(*reroute)(struct net *net, struct sk_buff *skb,
 				   const struct nf_queue_entry *entry);
 	int		route_key_size;
 };
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 61eafc9b4545..9e07e6f23398 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -104,7 +104,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb,
 	}
 }
 
-static int nf_ip_reroute(struct sk_buff *skb,
+static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
 			 const struct nf_queue_entry *entry)
 {
 	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b4de08a83e0b..26911b93dc7a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -93,7 +93,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
 	}
 }
 
-static int nf_ip6_reroute(struct sk_buff *skb,
+static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
 			  const struct nf_queue_entry *entry)
 {
 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 9f3c3c25fa73..34f628e16a4c 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -199,7 +199,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 
 	if (verdict == NF_ACCEPT) {
 		afinfo = nf_get_afinfo(entry->state.pf);
-		if (!afinfo || afinfo->reroute(skb, entry) < 0)
+		if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
 			verdict = NF_DROP;
 	}
 
-- 
cgit v1.2.3


From e45f50660ee5fd38a540afabb7c0f65d063db631 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 25 Sep 2015 15:07:30 -0500
Subject: ipv4: Pass struct net into ip_route_me_harder

Don't make ip_route_me_harder guess which network namespace
it is routing in, pass the network namespace in.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4.h            | 2 +-
 net/ipv4/netfilter.c                      | 5 ++---
 net/ipv4/netfilter/ipt_SYNPROXY.c         | 4 +++-
 net/ipv4/netfilter/iptable_mangle.c       | 2 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c  | 2 +-
 net/ipv4/netfilter/nf_reject_ipv4.c       | 2 +-
 net/ipv4/netfilter/nft_chain_route_ipv4.c | 2 +-
 net/netfilter/ipvs/ip_vs_core.c           | 2 +-
 8 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 6e4591bb54d4..98c03b2462b5 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -6,7 +6,7 @@
 
 #include <uapi/linux/netfilter_ipv4.h>
 
-int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
+int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 		       unsigned int dataoff, u_int8_t protocol);
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 9e07e6f23398..c3776ff6749f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,9 +17,8 @@
 #include <net/netfilter/nf_queue.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
+int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi4 fl4 = {};
@@ -116,7 +115,7 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
 		      skb->mark == rt_info->mark &&
 		      iph->daddr == rt_info->daddr &&
 		      iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(skb, RTN_UNSPEC);
+			return ip_route_me_harder(net, skb, RTN_UNSPEC);
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 0060d9abd514..6a6e762ab27f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -45,6 +45,8 @@ synproxy_send_tcp(const struct synproxy_net *snet,
 		  struct iphdr *niph, struct tcphdr *nth,
 		  unsigned int tcp_hdr_size)
 {
+	struct net *net = nf_ct_net(snet->tmpl);
+
 	nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
 	nskb->ip_summed   = CHECKSUM_PARTIAL;
 	nskb->csum_start  = (unsigned char *)nth - nskb->head;
@@ -52,7 +54,7 @@ synproxy_send_tcp(const struct synproxy_net *snet,
 
 	skb_dst_set_noref(nskb, skb_dst(skb));
 	nskb->protocol = htons(ETH_P_IP);
-	if (ip_route_me_harder(nskb, RTN_UNSPEC))
+	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 		goto free_nskb;
 
 	if (nfct) {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 2d6fc911866f..ba5d392a13c4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -67,7 +67,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 		    iph->daddr != daddr ||
 		    skb->mark != mark ||
 		    iph->tos != tos) {
-			err = ip_route_me_harder(skb, RTN_UNSPEC);
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index bc3b9dcbf080..5075b7ecd26d 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -431,7 +431,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 
 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			err = ip_route_me_harder(skb, RTN_UNSPEC);
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fb337406b1d2..2f5e925d3264 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -129,7 +129,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
 				   ip4_dst_hoplimit(skb_dst(nskb)));
 	nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
 
-	if (ip_route_me_harder(nskb, RTN_UNSPEC))
+	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 		goto free_nskb;
 
 	/* "Never happens" */
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 9f486b302108..2375b0a8be46 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -53,7 +53,7 @@ static unsigned int nf_route_table_hook(void *priv,
 		    iph->daddr != daddr ||
 		    skb->mark != mark ||
 		    iph->tos != tos)
-			if (ip_route_me_harder(skb, RTN_UNSPEC))
+			if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
 				ret = NF_DROP;
 	}
 	return ret;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index fb6b6c87d841..800b085242a8 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -720,7 +720,7 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
 	} else
 #endif
 		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
-		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
+		    ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
 			return 1;
 
 	return 0;
-- 
cgit v1.2.3


From 5f5d74d723146c5b97c7318b5851af15b30e3304 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 25 Sep 2015 15:07:31 -0500
Subject: ipv6: Pass struct net into ip6_route_me_harder

Don't make ip6_route_me_harder guess which network namespace
it is routing in, pass the network namespace in.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h            | 2 +-
 net/ipv6/netfilter.c                      | 5 ++---
 net/ipv6/netfilter/ip6table_mangle.c      | 2 +-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c  | 2 +-
 net/ipv6/netfilter/nft_chain_route_ipv6.c | 2 +-
 net/netfilter/ipvs/ip_vs_core.c           | 2 +-
 6 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 771574677e83..2ac8369fa96c 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -22,7 +22,7 @@ struct nf_ipv6_ops {
 };
 
 #ifdef CONFIG_NETFILTER
-int ip6_route_me_harder(struct sk_buff *skb);
+int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
 __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 			unsigned int dataoff, u_int8_t protocol);
 
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 26911b93dc7a..d11c46833d61 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,9 +18,8 @@
 #include <net/ip6_checksum.h>
 #include <net/netfilter/nf_queue.h>
 
-int ip6_route_me_harder(struct sk_buff *skb)
+int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	unsigned int hh_len;
 	struct dst_entry *dst;
@@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
 		    skb->mark != rt_info->mark)
-			return ip6_route_me_harder(skb);
+			return ip6_route_me_harder(net, skb);
 	}
 	return 0;
 }
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 8745b592b2f6..abe278b07932 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -65,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
 	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
-		err = ip6_route_me_harder(skb);
+		err = ip6_route_me_harder(state->net, skb);
 		if (err < 0)
 			ret = NF_DROP_ERR(err);
 	}
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 18e835ffbef3..238e70c3f7b7 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -437,7 +437,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
 				      &ct->tuplehash[!dir].tuple.src.u3)) {
-			err = ip6_route_me_harder(skb);
+			err = ip6_route_me_harder(state->net, skb);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index d42bbc1d7555..9df75bd7c94a 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -52,7 +52,7 @@ static unsigned int nf_route_table_hook(void *priv,
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
 	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
-		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+		return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
 
 	return ret;
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 800b085242a8..37dd77a3d0fb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -715,7 +715,7 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
 		struct dst_entry *dst = skb_dst(skb);
 
 		if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
-		    ip6_route_me_harder(skb) != 0)
+		    ip6_route_me_harder(ipvs->net, skb) != 0)
 			return 1;
 	} else
 #endif
-- 
cgit v1.2.3


From 53bb724f94f57b67213e08a4c155c8c5eb74c644 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Mon, 21 Sep 2015 13:26:59 -0700
Subject: mtd: provide proper 32/64-bit compat_ioctl() support for BLKPG

After a bit of poking around wondering why my 32-bit user-space can't
seem to send a proper ioctl(BLKPG) to an MTD on my 64-bit kernel
(ARM64), I noticed that struct blkpg_ioctl_arg is actually pretty
unsuitable for use in the ioctl() ABI, due to its use of raw pointers,
and its lack of alignment/packing restrictions (32-bit arch'es tend to
pack the 4 fields into 4 32-bit words, whereas 64-bit arch'es would add
padding after the third int, and make this 6 32-bit words).

Anyway, this means BLKPG deserves some special compat_ioctl handling. Do
the conversion in a small shim for MTD.

block/compat_ioctl.c already has compat support for the block subsystem,
but it does so by a re-marshalling data to/from user-space (see
compat_blkpg_ioctl()). Personally, I think this approach is cleaner.

Tested only on MTD, with an ARM32 user space on an ARM64 kernel.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdchar.c      | 42 +++++++++++++++++++++++++++++++++---------
 include/linux/blkpg.h      | 21 +++++++++++++++++++++
 include/uapi/linux/blkpg.h |  6 +++---
 3 files changed, 57 insertions(+), 12 deletions(-)
 create mode 100644 include/linux/blkpg.h

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 55fa27ecf4e1..6d19835b80a9 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -498,21 +498,17 @@ static int shrink_ecclayout(const struct nand_ecclayout *from,
 }
 
 static int mtdchar_blkpg_ioctl(struct mtd_info *mtd,
-			   struct blkpg_ioctl_arg __user *arg)
+			       struct blkpg_ioctl_arg *arg)
 {
-	struct blkpg_ioctl_arg a;
 	struct blkpg_partition p;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
+	if (copy_from_user(&p, arg->data, sizeof(p)))
 		return -EFAULT;
 
-	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
-		return -EFAULT;
-
-	switch (a.op) {
+	switch (arg->op) {
 	case BLKPG_ADD_PARTITION:
 
 		/* Only master mtd device must be used to add partitions */
@@ -966,8 +962,13 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 
 	case BLKPG:
 	{
-		ret = mtdchar_blkpg_ioctl(mtd,
-		      (struct blkpg_ioctl_arg __user *)arg);
+		struct blkpg_ioctl_arg __user *blk_arg = argp;
+		struct blkpg_ioctl_arg a;
+
+		if (copy_from_user(&a, blk_arg, sizeof(a)))
+			ret = -EFAULT;
+		else
+			ret = mtdchar_blkpg_ioctl(mtd, &a);
 		break;
 	}
 
@@ -1046,6 +1047,29 @@ static long mtdchar_compat_ioctl(struct file *file, unsigned int cmd,
 				&buf_user->start);
 		break;
 	}
+
+	case BLKPG:
+	{
+		/* Convert from blkpg_compat_ioctl_arg to blkpg_ioctl_arg */
+		struct blkpg_compat_ioctl_arg __user *uarg = argp;
+		struct blkpg_compat_ioctl_arg compat_arg;
+		struct blkpg_ioctl_arg a;
+
+		if (copy_from_user(&compat_arg, uarg, sizeof(compat_arg))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		memset(&a, 0, sizeof(a));
+		a.op = compat_arg.op;
+		a.flags = compat_arg.flags;
+		a.datalen = compat_arg.datalen;
+		a.data = compat_ptr(compat_arg.data);
+
+		ret = mtdchar_blkpg_ioctl(mtd, &a);
+		break;
+	}
+
 	default:
 		ret = mtdchar_ioctl(file, cmd, (unsigned long)argp);
 	}
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
new file mode 100644
index 000000000000..bef124fde61e
--- /dev/null
+++ b/include/linux/blkpg.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_BLKPG_H
+#define _LINUX_BLKPG_H
+
+/*
+ * Partition table and disk geometry handling
+ */
+
+#include <linux/compat.h>
+#include <uapi/linux/blkpg.h>
+
+#ifdef CONFIG_COMPAT
+/* For 32-bit/64-bit compatibility of struct blkpg_ioctl_arg */
+struct blkpg_compat_ioctl_arg {
+	compat_int_t op;
+	compat_int_t flags;
+	compat_int_t datalen;
+	compat_uptr_t data;
+};
+#endif
+
+#endif /* _LINUX_BLKPG_H */
diff --git a/include/uapi/linux/blkpg.h b/include/uapi/linux/blkpg.h
index a8519446c111..63739a035085 100644
--- a/include/uapi/linux/blkpg.h
+++ b/include/uapi/linux/blkpg.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_BLKPG_H
-#define _LINUX_BLKPG_H
+#ifndef _UAPI__LINUX_BLKPG_H
+#define _UAPI__LINUX_BLKPG_H
 
 /*
  * Partition table and disk geometry handling
@@ -56,4 +56,4 @@ struct blkpg_partition {
 	char volname[BLKPG_VOLNAMELTH];	/* volume label */
 };
 
-#endif /* _LINUX_BLKPG_H */
+#endif /* _UAPI__LINUX_BLKPG_H */
-- 
cgit v1.2.3


From 2094acbb714e24e464c810c2d8fa57493fcb25a6 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Mon, 28 Sep 2015 11:10:31 -0700
Subject: net/ipv4: Pass proto as u8 instead of u16 in ip_check_mc_rcu

This patch updates ip_check_mc_rcu so that protocol is passed as a u8
instead of a u16.

The motivation is just to avoid any unneeded type transitions since some
systems will require an instruction to zero extend a u8 field to a u16.
Also it makes it a bit more readable as to the fact that protocol is a u8
so there are no byte ordering changes needed to pass it.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 2 +-
 net/ipv4/igmp.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 908429216d9f..9c9de11549a7 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -110,7 +110,7 @@ struct ip_mc_list {
 #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value)
 #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value)
 
-extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto);
+extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto);
 extern int igmp_rcv(struct sk_buff *);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
 extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d38b8b61eaee..de6d4c8ba600 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2569,7 +2569,7 @@ void ip_mc_drop_socket(struct sock *sk)
 }
 
 /* called with rcu_read_lock() */
-int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
+int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u8 proto)
 {
 	struct ip_mc_list *im;
 	struct ip_mc_list __rcu **mc_hash;
-- 
cgit v1.2.3


From 31b33dfb0a144469dd805514c9e63f4993729a48 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Mon, 28 Sep 2015 17:24:25 -0700
Subject: skbuff: Fix skb checksum partial check.

Earlier patch 6ae459bda tried to detect void ckecksum partial
skb by comparing pull length to checksum offset. But it does
not work for all cases since checksum-offset depends on
updates to skb->data.

Following patch fixes it by validating checksum start offset
after skb-data pointer is updated. Negative value of checksum
offset start means there is no need to checksum.

Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull")
Reported-by: Andrew Vagin <avagin@odin.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 net/core/skbuff.c      | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2b0a30a6e31c..4398411236f1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2708,7 +2708,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
-		 skb_checksum_start_offset(skb) <= len)
+		 skb_checksum_start_offset(skb) < 0)
 		skb->ip_summed = CHECKSUM_NONE;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index dad4dd37e2aa..fab4599ba8b2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2958,11 +2958,12 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags);
  */
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
+	unsigned char *data = skb->data;
+
 	BUG_ON(len > skb->len);
-	skb->len -= len;
-	BUG_ON(skb->len < skb->data_len);
-	skb_postpull_rcsum(skb, skb->data, len);
-	return skb->data += len;
+	__skb_pull(skb, len);
+	skb_postpull_rcsum(skb, data, len);
+	return skb->data;
 }
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
-- 
cgit v1.2.3


From 0536fcc039a8926ec12ec587f41a83f7acafeb82 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 29 Sep 2015 07:42:52 -0700
Subject: tcp: prepare fastopen code for upcoming listener changes

While auditing TCP stack for upcoming 'lockless' listener changes,
I found I had to change fastopen_init_queue() to properly init the object
before publishing it.

Otherwise an other cpu could try to lock the spinlock before it gets
properly initialized.

Instead of adding appropriate barriers, just remove dynamic memory
allocations :
- Structure is 28 bytes on 64bit arches. Using additional 8 bytes
  for holding a pointer seems overkill.
- Two listeners can share same cache line and performance would suffer.

If we really want to save few bytes, we would instead dynamically allocate
whole struct request_sock_queue in the future.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h             | 22 ++++------------------
 include/net/request_sock.h      |  7 ++-----
 net/core/request_sock.c         |  9 ++++++++-
 net/ipv4/af_inet.c              | 10 +++-------
 net/ipv4/inet_connection_sock.c | 17 ++++++++---------
 net/ipv4/tcp.c                  | 14 ++------------
 net/ipv4/tcp_fastopen.c         | 10 +++++-----
 net/ipv4/tcp_ipv4.c             |  2 +-
 net/ipv6/tcp_ipv6.c             |  4 ++--
 9 files changed, 35 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fcb573be75d9..e442e6e9a365 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -382,25 +382,11 @@ static inline bool tcp_passive_fastopen(const struct sock *sk)
 		tcp_sk(sk)->fastopen_rsk != NULL);
 }
 
-extern void tcp_sock_destruct(struct sock *sk);
-
-static inline int fastopen_init_queue(struct sock *sk, int backlog)
+static inline void fastopen_queue_tune(struct sock *sk, int backlog)
 {
-	struct request_sock_queue *queue =
-	    &inet_csk(sk)->icsk_accept_queue;
-
-	if (queue->fastopenq == NULL) {
-		queue->fastopenq = kzalloc(
-		    sizeof(struct fastopen_queue),
-		    sk->sk_allocation);
-		if (queue->fastopenq == NULL)
-			return -ENOMEM;
-
-		sk->sk_destruct = tcp_sock_destruct;
-		spin_lock_init(&queue->fastopenq->lock);
-	}
-	queue->fastopenq->max_qlen = backlog;
-	return 0;
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+
+	queue->fastopenq.max_qlen = backlog;
 }
 
 static inline void tcp_saved_syn_free(struct tcp_sock *tp)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c146b5284786..d2544de329bd 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -180,11 +180,8 @@ struct request_sock_queue {
 	struct request_sock	*rskq_accept_tail;
 	u8			rskq_defer_accept;
 	struct listen_sock	*listen_opt;
-	struct fastopen_queue	*fastopenq; /* This is non-NULL iff TFO has been
-					     * enabled on this listener. Check
-					     * max_qlen != 0 in fastopen_queue
-					     * to determine if TFO is enabled
-					     * right at this moment.
+	struct fastopen_queue	fastopenq;  /* Check max_qlen != 0 to determine
+					     * if TFO is enabled.
 					     */
 
 	/* temporary alignment, our goal is to get rid of this lock */
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index b42f0e26f89e..e22cfa4ed25f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -59,6 +59,13 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 
 	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
 	spin_lock_init(&queue->syn_wait_lock);
+
+	spin_lock_init(&queue->fastopenq.lock);
+	queue->fastopenq.rskq_rst_head = NULL;
+	queue->fastopenq.rskq_rst_tail = NULL;
+	queue->fastopenq.qlen = 0;
+	queue->fastopenq.max_qlen = 0;
+
 	queue->rskq_accept_head = NULL;
 	lopt->nr_table_entries = nr_table_entries;
 	lopt->max_qlen_log = ilog2(nr_table_entries);
@@ -174,7 +181,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 	struct sock *lsk = req->rsk_listener;
 	struct fastopen_queue *fastopenq;
 
-	fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
+	fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
 	tcp_sk(sk)->fastopen_rsk = NULL;
 	spin_lock_bh(&fastopenq->lock);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8a556643b874..3af85eecbe11 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -219,17 +219,13 @@ int inet_listen(struct socket *sock, int backlog)
 		 * shutdown() (rather than close()).
 		 */
 		if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
-		    !inet_csk(sk)->icsk_accept_queue.fastopenq) {
+		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
 			if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
-				err = fastopen_init_queue(sk, backlog);
+				fastopen_queue_tune(sk, backlog);
 			else if ((sysctl_tcp_fastopen &
 				  TFO_SERVER_WO_SOCKOPT2) != 0)
-				err = fastopen_init_queue(sk,
+				fastopen_queue_tune(sk,
 				    ((uint)sysctl_tcp_fastopen) >> 16);
-			else
-				err = 0;
-			if (err)
-				goto out;
 
 			tcp_fastopen_init_key_once(true);
 		}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 694a5e8f4f9f..e1527882a578 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -335,9 +335,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 
 	sk_acceptq_removed(sk);
 	if (sk->sk_protocol == IPPROTO_TCP &&
-	    tcp_rsk(req)->tfo_listener &&
-	    queue->fastopenq) {
-		spin_lock_bh(&queue->fastopenq->lock);
+	    tcp_rsk(req)->tfo_listener) {
+		spin_lock_bh(&queue->fastopenq.lock);
 		if (tcp_rsk(req)->tfo_listener) {
 			/* We are still waiting for the final ACK from 3WHS
 			 * so can't free req now. Instead, we set req->sk to
@@ -348,7 +347,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 			req->sk = NULL;
 			req = NULL;
 		}
-		spin_unlock_bh(&queue->fastopenq->lock);
+		spin_unlock_bh(&queue->fastopenq.lock);
 	}
 out:
 	release_sock(sk);
@@ -886,12 +885,12 @@ void inet_csk_listen_stop(struct sock *sk)
 		sk_acceptq_removed(sk);
 		reqsk_put(req);
 	}
-	if (queue->fastopenq) {
+	if (queue->fastopenq.rskq_rst_head) {
 		/* Free all the reqs queued in rskq_rst_head. */
-		spin_lock_bh(&queue->fastopenq->lock);
-		acc_req = queue->fastopenq->rskq_rst_head;
-		queue->fastopenq->rskq_rst_head = NULL;
-		spin_unlock_bh(&queue->fastopenq->lock);
+		spin_lock_bh(&queue->fastopenq.lock);
+		acc_req = queue->fastopenq.rskq_rst_head;
+		queue->fastopenq.rskq_rst_head = NULL;
+		spin_unlock_bh(&queue->fastopenq.lock);
 		while ((req = acc_req) != NULL) {
 			acc_req = req->dl_next;
 			reqsk_put(req);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8b8fa184f75..3c96fa87ff9e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2253,13 +2253,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
-void tcp_sock_destruct(struct sock *sk)
-{
-	inet_sock_destruct(sk);
-
-	kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
-}
-
 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
 	return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
@@ -2581,7 +2574,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		    TCPF_LISTEN))) {
 			tcp_fastopen_init_key_once(true);
 
-			err = fastopen_init_queue(sk, val);
+			fastopen_queue_tune(sk, val);
 		} else {
 			err = -EINVAL;
 		}
@@ -2849,10 +2842,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_FASTOPEN:
-		if (icsk->icsk_accept_queue.fastopenq)
-			val = icsk->icsk_accept_queue.fastopenq->max_qlen;
-		else
-			val = 0;
+		val = icsk->icsk_accept_queue.fastopenq.max_qlen;
 		break;
 
 	case TCP_TIMESTAMP:
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index db43c6286cf7..f69f436fcbcc 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -142,9 +142,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
 	if (!child)
 		return NULL;
 
-	spin_lock(&queue->fastopenq->lock);
-	queue->fastopenq->qlen++;
-	spin_unlock(&queue->fastopenq->lock);
+	spin_lock(&queue->fastopenq.lock);
+	queue->fastopenq.qlen++;
+	spin_unlock(&queue->fastopenq.lock);
 
 	/* Initialize the child socket. Have to fix some values to take
 	 * into account the child is a Fast Open socket and is created
@@ -237,8 +237,8 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 	 * between qlen overflow causing Fast Open to be disabled
 	 * temporarily vs a server not supporting Fast Open at all.
 	 */
-	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
-	if (!fastopenq || fastopenq->max_qlen == 0)
+	fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+	if (fastopenq->max_qlen == 0)
 		return false;
 
 	if (fastopenq->qlen >= fastopenq->max_qlen) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f551e9e862db..64ece718d66c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2186,7 +2186,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
-	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
 	__be32 dest = inet->inet_daddr;
 	__be32 src = inet->inet_rcv_saddr;
 	__u16 destp = ntohs(inet->inet_dport);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 97bc26e0cd0f..0ac64f47f882 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1672,7 +1672,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	const struct inet_sock *inet = inet_sk(sp);
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
 
 	dest  = &sp->sk_v6_daddr;
 	src   = &sp->sk_v6_rcv_saddr;
@@ -1716,7 +1716,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		   tp->snd_cwnd,
 		   sp->sk_state == TCP_LISTEN ?
-			(fastopenq ? fastopenq->max_qlen : 0) :
+			fastopenq->max_qlen :
 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
 		   );
 }
-- 
cgit v1.2.3


From 007979eaf94d1c888d8c7cf8a5250c2c6c9bd98e Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:10 -0700
Subject: net: Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER

Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER and update the name of the
netif_is_vrf and netif_index_is_vrf macros.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c         |  6 +++---
 include/linux/netdevice.h | 14 +++++++-------
 include/net/route.h       |  2 +-
 include/net/vrf.h         |  4 ++--
 net/ipv4/ip_output.c      |  2 +-
 net/ipv4/route.c          |  2 +-
 net/ipv4/udp.c            |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4ecb3a3e516a..2d7418e0b908 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -438,7 +438,7 @@ out_fail:
 
 static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	if (netif_is_vrf(port_dev) || vrf_is_slave(port_dev))
+	if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
 		return -EINVAL;
 
 	return do_vrf_add_slave(dev, port_dev);
@@ -591,7 +591,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
 
-	dev->priv_flags |= IFF_VRF_MASTER;
+	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
 	err = -ENOMEM;
 	vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
@@ -657,7 +657,7 @@ static int vrf_device_event(struct notifier_block *unused,
 		struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
 		struct net_device *vrf_dev;
 
-		if (!vrf_ptr || netif_is_vrf(dev))
+		if (!vrf_ptr || netif_is_l3_master(dev))
 			goto out;
 
 		vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d2ffeafc9998..99c33e83822f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1258,7 +1258,7 @@ struct net_device_ops {
  * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
  *	change when it's running
  * @IFF_MACVLAN: Macvlan device
- * @IFF_VRF_MASTER: device is a VRF master
+ * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
  */
@@ -1283,7 +1283,7 @@ enum netdev_priv_flags {
 	IFF_XMIT_DST_RELEASE_PERM	= 1<<17,
 	IFF_IPVLAN_MASTER		= 1<<18,
 	IFF_IPVLAN_SLAVE		= 1<<19,
-	IFF_VRF_MASTER			= 1<<20,
+	IFF_L3MDEV_MASTER		= 1<<20,
 	IFF_NO_QUEUE			= 1<<21,
 	IFF_OPENVSWITCH			= 1<<22,
 };
@@ -1308,7 +1308,7 @@ enum netdev_priv_flags {
 #define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
 #define IFF_IPVLAN_MASTER		IFF_IPVLAN_MASTER
 #define IFF_IPVLAN_SLAVE		IFF_IPVLAN_SLAVE
-#define IFF_VRF_MASTER			IFF_VRF_MASTER
+#define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
 
@@ -3824,9 +3824,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
 	return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
-static inline bool netif_is_vrf(const struct net_device *dev)
+static inline bool netif_is_l3_master(const struct net_device *dev)
 {
-	return dev->priv_flags & IFF_VRF_MASTER;
+	return dev->priv_flags & IFF_L3MDEV_MASTER;
 }
 
 static inline bool netif_is_bridge_master(const struct net_device *dev)
@@ -3839,7 +3839,7 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_OPENVSWITCH;
 }
 
-static inline bool netif_index_is_vrf(struct net *net, int ifindex)
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 {
 	bool rc = false;
 
@@ -3853,7 +3853,7 @@ static inline bool netif_index_is_vrf(struct net *net, int ifindex)
 
 	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
-		rc = netif_is_vrf(dev);
+		rc = netif_is_l3_master(dev);
 
 	rcu_read_unlock();
 #endif
diff --git a/include/net/route.h b/include/net/route.h
index d1bd90bb3187..a565d0dad12c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -256,7 +256,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 	if (inet_sk(sk)->transparent)
 		flow_flags |= FLOWI_FLAG_ANYSRC;
 
-	if (netif_index_is_vrf(sock_net(sk), oif))
+	if (netif_index_is_l3_master(sock_net(sk), oif))
 		flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF;
 
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
diff --git a/include/net/vrf.h b/include/net/vrf.h
index 593e6094ddd4..34bb3f69def2 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -43,7 +43,7 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
 	if (!dev)
 		return 0;
 
-	if (netif_is_vrf(dev)) {
+	if (netif_is_l3_master(dev)) {
 		ifindex = dev->ifindex;
 	} else {
 		vrf_ptr = rcu_dereference(dev->vrf_ptr);
@@ -125,7 +125,7 @@ static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
 	return tb_id;
 }
 
-/* caller has already checked netif_is_vrf(dev) */
+/* caller has already checked netif_is_l3_master(dev) */
 static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
 {
 	struct rtable *rth = ERR_PTR(-ENETUNREACH);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 06d2c87ed505..aff6766922e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1571,7 +1571,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	}
 
 	oif = arg->bound_dev_if;
-	if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+	if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 		oif = skb->skb_iif;
 
 	flowi4_init_output(&fl4, oif,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8c84a6664b30..a670f894ce13 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2124,7 +2124,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 				fl4->saddr = inet_select_addr(dev_out, 0,
 							      RT_SCOPE_HOST);
 		}
-		if (netif_is_vrf(dev_out) &&
+		if (netif_is_l3_master(dev_out) &&
 		    !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
 			rth = vrf_dev_get_rth(dev_out);
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..156ba75b6000 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1021,7 +1021,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		 * device lookup source address from VRF table. This mimics
 		 * behavior of ip_route_connect{_init}.
 		 */
-		if (netif_index_is_vrf(net, ipc.oif)) {
+		if (netif_index_is_l3_master(net, ipc.oif)) {
 			flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 					   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 					   (flow_flags | FLOWI_FLAG_VRFSRC |
-- 
cgit v1.2.3


From 1b69c6d0ae90b7f1a4f61d5c8209d5cb7a55f849 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:11 -0700
Subject: net: Introduce L3 Master device abstraction

L3 master devices allow users of the abstraction to influence FIB lookups
for enslaved devices. Current API provides a means for the master device
to return a specific FIB table for an enslaved device, to return an
rtable/custom dst and influence the OIF used for fib lookups.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS               |   7 +++
 include/linux/netdevice.h |   3 ++
 include/net/l3mdev.h      | 125 ++++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig               |   1 +
 net/Makefile              |   3 ++
 net/l3mdev/Kconfig        |  10 ++++
 net/l3mdev/Makefile       |   5 ++
 net/l3mdev/l3mdev.c       |  92 ++++++++++++++++++++++++++++++++++
 8 files changed, 246 insertions(+)
 create mode 100644 include/net/l3mdev.h
 create mode 100644 net/l3mdev/Kconfig
 create mode 100644 net/l3mdev/Makefile
 create mode 100644 net/l3mdev/l3mdev.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index bcd263de4827..3f2d7a9d0bbf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6095,6 +6095,13 @@ F:	Documentation/auxdisplay/ks0108
 F:	drivers/auxdisplay/ks0108.c
 F:	include/linux/ks0108.h
 
+L3MDEV
+M:	David Ahern <dsa@cumulusnetworks.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	net/l3mdev
+F:	include/net/l3mdev.h
+
 LAPB module
 L:	linux-x25@vger.kernel.org
 S:	Orphan
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 99c33e83822f..c7f14794fe14 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1587,6 +1587,9 @@ struct net_device {
 #ifdef CONFIG_NET_SWITCHDEV
 	const struct switchdev_ops *switchdev_ops;
 #endif
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	const struct l3mdev_ops	*l3mdev_ops;
+#endif
 
 	const struct header_ops *header_ops;
 
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
new file mode 100644
index 000000000000..e382c777bab8
--- /dev/null
+++ b/include/net/l3mdev.h
@@ -0,0 +1,125 @@
+/*
+ * include/net/l3mdev.h - L3 master device API
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _NET_L3MDEV_H_
+#define _NET_L3MDEV_H_
+
+/**
+ * struct l3mdev_ops - l3mdev operations
+ *
+ * @l3mdev_fib_table: Get FIB table id to use for lookups
+ *
+ * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ */
+
+struct l3mdev_ops {
+	u32		(*l3mdev_fib_table)(const struct net_device *dev);
+	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
+					     const struct flowi4 *fl4);
+};
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev);
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+	int ifindex;
+
+	rcu_read_lock();
+	ifindex = l3mdev_master_ifindex_rcu(dev);
+	rcu_read_unlock();
+
+	return ifindex;
+}
+
+/* get index of an interface to use for FIB lookups. For devices
+ * enslaved to an L3 master device FIB lookups are based on the
+ * master index
+ */
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+	return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
+}
+
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+	int oif;
+
+	rcu_read_lock();
+	oif = l3mdev_fib_oif_rcu(dev);
+	rcu_read_unlock();
+
+	return oif;
+}
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev);
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+	u32 tb_id;
+
+	rcu_read_lock();
+	tb_id = l3mdev_fib_table_rcu(dev);
+	rcu_read_unlock();
+
+	return tb_id;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+					       const struct flowi4 *fl4)
+{
+	if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
+		return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
+
+	return NULL;
+}
+
+#else
+
+static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+	return 0;
+}
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+	return 0;
+}
+
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+	return dev ? dev->ifindex : 0;
+}
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+	return dev ? dev->ifindex : 0;
+}
+
+static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+	return 0;
+}
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+	return 0;
+}
+static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+	return 0;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+					       const struct flowi4 *fl4)
+{
+	return NULL;
+}
+
+#endif
+
+#endif /* _NET_L3MDEV_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
 source "net/hsr/Kconfig"
 source "net/switchdev/Kconfig"
+source "net/l3mdev/Kconfig"
 
 config RPS
 	bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR)		+= hsr/
 ifneq ($(CONFIG_NET_SWITCHDEV),)
 obj-y				+= switchdev/
 endif
+ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
+obj-y				+= l3mdev/
+endif
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
+#
+# Configuration for L3 master device support
+#
+
+config NET_L3_MASTER_DEV
+	bool "L3 Master device support"
+	depends on INET || IPV6
+	---help---
+	  This module provides glue between core networking code and device
+	  drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L3 device API
+#
+
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..ddf75ad41713
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
+/*
+ * net/l3mdev/l3mdev.c - L3 master device implementation
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <net/l3mdev.h>
+
+/**
+ *	l3mdev_master_ifindex - get index of L3 master device
+ *	@dev: targeted interface
+ */
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+	int ifindex = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_l3_master(dev)) {
+		ifindex = dev->ifindex;
+	} else if (dev->flags & IFF_SLAVE) {
+		struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(dev);
+		if (master && netif_is_l3_master(master))
+			ifindex = master->ifindex;
+	}
+
+	return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+
+/**
+ *	l3mdev_fib_table - get FIB table id associated with an L3
+ *                             master interface
+ *	@dev: targeted interface
+ */
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+	u32 tb_id = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_l3_master(dev)) {
+		if (dev->l3mdev_ops->l3mdev_fib_table)
+			tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+	} else if (dev->flags & IFF_SLAVE) {
+		/* Users of netdev_master_upper_dev_get_rcu need non-const,
+		 * but current inet_*type functions take a const
+		 */
+		struct net_device *_dev = (struct net_device *) dev;
+		const struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(_dev);
+		if (master && netif_is_l3_master(master) &&
+		    master->l3mdev_ops->l3mdev_fib_table)
+			tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
+	}
+
+	return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
+
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+	u32 tb_id = 0;
+
+	if (!ifindex)
+		return 0;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		tb_id = l3mdev_fib_table_rcu(dev);
+
+	rcu_read_unlock();
+
+	return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
-- 
cgit v1.2.3


From 93a7e7e837af6846052481da974320c19ab82e5c Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:16 -0700
Subject: net: Remove the now unused vrf_ptr

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c         | 32 ++------------------------------
 include/linux/netdevice.h |  2 --
 include/net/vrf.h         |  6 ------
 3 files changed, 2 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 72f1892ebad0..df872f4efb0d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -396,18 +396,15 @@ static void __vrf_insert_slave(struct slave_queue *queue, struct slave *slave)
 
 static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	struct net_vrf_dev *vrf_ptr = kmalloc(sizeof(*vrf_ptr), GFP_KERNEL);
 	struct slave *slave = kzalloc(sizeof(*slave), GFP_KERNEL);
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct slave_queue *queue = &vrf->queue;
 	int ret = -ENOMEM;
 
-	if (!slave || !vrf_ptr)
+	if (!slave)
 		goto out_fail;
 
 	slave->dev = port_dev;
-	vrf_ptr->ifindex = dev->ifindex;
-	vrf_ptr->tb_id = vrf->tb_id;
 
 	/* register the packet handler for slave ports */
 	ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
@@ -424,7 +421,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 
 	port_dev->flags |= IFF_SLAVE;
 	__vrf_insert_slave(queue, slave);
-	rcu_assign_pointer(port_dev->vrf_ptr, vrf_ptr);
 	cycle_netdev(port_dev);
 
 	return 0;
@@ -432,7 +428,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 out_unregister:
 	netdev_rx_handler_unregister(port_dev);
 out_fail:
-	kfree(vrf_ptr);
 	kfree(slave);
 	return ret;
 }
@@ -448,21 +443,15 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 /* inverse of do_vrf_add_slave */
 static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	struct net_vrf_dev *vrf_ptr = rtnl_dereference(port_dev->vrf_ptr);
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct slave_queue *queue = &vrf->queue;
 	struct slave *slave;
 
-	RCU_INIT_POINTER(port_dev->vrf_ptr, NULL);
-
 	netdev_upper_dev_unlink(port_dev, dev);
 	port_dev->flags &= ~IFF_SLAVE;
 
 	netdev_rx_handler_unregister(port_dev);
 
-	/* after netdev_rx_handler_unregister for synchronize_rcu */
-	kfree(vrf_ptr);
-
 	cycle_netdev(port_dev);
 
 	slave = __vrf_find_slave_dev(queue, port_dev);
@@ -601,10 +590,6 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
 
 static void vrf_dellink(struct net_device *dev, struct list_head *head)
 {
-	struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
-
-	RCU_INIT_POINTER(dev->vrf_ptr, NULL);
-	kfree_rcu(vrf_ptr, rcu);
 	unregister_netdevice_queue(dev, head);
 }
 
@@ -612,7 +597,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	struct net_vrf_dev *vrf_ptr;
 	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
@@ -622,24 +606,13 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
-	err = -ENOMEM;
-	vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
-	if (!vrf_ptr)
-		goto out_fail;
-
-	vrf_ptr->ifindex = dev->ifindex;
-	vrf_ptr->tb_id = vrf->tb_id;
-
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out_fail;
 
-	rcu_assign_pointer(dev->vrf_ptr, vrf_ptr);
-
 	return 0;
 
 out_fail:
-	kfree(vrf_ptr);
 	free_netdev(dev);
 	return err;
 }
@@ -683,10 +656,9 @@ static int vrf_device_event(struct notifier_block *unused,
 
 	/* only care about unregister events to drop slave references */
 	if (event == NETDEV_UNREGISTER) {
-		struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
 		struct net_device *vrf_dev;
 
-		if (!vrf_ptr || netif_is_l3_master(dev))
+		if (netif_is_l3_master(dev))
 			goto out;
 
 		vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c7f14794fe14..72bf9e37a2f0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1427,7 +1427,6 @@ enum netdev_priv_flags {
  *	@dn_ptr:	DECnet specific data
  *	@ip6_ptr:	IPv6 specific data
  *	@ax25_ptr:	AX.25 specific data
- *	@vrf_ptr:	VRF specific data
  *	@ieee80211_ptr:	IEEE 802.11 specific data, assign before registering
  *
  *	@last_rx:	Time of last Rx
@@ -1649,7 +1648,6 @@ struct net_device {
 	struct dn_dev __rcu     *dn_ptr;
 	struct inet6_dev __rcu	*ip6_ptr;
 	void			*ax25_ptr;
-	struct net_vrf_dev __rcu *vrf_ptr;
 	struct wireless_dev	*ieee80211_ptr;
 	struct wpan_dev		*ieee802154_ptr;
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
diff --git a/include/net/vrf.h b/include/net/vrf.h
index 5bba1535ba73..e83fc38770dd 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -11,12 +11,6 @@
 #ifndef __LINUX_NET_VRF_H
 #define __LINUX_NET_VRF_H
 
-struct net_vrf_dev {
-	struct rcu_head		rcu;
-	int                     ifindex; /* ifindex of master dev */
-	u32                     tb_id;   /* table id for VRF */
-};
-
 struct slave {
 	struct list_head	list;
 	struct net_device	*dev;
-- 
cgit v1.2.3


From 9478d12d33ad12d29c5343ae7346b51bc1f4c5a9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:18 -0700
Subject: net: Move netif_index_is_l3_master to l3mdev.h

Change CONFIG dependency to CONFIG_NET_L3_MASTER_DEV as well.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 21 ---------------------
 include/net/l3mdev.h      | 24 ++++++++++++++++++++++++
 include/net/route.h       |  1 +
 3 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 72bf9e37a2f0..b9450784ae06 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3840,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_OPENVSWITCH;
 }
 
-static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
-{
-	bool rc = false;
-
-#if IS_ENABLED(CONFIG_NET_VRF)
-	struct net_device *dev;
-
-	if (ifindex == 0)
-		return false;
-
-	rcu_read_lock();
-
-	dev = dev_get_by_index_rcu(net, ifindex);
-	if (dev)
-		rc = netif_is_l3_master(dev);
-
-	rcu_read_unlock();
-#endif
-	return rc;
-}
-
 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
 static inline void netif_keep_dst(struct net_device *dev)
 {
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index e382c777bab8..87cee05a0a17 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -81,6 +81,25 @@ static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
 	return NULL;
 }
 
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+	bool rc = false;
+
+	if (ifindex == 0)
+		return false;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		rc = netif_is_l3_master(dev);
+
+	rcu_read_unlock();
+
+	return rc;
+}
+
 #else
 
 static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
@@ -120,6 +139,11 @@ static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
 	return NULL;
 }
 
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/route.h b/include/net/route.h
index a565d0dad12c..e211dc167db1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -28,6 +28,7 @@
 #include <net/inetpeer.h>
 #include <net/flow.h>
 #include <net/inet_sock.h>
+#include <net/l3mdev.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
-- 
cgit v1.2.3


From 7d8c6e391575ee86c870b88635a163743fca9eac Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 12 Jun 2015 22:12:04 -0500
Subject: ipv6: Pass struct net through ip6_fragment

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/netfilter_ipv6.h  |  4 ++--
 include/net/ip6_route.h         |  4 ++--
 net/bridge/br_netfilter_hooks.c |  2 +-
 net/ipv6/ip6_output.c           | 16 +++++++---------
 net/ipv6/xfrm6_output.c         | 10 ++++++++--
 net/openvswitch/actions.c       |  2 +-
 6 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 2ac8369fa96c..47c6b04c28c0 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -17,8 +17,8 @@ struct nf_ipv6_ops {
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
 	void (*route_input)(struct sk_buff *skb);
-	int (*fragment)(struct sock *sk, struct sk_buff *skb,
-			int (*output)(struct sock *, struct sk_buff *));
+	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+			int (*output)(struct net *, struct sock *, struct sk_buff *));
 };
 
 #ifdef CONFIG_NETFILTER
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 297629aadb19..2bfb2ad2fab1 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -173,8 +173,8 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
 		 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
 }
 
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
-		 int (*output)(struct sock *, struct sk_buff *));
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		 int (*output)(struct net *, struct sock *, struct sk_buff *));
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 00e356c236cf..815994d5b02d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -786,7 +786,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 						 data->size);
 
 		if (v6ops)
-			return v6ops->fragment(sk, skb, br_nf_push_frag_xmit_sk);
+			return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
 
 		kfree_skb(skb);
 		return -EMSGSIZE;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a598fe2c0849..caf7d14a1bdd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,11 +56,10 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 
-static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
-	struct net *net = dev_net(dev);
 	struct neighbour *neigh;
 	struct in6_addr *nexthop;
 	int ret;
@@ -126,9 +125,9 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 	    dst_allfrag(skb_dst(skb)) ||
 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
-		return ip6_fragment(sk, skb, ip6_finish_output2);
+		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 	else
-		return ip6_finish_output2(sk, skb);
+		return ip6_finish_output2(net, sk, skb);
 }
 
 int ip6_output(struct sock *sk, struct sk_buff *skb)
@@ -554,8 +553,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
-		 int (*output)(struct sock *, struct sk_buff *))
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -568,7 +567,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 	__be32 frag_id;
 	int ptr, offset = 0, err = 0;
 	u8 *prevhdr, nexthdr = 0;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
@@ -688,7 +686,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 				ip6_copy_metadata(frag, skb);
 			}
 
-			err = output(sk, skb);
+			err = output(net, sk, skb);
 			if (!err)
 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
@@ -816,7 +814,7 @@ slow_path:
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-		err = output(sk, frag);
+		err = output(net, sk, frag);
 		if (err)
 			goto fail;
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0c3e9ffcf231..335066a64b45 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -131,6 +131,12 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
+static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct xfrm_state *x = skb_dst(skb)->xfrm;
+	return x->outer_mode->afinfo->output_finish(sk, skb);
+}
+
 static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -160,8 +166,8 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (x->props.mode == XFRM_MODE_TUNNEL &&
 	    ((skb->len > mtu && !skb_is_gso(skb)) ||
 		dst_allfrag(skb_dst(skb)))) {
-		return ip6_fragment(sk, skb,
-				    x->outer_mode->afinfo->output_finish);
+		return ip6_fragment(net, sk, skb,
+				    __xfrm6_output_finish);
 	}
 	return x->outer_mode->afinfo->output_finish(sk, skb);
 }
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b281b2b76c3f..f33c627f97b3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -727,7 +727,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
 		skb_dst_set_noref(skb, &ovs_rt.dst);
 		IP6CB(skb)->frag_max_size = mru;
 
-		v6ops->fragment(skb->sk, skb, ovs_vport_output_sk);
+		v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
 		refdst_drop(orig_dst);
 	} else {
 		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
-- 
cgit v1.2.3


From ce6f7496050e29108bcefe0413df1789648fb38f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 29 Sep 2015 22:57:24 +0900
Subject: extcon: gpio: Add the missing supported_cable parameter to
 devm_extcon_dev_allocate()

The commit 2a9de9c0 ("extcon: Use the unique id for external connector instead
of string") defines the unique id of each external connector to identify the
type of external connector instead of string name. So, devm_extcon_dev_allocate()
should include the second parameter (unsigned int *supported_cable). This patch
adds the supported_cable parameter which is passed by platform data.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-gpio.c       | 6 ++----
 include/linux/extcon/extcon-gpio.h | 2 ++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 2a89c89dfaf5..ba927df2fb91 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -70,17 +70,15 @@ static int gpio_extcon_probe(struct platform_device *pdev)
 
 	if (!pdata)
 		return -EBUSY;
-	if (!pdata->irq_flags) {
-		dev_err(&pdev->dev, "IRQ flag is not specified.\n");
+	if (!pdata->irq_flags || pdata->extcon_id > EXTCON_NONE)
 		return -EINVAL;
-	}
 
 	data = devm_kzalloc(&pdev->dev, sizeof(struct gpio_extcon_data),
 				   GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	data->edev = devm_extcon_dev_allocate(&pdev->dev, NULL);
+	data->edev = devm_extcon_dev_allocate(&pdev->dev, &pdata->extcon_id);
 	if (IS_ERR(data->edev)) {
 		dev_err(&pdev->dev, "failed to allocate extcon device\n");
 		return -ENOMEM;
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
index 0b17ad43fbfc..232bb8f80b51 100644
--- a/include/linux/extcon/extcon-gpio.h
+++ b/include/linux/extcon/extcon-gpio.h
@@ -26,6 +26,7 @@
 /**
  * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device.
  * @name:		The name of this GPIO extcon device.
+ * @extcon_id:		The unique id of specific external connector.
  * @gpio:		Corresponding GPIO.
  * @gpio_active_low:	Boolean describing whether gpio active state is 1 or 0
  *			If true, low state of gpio means active.
@@ -45,6 +46,7 @@
  */
 struct gpio_extcon_platform_data {
 	const char *name;
+	unsigned int extcon_id;
 	unsigned gpio;
 	bool gpio_active_low;
 	unsigned long debounce;
-- 
cgit v1.2.3


From 6ba129974a3d6bcb236cbd96aa1db6001d163678 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 29 Sep 2015 22:59:55 +0900
Subject: extcon: gpio: Fix minor coding style and remove the unused fields.

This patch fixes the minor coding style about indentation and removes the
unused fields from struct gpio_extcon_platform_data.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-gpio.c       | 11 ++++-------
 include/linux/extcon/extcon-gpio.h | 22 ++++------------------
 2 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index ba927df2fb91..27de6aee890f 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -1,7 +1,5 @@
 /*
- *  drivers/extcon/extcon_gpio.c
- *
- *  Single-state GPIO extcon driver based on extcon class
+ * extcon_gpio.c - Single-state GPIO extcon driver based on extcon class
  *
  * Copyright (C) 2008 Google, Inc.
  * Author: Mike Lockwood <lockwood@android.com>
@@ -17,8 +15,7 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
-*/
+ */
 
 #include <linux/extcon.h>
 #include <linux/extcon/extcon-gpio.h>
@@ -37,7 +34,7 @@ struct gpio_extcon_data {
 	struct delayed_work work;
 	unsigned long debounce_jiffies;
 
-	struct gpio_extcon_platform_data *pdata;
+	struct gpio_extcon_pdata *pdata;
 };
 
 static void gpio_extcon_work(struct work_struct *work)
@@ -64,7 +61,7 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
 
 static int gpio_extcon_probe(struct platform_device *pdev)
 {
-	struct gpio_extcon_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct gpio_extcon_pdata *pdata = dev_get_platdata(&pdev->dev);
 	struct gpio_extcon_data *data;
 	int ret;
 
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
index 232bb8f80b51..7cacafb78b09 100644
--- a/include/linux/extcon/extcon-gpio.h
+++ b/include/linux/extcon/extcon-gpio.h
@@ -1,5 +1,5 @@
 /*
- *  External connector (extcon) class generic GPIO driver
+ * Single-state GPIO extcon driver based on extcon class
  *
  * Copyright (C) 2012 Samsung Electronics
  * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
@@ -16,16 +16,14 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
-*/
+ */
 #ifndef __EXTCON_GPIO_H__
 #define __EXTCON_GPIO_H__ __FILE__
 
 #include <linux/extcon.h>
 
 /**
- * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device.
- * @name:		The name of this GPIO extcon device.
+ * struct gpio_extcon_pdata - A simple GPIO-controlled extcon device.
  * @extcon_id:		The unique id of specific external connector.
  * @gpio:		Corresponding GPIO.
  * @gpio_active_low:	Boolean describing whether gpio active state is 1 or 0
@@ -33,28 +31,16 @@
  *			If false, high state of gpio means active.
  * @debounce:		Debounce time for GPIO IRQ in ms.
  * @irq_flags:		IRQ Flags (e.g., IRQF_TRIGGER_LOW).
- * @state_on:		print_state is overriden with state_on if attached.
- *			If NULL, default method of extcon class is used.
- * @state_off:		print_state is overriden with state_off if detached.
- *			If NUll, default method of extcon class is used.
  * @check_on_resume:	Boolean describing whether to check the state of gpio
  *			while resuming from sleep.
- *
- * Note that in order for state_on or state_off to be valid, both state_on
- * and state_off should be not NULL. If at least one of them is NULL,
- * the print_state is not overriden.
  */
-struct gpio_extcon_platform_data {
-	const char *name;
+struct gpio_extcon_pdata {
 	unsigned int extcon_id;
 	unsigned gpio;
 	bool gpio_active_low;
 	unsigned long debounce;
 	unsigned long irq_flags;
 
-	/* if NULL, "0" or "1" will be printed */
-	const char *state_on;
-	const char *state_off;
 	bool check_on_resume;
 };
 
-- 
cgit v1.2.3


From 9ae7ce00cc1353155b1914bfc40e8362efef7d1c Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 29 Sep 2015 18:21:18 +0900
Subject: usb: renesas_usbhs: fix build warning if 64-bit architecture

This patch fixes the following warning if 64-bit architecture environment:

./drivers/usb/renesas_usbhs/common.c:496:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
  dparam->type = of_id ? (u32)of_id->data : 0;

Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/common.c | 2 +-
 include/linux/usb/renesas_usbhs.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 7b98e1d9194c..0ce398c5482e 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -493,7 +493,7 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev)
 		return NULL;
 
 	dparam = &info->driver_param;
-	dparam->type = of_id ? (u32)of_id->data : 0;
+	dparam->type = of_id ? (uintptr_t)of_id->data : 0;
 	if (!of_property_read_u32(dev->of_node, "renesas,buswait", &tmp))
 		dparam->buswait_bwait = tmp;
 	gpio = of_get_named_gpio_flags(dev->of_node, "renesas,enable-gpio", 0,
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 3dd5a781da99..bfb74723f151 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -157,7 +157,7 @@ struct renesas_usbhs_driver_param {
 	 */
 	int pio_dma_border; /* default is 64byte */
 
-	u32 type;
+	uintptr_t type;
 	u32 enable_gpio;
 
 	/*
-- 
cgit v1.2.3


From 8a3e33cf92c7b7ae25c589eccd1a69ab11cc4353 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Wed, 30 Sep 2015 21:10:25 +0200
Subject: ata: ahci: find eSATA ports and flag them as removable

If the AHCI ports' HPCP or ESP bits are set, the port
should be considered external (e.g. eSATA) and is marked
as removable.  Userspace tools like udisks then treat it
like an usb drive.

With this patch applied, when I plug a drive into the esata port,
KDE pops up a window asking what to do with the drives(s), just
like it does for any random USB stick.

Removability is indicated to the upper layers by way of the
SCSI RMB bit, as I haven't found another way to signal
userspace to treat a sata disk like any usb stick.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci.h        | 2 ++
 drivers/ata/libahci.c     | 7 +++++++
 drivers/ata/libata-scsi.c | 7 +++++--
 include/linux/libata.h    | 1 +
 4 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 5b8e8a0fab48..45586c1dbbdc 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -181,6 +181,8 @@ enum {
 	PORT_CMD_ALPE		= (1 << 26), /* Aggressive Link PM enable */
 	PORT_CMD_ATAPI		= (1 << 24), /* Device is ATAPI */
 	PORT_CMD_FBSCP		= (1 << 22), /* FBS Capable Port */
+	PORT_CMD_ESP		= (1 << 21), /* External Sata Port */
+	PORT_CMD_HPCP		= (1 << 18), /* HotPlug Capable Port */
 	PORT_CMD_PMP		= (1 << 17), /* PMP attached */
 	PORT_CMD_LIST_ON	= (1 << 15), /* cmd list DMA engine running */
 	PORT_CMD_FIS_ON		= (1 << 14), /* FIS DMA engine running */
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index d256a66158be..2fa551a5146e 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1117,6 +1117,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
 			   int port_no, void __iomem *mmio,
 			   void __iomem *port_mmio)
 {
+	struct ahci_host_priv *hpriv = ap->host->private_data;
 	const char *emsg = NULL;
 	int rc;
 	u32 tmp;
@@ -1138,6 +1139,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
 		writel(tmp, port_mmio + PORT_IRQ_STAT);
 
 	writel(1 << port_no, mmio + HOST_IRQ_STAT);
+
+	/* mark esata ports */
+	tmp = readl(port_mmio + PORT_CMD);
+	if ((tmp & PORT_CMD_HPCP) ||
+	    ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)))
+		ap->pflags |= ATA_PFLAG_EXTERNAL;
 }
 
 void ahci_init_controller(struct ata_host *host)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0d7f0da3a269..183a57bff935 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2015,8 +2015,11 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
 
 	VPRINTK("ENTER\n");
 
-	/* set scsi removable (RMB) bit per ata bit */
-	if (ata_id_removable(args->id))
+	/* set scsi removable (RMB) bit per ata bit, or if the
+	 * AHCI port says it's external (Hotplug-capable, eSATA).
+	 */
+	if (ata_id_removable(args->id) ||
+	    (args->dev->link->ap->pflags & ATA_PFLAG_EXTERNAL))
 		hdr[1] |= (1 << 7);
 
 	if (args->dev->class == ATA_DEV_ZAC) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c9cfbcdb8d14..83577f8fd15b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -254,6 +254,7 @@ enum {
 
 	ATA_PFLAG_PIO32		= (1 << 20),  /* 32bit PIO */
 	ATA_PFLAG_PIO32CHANGE	= (1 << 21),  /* 32bit PIO can be turned on/off */
+	ATA_PFLAG_EXTERNAL	= (1 << 22),  /* eSATA/external port */
 
 	/* struct ata_queued_cmd flags */
 	ATA_QCFLAG_ACTIVE	= (1 << 0), /* cmd not yet ack'd to scsi lyer */
-- 
cgit v1.2.3


From e647b532275bb357e87272e052fccf5fcdb36a17 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:12 +0100
Subject: ACPI: Add early device probing infrastructure

IRQ controllers and timers are the two types of device the kernel
requires before being able to use the device driver model.

ACPI so far lacks a proper probing infrastructure similar to the one
we have with DT, where we're able to declare IRQ chips and
clocksources inside the driver code, and let the core code pick it up
and call us back on a match. This leads to all kind of really ugly
hacks all over the arm64 code and even in the ACPI layer.

In order to allow some basic probing based on the ACPI tables,
introduce "struct acpi_probe_entry" which contains just enough
data and callbacks to match a table, an optional subtable, and
call a probe function. A driver can, at build time, register itself
and expect being called if the right entry exists in the ACPI
table.

A acpi_probe_device_table() is provided, taking an identifier for
a set of acpi_prove_entries, and iterating over the registered
entries.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c               | 39 +++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h | 10 ++++++
 include/linux/acpi.h              | 66 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 115 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d1ce377db3e9..1d6b55a6f501 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1913,3 +1913,42 @@ int __init acpi_scan_init(void)
 	mutex_unlock(&acpi_scan_lock);
 	return result;
 }
+
+static struct acpi_probe_entry *ape;
+static int acpi_probe_count;
+static DEFINE_SPINLOCK(acpi_probe_lock);
+
+static int __init acpi_match_madt(struct acpi_subtable_header *header,
+				  const unsigned long end)
+{
+	if (!ape->subtable_valid || ape->subtable_valid(header, ape))
+		if (!ape->probe_subtbl(header, end))
+			acpi_probe_count++;
+
+	return 0;
+}
+
+int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
+{
+	int count = 0;
+
+	if (acpi_disabled)
+		return 0;
+
+	spin_lock(&acpi_probe_lock);
+	for (ape = ap_head; nr; ape++, nr--) {
+		if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) {
+			acpi_probe_count = 0;
+			acpi_table_parse_madt(ape->type, acpi_match_madt, 0);
+			count += acpi_probe_count;
+		} else {
+			int res;
+			res = acpi_table_parse(ape->id, ape->probe_table);
+			if (!res)
+				count++;
+		}
+	}
+	spin_unlock(&acpi_probe_lock);
+
+	return count;
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1781e54ea6d3..efd7ed1eafae 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -181,6 +181,16 @@
 #define CPUIDLE_METHOD_OF_TABLES() OF_TABLE(CONFIG_CPU_IDLE, cpuidle_method)
 #define EARLYCON_OF_TABLES()	OF_TABLE(CONFIG_SERIAL_EARLYCON, earlycon)
 
+#ifdef CONFIG_ACPI
+#define ACPI_PROBE_TABLE(name)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__##name##_acpi_probe_table) = .;		\
+	*(__##name##_acpi_probe_table)					\
+	VMLINUX_SYMBOL(__##name##_acpi_probe_table_end) = .;
+#else
+#define ACPI_PROBE_TABLE(name)
+#endif
+
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	VMLINUX_SYMBOL(__dtb_start) = .;				\
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 865d948c60e6..815f5f62513f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -773,6 +773,61 @@ int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
 
 struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
 					    struct fwnode_handle *subnode);
+
+struct acpi_probe_entry;
+typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
+						 struct acpi_probe_entry *);
+
+#define ACPI_TABLE_ID_LEN	5
+
+/**
+ * struct acpi_probe_entry - boot-time probing entry
+ * @id:			ACPI table name
+ * @type:		Optional subtable type to match
+ *			(if @id contains subtables)
+ * @subtable_valid:	Optional callback to check the validity of
+ *			the subtable
+ * @probe_table:	Callback to the driver being probed when table
+ *			match is successful
+ * @probe_subtbl:	Callback to the driver being probed when table and
+ *			subtable match (and optional callback is successful)
+ * @driver_data:	Sideband data provided back to the driver
+ */
+struct acpi_probe_entry {
+	__u8 id[ACPI_TABLE_ID_LEN];
+	__u8 type;
+	acpi_probe_entry_validate_subtbl subtable_valid;
+	union {
+		acpi_tbl_table_handler probe_table;
+		acpi_tbl_entry_handler probe_subtbl;
+	};
+	kernel_ulong_t driver_data;
+};
+
+#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn)	\
+	static const struct acpi_probe_entry __acpi_probe_##name	\
+		__used __section(__##table##_acpi_probe_table)		\
+		 = {							\
+			.id = table_id,					\
+			.type = subtable,				\
+			.subtable_valid = valid,			\
+			.probe_table = (acpi_tbl_table_handler)fn,	\
+			.driver_data = data, 				\
+		   }
+
+#define ACPI_PROBE_TABLE(name)		__##name##_acpi_probe_table
+#define ACPI_PROBE_TABLE_END(name)	__##name##_acpi_probe_table_end
+
+int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr);
+
+#define acpi_probe_device_table(t)					\
+	({ 								\
+		extern struct acpi_probe_entry ACPI_PROBE_TABLE(t),	\
+			                       ACPI_PROBE_TABLE_END(t);	\
+		__acpi_probe_device_table(&ACPI_PROBE_TABLE(t),		\
+					  (&ACPI_PROBE_TABLE_END(t) -	\
+					   &ACPI_PROBE_TABLE(t)));	\
+	})
 #else
 static inline int acpi_dev_get_property(struct acpi_device *adev,
 					const char *name, acpi_object_type type,
@@ -831,6 +886,17 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
 {
 	return NULL;
 }
+
+#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \
+	static const void * __acpi_table_##name[]			\
+		__attribute__((unused))					\
+		 = { (void *) table_id,					\
+		     (void *) subtable,					\
+		     (void *) valid,					\
+		     (void *) fn,					\
+		     (void *) data }
+
+#define acpi_probe_device_table(t)	({ int __r = 0; __r;})
 #endif
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 46e589a391809627144e6bee93d71d73fe915db2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:13 +0100
Subject: irqchip / ACPI: Add probing infrastructure for ACPI-based irqchips

DT enjoys a rather nice probing infrastructure for irqchips, while
ACPI is so far stuck into a very distant past.

This patch introduces a declarative API, allowing irqchips to be
self-contained and be called when a particular entry is matched
in the MADT table.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/include/asm/irq.h      | 11 -----------
 drivers/irqchip/irqchip.c         |  8 +++++---
 include/asm-generic/vmlinux.lds.h |  1 +
 include/linux/acpi_irq.h          | 10 ----------
 include/linux/irqchip.h           | 17 +++++++++++++++++
 5 files changed, 23 insertions(+), 24 deletions(-)
 delete mode 100644 include/linux/acpi_irq.h

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index bbb251b14746..1a1037a32f5c 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -10,15 +10,4 @@ struct pt_regs;
 extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
-static inline void acpi_irq_init(void)
-{
-	/*
-	 * Hardcode ACPI IRQ chip initialization to GICv2 for now.
-	 * Proper irqchip infrastructure will be implemented along with
-	 * incoming  GICv2m|GICv3|ITS bits.
-	 */
-	acpi_gic_init();
-}
-#define acpi_irq_init acpi_irq_init
-
 #endif
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
index afd1af3dfe5a..1ee773e98f5f 100644
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -8,7 +8,7 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/acpi_irq.h>
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/of_irq.h>
 #include <linux/irqchip.h>
@@ -27,6 +27,8 @@ extern struct of_device_id __irqchip_of_table[];
 void __init irqchip_init(void)
 {
 	of_irq_init(__irqchip_of_table);
-
-	acpi_irq_init();
+#if defined(CONFIG_ARM64) && defined(CONFIG_ACPI)
+	acpi_gic_init();	/* Temporary hack */
+#endif
+	acpi_probe_device_table(irqchip);
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index efd7ed1eafae..cd836cd1ea24 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -524,6 +524,7 @@
 	CPUIDLE_METHOD_OF_TABLES()					\
 	KERNEL_DTB()							\
 	IRQCHIP_OF_MATCH_TABLE()					\
+	ACPI_PROBE_TABLE(irqchip)					\
 	EARLYCON_TABLE()						\
 	EARLYCON_OF_TABLES()
 
diff --git a/include/linux/acpi_irq.h b/include/linux/acpi_irq.h
deleted file mode 100644
index f10c87265855..000000000000
--- a/include/linux/acpi_irq.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _LINUX_ACPI_IRQ_H
-#define _LINUX_ACPI_IRQ_H
-
-#include <linux/irq.h>
-
-#ifndef acpi_irq_init
-static inline void acpi_irq_init(void) { }
-#endif
-
-#endif /* _LINUX_ACPI_IRQ_H */
diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index 638887376e58..89c34b200671 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -11,6 +11,7 @@
 #ifndef _LINUX_IRQCHIP_H
 #define _LINUX_IRQCHIP_H
 
+#include <linux/acpi.h>
 #include <linux/of.h>
 
 /*
@@ -25,6 +26,22 @@
  */
 #define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn)
 
+/*
+ * This macro must be used by the different irqchip drivers to declare
+ * the association between their version and their initialization function.
+ *
+ * @name: name that must be unique accross all IRQCHIP_ACPI_DECLARE of the
+ * same file.
+ * @subtable: Subtable to be identified in MADT
+ * @validate: Function to be called on that subtable to check its validity.
+ *            Can be NULL.
+ * @data: data to be checked by the validate function.
+ * @fn: initialization function
+ */
+#define IRQCHIP_ACPI_DECLARE(name, subtable, validate, data, fn)	\
+	ACPI_DECLARE_PROBE_ENTRY(irqchip, name, ACPI_SIG_MADT, 		\
+				 subtable, validate, data, fn)
+
 #ifdef CONFIG_IRQCHIP
 void irqchip_init(void);
 #else
-- 
cgit v1.2.3


From f26527b1428f379fbd7edf779854c3b41bc0b3e5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:14 +0100
Subject: irqchip / GIC: Convert the GIC driver to ACPI probing

Now that we have a basic infrastructure to register irqchips and
call them on discovery of a matching entry in MADT, convert the
GIC driver to this new probing method.

It ends up being a code deletion party, which is a rather good thing.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/include/asm/acpi.h        |  1 -
 arch/arm64/include/asm/irq.h         |  2 --
 arch/arm64/kernel/acpi.c             | 25 -------------
 drivers/irqchip/irq-gic.c            | 69 ++++++++++++++++++------------------
 drivers/irqchip/irqchip.c            |  3 --
 include/linux/irqchip/arm-gic-acpi.h | 31 ----------------
 6 files changed, 35 insertions(+), 96 deletions(-)
 delete mode 100644 include/linux/irqchip/arm-gic-acpi.h

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 208cec08a74f..6894205797a3 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -12,7 +12,6 @@
 #ifndef _ASM_ACPI_H
 #define _ASM_ACPI_H
 
-#include <linux/irqchip/arm-gic-acpi.h>
 #include <linux/mm.h>
 #include <linux/psci.h>
 
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 1a1037a32f5c..94c53674a31d 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,8 +1,6 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
-#include <linux/irqchip/arm-gic-acpi.h>
-
 #include <asm-generic/irq.h>
 
 struct pt_regs;
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 19de7537e7d3..d6463bba2360 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -205,28 +205,3 @@ void __init acpi_boot_table_init(void)
 			disable_acpi();
 	}
 }
-
-void __init acpi_gic_init(void)
-{
-	struct acpi_table_header *table;
-	acpi_status status;
-	acpi_size tbl_size;
-	int err;
-
-	if (acpi_disabled)
-		return;
-
-	status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size);
-	if (ACPI_FAILURE(status)) {
-		const char *msg = acpi_format_exception(status);
-
-		pr_err("Failed to get MADT table, %s\n", msg);
-		return;
-	}
-
-	err = gic_v2_acpi_init(table);
-	if (err)
-		pr_err("Failed to initialize GIC IRQ controller");
-
-	early_acpi_os_unmap_memory((char *)table, tbl_size);
-}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 982c09c2d791..d4add30d1d46 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -41,7 +41,6 @@
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
-#include <linux/irqchip/arm-gic-acpi.h>
 
 #include <asm/cputype.h>
 #include <asm/irq.h>
@@ -1195,7 +1194,7 @@ IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
 #endif
 
 #ifdef CONFIG_ACPI
-static phys_addr_t dist_phy_base, cpu_phy_base __initdata;
+static phys_addr_t cpu_phy_base __initdata;
 
 static int __init
 gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
@@ -1223,60 +1222,56 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
 	return 0;
 }
 
-static int __init
-gic_acpi_parse_madt_distributor(struct acpi_subtable_header *header,
-				const unsigned long end)
+/* The things you have to do to just *count* something... */
+static int __init acpi_dummy_func(struct acpi_subtable_header *header,
+				  const unsigned long end)
 {
-	struct acpi_madt_generic_distributor *dist;
+	return 0;
+}
 
-	dist = (struct acpi_madt_generic_distributor *)header;
+static bool __init acpi_gic_redist_is_present(void)
+{
+	return acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
+				     acpi_dummy_func, 0) > 0;
+}
 
-	if (BAD_MADT_ENTRY(dist, end))
-		return -EINVAL;
+static bool __init gic_validate_dist(struct acpi_subtable_header *header,
+				     struct acpi_probe_entry *ape)
+{
+	struct acpi_madt_generic_distributor *dist;
+	dist = (struct acpi_madt_generic_distributor *)header;
 
-	dist_phy_base = dist->base_address;
-	return 0;
+	return (dist->version == ape->driver_data &&
+		(dist->version != ACPI_MADT_GIC_VERSION_NONE ||
+		 !acpi_gic_redist_is_present()));
 }
 
-int __init
-gic_v2_acpi_init(struct acpi_table_header *table)
+#define ACPI_GICV2_DIST_MEM_SIZE	(SZ_4K)
+#define ACPI_GIC_CPU_IF_MEM_SIZE	(SZ_8K)
+
+static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
+				   const unsigned long end)
 {
+	struct acpi_madt_generic_distributor *dist;
 	void __iomem *cpu_base, *dist_base;
 	int count;
 
 	/* Collect CPU base addresses */
-	count = acpi_parse_entries(ACPI_SIG_MADT,
-				   sizeof(struct acpi_table_madt),
-				   gic_acpi_parse_madt_cpu, table,
-				   ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+				      gic_acpi_parse_madt_cpu, 0);
 	if (count <= 0) {
 		pr_err("No valid GICC entries exist\n");
 		return -EINVAL;
 	}
 
-	/*
-	 * Find distributor base address. We expect one distributor entry since
-	 * ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
-	 */
-	count = acpi_parse_entries(ACPI_SIG_MADT,
-				   sizeof(struct acpi_table_madt),
-				   gic_acpi_parse_madt_distributor, table,
-				   ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
-	if (count <= 0) {
-		pr_err("No valid GICD entries exist\n");
-		return -EINVAL;
-	} else if (count > 1) {
-		pr_err("More than one GICD entry detected\n");
-		return -EINVAL;
-	}
-
 	cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
 	if (!cpu_base) {
 		pr_err("Unable to map GICC registers\n");
 		return -ENOMEM;
 	}
 
-	dist_base = ioremap(dist_phy_base, ACPI_GICV2_DIST_MEM_SIZE);
+	dist = (struct acpi_madt_generic_distributor *)header;
+	dist_base = ioremap(dist->base_address, ACPI_GICV2_DIST_MEM_SIZE);
 	if (!dist_base) {
 		pr_err("Unable to map GICD registers\n");
 		iounmap(cpu_base);
@@ -1302,4 +1297,10 @@ gic_v2_acpi_init(struct acpi_table_header *table)
 	acpi_irq_model = ACPI_IRQ_MODEL_GIC;
 	return 0;
 }
+IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+		     gic_validate_dist, ACPI_MADT_GIC_VERSION_V2,
+		     gic_v2_acpi_init);
+IRQCHIP_ACPI_DECLARE(gic_v2_maybe, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+		     gic_validate_dist, ACPI_MADT_GIC_VERSION_NONE,
+		     gic_v2_acpi_init);
 #endif
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
index 1ee773e98f5f..2b35e68bea82 100644
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -27,8 +27,5 @@ extern struct of_device_id __irqchip_of_table[];
 void __init irqchip_init(void)
 {
 	of_irq_init(__irqchip_of_table);
-#if defined(CONFIG_ARM64) && defined(CONFIG_ACPI)
-	acpi_gic_init();	/* Temporary hack */
-#endif
 	acpi_probe_device_table(irqchip);
 }
diff --git a/include/linux/irqchip/arm-gic-acpi.h b/include/linux/irqchip/arm-gic-acpi.h
deleted file mode 100644
index de3419ed3937..000000000000
--- a/include/linux/irqchip/arm-gic-acpi.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2014, Linaro Ltd.
- *	Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef ARM_GIC_ACPI_H_
-#define ARM_GIC_ACPI_H_
-
-#ifdef CONFIG_ACPI
-
-/*
- * Hard code here, we can not get memory size from MADT (but FDT does),
- * Actually no need to do that, because this size can be inferred
- * from GIC spec.
- */
-#define ACPI_GICV2_DIST_MEM_SIZE	(SZ_4K)
-#define ACPI_GIC_CPU_IF_MEM_SIZE	(SZ_8K)
-
-struct acpi_table_header;
-
-int gic_v2_acpi_init(struct acpi_table_header *table);
-void acpi_gic_init(void);
-#else
-static inline void acpi_gic_init(void) { }
-#endif
-
-#endif /* ARM_GIC_ACPI_H_ */
-- 
cgit v1.2.3


From c625f76a9910b9d51df5d6ca40a8da0684326996 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:15 +0100
Subject: clocksource / ACPI: Add probing infrastructure for ACPI-based
 clocksources

DT enjoys a rather nice probing infrastructure for clocksources,
while ACPI is so far stuck into a very distant past.

This patch introduces a declarative API, allowing clocksources
to be self-contained and be called when parsing the GTDT table.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/clocksource/clksrc-of.c   | 4 ++++
 include/asm-generic/vmlinux.lds.h | 1 +
 include/linux/clocksource.h       | 3 +++
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c
index 0093a8e49e14..a2105bd620a0 100644
--- a/drivers/clocksource/clksrc-of.c
+++ b/drivers/clocksource/clksrc-of.c
@@ -14,6 +14,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/clocksource.h>
@@ -38,6 +39,9 @@ void __init clocksource_of_init(void)
 		init_func(np);
 		clocksources++;
 	}
+
+	clocksources += acpi_probe_device_table(clksrc);
+
 	if (!clocksources)
 		pr_crit("%s: no matching clocksources found\n", __func__);
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index cd836cd1ea24..c4bd0e2c173c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -525,6 +525,7 @@
 	KERNEL_DTB()							\
 	IRQCHIP_OF_MATCH_TABLE()					\
 	ACPI_PROBE_TABLE(irqchip)					\
+	ACPI_PROBE_TABLE(clksrc)					\
 	EARLYCON_TABLE()						\
 	EARLYCON_OF_TABLES()
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 278dd279a7a8..8cde04803d91 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -258,4 +258,7 @@ void acpi_generic_timer_init(void);
 static inline void acpi_generic_timer_init(void) { }
 #endif
 
+#define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn)		\
+	ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn)
+
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From aad83b15aa21f2d9e46b978b27bc63989e61202d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:16 +0100
Subject: clocksource: Add new CLKSRC_{PROBE,ACPI} config symbols

The clocksource probing infrastructure currently depends on
CONFIG_CLKSRC_OF, which depends on CONFIG_OF. In order to make
this infrastructure selectable even if CONFIG_OF is not selected,
introduce a new CONFIG_CLKSRC_PROBE (which allow the infrastructure
to be compiled in), and CONFIG_CLKSRC_ACPI (which is the pendent
of CONFIG_CLKSRC_OF for ACPI).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/clocksource/Kconfig  | 8 ++++++++
 drivers/clocksource/Makefile | 2 +-
 include/linux/clocksource.h  | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index a7726db13abb..fefedf15be03 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -2,6 +2,14 @@ menu "Clock Source drivers"
 
 config CLKSRC_OF
 	bool
+	select CLKSRC_PROBE
+
+config CLKSRC_ACPI
+	bool
+	select CLKSRC_PROBE
+
+config CLKSRC_PROBE
+	bool
 
 config CLKSRC_I8253
 	bool
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 5c00863c3e33..b47be92cd7ef 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_CLKSRC_OF)	+= clksrc-of.o
+obj-$(CONFIG_CLKSRC_PROBE)	+= clksrc-of.o
 obj-$(CONFIG_ATMEL_PIT)		+= timer-atmel-pit.o
 obj-$(CONFIG_ATMEL_ST)		+= timer-atmel-st.o
 obj-$(CONFIG_ATMEL_TCB_CLKSRC)	+= tcb_clksrc.o
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 8cde04803d91..6eab6abf89b3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -246,7 +246,7 @@ extern int clocksource_i8253_init(void);
 #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
 	OF_DECLARE_1(clksrc, name, compat, fn)
 
-#ifdef CONFIG_CLKSRC_OF
+#ifdef CONFIG_CLKSRC_PROBE
 extern void clocksource_of_init(void);
 #else
 static inline void clocksource_of_init(void) {}
-- 
cgit v1.2.3


From ae281cbd2689200329afe2474b2f39f3f6eb54b9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:17 +0100
Subject: clocksource / arm_arch_timer: Convert to ACPI probing

It is now absolutely trivial to convert the arch timer driver to
use ACPI probing, just like its DT counterpart.

Let's enjoy another crapectomy.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/kernel/time.c             |  6 ------
 drivers/clocksource/Kconfig          |  1 +
 drivers/clocksource/arm_arch_timer.c | 10 +---------
 include/linux/clocksource.h          |  6 ------
 4 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 149151fb42bb..819a105d3085 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -71,12 +71,6 @@ void __init time_init(void)
 
 	tick_setup_hrtimer_broadcast();
 
-	/*
-	 * Since ACPI or FDT will only one be available in the system,
-	 * we can use acpi_generic_timer_init() here safely
-	 */
-	acpi_generic_timer_init();
-
 	arch_timer_rate = arch_timer_get_rate();
 	if (!arch_timer_rate)
 		panic("Unable to initialise architected timer.\n");
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index fefedf15be03..1a9c5dc1e5f9 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -131,6 +131,7 @@ config CLKSRC_STM32
 config ARM_ARCH_TIMER
 	bool
 	select CLKSRC_OF if OF
+	select CLKSRC_ACPI if ACPI
 
 config ARM_ARCH_TIMER_EVTSTREAM
 	bool "Support for ARM architected timer event stream generation"
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index d6e3e49399dd..c64d543d64bf 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -864,13 +864,5 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
 	arch_timer_init();
 	return 0;
 }
-
-/* Initialize all the generic timers presented in GTDT */
-void __init acpi_generic_timer_init(void)
-{
-	if (acpi_disabled)
-		return;
-
-	acpi_table_parse(ACPI_SIG_GTDT, arch_timer_acpi_init);
-}
+CLOCKSOURCE_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
 #endif
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 6eab6abf89b3..116645f746c1 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -252,12 +252,6 @@ extern void clocksource_of_init(void);
 static inline void clocksource_of_init(void) {}
 #endif
 
-#ifdef CONFIG_ACPI
-void acpi_generic_timer_init(void);
-#else
-static inline void acpi_generic_timer_init(void) { }
-#endif
-
 #define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn)		\
 	ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn)
 
-- 
cgit v1.2.3


From 3722ed2380ad6e89eaf81fcf93f06d605e740435 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 28 Sep 2015 15:49:18 +0100
Subject: clocksource: cosmetic: Drop OF 'dependency' from symbols

Seeing the 'of' characters in a symbol that is being called from
ACPI seems to freak out people. So let's do a bit of pointless
renaming so that these folks do feel at home.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/kernel/time.c                   |  2 +-
 arch/arm/mach-omap2/timer.c              |  4 +--
 arch/arm/mach-rockchip/rockchip.c        |  2 +-
 arch/arm/mach-shmobile/setup-r8a7779.c   |  2 +-
 arch/arm/mach-shmobile/setup-rcar-gen2.c |  2 +-
 arch/arm/mach-spear/spear13xx.c          |  2 +-
 arch/arm/mach-sunxi/sunxi.c              |  2 +-
 arch/arm/mach-u300/core.c                |  2 +-
 arch/arm/mach-ux500/timer.c              |  2 +-
 arch/arm/mach-zynq/common.c              |  2 +-
 arch/arm64/kernel/time.c                 |  2 +-
 arch/microblaze/kernel/setup.c           |  2 +-
 arch/mips/pistachio/time.c               |  2 +-
 arch/mips/ralink/clk.c                   |  2 +-
 arch/nios2/kernel/time.c                 |  2 +-
 arch/xtensa/kernel/time.c                |  2 +-
 drivers/clocksource/Makefile             |  2 +-
 drivers/clocksource/clksrc-of.c          | 47 --------------------------------
 drivers/clocksource/clksrc-probe.c       | 47 ++++++++++++++++++++++++++++++++
 include/linux/clocksource.h              |  4 +--
 20 files changed, 67 insertions(+), 67 deletions(-)
 delete mode 100644 drivers/clocksource/clksrc-of.c
 create mode 100644 drivers/clocksource/clksrc-probe.c

(limited to 'include/linux')

diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index a66e37e211a9..97b22fa7cb3a 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -120,6 +120,6 @@ void __init time_init(void)
 #ifdef CONFIG_COMMON_CLK
 		of_clk_init(NULL);
 #endif
-		clocksource_of_init();
+		clocksource_probe();
 	}
 }
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index a55655127ef2..bef41837bf7f 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -647,7 +647,7 @@ static OMAP_SYS_32K_TIMER_INIT(4, 1, "timer_32k_ck", "ti,timer-alwon",
 void __init omap4_local_timer_init(void)
 {
 	omap4_sync32k_timer_init();
-	clocksource_of_init();
+	clocksource_probe();
 }
 #else
 void __init omap4_local_timer_init(void)
@@ -663,7 +663,7 @@ void __init omap5_realtime_timer_init(void)
 	omap4_sync32k_timer_init();
 	realtime_counter_init();
 
-	clocksource_of_init();
+	clocksource_probe();
 }
 #endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */
 
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index b6cf3b449428..251c7b9c5f9b 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -67,7 +67,7 @@ static void __init rockchip_timer_init(void)
 	}
 
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 static void __init rockchip_dt_init(void)
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 6bfa6407a27c..1e572a903f8e 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -97,7 +97,7 @@ static u32 __init r8a7779_read_mode_pins(void)
 static void __init r8a7779_init_time(void)
 {
 	r8a7779_clocks_init(r8a7779_read_mode_pins());
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 static const char *const r8a7779_compat_dt[] __initconst = {
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index aa3339258d9c..9eccde3c7b13 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -128,7 +128,7 @@ void __init rcar_gen2_timer_init(void)
 #endif /* CONFIG_ARM_ARCH_TIMER */
 
 	rcar_gen2_clocks_init(mode);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 struct memory_reserve_config {
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index b7afce6795f4..ca2f6a82a414 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -124,5 +124,5 @@ void __init spear13xx_timer_init(void)
 	clk_put(pclk);
 
 	spear_setup_of_timer();
-	clocksource_of_init();
+	clocksource_probe();
 }
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 65bab2876343..223c9e99380d 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -46,7 +46,7 @@ static void __init sun6i_timer_init(void)
 	of_clk_init(NULL);
 	if (IS_ENABLED(CONFIG_RESET_CONTROLLER))
 		sun6i_reset_init();
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 35670b15f281..546338bbacf8 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -408,7 +408,7 @@ static const char * u300_board_compat[] = {
 DT_MACHINE_START(U300_DT, "U300 S335/B335 (Device Tree)")
 	.map_io		= u300_map_io,
 	.init_irq	= u300_init_irq_dt,
-	.init_time	= clocksource_of_init,
+	.init_time	= clocksource_probe,
 	.init_machine	= u300_init_machine_dt,
 	.restart	= u300_restart,
 	.dt_compat      = u300_board_compat,
diff --git a/arch/arm/mach-ux500/timer.c b/arch/arm/mach-ux500/timer.c
index ff28d8ad1ed7..8d2d233f8e6c 100644
--- a/arch/arm/mach-ux500/timer.c
+++ b/arch/arm/mach-ux500/timer.c
@@ -44,5 +44,5 @@ void __init ux500_timer_init(void)
 
 dt_fail:
 	clksrc_dbx500_prcmu_init(prcmu_timer_base);
-	clocksource_of_init();
+	clocksource_probe();
 }
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 5a6e4e20ca0a..6f39d03cc27e 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -154,7 +154,7 @@ static void __init zynq_timer_init(void)
 
 	zynq_clock_init();
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 static struct map_desc zynq_cortex_a9_scu_map __initdata = {
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 819a105d3085..13339b6ffc1a 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -67,7 +67,7 @@ void __init time_init(void)
 	u32 arch_timer_rate;
 
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 
 	tick_setup_hrtimer_broadcast();
 
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index ab5b488e1fde..89a2a9394927 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -194,7 +194,7 @@ void __init time_init(void)
 {
 	of_clk_init(NULL);
 	setup_cpuinfo_clk();
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
index 8a377346f0ca..1022201b2beb 100644
--- a/arch/mips/pistachio/time.c
+++ b/arch/mips/pistachio/time.c
@@ -39,7 +39,7 @@ void __init plat_time_init(void)
 	struct clk *clk;
 
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 
 	np = of_get_cpu_node(0, NULL);
 	if (!np) {
diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c
index feb5a9bf98b4..25c4a61779f1 100644
--- a/arch/mips/ralink/clk.c
+++ b/arch/mips/ralink/clk.c
@@ -75,5 +75,5 @@ void __init plat_time_init(void)
 	pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
 	mips_hpt_frequency = clk_get_rate(clk) / 2;
 	clk_put(clk);
-	clocksource_of_init();
+	clocksource_probe();
 }
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index bbc3f9157f9c..e835dda2bfe2 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -324,7 +324,7 @@ void __init time_init(void)
 	if (count < 2)
 		panic("%d timer is found, it needs 2 timers in system\n", count);
 
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index b97767dbc7c8..b9ad9feadc2d 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -148,7 +148,7 @@ void __init time_init(void)
 	local_timer_setup(0);
 	setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction);
 	sched_clock_register(ccount_sched_clock_read, 32, ccount_freq);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 /*
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index b47be92cd7ef..51856d50bccc 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_CLKSRC_PROBE)	+= clksrc-of.o
+obj-$(CONFIG_CLKSRC_PROBE)	+= clksrc-probe.o
 obj-$(CONFIG_ATMEL_PIT)		+= timer-atmel-pit.o
 obj-$(CONFIG_ATMEL_ST)		+= timer-atmel-st.o
 obj-$(CONFIG_ATMEL_TCB_CLKSRC)	+= tcb_clksrc.o
diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c
deleted file mode 100644
index a2105bd620a0..000000000000
--- a/drivers/clocksource/clksrc-of.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/clocksource.h>
-
-extern struct of_device_id __clksrc_of_table[];
-
-static const struct of_device_id __clksrc_of_table_sentinel
-	__used __section(__clksrc_of_table_end);
-
-void __init clocksource_of_init(void)
-{
-	struct device_node *np;
-	const struct of_device_id *match;
-	of_init_fn_1 init_func;
-	unsigned clocksources = 0;
-
-	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
-		if (!of_device_is_available(np))
-			continue;
-
-		init_func = match->data;
-		init_func(np);
-		clocksources++;
-	}
-
-	clocksources += acpi_probe_device_table(clksrc);
-
-	if (!clocksources)
-		pr_crit("%s: no matching clocksources found\n", __func__);
-}
diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c
new file mode 100644
index 000000000000..7cb6c923a836
--- /dev/null
+++ b/drivers/clocksource/clksrc-probe.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/clocksource.h>
+
+extern struct of_device_id __clksrc_of_table[];
+
+static const struct of_device_id __clksrc_of_table_sentinel
+	__used __section(__clksrc_of_table_end);
+
+void __init clocksource_probe(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+	of_init_fn_1 init_func;
+	unsigned clocksources = 0;
+
+	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
+		if (!of_device_is_available(np))
+			continue;
+
+		init_func = match->data;
+		init_func(np);
+		clocksources++;
+	}
+
+	clocksources += acpi_probe_device_table(clksrc);
+
+	if (!clocksources)
+		pr_crit("%s: no matching clocksources found\n", __func__);
+}
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 116645f746c1..7784b597e959 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -247,9 +247,9 @@ extern int clocksource_i8253_init(void);
 	OF_DECLARE_1(clksrc, name, compat, fn)
 
 #ifdef CONFIG_CLKSRC_PROBE
-extern void clocksource_of_init(void);
+extern void clocksource_probe(void);
 #else
-static inline void clocksource_of_init(void) {}
+static inline void clocksource_probe(void) {}
 #endif
 
 #define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn)		\
-- 
cgit v1.2.3


From 9290a16cf19301224556bc7bcb913c0c2a45bb9a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 21 Aug 2015 11:48:37 +0000
Subject: dmaengine: OF DMAEngine API based on CONFIG_DMA_OF instead of
 CONFIG_OF

5fa422c ("dmaengine: move drivers/of/dma.c -> drivers/dma/of-dma.c")
moved OF base DMAEngine code to of-dma.c, then it based on CONFIG_DMA_OF.
But, OF base DMAEngine API on of_dma.h still based on CONFIG_OF now.
So, current kernel can't find OF base DMAEngine API if .config has CONFIG_OF,
but not have CONFIG_DMA_OF. This patch tidyup it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/of_dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 98ba7525929e..36112cdd665a 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -34,7 +34,7 @@ struct of_dma_filter_info {
 	dma_filter_fn	filter_fn;
 };
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_DMA_OF
 extern int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
 		(struct of_phandle_args *, struct of_dma *),
-- 
cgit v1.2.3


From f4829a9b7a61e159367350008a608b062c4f6840 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 27 Sep 2015 21:01:50 +0200
Subject: blk-mq: fix racy updates of rq->errors

blk_mq_complete_request may be a no-op if the request has already
been completed by others means (e.g. a timeout or cancellation), but
currently drivers have to set rq->errors before calling
blk_mq_complete_request, which might leave us with the wrong error value.

Add an error parameter to blk_mq_complete_request so that we can
defer setting rq->errors until we known we won the race to complete the
request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c               | 12 ++++++------
 drivers/block/loop.c         | 11 +++++------
 drivers/block/null_blk.c     |  2 +-
 drivers/block/nvme-core.c    | 16 +++++++---------
 drivers/block/virtio_blk.c   |  2 +-
 drivers/block/xen-blkfront.c | 19 ++++++++++---------
 drivers/scsi/scsi_lib.c      |  2 +-
 include/linux/blk-mq.h       |  2 +-
 8 files changed, 32 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 31c0c6259c4c..2306330530e8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq)
  *	Ends all I/O on a request. It does not handle partial completions.
  *	The actual completion happens out-of-order, through a IPI handler.
  **/
-void blk_mq_complete_request(struct request *rq)
+void blk_mq_complete_request(struct request *rq, int error)
 {
 	struct request_queue *q = rq->q;
 
 	if (unlikely(blk_should_fake_timeout(q)))
 		return;
-	if (!blk_mark_rq_complete(rq))
+	if (!blk_mark_rq_complete(rq)) {
+		rq->errors = error;
 		__blk_mq_complete_request(rq);
+	}
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
@@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 		 * If a request wasn't started before the queue was
 		 * marked dying, kill it here or it'll go unnoticed.
 		 */
-		if (unlikely(blk_queue_dying(rq->q))) {
-			rq->errors = -EIO;
-			blk_mq_complete_request(rq);
-		}
+		if (unlikely(blk_queue_dying(rq->q)))
+			blk_mq_complete_request(rq, -EIO);
 		return;
 	}
 	if (rq->cmd_flags & REQ_NO_TIMEOUT)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f9889b6bc02c..674f800a3b57 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1486,17 +1486,16 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 {
 	const bool write = cmd->rq->cmd_flags & REQ_WRITE;
 	struct loop_device *lo = cmd->rq->q->queuedata;
-	int ret = -EIO;
+	int ret = 0;
 
-	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
+	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
+		ret = -EIO;
 		goto failed;
+	}
 
 	ret = do_req_filebacked(lo, cmd->rq);
-
  failed:
-	if (ret)
-		cmd->rq->errors = -EIO;
-	blk_mq_complete_request(cmd->rq);
+	blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
 }
 
 static void loop_queue_write_work(struct work_struct *work)
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a295b98c6bae..1c9e4fe5aa44 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -289,7 +289,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
 	case NULL_IRQ_SOFTIRQ:
 		switch (queue_mode)  {
 		case NULL_Q_MQ:
-			blk_mq_complete_request(cmd->rq);
+			blk_mq_complete_request(cmd->rq, cmd->rq->errors);
 			break;
 		case NULL_Q_RQ:
 			blk_complete_request(cmd->rq);
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 30758bdf69ea..6f04771f1019 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -618,16 +618,15 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			spin_unlock_irqrestore(req->q->queue_lock, flags);
 			return;
 		}
+
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
 			if (cmd_rq->ctx == CMD_CTX_CANCELLED)
-				req->errors = -EINTR;
-			else
-				req->errors = status;
+				status = -EINTR;
 		} else {
-			req->errors = nvme_error_status(status);
+			status = nvme_error_status(status);
 		}
-	} else
-		req->errors = 0;
+	}
+
 	if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
 		u32 result = le32_to_cpup(&cqe->result);
 		req->special = (void *)(uintptr_t)result;
@@ -650,7 +649,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	}
 	nvme_free_iod(nvmeq->dev, iod);
 
-	blk_mq_complete_request(req);
+	blk_mq_complete_request(req, status);
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
@@ -863,8 +862,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ns && ns->ms && !blk_integrity_rq(req)) {
 		if (!(ns->pi_type && ns->ms == 8) &&
 					req->cmd_type != REQ_TYPE_DRV_PRIV) {
-			req->errors = -EFAULT;
-			blk_mq_complete_request(req);
+			blk_mq_complete_request(req, -EFAULT);
 			return BLK_MQ_RQ_QUEUE_OK;
 		}
 	}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index e93899cc6f60..6ca35495a5be 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -144,7 +144,7 @@ static void virtblk_done(struct virtqueue *vq)
 	do {
 		virtqueue_disable_cb(vq);
 		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
-			blk_mq_complete_request(vbr->req);
+			blk_mq_complete_request(vbr->req, vbr->req->errors);
 			req_done = true;
 		}
 		if (unlikely(virtqueue_is_broken(vq)))
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 0823a96902f8..611170896b8c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1142,6 +1142,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	RING_IDX i, rp;
 	unsigned long flags;
 	struct blkfront_info *info = (struct blkfront_info *)dev_id;
+	int error;
 
 	spin_lock_irqsave(&info->io_lock, flags);
 
@@ -1182,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				req->errors = -EOPNOTSUPP;
+				error = -EOPNOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
 				queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
 			}
-			blk_mq_complete_request(req);
+			blk_mq_complete_request(req, error);
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				req->errors = -EOPNOTSUPP;
+				error = -EOPNOTSUPP;
 			}
 			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
 				     info->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				req->errors = -EOPNOTSUPP;
+				error = -EOPNOTSUPP;
 			}
-			if (unlikely(req->errors)) {
-				if (req->errors == -EOPNOTSUPP)
-					req->errors = 0;
+			if (unlikely(error)) {
+				if (error == -EOPNOTSUPP)
+					error = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
 			}
@@ -1223,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			blk_mq_complete_request(req);
+			blk_mq_complete_request(req, error);
 			break;
 		default:
 			BUG();
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index cbfc5990052b..126a48c6431e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1957,7 +1957,7 @@ static int scsi_mq_prep_fn(struct request *req)
 static void scsi_mq_done(struct scsi_cmnd *cmd)
 {
 	trace_scsi_dispatch_cmd_done(cmd);
-	blk_mq_complete_request(cmd->request);
+	blk_mq_complete_request(cmd->request, cmd->request->errors);
 }
 
 static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b80ba4572a31..c1b5c867ff07 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -214,7 +214,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
 void blk_mq_cancel_requeue_work(struct request_queue *q);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_abort_requeue_list(struct request_queue *q);
-void blk_mq_complete_request(struct request *rq);
+void blk_mq_complete_request(struct request *rq, int error);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
-- 
cgit v1.2.3


From 0bf6cd5b9531bcc29c0a5e504b6ce2984c6fd8d8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 27 Sep 2015 21:01:51 +0200
Subject: blk-mq: factor out a helper to iterate all tags for a request_queue

And replace the blk_mq_tag_busy_iter with it - the driver use has been
replaced with a new helper a while ago, and internal to the block we
only need the new version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c     | 27 ++++++++++++++++++++-------
 block/blk-mq-tag.h     |  2 ++
 block/blk-mq.c         | 14 +++-----------
 include/linux/blk-mq.h |  2 --
 4 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 9115c6d59948..ed96474d75cb 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -471,17 +471,30 @@ void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
 }
 EXPORT_SYMBOL(blk_mq_all_tag_busy_iter);
 
-void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
+void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 		void *priv)
 {
-	struct blk_mq_tags *tags = hctx->tags;
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		struct blk_mq_tags *tags = hctx->tags;
+
+		/*
+		 * If not software queues are currently mapped to this
+		 * hardware queue, there's nothing to check
+		 */
+		if (!blk_mq_hw_queue_mapped(hctx))
+			continue;
+
+		if (tags->nr_reserved_tags)
+			bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
+		bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
+		      false);
+	}
 
-	if (tags->nr_reserved_tags)
-		bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
-	bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
-			false);
 }
-EXPORT_SYMBOL(blk_mq_tag_busy_iter);
 
 static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
 {
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 9eb2cf4f01cb..d468a79f2c4a 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -58,6 +58,8 @@ extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
 extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
 extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
+void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
+		void *priv);
 
 enum {
 	BLK_MQ_TAG_CACHE_MIN	= 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2306330530e8..7785ae96267a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -641,24 +641,16 @@ static void blk_mq_rq_timer(unsigned long priv)
 		.next		= 0,
 		.next_set	= 0,
 	};
-	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		/*
-		 * If not software queues are currently mapped to this
-		 * hardware queue, there's nothing to check
-		 */
-		if (!blk_mq_hw_queue_mapped(hctx))
-			continue;
-
-		blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
-	}
+	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
 
 	if (data.next_set) {
 		data.next = blk_rq_timeout(round_jiffies_up(data.next));
 		mod_timer(&q->timeout, data.next);
 	} else {
+		struct blk_mq_hw_ctx *hctx;
+
 		queue_for_each_hw_ctx(q, hctx, i) {
 			/* the hctx may be unmapped, so check it here */
 			if (blk_mq_hw_queue_mapped(hctx))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c1b5c867ff07..5e7d43ab61c0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -223,8 +223,6 @@ void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
-		void *priv);
 void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
 		void *priv);
 void blk_mq_freeze_queue(struct request_queue *q);
-- 
cgit v1.2.3


From f1e0bb0ad473a32d1b7e6d285ae9f7e47710bb5e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 24 Sep 2015 17:32:13 +0800
Subject: genirq: Introduce generic irq migration for cpu hotunplug

ARM and ARM64 have almost identical code for migrating interrupts on
cpu hotunplug. Provide a generic version which can be used by both.

The new code addresses a shortcoming in the ARM[64] variants which
fails to update the affinity change in some cases. The solution for
this is to use the core function irq_do_set_affinity() instead of open
coding it.

[ tglx: Added copyright notice and license boilerplate. Rewrote
  	subject and changelog. ]

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: <linux-arm-kernel@lists.infradead.org>
Link: http://lkml.kernel.org/r/1443087135-17044-2-git-send-email-yangyingliang@huawei.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     |  2 ++
 kernel/irq/Kconfig      |  4 +++
 kernel/irq/Makefile     |  1 +
 kernel/irq/cpuhotplug.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+)
 create mode 100644 kernel/irq/cpuhotplug.c

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 11bf09288ddb..45cc7299bb61 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -452,6 +452,8 @@ extern int irq_set_affinity_locked(struct irq_data *data,
 				   const struct cpumask *cpumask, bool force);
 extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
 
+extern void irq_migrate_all_off_this_cpu(void);
+
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
 void irq_move_masked_irq(struct irq_data *data);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 9a76e3beda54..3b48dab80164 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -30,6 +30,10 @@ config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 config GENERIC_PENDING_IRQ
 	bool
 
+# Support for generic irq migrating off cpu before the cpu is offline.
+config GENERIC_IRQ_MIGRATION
+	bool
+
 # Alpha specific irq affinity mechanism
 config AUTO_IRQ_AFFINITY
        bool
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index d12123526e2b..2fc9cbdf35b6 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -5,5 +5,6 @@ obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
+obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
 obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
new file mode 100644
index 000000000000..80f4f4e56fed
--- /dev/null
+++ b/kernel/irq/cpuhotplug.c
@@ -0,0 +1,82 @@
+/*
+ * Generic cpu hotunplug interrupt migration code copied from the
+ * arch/arm implementation
+ *
+ * Copyright (C) Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/interrupt.h>
+#include <linux/ratelimit.h>
+#include <linux/irq.h>
+
+#include "internals.h"
+
+static bool migrate_one_irq(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->common->affinity;
+	struct irq_chip *c;
+	bool ret = false;
+
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) ||
+	    !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
+		ret = true;
+	}
+
+	c = irq_data_get_irq_chip(d);
+	if (!c->irq_set_affinity) {
+		pr_warn_ratelimited("IRQ%u: unable to set affinity\n", d->irq);
+	} else {
+		int r = irq_do_set_affinity(d, affinity, false);
+		if (r)
+			pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
+					    d->irq, r);
+	}
+
+	return ret;
+}
+
+/**
+ * irq_migrate_all_off_this_cpu - Migrate irqs away from offline cpu
+ *
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
+ */
+void irq_migrate_all_off_this_cpu(void)
+{
+	unsigned int irq;
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	for_each_active_irq(irq) {
+		bool affinity_broken;
+
+		desc = irq_to_desc(irq);
+		raw_spin_lock(&desc->lock);
+		affinity_broken = migrate_one_irq(desc);
+		raw_spin_unlock(&desc->lock);
+
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+					    irq, smp_processor_id());
+	}
+
+	local_irq_restore(flags);
+}
-- 
cgit v1.2.3


From 49df6397edfc5a8ba8ca813b51fb9729d8e94b40 Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Wed, 29 Jul 2015 23:21:40 -0700
Subject: KVM: x86: Split the APIC from the rest of IRQCHIP.

First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Suggested-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 17 +++++++++++++++++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c              |  4 +++-
 arch/x86/kvm/ioapic.h             |  8 ++++++++
 arch/x86/kvm/irq.h                | 11 ++++++++++-
 arch/x86/kvm/irq_comm.c           |  9 ++++++++-
 arch/x86/kvm/lapic.c              |  6 ++++--
 arch/x86/kvm/x86.c                | 23 +++++++++++++++++++++--
 include/linux/kvm_host.h          |  1 +
 include/uapi/linux/kvm.h          |  1 +
 10 files changed, 75 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index d9ecceea5a02..43e0816d0de1 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3627,6 +3627,23 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_CAP_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. This can be used
+instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
+IOAPIC and PIC (and also the PIT, even though this has to be enabled
+separately).
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 ----------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a0ef289d5a86..befcf555bddc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -684,6 +684,8 @@ struct kvm_arch {
 	u32 bsp_vcpu_id;
 
 	u64 disabled_quirks;
+
+	bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f90952f64e79..08116ff227cc 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
 
+#include "ioapic.h"
 #include "irq.h"
 #include "i8254.h"
 #include "x86.h"
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
 	s64 interval;
 
-	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
+	if (!ioapic_in_kernel(kvm) ||
+	    ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
 		return;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index bf36d66a1951..a8842c0dee73 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -97,6 +97,14 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 	return kvm->arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+	int ret;
+
+	ret = (ioapic_irqchip(kvm) != NULL);
+	return ret;
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, unsigned int dest, int dest_mode);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 9e6e7e04de98..2f9703dcd913 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,13 +83,22 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 	return kvm->arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+	return kvm->arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	struct kvm_pic *vpic = pic_irqchip(kvm);
+	bool ret;
+
+	ret = (vpic != NULL);
+	ret |= irqchip_split(kvm);
 
 	/* Read vpic before kvm->irq_routing.  */
 	smp_rmb();
-	return vpic != NULL;
+	return ret;
 }
 
 static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e5b58c..67f6b62a6814 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		goto unlock;
 	}
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-	if (!irqchip_in_kernel(kvm))
+	if (!ioapic_in_kernel(kvm))
 		goto unlock;
 
 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
@@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
 	return kvm_set_irq_routing(kvm, default_routing,
 				   ARRAY_SIZE(default_routing), 0);
 }
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c4bcc86d6dc4..e05946c36b87 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,7 +209,8 @@ out:
 	if (old)
 		kfree_rcu(old, rcu);
 
-	kvm_vcpu_request_scan_ioapic(kvm);
+	if (ioapic_in_kernel(kvm))
+		kvm_vcpu_request_scan_ioapic(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
@@ -1845,7 +1846,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 				apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	kvm_rtc_eoi_tracking_restore_one(vcpu);
+	if (ioapic_in_kernel(vcpu->kvm))
+		kvm_rtc_eoi_tracking_restore_one(vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 28c98c8f9d1c..f720774a4797 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2448,6 +2448,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
 	case KVM_CAP_SET_BOOT_CPU_ID:
+ 	case KVM_CAP_SPLIT_IRQCHIP:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3555,6 +3556,24 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		kvm->arch.disabled_quirks = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_SPLIT_IRQCHIP: {
+		mutex_lock(&kvm->lock);
+		r = -EEXIST;
+		if (irqchip_in_kernel(kvm))
+			goto split_irqchip_unlock;
+		if (atomic_read(&kvm->online_vcpus))
+			goto split_irqchip_unlock;
+		r = kvm_setup_empty_irq_routing(kvm);
+		if (r)
+			goto split_irqchip_unlock;
+		/* Pairs with irqchip_in_kernel. */
+		smp_wmb();
+		kvm->arch.irqchip_split = true;
+		r = 0;
+split_irqchip_unlock:
+		mutex_unlock(&kvm->lock);
+		break;
+	}
 	default:
 		r = -EINVAL;
 		break;
@@ -3668,7 +3687,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 
 		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
+		if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
 			goto get_irqchip_out;
 		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
 		if (r)
@@ -3692,7 +3711,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 
 		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
+		if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
 			goto set_irqchip_out;
 		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
 		if (r)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1bef9e21e725..354f147647ab 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1002,6 +1002,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 #endif
 
 int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a9256f0331ae..ed00f8fc9ea2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -824,6 +824,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MULTI_ADDRESS_SPACE 118
 #define KVM_CAP_GUEST_DEBUG_HW_BPS 119
 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120
+#define KVM_CAP_SPLIT_IRQCHIP 121
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 7543a635aa09eb138b2cbf60ac3ff19503ae6954 Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Wed, 29 Jul 2015 23:21:41 -0700
Subject: KVM: x86: Add KVM exit for IOAPIC EOIs

Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 12 ++++++++++++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/lapic.c              | 24 +++++++++++++++++-------
 arch/x86/kvm/x86.c                | 11 +++++++++++
 include/linux/kvm_host.h          |  2 +-
 include/uapi/linux/kvm.h          |  5 +++++
 6 files changed, 48 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 43e0816d0de1..0d14bf5db534 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3309,6 +3309,18 @@ Valid values for 'type' are:
    to ignore the request, or to gather VM memory core dump and/or
    reset/shutdown of the VM.
 
+		/* KVM_EXIT_IOAPIC_EOI */
+		struct {
+			__u8 vector;
+		} eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index befcf555bddc..af09fa1d1be7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -574,6 +574,8 @@ struct kvm_vcpu_arch {
 	struct {
 		bool pv_unhalted;
 	} pv;
+
+	int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e05946c36b87..ef70f6f3a37a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -877,15 +877,25 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
-	if (kvm_ioapic_handles_vector(apic, vector)) {
-		int trigger_mode;
-		if (apic_test_vector(vector, apic->regs + APIC_TMR))
-			trigger_mode = IOAPIC_LEVEL_TRIG;
-		else
-			trigger_mode = IOAPIC_EDGE_TRIG;
+	int trigger_mode;
+
+	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
+	if (!kvm_ioapic_handles_vector(apic, vector))
+		return;
 
-		kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
+	/* Request a KVM exit to inform the userspace IOAPIC. */
+	if (irqchip_split(apic->vcpu->kvm)) {
+		apic->vcpu->arch.pending_ioapic_eoi = vector;
+		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
+		return;
 	}
+
+	if (apic_test_vector(vector, apic->regs + APIC_TMR))
+		trigger_mode = IOAPIC_LEVEL_TRIG;
+	else
+		trigger_mode = IOAPIC_EDGE_TRIG;
+
+	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
 }
 
 static int apic_set_eoi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f720774a4797..27429daa054e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6271,6 +6271,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_pmu_handle_event(vcpu);
 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
 			kvm_pmu_deliver_pmi(vcpu);
+		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
+			if (test_bit(vcpu->arch.pending_ioapic_eoi,
+				     (void *) vcpu->arch.eoi_exit_bitmap)) {
+				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
+				vcpu->run->eoi.vector =
+						vcpu->arch.pending_ioapic_eoi;
+				r = 0;
+				goto out;
+			}
+		}
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 354f147647ab..3b33215ed447 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,6 +140,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_APIC_PAGE_RELOAD  25
 #define KVM_REQ_SMI               26
 #define KVM_REQ_HV_CRASH          27
+#define KVM_REQ_IOAPIC_EOI_EXIT   28
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -1146,4 +1147,3 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 #endif
-
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ed00f8fc9ea2..12e3afbf0f47 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -183,6 +183,7 @@ struct kvm_s390_skeys {
 #define KVM_EXIT_EPR              23
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_S390_STSI        25
+#define KVM_EXIT_IOAPIC_EOI       26
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -333,6 +334,10 @@ struct kvm_run {
 			__u8 sel1;
 			__u16 sel2;
 		} s390_stsi;
+		/* KVM_EXIT_IOAPIC_EOI */
+		struct {
+			__u8 vector;
+		} eoi;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
-- 
cgit v1.2.3


From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Wed, 29 Jul 2015 23:32:35 -0700
Subject: KVM: x86: Add EOI exit bitmap inference

In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  9 ++++++---
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/ioapic.h             |  2 ++
 arch/x86/kvm/irq_comm.c           | 42 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.c              |  3 +--
 arch/x86/kvm/x86.c                |  9 ++++++++-
 include/linux/kvm_host.h          | 16 +++++++++++++++
 virt/kvm/irqchip.c                | 12 ++---------
 8 files changed, 78 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0d14bf5db534..89e71648d748 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3642,7 +3642,7 @@ KVM handlers should exit to userspace with rc = -EREMOTE.
 7.5 KVM_CAP_SPLIT_IRQCHIP
 
 Architectures: x86
-Parameters: None
+Parameters: args[0] - number of routes reserved for userspace IOAPICs
 Returns: 0 on success, -1 on error
 
 Create a local apic for each processor in the kernel. This can be used
@@ -3650,8 +3650,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
 IOAPIC and PIC (and also the PIT, even though this has to be enabled
 separately).
 
-This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
-IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+This capability also enables in kernel routing of interrupt requests;
+when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
+used in the IRQ routing table.  The first args[0] MSI routes are reserved
+for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
+a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
 
 Fails if VCPU has already been created, or if the irqchip is already in the
 kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af09fa1d1be7..7a5f9debbcd8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -688,6 +688,7 @@ struct kvm_arch {
 	u64 disabled_quirks;
 
 	bool irqchip_split;
+	u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index a8842c0dee73..084617d37c74 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
 #define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62a6814..177460998bb0 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
 	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+		return;
+	kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_kernel_irq_routing_entry *entry;
+	struct kvm_irq_routing_table *table;
+	u32 i, nr_ioapic_pins;
+	int idx;
+
+	/* kvm->irq_routing must be read after clearing
+	 * KVM_SCAN_IOAPIC. */
+	smp_mb();
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
+			       kvm->arch.nr_reserved_ioapic_pins);
+	for (i = 0; i < nr_ioapic_pins; ++i) {
+		hlist_for_each_entry(entry, &table->map[i], link) {
+			u32 dest_id, dest_mode;
+
+			if (entry->type != KVM_IRQ_ROUTING_MSI)
+				continue;
+			dest_id = (entry->msi.address_lo >> 12) & 0xff;
+			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
+			if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+						dest_mode)) {
+				u32 vector = entry->msi.data & 0xff;
+
+				__set_bit(vector,
+					  (unsigned long *) eoi_exit_bitmap);
+			}
+		}
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ef70f6f3a37a..2f4c0d0cbe0a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@ out:
 	if (old)
 		kfree_rcu(old, rcu);
 
-	if (ioapic_in_kernel(kvm))
-		kvm_vcpu_request_scan_ioapic(kvm);
+	kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 27429daa054e..4aeed2086c5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3558,6 +3558,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		break;
 	case KVM_CAP_SPLIT_IRQCHIP: {
 		mutex_lock(&kvm->lock);
+		r = -EINVAL;
+		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
+			goto split_irqchip_unlock;
 		r = -EEXIST;
 		if (irqchip_in_kernel(kvm))
 			goto split_irqchip_unlock;
@@ -3569,6 +3572,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		/* Pairs with irqchip_in_kernel. */
 		smp_wmb();
 		kvm->arch.irqchip_split = true;
+		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
 		r = 0;
 split_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
@@ -6167,7 +6171,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 
 	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
 
-	kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
+	if (irqchip_split(vcpu->kvm))
+		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
+	else
+		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
 	kvm_x86_ops->load_eoi_exitmap(vcpu);
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3b33215ed447..d754f2d826e9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -330,6 +330,18 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -456,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_IOAPIC
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_arch_irq_routing_update(struct kvm *kvm);
 #else
 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 }
+static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+}
 #endif
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index d7ea8e20dae4..716a1c4db528 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
@@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
+	kvm_arch_irq_routing_update(kvm);
+
 	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
-- 
cgit v1.2.3


From e516cebb4f2b2057a5a421fea589079502acfff6 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Wed, 16 Sep 2015 12:29:48 +0300
Subject: kvm/x86: Hyper-V HV_X64_MSR_RESET msr

HV_X64_MSR_RESET msr is used by Hyper-V based Windows guest
to reset guest VM by hypervisor.

Necessary to support loading of winhv.sys in guest, which in turn is
required to support Windows VMBus.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Gleb Natapov <gleb@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/hyperv.h |  3 +++
 arch/x86/kvm/hyperv.c              | 10 ++++++++++
 arch/x86/kvm/x86.c                 |  7 +++++++
 include/linux/kvm_host.h           |  1 +
 4 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index f0412c50c47b..dab584bf7ddf 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,6 +153,9 @@
 /* MSR used to provide vcpu index */
 #define HV_X64_MSR_VP_INDEX			0x40000002
 
+/* MSR used to reset the guest OS. */
+#define HV_X64_MSR_RESET			0x40000003
+
 /* MSR used to read the per-partition time reference counter */
 #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a8160d2ae362..0ad11a232474 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	case HV_X64_MSR_TIME_REF_COUNT:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_RESET:
 		r = true;
 		break;
 	}
@@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 						 data);
 	case HV_X64_MSR_CRASH_CTL:
 		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+	case HV_X64_MSR_RESET:
+		if (data == 1) {
+			vcpu_debug(vcpu, "hyper-v reset requested\n");
+			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
+		}
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -241,6 +248,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 						 pdata);
 	case HV_X64_MSR_CRASH_CTL:
 		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+	case HV_X64_MSR_RESET:
+		data = 0;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b90ad01c617d..297b36fa3b80 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -952,6 +952,7 @@ static u32 emulated_msrs[] = {
 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
 	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
 	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
+	HV_X64_MSR_RESET,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -6321,6 +6322,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+			r = 0;
+			goto out;
+		}
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d754f2d826e9..cd0ba2e931e1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -141,6 +141,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_SMI               26
 #define KVM_REQ_HV_CRASH          27
 #define KVM_REQ_IOAPIC_EOI_EXIT   28
+#define KVM_REQ_HV_RESET          29
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v1.2.3


From f73f8173126ba68eb1c42bd9a234a51d78576ca6 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 18 Sep 2015 22:29:39 +0800
Subject: virt: IRQ bypass manager

When a physical I/O device is assigned to a virtual machine through
facilities like VFIO and KVM, the interrupt for the device generally
bounces through the host system before being injected into the VM.
However, hardware technologies exist that often allow the host to be
bypassed for some of these scenarios.  Intel Posted Interrupts allow
the specified physical edge interrupts to be directly injected into a
guest when delivered to a physical processor while the vCPU is
running.  ARM IRQ Forwarding allows forwarded physical interrupts to
be directly deactivated by the guest.

The IRQ bypass manager here is meant to provide the shim to connect
interrupt producers, generally the host physical device driver, with
interrupt consumers, generally the hypervisor, in order to configure
these bypass mechanism.  To do this, we base the connection on a
shared, opaque token.  For KVM-VFIO this is expected to be an
eventfd_ctx since this is the connection we already use to connect an
eventfd to an irqfd on the in-kernel path.  When a producer and
consumer with matching tokens is found, callbacks via both registered
participants allow the bypass facilities to be automatically enabled.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Feng Wu <feng.wu@intel.com>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 MAINTAINERS               |   7 ++
 include/linux/irqbypass.h |  90 ++++++++++++++++
 virt/lib/Kconfig          |   2 +
 virt/lib/Makefile         |   1 +
 virt/lib/irqbypass.c      | 257 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 357 insertions(+)
 create mode 100644 include/linux/irqbypass.h
 create mode 100644 virt/lib/Kconfig
 create mode 100644 virt/lib/Makefile
 create mode 100644 virt/lib/irqbypass.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 797236befd27..3b738cb8bdeb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11177,6 +11177,13 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/via/via-velocity.*
 
+VIRT LIB
+M:	Alex Williamson <alex.williamson@redhat.com>
+M:	Paolo Bonzini <pbonzini@redhat.com>
+L:	kvm@vger.kernel.org
+S:	Supported
+F:	virt/lib/
+
 VIVID VIRTUAL VIDEO DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
new file mode 100644
index 000000000000..1551b5b2f4c2
--- /dev/null
+++ b/include/linux/irqbypass.h
@@ -0,0 +1,90 @@
+/*
+ * IRQ offload/bypass manager
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ * Copyright (c) 2015 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef IRQBYPASS_H
+#define IRQBYPASS_H
+
+#include <linux/list.h>
+
+struct irq_bypass_consumer;
+
+/*
+ * Theory of operation
+ *
+ * The IRQ bypass manager is a simple set of lists and callbacks that allows
+ * IRQ producers (ex. physical interrupt sources) to be matched to IRQ
+ * consumers (ex. virtualization hardware that allows IRQ bypass or offload)
+ * via a shared token (ex. eventfd_ctx).  Producers and consumers register
+ * independently.  When a token match is found, the optional @stop callback
+ * will be called for each participant.  The pair will then be connected via
+ * the @add_* callbacks, and finally the optional @start callback will allow
+ * any final coordination.  When either participant is unregistered, the
+ * process is repeated using the @del_* callbacks in place of the @add_*
+ * callbacks.  Match tokens must be unique per producer/consumer, 1:N pairings
+ * are not supported.
+ */
+
+/**
+ * struct irq_bypass_producer - IRQ bypass producer definition
+ * @node: IRQ bypass manager private list management
+ * @token: opaque token to match between producer and consumer
+ * @irq: Linux IRQ number for the producer device
+ * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
+ * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
+ * @stop: Perform any quiesce operations necessary prior to add/del (optional)
+ * @start: Perform any startup operations necessary after add/del (optional)
+ *
+ * The IRQ bypass producer structure represents an interrupt source for
+ * participation in possible host bypass, for instance an interrupt vector
+ * for a physical device assigned to a VM.
+ */
+struct irq_bypass_producer {
+	struct list_head node;
+	void *token;
+	int irq;
+	int (*add_consumer)(struct irq_bypass_producer *,
+			    struct irq_bypass_consumer *);
+	void (*del_consumer)(struct irq_bypass_producer *,
+			     struct irq_bypass_consumer *);
+	void (*stop)(struct irq_bypass_producer *);
+	void (*start)(struct irq_bypass_producer *);
+};
+
+/**
+ * struct irq_bypass_consumer - IRQ bypass consumer definition
+ * @node: IRQ bypass manager private list management
+ * @token: opaque token to match between producer and consumer
+ * @add_producer: Connect the IRQ consumer to an IRQ producer
+ * @del_producer: Disconnect the IRQ consumer from an IRQ producer
+ * @stop: Perform any quiesce operations necessary prior to add/del (optional)
+ * @start: Perform any startup operations necessary after add/del (optional)
+ *
+ * The IRQ bypass consumer structure represents an interrupt sink for
+ * participation in possible host bypass, for instance a hypervisor may
+ * support offloads to allow bypassing the host entirely or offload
+ * portions of the interrupt handling to the VM.
+ */
+struct irq_bypass_consumer {
+	struct list_head node;
+	void *token;
+	int (*add_producer)(struct irq_bypass_consumer *,
+			    struct irq_bypass_producer *);
+	void (*del_producer)(struct irq_bypass_consumer *,
+			     struct irq_bypass_producer *);
+	void (*stop)(struct irq_bypass_consumer *);
+	void (*start)(struct irq_bypass_consumer *);
+};
+
+int irq_bypass_register_producer(struct irq_bypass_producer *);
+void irq_bypass_unregister_producer(struct irq_bypass_producer *);
+int irq_bypass_register_consumer(struct irq_bypass_consumer *);
+void irq_bypass_unregister_consumer(struct irq_bypass_consumer *);
+
+#endif /* IRQBYPASS_H */
diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig
new file mode 100644
index 000000000000..89a414f815d2
--- /dev/null
+++ b/virt/lib/Kconfig
@@ -0,0 +1,2 @@
+config IRQ_BYPASS_MANAGER
+	tristate
diff --git a/virt/lib/Makefile b/virt/lib/Makefile
new file mode 100644
index 000000000000..901228d1ffbc
--- /dev/null
+++ b/virt/lib/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
new file mode 100644
index 000000000000..09a03b5a21ff
--- /dev/null
+++ b/virt/lib/irqbypass.c
@@ -0,0 +1,257 @@
+/*
+ * IRQ offload/bypass manager
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ * Copyright (c) 2015 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Various virtualization hardware acceleration techniques allow bypassing or
+ * offloading interrupts received from devices around the host kernel.  Posted
+ * Interrupts on Intel VT-d systems can allow interrupts to be received
+ * directly by a virtual machine.  ARM IRQ Forwarding allows forwarded physical
+ * interrupts to be directly deactivated by the guest.  This manager allows
+ * interrupt producers and consumers to find each other to enable this sort of
+ * bypass.
+ */
+
+#include <linux/irqbypass.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("IRQ bypass manager utility module");
+
+static LIST_HEAD(producers);
+static LIST_HEAD(consumers);
+static DEFINE_MUTEX(lock);
+
+/* @lock must be held when calling connect */
+static int __connect(struct irq_bypass_producer *prod,
+		     struct irq_bypass_consumer *cons)
+{
+	int ret = 0;
+
+	if (prod->stop)
+		prod->stop(prod);
+	if (cons->stop)
+		cons->stop(cons);
+
+	if (prod->add_consumer)
+		ret = prod->add_consumer(prod, cons);
+
+	if (!ret) {
+		ret = cons->add_producer(cons, prod);
+		if (ret && prod->del_consumer)
+			prod->del_consumer(prod, cons);
+	}
+
+	if (cons->start)
+		cons->start(cons);
+	if (prod->start)
+		prod->start(prod);
+
+	return ret;
+}
+
+/* @lock must be held when calling disconnect */
+static void __disconnect(struct irq_bypass_producer *prod,
+			 struct irq_bypass_consumer *cons)
+{
+	if (prod->stop)
+		prod->stop(prod);
+	if (cons->stop)
+		cons->stop(cons);
+
+	cons->del_producer(cons, prod);
+
+	if (prod->del_consumer)
+		prod->del_consumer(prod, cons);
+
+	if (cons->start)
+		cons->start(cons);
+	if (prod->start)
+		prod->start(prod);
+}
+
+/**
+ * irq_bypass_register_producer - register IRQ bypass producer
+ * @producer: pointer to producer structure
+ *
+ * Add the provided IRQ producer to the list of producers and connect
+ * with any matching token found on the IRQ consumers list.
+ */
+int irq_bypass_register_producer(struct irq_bypass_producer *producer)
+{
+	struct irq_bypass_producer *tmp;
+	struct irq_bypass_consumer *consumer;
+
+	might_sleep();
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&lock);
+
+	list_for_each_entry(tmp, &producers, node) {
+		if (tmp->token == producer->token) {
+			mutex_unlock(&lock);
+			module_put(THIS_MODULE);
+			return -EBUSY;
+		}
+	}
+
+	list_for_each_entry(consumer, &consumers, node) {
+		if (consumer->token == producer->token) {
+			int ret = __connect(producer, consumer);
+			if (ret) {
+				mutex_unlock(&lock);
+				module_put(THIS_MODULE);
+				return ret;
+			}
+			break;
+		}
+	}
+
+	list_add(&producer->node, &producers);
+
+	mutex_unlock(&lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
+
+/**
+ * irq_bypass_unregister_producer - unregister IRQ bypass producer
+ * @producer: pointer to producer structure
+ *
+ * Remove a previously registered IRQ producer from the list of producers
+ * and disconnect it from any connected IRQ consumer.
+ */
+void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
+{
+	struct irq_bypass_producer *tmp;
+	struct irq_bypass_consumer *consumer;
+
+	might_sleep();
+
+	if (!try_module_get(THIS_MODULE))
+		return; /* nothing in the list anyway */
+
+	mutex_lock(&lock);
+
+	list_for_each_entry(tmp, &producers, node) {
+		if (tmp->token != producer->token)
+			continue;
+
+		list_for_each_entry(consumer, &consumers, node) {
+			if (consumer->token == producer->token) {
+				__disconnect(producer, consumer);
+				break;
+			}
+		}
+
+		list_del(&producer->node);
+		module_put(THIS_MODULE);
+		break;
+	}
+
+	mutex_unlock(&lock);
+
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer);
+
+/**
+ * irq_bypass_register_consumer - register IRQ bypass consumer
+ * @consumer: pointer to consumer structure
+ *
+ * Add the provided IRQ consumer to the list of consumers and connect
+ * with any matching token found on the IRQ producer list.
+ */
+int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
+{
+	struct irq_bypass_consumer *tmp;
+	struct irq_bypass_producer *producer;
+
+	if (!consumer->add_producer || !consumer->del_producer)
+		return -EINVAL;
+
+	might_sleep();
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&lock);
+
+	list_for_each_entry(tmp, &consumers, node) {
+		if (tmp->token == consumer->token) {
+			mutex_unlock(&lock);
+			module_put(THIS_MODULE);
+			return -EBUSY;
+		}
+	}
+
+	list_for_each_entry(producer, &producers, node) {
+		if (producer->token == consumer->token) {
+			int ret = __connect(producer, consumer);
+			if (ret) {
+				mutex_unlock(&lock);
+				module_put(THIS_MODULE);
+				return ret;
+			}
+			break;
+		}
+	}
+
+	list_add(&consumer->node, &consumers);
+
+	mutex_unlock(&lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
+
+/**
+ * irq_bypass_unregister_consumer - unregister IRQ bypass consumer
+ * @consumer: pointer to consumer structure
+ *
+ * Remove a previously registered IRQ consumer from the list of consumers
+ * and disconnect it from any connected IRQ producer.
+ */
+void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
+{
+	struct irq_bypass_consumer *tmp;
+	struct irq_bypass_producer *producer;
+
+	might_sleep();
+
+	if (!try_module_get(THIS_MODULE))
+		return; /* nothing in the list anyway */
+
+	mutex_lock(&lock);
+
+	list_for_each_entry(tmp, &consumers, node) {
+		if (tmp->token != consumer->token)
+			continue;
+
+		list_for_each_entry(producer, &producers, node) {
+			if (producer->token == consumer->token) {
+				__disconnect(producer, consumer);
+				break;
+			}
+		}
+
+		list_del(&consumer->node);
+		module_put(THIS_MODULE);
+		break;
+	}
+
+	mutex_unlock(&lock);
+
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer);
-- 
cgit v1.2.3


From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 18 Sep 2015 22:29:42 +0800
Subject: KVM: create kvm_irqfd.h

Move _irqfd_resampler and _irqfd struct declarations in a new
public header: kvm_irqfd.h. They are respectively renamed into
kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes
will be used by architecture specific code, in the context of
IRQ bypass manager integration.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_irqfd.h | 69 ++++++++++++++++++++++++++++++++++
 virt/kvm/eventfd.c        | 95 ++++++++++++-----------------------------------
 2 files changed, 92 insertions(+), 72 deletions(-)
 create mode 100644 include/linux/kvm_irqfd.h

(limited to 'include/linux')

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
new file mode 100644
index 000000000000..f926b39a26b6
--- /dev/null
+++ b/include/linux/kvm_irqfd.h
@@ -0,0 +1,69 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ * Credit goes to Avi Kivity for the original idea.
+ */
+
+#ifndef __LINUX_KVM_IRQFD_H
+#define __LINUX_KVM_IRQFD_H
+
+#include <linux/kvm_host.h>
+#include <linux/poll.h>
+
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgment through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct kvm_kernel_irqfd_resampler {
+	struct kvm *kvm;
+	/*
+	 * List of resampling struct _irqfd objects sharing this gsi.
+	 * RCU list modified under kvm->irqfds.resampler_lock
+	 */
+	struct list_head list;
+	struct kvm_irq_ack_notifier notifier;
+	/*
+	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
+	 * resamplers among irqfds on the same gsi.
+	 * Accessed and modified under kvm->irqfds.resampler_lock
+	 */
+	struct list_head link;
+};
+
+struct kvm_kernel_irqfd {
+	/* Used for MSI fast-path */
+	struct kvm *kvm;
+	wait_queue_t wait;
+	/* Update side is protected by irqfds.lock */
+	struct kvm_kernel_irq_routing_entry irq_entry;
+	seqcount_t irq_entry_sc;
+	/* Used for level IRQ fast-path */
+	int gsi;
+	struct work_struct inject;
+	/* The resampler used by this irqfd (resampler-only) */
+	struct kvm_kernel_irqfd_resampler *resampler;
+	/* Eventfd notified on resample (resampler-only) */
+	struct eventfd_ctx *resamplefd;
+	/* Entry in list of irqfds for a resampler (resampler-only) */
+	struct list_head resampler_link;
+	/* Used for setup/shutdown */
+	struct eventfd_ctx *eventfd;
+	struct list_head list;
+	poll_table pt;
+	struct work_struct shutdown;
+};
+
+#endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index ac89299b8699..413f5a6b61ba 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
+#include <linux/kvm_irqfd.h>
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
 #include <linux/wait.h>
@@ -39,68 +40,14 @@
 #include <kvm/iodev.h>
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * --------------------------------------------------------------------
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
- *
- * Credit goes to Avi Kivity for the original idea.
- * --------------------------------------------------------------------
- */
-
-/*
- * Resampling irqfds are a special variety of irqfds used to emulate
- * level triggered interrupts.  The interrupt is asserted on eventfd
- * trigger.  On acknowledgement through the irq ack notifier, the
- * interrupt is de-asserted and userspace is notified through the
- * resamplefd.  All resamplers on the same gsi are de-asserted
- * together, so we don't need to track the state of each individual
- * user.  We can also therefore share the same irq source ID.
- */
-struct _irqfd_resampler {
-	struct kvm *kvm;
-	/*
-	 * List of resampling struct _irqfd objects sharing this gsi.
-	 * RCU list modified under kvm->irqfds.resampler_lock
-	 */
-	struct list_head list;
-	struct kvm_irq_ack_notifier notifier;
-	/*
-	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
-	 * resamplers among irqfds on the same gsi.
-	 * Accessed and modified under kvm->irqfds.resampler_lock
-	 */
-	struct list_head link;
-};
-
-struct _irqfd {
-	/* Used for MSI fast-path */
-	struct kvm *kvm;
-	wait_queue_t wait;
-	/* Update side is protected by irqfds.lock */
-	struct kvm_kernel_irq_routing_entry irq_entry;
-	seqcount_t irq_entry_sc;
-	/* Used for level IRQ fast-path */
-	int gsi;
-	struct work_struct inject;
-	/* The resampler used by this irqfd (resampler-only) */
-	struct _irqfd_resampler *resampler;
-	/* Eventfd notified on resample (resampler-only) */
-	struct eventfd_ctx *resamplefd;
-	/* Entry in list of irqfds for a resampler (resampler-only) */
-	struct list_head resampler_link;
-	/* Used for setup/shutdown */
-	struct eventfd_ctx *eventfd;
-	struct list_head list;
-	poll_table pt;
-	struct work_struct shutdown;
-};
 
 static struct workqueue_struct *irqfd_cleanup_wq;
 
 static void
 irqfd_inject(struct work_struct *work)
 {
-	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(work, struct kvm_kernel_irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
 	if (!irqfd->resampler) {
@@ -121,12 +68,13 @@ irqfd_inject(struct work_struct *work)
 static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
-	struct _irqfd_resampler *resampler;
+	struct kvm_kernel_irqfd_resampler *resampler;
 	struct kvm *kvm;
-	struct _irqfd *irqfd;
+	struct kvm_kernel_irqfd *irqfd;
 	int idx;
 
-	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	resampler = container_of(kian,
+			struct kvm_kernel_irqfd_resampler, notifier);
 	kvm = resampler->kvm;
 
 	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
@@ -141,9 +89,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 }
 
 static void
-irqfd_resampler_shutdown(struct _irqfd *irqfd)
+irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
 {
-	struct _irqfd_resampler *resampler = irqfd->resampler;
+	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
 	struct kvm *kvm = resampler->kvm;
 
 	mutex_lock(&kvm->irqfds.resampler_lock);
@@ -168,7 +116,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 static void
 irqfd_shutdown(struct work_struct *work)
 {
-	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(work, struct kvm_kernel_irqfd, shutdown);
 	u64 cnt;
 
 	/*
@@ -198,7 +147,7 @@ irqfd_shutdown(struct work_struct *work)
 
 /* assumes kvm->irqfds.lock is held */
 static bool
-irqfd_is_active(struct _irqfd *irqfd)
+irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
 {
 	return list_empty(&irqfd->list) ? false : true;
 }
@@ -209,7 +158,7 @@ irqfd_is_active(struct _irqfd *irqfd)
  * assumes kvm->irqfds.lock is held
  */
 static void
-irqfd_deactivate(struct _irqfd *irqfd)
+irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 {
 	BUG_ON(!irqfd_is_active(irqfd));
 
@@ -224,7 +173,8 @@ irqfd_deactivate(struct _irqfd *irqfd)
 static int
 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(wait, struct kvm_kernel_irqfd, wait);
 	unsigned long flags = (unsigned long)key;
 	struct kvm_kernel_irq_routing_entry irq;
 	struct kvm *kvm = irqfd->kvm;
@@ -274,12 +224,13 @@ static void
 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 			poll_table *pt)
 {
-	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(pt, struct kvm_kernel_irqfd, pt);
 	add_wait_queue(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
+static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
@@ -304,7 +255,7 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct _irqfd *irqfd, *tmp;
+	struct kvm_kernel_irqfd *irqfd, *tmp;
 	struct fd f;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
@@ -340,7 +291,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	irqfd->eventfd = eventfd;
 
 	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
-		struct _irqfd_resampler *resampler;
+		struct kvm_kernel_irqfd_resampler *resampler;
 
 		resamplefd = eventfd_ctx_fdget(args->resamplefd);
 		if (IS_ERR(resamplefd)) {
@@ -525,7 +476,7 @@ kvm_eventfd_init(struct kvm *kvm)
 static int
 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct _irqfd *irqfd, *tmp;
+	struct kvm_kernel_irqfd *irqfd, *tmp;
 	struct eventfd_ctx *eventfd;
 
 	eventfd = eventfd_ctx_fdget(args->fd);
@@ -581,7 +532,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 void
 kvm_irqfd_release(struct kvm *kvm)
 {
-	struct _irqfd *irqfd, *tmp;
+	struct kvm_kernel_irqfd *irqfd, *tmp;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
@@ -604,7 +555,7 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
-	struct _irqfd *irqfd;
+	struct kvm_kernel_irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-- 
cgit v1.2.3


From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 18 Sep 2015 22:29:43 +0800
Subject: KVM: introduce kvm_arch functions for IRQ bypass

This patch introduces
- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop
- kvm_arch_irq_bypass_start

They make possible to specialize the KVM IRQ bypass consumer in
case CONFIG_KVM_HAVE_IRQ_BYPASS is set.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
[Add weak implementations of the callbacks. - Feng]
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 10 ++++++++++
 virt/kvm/Kconfig         |  3 +++
 virt/kvm/eventfd.c       | 12 ++++++++++++
 3 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cd0ba2e931e1..b8543b02b7bc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -24,6 +24,7 @@
 #include <linux/err.h>
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
+#include <linux/irqbypass.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -1163,4 +1164,13 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 {
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
+			   struct irq_bypass_producer *);
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
+			   struct irq_bypass_producer *);
+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
+void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index e2c876d5a03b..9f8014dda2cf 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 config KVM_COMPAT
        def_bool y
        depends on COMPAT && !S390
+
+config HAVE_KVM_IRQ_BYPASS
+       bool
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 413f5a6b61ba..c4f7abec4261 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -252,6 +252,18 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
+				struct irq_bypass_consumer *cons)
+{
+}
+
+void __attribute__((weak)) kvm_arch_irq_bypass_start(
+				struct irq_bypass_consumer *cons)
+{
+}
+#endif
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-- 
cgit v1.2.3


From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 18 Sep 2015 22:29:44 +0800
Subject: KVM: eventfd: add irq bypass consumer management

This patch adds the registration/unregistration of an
irq_bypass_consumer on irqfd assignment/deassignment.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_irqfd.h |  2 ++
 virt/kvm/eventfd.c        | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index f926b39a26b6..0c1de05098c8 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -64,6 +64,8 @@ struct kvm_kernel_irqfd {
 	struct list_head list;
 	poll_table pt;
 	struct work_struct shutdown;
+	struct irq_bypass_consumer consumer;
+	struct irq_bypass_producer *producer;
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c4f7abec4261..7df356d8f1fd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <linux/seqlock.h>
+#include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
 #include <kvm/iodev.h>
@@ -140,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
 	/*
 	 * It is now safe to release the object's resources
 	 */
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+	irq_bypass_unregister_consumer(&irqfd->consumer);
+#endif
 	eventfd_ctx_put(irqfd->eventfd);
 	kfree(irqfd);
 }
@@ -391,6 +395,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	 * we might race against the POLLHUP
 	 */
 	fdput(f);
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+	irqfd->consumer.token = (void *)irqfd->eventfd;
+	irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+	irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+	irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+	irqfd->consumer.start = kvm_arch_irq_bypass_start;
+	ret = irq_bypass_register_consumer(&irqfd->consumer);
+	if (ret)
+		pr_info("irq bypass consumer (token %p) registration fails: %d\n",
+				irqfd->consumer.token, ret);
+#endif
 
 	return 0;
 
-- 
cgit v1.2.3


From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@intel.com>
Date: Fri, 18 Sep 2015 22:29:53 +0800
Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/eventfd.c       | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b8543b02b7bc..5c3f4538807f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1172,5 +1172,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
 			   struct irq_bypass_producer *);
 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+				  uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7df356d8f1fd..b637965746bb 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -266,6 +266,13 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
 				struct irq_bypass_consumer *cons)
 {
 }
+
+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
+				struct kvm *kvm, unsigned int host_irq,
+				uint32_t guest_irq, bool set)
+{
+	return 0;
+}
 #endif
 
 static int
@@ -586,9 +593,19 @@ void kvm_irq_routing_update(struct kvm *kvm)
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
 		irqfd_update(kvm, irqfd);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+		if (irqfd->producer) {
+			int ret = kvm_arch_update_irqfd_routing(
+					irqfd->kvm, irqfd->producer->irq,
+					irqfd->gsi, 1);
+			WARN_ON(ret);
+		}
+#endif
+	}
+
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
 
-- 
cgit v1.2.3


From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@intel.com>
Date: Fri, 18 Sep 2015 22:29:55 +0800
Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu <feng.wu@intel.com>
[Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/locking.txt |  12 +++
 arch/x86/include/asm/kvm_host.h       |  11 +++
 arch/x86/kvm/vmx.c                    | 153 ++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c                    |  34 +++++---
 include/linux/kvm_host.h              |   3 +
 virt/kvm/kvm_main.c                   |   3 +
 6 files changed, 206 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index d68af4dc3006..19f94a6b9bb0 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -166,3 +166,15 @@ Comment:	The srcu read lock must be held while accessing memslots (e.g.
 		MMIO/PIO address->device structure mapping (kvm->buses).
 		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
 		if it is needed by multiple functions.
+
+Name:		blocked_vcpu_on_cpu_lock
+Type:		spinlock_t
+Arch:		x86
+Protects:	blocked_vcpu_on_cpu
+Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
+		When VT-d posted-interrupts is supported and the VM has assigned
+		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
+		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
+		wakeup notification event since external interrupts from the
+		assigned devices happens, we will find the vCPU on the list to
+		wakeup.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 15664994b6f3..cdbdb559ecd2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -899,6 +899,17 @@ struct kvm_x86_ops {
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
 
+	/*
+	 * Architecture specific hooks for vCPU blocking due to
+	 * HLT instruction.
+	 * Returns for .pre_block():
+	 *    - 0 means continue to block the vCPU.
+	 *    - 1 means we cannot block the vCPU since some event
+	 *        happens during this period, such as, 'ON' bit in
+	 *        posted-interrupts descriptor is set.
+	 */
+	int (*pre_block)(struct kvm_vcpu *vcpu);
+	void (*post_block)(struct kvm_vcpu *vcpu);
 	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
 			      uint32_t guest_irq, bool set);
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 99f5c61954ea..c5c22831aee2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2986,6 +2993,8 @@ static int hardware_enable(void)
 		return -EBUSY;
 
 	INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+	INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+	spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
 	/*
 	 * Now we can enable the vmclear operation in kdump
@@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+	struct kvm_vcpu *vcpu;
+	int cpu = smp_processor_id();
+
+	spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+	list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+			blocked_vcpu_list) {
+		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+		if (pi_test_on(pi_desc) == 1)
+			kvm_vcpu_kick(vcpu);
+	}
+	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
 	int r = -ENOMEM, i, msr;
@@ -6231,6 +6259,8 @@ static __init int hardware_setup(void)
 		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
 	}
 
+	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
 	return alloc_kvm_area();
 
 out8:
@@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *      'NDST' <-- vcpu->pre_pcpu
+ *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+static int vmx_pre_block(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+	unsigned int dest;
+	struct pi_desc old, new;
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return 0;
+
+	vcpu->pre_pcpu = vcpu->cpu;
+	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+			  vcpu->pre_pcpu), flags);
+	list_add_tail(&vcpu->blocked_vcpu_list,
+		      &per_cpu(blocked_vcpu_on_cpu,
+		      vcpu->pre_pcpu));
+	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+			       vcpu->pre_pcpu), flags);
+
+	do {
+		old.control = new.control = pi_desc->control;
+
+		/*
+		 * We should not block the vCPU if
+		 * an interrupt is posted for it.
+		 */
+		if (pi_test_on(pi_desc) == 1) {
+			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+					  vcpu->pre_pcpu), flags);
+			list_del(&vcpu->blocked_vcpu_list);
+			spin_unlock_irqrestore(
+					&per_cpu(blocked_vcpu_on_cpu_lock,
+					vcpu->pre_pcpu), flags);
+			vcpu->pre_pcpu = -1;
+
+			return 1;
+		}
+
+		WARN((pi_desc->sn == 1),
+		     "Warning: SN field of posted-interrupts "
+		     "is set before blocking\n");
+
+		/*
+		 * Since vCPU can be preempted during this process,
+		 * vcpu->cpu could be different with pre_pcpu, we
+		 * need to set pre_pcpu as the destination of wakeup
+		 * notification event, then we can find the right vCPU
+		 * to wakeup in wakeup handler if interrupts happen
+		 * when the vCPU is in blocked state.
+		 */
+		dest = cpu_physical_id(vcpu->pre_pcpu);
+
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
+
+		/* set 'NV' to 'wakeup vector' */
+		new.nv = POSTED_INTR_WAKEUP_VECTOR;
+	} while (cmpxchg(&pi_desc->control, old.control,
+			new.control) != old.control);
+
+	return 0;
+}
+
+static void vmx_post_block(struct kvm_vcpu *vcpu)
+{
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+	struct pi_desc old, new;
+	unsigned int dest;
+	unsigned long flags;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return;
+
+	do {
+		old.control = new.control = pi_desc->control;
+
+		dest = cpu_physical_id(vcpu->cpu);
+
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
+
+		/* Allow posting non-urgent interrupts */
+		new.sn = 0;
+
+		/* set 'NV' to 'notification vector' */
+		new.nv = POSTED_INTR_VECTOR;
+	} while (cmpxchg(&pi_desc->control, old.control,
+			new.control) != old.control);
+
+	if(vcpu->pre_pcpu != -1) {
+		spin_lock_irqsave(
+			&per_cpu(blocked_vcpu_on_cpu_lock,
+			vcpu->pre_pcpu), flags);
+		list_del(&vcpu->blocked_vcpu_list);
+		spin_unlock_irqrestore(
+			&per_cpu(blocked_vcpu_on_cpu_lock,
+			vcpu->pre_pcpu), flags);
+		vcpu->pre_pcpu = -1;
+	}
+}
+
 /*
  * vmx_update_pi_irte - set IRTE for Posted-Interrupts
  *
@@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.flush_log_dirty = vmx_flush_log_dirty,
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 
+	.pre_block = vmx_pre_block,
+	.post_block = vmx_post_block,
+
 	.pmu_ops = &intel_pmu_ops,
 
 	.update_pi_irte = vmx_update_pi_irte,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b8425a769c0a..2d2c9bb0d6d6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	/*
+	 * KVM_REQ_EVENT is not set when posted interrupts are set by
+	 * VT-d hardware, so we have to update RVI unconditionally.
+	 */
+	if (kvm_lapic_enabled(vcpu)) {
+		/*
+		 * Update architecture specific hints for APIC
+		 * virtual interrupt delivery.
+		 */
+		if (kvm_x86_ops->hwapic_irr_update)
+			kvm_x86_ops->hwapic_irr_update(vcpu,
+				kvm_lapic_find_highest_irr(vcpu));
+	}
+
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
 		kvm_apic_accept_events(vcpu);
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->enable_irq_window(vcpu);
 
 		if (kvm_lapic_enabled(vcpu)) {
-			/*
-			 * Update architecture specific hints for APIC
-			 * virtual interrupt delivery.
-			 */
-			if (kvm_x86_ops->hwapic_irr_update)
-				kvm_x86_ops->hwapic_irr_update(vcpu,
-					kvm_lapic_find_highest_irr(vcpu));
 			update_cr8_intercept(vcpu);
 			kvm_lapic_sync_to_vapic(vcpu);
 		}
@@ -6493,10 +6500,15 @@ out:
 
 static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 {
-	if (!kvm_arch_vcpu_runnable(vcpu)) {
+	if (!kvm_arch_vcpu_runnable(vcpu) &&
+	    (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
 		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 		kvm_vcpu_block(vcpu);
 		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
+		if (kvm_x86_ops->post_block)
+			kvm_x86_ops->post_block(vcpu);
+
 		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
 			return 1;
 	}
@@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 
 	for (;;) {
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
-		    !vcpu->arch.apf.halted)
+		    !vcpu->arch.apf.halted) {
 			r = vcpu_enter_guest(vcpu);
-		else
+		} else {
 			r = vcpu_block(kvm, vcpu);
+		}
+
 		if (r <= 0)
 			break;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5c3f4538807f..9596a2f0977b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,6 +234,9 @@ struct kvm_vcpu {
 	unsigned long requests;
 	unsigned long guest_debug;
 
+	int pre_pcpu;
+	struct list_head blocked_vcpu_list;
+
 	struct mutex mutex;
 	struct kvm_run *run;
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index afd7ae6aec65..a75502c93c3e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	init_waitqueue_head(&vcpu->wq);
 	kvm_async_pf_vcpu_init(vcpu);
 
+	vcpu->pre_pcpu = -1;
+	INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
+
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
 		r = -ENOMEM;
-- 
cgit v1.2.3


From 92068d17c20b218bf1e24505a3e08495476b0ebf Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 1 Oct 2015 15:54:52 +0300
Subject: genirq: Fix typo in documentation of enumeration field name

It should say IRQ_NESTED_THREAD instead of IRQ_NESTED_TRHEAD.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1443704093-36837-1-git-send-email-mika.westerberg@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 45cc7299bb61..7038f38a63c7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -67,7 +67,7 @@ enum irqchip_irq_state;
  *				  request/setup_irq()
  * IRQ_NO_BALANCING		- Interrupt cannot be balanced (affinity set)
  * IRQ_MOVE_PCNTXT		- Interrupt can be migrated from process context
- * IRQ_NESTED_TRHEAD		- Interrupt nests into another thread
+ * IRQ_NESTED_THREAD		- Interrupt nests into another thread
  * IRQ_PER_CPU_DEVID		- Dev_id is a per-cpu variable
  * IRQ_IS_POLLED		- Always polled by another interrupt. Exclude
  *				  it from the spurious interrupt detection
-- 
cgit v1.2.3


From 9e7e2b0a6a005941a6854cdabae19c3d9e069dbe Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 1 Oct 2015 15:54:53 +0300
Subject: genirq: Remove unused functions
 irqd_[set|clr]_chained_irq_inprogress()

These two functions are not used anywhere in the kernel source tree so
remove them.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1443704093-36837-2-git-send-email-mika.westerberg@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7038f38a63c7..ba72b60b57b1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -297,21 +297,6 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
 }
 
-/*
- * Functions for chained handlers which can be enabled/disabled by the
- * standard disable_irq/enable_irq calls. Must be called with
- * irq_desc->lock held.
- */
-static inline void irqd_set_chained_irq_inprogress(struct irq_data *d)
-{
-	__irqd_to_state(d) |= IRQD_IRQ_INPROGRESS;
-}
-
-static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
-{
-	__irqd_to_state(d) &= ~IRQD_IRQ_INPROGRESS;
-}
-
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 {
 	return d->hwirq;
-- 
cgit v1.2.3


From 7ec88e4be461590b5a3817460c34603f76d9b3ae Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Sep 2015 22:21:28 +0200
Subject: ntp/pps: use timespec64 for hardpps()

There is only one user of the hardpps function in the kernel, so
it makes sense to atomically change it over to using 64-bit
timestamps for y2038 safety. In the hardpps implementation,
we also need to change the pps_normtime structure, which is
similar to struct timespec and also requires a 64-bit
seconds portion.

This introduces two temporary variables in pps_kc_event() to
do the conversion, they will be removed again in the next step,
which seemed preferable to having a larger patch changing it
all at the same time.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/pps/kc.c           |  4 +++-
 include/linux/timex.h      |  2 +-
 kernel/time/ntp.c          | 12 ++++++------
 kernel/time/ntp_internal.h |  2 +-
 kernel/time/timekeeping.c  |  2 +-
 5 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c
index e219db1f1c84..a16cea2ba980 100644
--- a/drivers/pps/kc.c
+++ b/drivers/pps/kc.c
@@ -113,10 +113,12 @@ void pps_kc_event(struct pps_device *pps, struct pps_event_time *ts,
 		int event)
 {
 	unsigned long flags;
+	struct timespec64 real = timespec_to_timespec64(ts->ts_real);
+	struct timespec64 raw = timespec_to_timespec64(ts->ts_raw);
 
 	/* Pass some events to kernel consumer if activated */
 	spin_lock_irqsave(&pps_kc_hardpps_lock, flags);
 	if (pps == pps_kc_hardpps_dev && event & pps_kc_hardpps_mode)
-		hardpps(&ts->ts_real, &ts->ts_raw);
+		hardpps(&real, &raw);
 	spin_unlock_irqrestore(&pps_kc_hardpps_lock, flags);
 }
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 9d3f1a5b6178..39c25dbebfe8 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -152,7 +152,7 @@ extern unsigned long tick_nsec;		/* SHIFTED_HZ period (nsec) */
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
 extern int do_adjtimex(struct timex *);
-extern void hardpps(const struct timespec *, const struct timespec *);
+extern void hardpps(const struct timespec64 *, const struct timespec64 *);
 
 int read_current_timer(unsigned long *timer_val);
 void ntp_notify_cmos_timer(void);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index df68cb875248..bd4fa6271262 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -99,7 +99,7 @@ static time64_t			ntp_next_leap_sec = TIME64_MAX;
 static int pps_valid;		/* signal watchdog counter */
 static long pps_tf[3];		/* phase median filter */
 static long pps_jitter;		/* current jitter (ns) */
-static struct timespec pps_fbase; /* beginning of the last freq interval */
+static struct timespec64 pps_fbase; /* beginning of the last freq interval */
 static int pps_shift;		/* current interval duration (s) (shift) */
 static int pps_intcnt;		/* interval counter */
 static s64 pps_freq;		/* frequency offset (scaled ns/s) */
@@ -773,13 +773,13 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
  * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
  * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
 struct pps_normtime {
-	__kernel_time_t	sec;	/* seconds */
+	s64		sec;	/* seconds */
 	long		nsec;	/* nanoseconds */
 };
 
 /* normalize the timestamp so that nsec is in the
    ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
-static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
+static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts)
 {
 	struct pps_normtime norm = {
 		.sec = ts.tv_sec,
@@ -861,7 +861,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
 		pps_errcnt++;
 		pps_dec_freq_interval();
 		printk_deferred(KERN_ERR
-			"hardpps: PPSERROR: interval too long - %ld s\n",
+			"hardpps: PPSERROR: interval too long - %lld s\n",
 			freq_norm.sec);
 		return 0;
 	}
@@ -948,7 +948,7 @@ static void hardpps_update_phase(long error)
  * This code is based on David Mills's reference nanokernel
  * implementation. It was mostly rewritten but keeps the same idea.
  */
-void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 {
 	struct pps_normtime pts_norm, freq_norm;
 
@@ -969,7 +969,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 	}
 
 	/* ok, now we have a base for frequency calculation */
-	freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
+	freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase));
 
 	/* check that the signal is in the range
 	 * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 65430504ca26..af924470eac0 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -9,5 +9,5 @@ extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(unsigned long secs);
 extern int ntp_validate_timex(struct timex *);
 extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
-extern void __hardpps(const struct timespec *, const struct timespec *);
+extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3739ac6aa473..177188b11a2e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2025,7 +2025,7 @@ int do_adjtimex(struct timex *txc)
 /**
  * hardpps() - Accessor function to NTP __hardpps function
  */
-void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 {
 	unsigned long flags;
 
-- 
cgit v1.2.3


From 071eee45b1650d53d21c636d344bdcebd4577ed2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Sep 2015 22:21:29 +0200
Subject: ntp/pps: replace getnstime_raw_and_real with 64-bit version

There is exactly one caller of getnstime_raw_and_real in the kernel,
which is the pps_get_ts function. This changes the caller and
the implementation to work on timespec64 types rather than timespec,
to avoid the time_t overflow on 32-bit architectures.

For consistency with the other new functions (ktime_get_seconds,
ktime_get_real_*, ...), I'm renaming the function to
ktime_get_raw_and_real_ts64.

We still need to convert from the internal 64-bit type to 32 bit
types in the caller, but this conversion is now pushed out from
getnstime_raw_and_real to pps_get_ts. A follow-up patch changes
the remaining pps code to completely avoid the conversion.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/pps_kernel.h  |  7 ++++++-
 include/linux/timekeeping.h |  4 ++--
 kernel/time/timekeeping.c   | 12 ++++++------
 3 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 1d2cd21242e8..b2fbd62ab18d 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -115,7 +115,12 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt,
 
 static inline void pps_get_ts(struct pps_event_time *ts)
 {
-	getnstime_raw_and_real(&ts->ts_raw, &ts->ts_real);
+	struct timespec64 raw, real;
+
+	ktime_get_raw_and_real_ts64(&raw, &real);
+
+	ts->ts_raw = timespec64_to_timespec(raw);
+	ts->ts_real = timespec64_to_timespec(real);
 }
 
 #else /* CONFIG_NTP_PPS */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ba0ae09cbb21..ec89d846324c 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -263,8 +263,8 @@ extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
 /*
  * PPS accessor
  */
-extern void getnstime_raw_and_real(struct timespec *ts_raw,
-				   struct timespec *ts_real);
+extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw,
+				        struct timespec64 *ts_real);
 
 /*
  * Persistent clock related interfaces
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 177188b11a2e..274ed5e88456 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -849,7 +849,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
 #ifdef CONFIG_NTP_PPS
 
 /**
- * getnstime_raw_and_real - get day and raw monotonic time in timespec format
+ * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
  * @ts_raw:	pointer to the timespec to be set to raw monotonic time
  * @ts_real:	pointer to the timespec to be set to the time of day
  *
@@ -857,7 +857,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
  * same time atomically and stores the resulting timestamps in timespec
  * format.
  */
-void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
+void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
@@ -868,7 +868,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		*ts_raw = timespec64_to_timespec(tk->raw_time);
+		*ts_raw = tk->raw_time;
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
 
@@ -877,10 +877,10 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	timespec_add_ns(ts_raw, nsecs_raw);
-	timespec_add_ns(ts_real, nsecs_real);
+	timespec64_add_ns(ts_raw, nsecs_raw);
+	timespec64_add_ns(ts_real, nsecs_real);
 }
-EXPORT_SYMBOL(getnstime_raw_and_real);
+EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
 
 #endif /* CONFIG_NTP_PPS */
 
-- 
cgit v1.2.3


From ade1bdffe90e59cd257cb9bd4f5abe4de5f14911 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Sep 2015 22:21:31 +0200
Subject: ntp/pps: use y2038 safe types in pps_event_time

The pps_event_time uses two 'timespec' structures internally, which
suffer from the y2038 problem. The uses of this structure are
fairly self-contained in the pps code, so this replaces them all at
once.

Unfortunately, this includes the sfc ethernet driver aside from the
pps subsystem, so we change that one as well. Both touch the
same data structure, and there probably is no good way to split
the patch into smaller units.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/net/ethernet/sfc/ptp.c | 16 ++++++++--------
 drivers/pps/kapi.c             |  4 ++--
 drivers/pps/kc.c               |  4 +---
 include/linux/pps_kernel.h     | 21 ++++++++-------------
 4 files changed, 19 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index ad62615a93dc..fe849dbf9f80 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -646,28 +646,28 @@ static void efx_ptp_send_times(struct efx_nic *efx,
 			       struct pps_event_time *last_time)
 {
 	struct pps_event_time now;
-	struct timespec limit;
+	struct timespec64 limit;
 	struct efx_ptp_data *ptp = efx->ptp_data;
-	struct timespec start;
+	struct timespec64 start;
 	int *mc_running = ptp->start.addr;
 
 	pps_get_ts(&now);
 	start = now.ts_real;
 	limit = now.ts_real;
-	timespec_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
+	timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
 
 	/* Write host time for specified period or until MC is done */
-	while ((timespec_compare(&now.ts_real, &limit) < 0) &&
+	while ((timespec64_compare(&now.ts_real, &limit) < 0) &&
 	       ACCESS_ONCE(*mc_running)) {
-		struct timespec update_time;
+		struct timespec64 update_time;
 		unsigned int host_time;
 
 		/* Don't update continuously to avoid saturating the PCIe bus */
 		update_time = now.ts_real;
-		timespec_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS);
+		timespec64_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS);
 		do {
 			pps_get_ts(&now);
-		} while ((timespec_compare(&now.ts_real, &update_time) < 0) &&
+		} while ((timespec64_compare(&now.ts_real, &update_time) < 0) &&
 			 ACCESS_ONCE(*mc_running));
 
 		/* Synchronise NIC with single word of time only */
@@ -723,7 +723,7 @@ efx_ptp_process_times(struct efx_nic *efx, MCDI_DECLARE_STRUCT_PTR(synch_buf),
 	struct efx_ptp_data *ptp = efx->ptp_data;
 	u32 last_sec;
 	u32 start_sec;
-	struct timespec delta;
+	struct timespec64 delta;
 	ktime_t mc_time;
 
 	if (number_readings == 0)
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index cdad4d95b20e..805c749ac1ad 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -179,8 +179,8 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
 	/* check event type */
 	BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0);
 
-	dev_dbg(pps->dev, "PPS event at %ld.%09ld\n",
-			ts->ts_real.tv_sec, ts->ts_real.tv_nsec);
+	dev_dbg(pps->dev, "PPS event at %lld.%09ld\n",
+			(s64)ts->ts_real.tv_sec, ts->ts_real.tv_nsec);
 
 	timespec_to_pps_ktime(&ts_real, ts->ts_real);
 
diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c
index a16cea2ba980..e219db1f1c84 100644
--- a/drivers/pps/kc.c
+++ b/drivers/pps/kc.c
@@ -113,12 +113,10 @@ void pps_kc_event(struct pps_device *pps, struct pps_event_time *ts,
 		int event)
 {
 	unsigned long flags;
-	struct timespec64 real = timespec_to_timespec64(ts->ts_real);
-	struct timespec64 raw = timespec_to_timespec64(ts->ts_raw);
 
 	/* Pass some events to kernel consumer if activated */
 	spin_lock_irqsave(&pps_kc_hardpps_lock, flags);
 	if (pps == pps_kc_hardpps_dev && event & pps_kc_hardpps_mode)
-		hardpps(&real, &raw);
+		hardpps(&ts->ts_real, &ts->ts_raw);
 	spin_unlock_irqrestore(&pps_kc_hardpps_lock, flags);
 }
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index b2fbd62ab18d..54bf1484d41f 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -48,9 +48,9 @@ struct pps_source_info {
 
 struct pps_event_time {
 #ifdef CONFIG_NTP_PPS
-	struct timespec ts_raw;
+	struct timespec64 ts_raw;
 #endif /* CONFIG_NTP_PPS */
-	struct timespec ts_real;
+	struct timespec64 ts_real;
 };
 
 /* The main struct */
@@ -105,7 +105,7 @@ extern void pps_event(struct pps_device *pps,
 struct pps_device *pps_lookup_dev(void const *cookie);
 
 static inline void timespec_to_pps_ktime(struct pps_ktime *kt,
-		struct timespec ts)
+		struct timespec64 ts)
 {
 	kt->sec = ts.tv_sec;
 	kt->nsec = ts.tv_nsec;
@@ -115,29 +115,24 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt,
 
 static inline void pps_get_ts(struct pps_event_time *ts)
 {
-	struct timespec64 raw, real;
-
-	ktime_get_raw_and_real_ts64(&raw, &real);
-
-	ts->ts_raw = timespec64_to_timespec(raw);
-	ts->ts_real = timespec64_to_timespec(real);
+	ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real);
 }
 
 #else /* CONFIG_NTP_PPS */
 
 static inline void pps_get_ts(struct pps_event_time *ts)
 {
-	getnstimeofday(&ts->ts_real);
+	ktime_get_real_ts64(&ts->ts_real);
 }
 
 #endif /* CONFIG_NTP_PPS */
 
 /* Subtract known time delay from PPS event time(s) */
-static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec delta)
+static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta)
 {
-	ts->ts_real = timespec_sub(ts->ts_real, delta);
+	ts->ts_real = timespec64_sub(ts->ts_real, delta);
 #ifdef CONFIG_NTP_PPS
-	ts->ts_raw = timespec_sub(ts->ts_raw, delta);
+	ts->ts_raw = timespec64_sub(ts->ts_raw, delta);
 #endif
 }
 
-- 
cgit v1.2.3


From a7f5ba40c74e67b2c8c6e15425157ab6775621d7 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Thu, 1 Oct 2015 16:58:27 +0200
Subject: mtd: nand: remove unused ->init_size() hook

The ->init_size() hook was introduced to let NAND controller drivers
support NAND devices that could not be described in the nand_ids table.
Since then, the core has added support for extended-id parsing and
full-id description, thus allowing to describe pretty much all existing
NANDs.
Moreover, this hook is not used by any mainline driver, and should not be
used by new drivers, because detecting the NAND chip is not something
controller specific.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 5 +----
 include/linux/mtd/nand.h     | 6 ------
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 37c0d9d62478..a8230b164f49 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3836,10 +3836,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 
 	chip->chipsize = (uint64_t)type->chipsize << 20;
 
-	if (!type->pagesize && chip->init_size) {
-		/* Set the pagesize, oobsize, erasesize by the driver */
-		busw = chip->init_size(mtd, chip, id_data);
-	} else if (!type->pagesize) {
+	if (!type->pagesize) {
 		/* Decode parameters from extended ID */
 		nand_decode_ext_id(mtd, chip, id_data, &busw);
 	} else {
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c4d8e308f453..50e1f94fa377 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -556,10 +556,6 @@ struct nand_buffers {
  * @block_markbad:	[REPLACEABLE] mark a block bad
  * @cmd_ctrl:		[BOARDSPECIFIC] hardwarespecific function for controlling
  *			ALE/CLE/nCE. Also used to write command and address
- * @init_size:		[BOARDSPECIFIC] hardwarespecific function for setting
- *			mtd->oobsize, mtd->writesize and so on.
- *			@id_data contains the 8 bytes values of NAND_CMD_READID.
- *			Return with the bus width.
  * @dev_ready:		[BOARDSPECIFIC] hardwarespecific function for accessing
  *			device ready/busy line. If set to NULL no access to
  *			ready/busy is available and the ready/busy information
@@ -658,8 +654,6 @@ struct nand_chip {
 	int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
-	int (*init_size)(struct mtd_info *mtd, struct nand_chip *this,
-			u8 *id_data);
 	int (*dev_ready)(struct mtd_info *mtd);
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
 			int page_addr);
-- 
cgit v1.2.3


From 96ef36e9c424b7a66413bb9229ef5afcddf4fef4 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Wed, 17 Jun 2015 14:40:38 +0200
Subject: clk: at91: cleanup PMC header file for PCR register fields

Add _MASK and _OFFSET values and cleanup register fields layout.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/at91/clk-peripheral.c |  8 ++++----
 include/linux/clk/at91_pmc.h      | 14 ++++++--------
 2 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c
index e4d7b574f1ea..0a66b959f99b 100644
--- a/drivers/clk/at91/clk-peripheral.c
+++ b/drivers/clk/at91/clk-peripheral.c
@@ -165,7 +165,7 @@ static int clk_sam9x5_peripheral_enable(struct clk_hw *hw)
 	if (periph->id < PERIPHERAL_ID_MIN)
 		return 0;
 
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID) |
+	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK) |
 				     AT91_PMC_PCR_CMD |
 				     AT91_PMC_PCR_DIV(periph->div) |
 				     AT91_PMC_PCR_EN);
@@ -180,7 +180,7 @@ static void clk_sam9x5_peripheral_disable(struct clk_hw *hw)
 	if (periph->id < PERIPHERAL_ID_MIN)
 		return;
 
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID) |
+	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK) |
 				     AT91_PMC_PCR_CMD);
 }
 
@@ -194,7 +194,7 @@ static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw)
 		return 1;
 
 	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID));
+	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
 	ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_EN);
 	pmc_unlock(pmc);
 
@@ -213,7 +213,7 @@ clk_sam9x5_peripheral_recalc_rate(struct clk_hw *hw,
 		return parent_rate;
 
 	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID));
+	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
 	tmp = pmc_read(pmc, AT91_PMC_PCR);
 	pmc_unlock(pmc);
 
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 7669f7618f39..dfc59e2b64fb 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -182,13 +182,11 @@ extern void __iomem *at91_pmc_base;
 #define AT91_PMC_PCSR1		0x108			/* Peripheral Clock Enable Register 1 */
 
 #define AT91_PMC_PCR		0x10c			/* Peripheral Control Register [some SAM9 and SAMA5] */
-#define		AT91_PMC_PCR_PID	(0x3f  <<  0)		/* Peripheral ID */
-#define		AT91_PMC_PCR_CMD	(0x1  <<  12)		/* Command (read=0, write=1) */
-#define		AT91_PMC_PCR_DIV(n)	((n)  <<  16)		/* Divisor Value */
-#define			AT91_PMC_PCR_DIV0	0x0			/* Peripheral clock is MCK */
-#define			AT91_PMC_PCR_DIV2	0x1			/* Peripheral clock is MCK/2 */
-#define			AT91_PMC_PCR_DIV4	0x2			/* Peripheral clock is MCK/4 */
-#define			AT91_PMC_PCR_DIV8	0x3			/* Peripheral clock is MCK/8 */
-#define		AT91_PMC_PCR_EN		(0x1  <<  28)		/* Enable */
+#define		AT91_PMC_PCR_PID_MASK		0x3f
+#define		AT91_PMC_PCR_CMD		(0x1  <<  12)				/* Command (read=0, write=1) */
+#define		AT91_PMC_PCR_DIV_OFFSET		16
+#define		AT91_PMC_PCR_DIV_MASK		(0x3  << AT91_PMC_PCR_DIV_OFFSET)
+#define		AT91_PMC_PCR_DIV(n)		((n)  << AT91_PMC_PCR_DIV_OFFSET)	/* Divisor Value */
+#define		AT91_PMC_PCR_EN			(0x1  <<  28)				/* Enable */
 
 #endif
-- 
cgit v1.2.3


From a5752e57bb63154fe9202d8d2282bad3bae3bced Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Thu, 18 Jun 2015 14:43:29 +0200
Subject: clk: at91: add PMC sama5d2 support

Add support for the new sama5d2 SoC and adapt capabilities.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/at91/pmc.c           | 15 +++++++++++++++
 include/dt-bindings/clock/at91.h |  1 +
 include/linux/clk/at91_pmc.h     |  1 +
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index d1844f1f3729..dd2bd1cba40f 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -206,6 +206,14 @@ static const struct at91_pmc_caps at91sam9x5_caps = {
 			  AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
 };
 
+static const struct at91_pmc_caps sama5d2_caps = {
+	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
+			  AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
+			  AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
+			  AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
+			  AT91_PMC_CFDEV | AT91_PMC_GCKRDY,
+};
+
 static const struct at91_pmc_caps sama5d3_caps = {
 	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
 			  AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
@@ -436,6 +444,13 @@ static void __init of_at91sam9x5_pmc_setup(struct device_node *np)
 CLK_OF_DECLARE(at91sam9x5_clk_pmc, "atmel,at91sam9x5-pmc",
 	       of_at91sam9x5_pmc_setup);
 
+static void __init of_sama5d2_pmc_setup(struct device_node *np)
+{
+	of_at91_pmc_setup(np, &sama5d2_caps);
+}
+CLK_OF_DECLARE(sama5d2_clk_pmc, "atmel,sama5d2-pmc",
+	       of_sama5d2_pmc_setup);
+
 static void __init of_sama5d3_pmc_setup(struct device_node *np)
 {
 	of_at91_pmc_setup(np, &sama5d3_caps);
diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h
index 0b4cb999a3f7..ab3ee241d10c 100644
--- a/include/dt-bindings/clock/at91.h
+++ b/include/dt-bindings/clock/at91.h
@@ -18,5 +18,6 @@
 #define AT91_PMC_MOSCSELS	16		/* Main Oscillator Selection */
 #define AT91_PMC_MOSCRCS	17		/* Main On-Chip RC */
 #define AT91_PMC_CFDEV		18		/* Clock Failure Detector Event */
+#define AT91_PMC_GCKRDY		24		/* Generated Clocks */
 
 #endif
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index dfc59e2b64fb..dc2a0fa62eaa 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -164,6 +164,7 @@ extern void __iomem *at91_pmc_base;
 #define		AT91_PMC_MOSCSELS	(1 << 16)		/* Main Oscillator Selection [some SAM9] */
 #define		AT91_PMC_MOSCRCS	(1 << 17)		/* Main On-Chip RC [some SAM9] */
 #define		AT91_PMC_CFDEV		(1 << 18)		/* Clock Failure Detector Event [some SAM9] */
+#define		AT91_PMC_GCKRDY		(1 << 24)		/* Generated Clocks */
 #define	AT91_PMC_IMR		0x6c			/* Interrupt Mask Register */
 
 #define AT91_PMC_PLLICPR	0x80			/* PLL Charge Pump Current Register */
-- 
cgit v1.2.3


From df70aeef60839cb2732913fa41e61aba52ca942c Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Fri, 31 Jul 2015 11:43:12 +0200
Subject: clk: at91: add generated clock driver

Add a new type of clocks that can be provided to a peripheral.
In addition to the peripheral clock, this new clock that can use several
input clocks as parents can generate divided rates.
This would allow a peripheral to have finer grained clocks for generating
a baud rate, clocking an asynchronous part or having more
options in frequency.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
[sboyd@codeaurora.org: Transition to new clk_hw provider APIs]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 .../devicetree/bindings/clock/at91-clock.txt       |  35 +++
 arch/arm/mach-at91/Kconfig                         |   3 +
 drivers/clk/at91/Makefile                          |   1 +
 drivers/clk/at91/clk-generated.c                   | 306 +++++++++++++++++++++
 drivers/clk/at91/pmc.c                             |   6 +
 drivers/clk/at91/pmc.h                             |   3 +
 include/linux/clk/at91_pmc.h                       |   7 +
 7 files changed, 361 insertions(+)
 create mode 100644 drivers/clk/at91/clk-generated.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
index 5ba6450693b9..181bc8ac4e3a 100644
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -77,6 +77,9 @@ Required properties:
 	"atmel,sama5d4-clk-h32mx":
 		at91 h32mx clock
 
+	"atmel,sama5d2-clk-generated":
+		at91 generated clock
+
 Required properties for SCKC node:
 - reg : defines the IO memory reserved for the SCKC.
 - #size-cells : shall be 0 (reg is used to encode clk id).
@@ -461,3 +464,35 @@ For example:
 		compatible = "atmel,sama5d4-clk-h32mx";
 		clocks = <&mck>;
 	};
+
+Required properties for generated clocks:
+- #size-cells : shall be 0 (reg is used to encode clk id).
+- #address-cells : shall be 1 (reg is used to encode clk id).
+- clocks : shall be the generated clock source phandles.
+	e.g. clocks = <&clk32k>, <&main>, <&plladiv>, <&utmi>, <&mck>, <&audio_pll_pmc>;
+- name: device tree node describing a specific generated clock.
+	* #clock-cells : from common clock binding; shall be set to 0.
+	* reg: peripheral id. See Atmel's datasheets to get a full
+	  list of peripheral ids.
+	* atmel,clk-output-range : minimum and maximum clock frequency
+	  (two u32 fields).
+
+For example:
+	gck {
+		compatible = "atmel,sama5d2-clk-generated";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&clk32k>, <&main>, <&plladiv>, <&utmi>, <&mck>, <&audio_pll_pmc>;
+
+		tcb0_gclk: tcb0_gclk {
+			#clock-cells = <0>;
+			reg = <35>;
+			atmel,clk-output-range = <0 83000000>;
+		};
+
+		pwm_gclk: pwm_gclk {
+			#clock-cells = <0>;
+			reg = <38>;
+			atmel,clk-output-range = <0 83000000>;
+		};
+	};
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 89a755b90db2..92673006e55c 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -102,6 +102,9 @@ config HAVE_AT91_SMD
 config HAVE_AT91_H32MX
 	bool
 
+config HAVE_AT91_GENERATED_CLK
+	bool
+
 config SOC_SAM_V4_V5
 	bool
 
diff --git a/drivers/clk/at91/Makefile b/drivers/clk/at91/Makefile
index 89a48a7bd5df..13e67bd35cff 100644
--- a/drivers/clk/at91/Makefile
+++ b/drivers/clk/at91/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_HAVE_AT91_UTMI)		+= clk-utmi.o
 obj-$(CONFIG_HAVE_AT91_USB_CLK)		+= clk-usb.o
 obj-$(CONFIG_HAVE_AT91_SMD)		+= clk-smd.o
 obj-$(CONFIG_HAVE_AT91_H32MX)		+= clk-h32mx.o
+obj-$(CONFIG_HAVE_AT91_GENERATED_CLK)	+= clk-generated.o
diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
new file mode 100644
index 000000000000..abc80949e1dd
--- /dev/null
+++ b/drivers/clk/at91/clk-generated.c
@@ -0,0 +1,306 @@
+/*
+ *  Copyright (C) 2015 Atmel Corporation,
+ *                     Nicolas Ferre <nicolas.ferre@atmel.com>
+ *
+ * Based on clk-programmable & clk-peripheral drivers by Boris BREZILLON.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/clk/at91_pmc.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include "pmc.h"
+
+#define PERIPHERAL_MAX		64
+#define PERIPHERAL_ID_MIN	2
+
+#define GENERATED_SOURCE_MAX	6
+#define GENERATED_MAX_DIV	255
+
+struct clk_generated {
+	struct clk_hw hw;
+	struct at91_pmc *pmc;
+	struct clk_range range;
+	u32 id;
+	u32 gckdiv;
+	u8 parent_id;
+};
+
+#define to_clk_generated(hw) \
+	container_of(hw, struct clk_generated, hw)
+
+static int clk_generated_enable(struct clk_hw *hw)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+	struct at91_pmc *pmc = gck->pmc;
+	u32 tmp;
+
+	pr_debug("GCLK: %s, gckdiv = %d, parent id = %d\n",
+		 __func__, gck->gckdiv, gck->parent_id);
+
+	pmc_lock(pmc);
+	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
+	tmp = pmc_read(pmc, AT91_PMC_PCR) &
+			~(AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK);
+	pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_GCKCSS(gck->parent_id)
+					 | AT91_PMC_PCR_CMD
+					 | AT91_PMC_PCR_GCKDIV(gck->gckdiv)
+					 | AT91_PMC_PCR_GCKEN);
+	pmc_unlock(pmc);
+	return 0;
+}
+
+static void clk_generated_disable(struct clk_hw *hw)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+	struct at91_pmc *pmc = gck->pmc;
+	u32 tmp;
+
+	pmc_lock(pmc);
+	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
+	tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_GCKEN;
+	pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
+	pmc_unlock(pmc);
+}
+
+static int clk_generated_is_enabled(struct clk_hw *hw)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+	struct at91_pmc *pmc = gck->pmc;
+	int ret;
+
+	pmc_lock(pmc);
+	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
+	ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_GCKEN);
+	pmc_unlock(pmc);
+
+	return ret;
+}
+
+static unsigned long
+clk_generated_recalc_rate(struct clk_hw *hw,
+			  unsigned long parent_rate)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+
+	return DIV_ROUND_CLOSEST(parent_rate, gck->gckdiv + 1);
+}
+
+static int clk_generated_determine_rate(struct clk_hw *hw,
+					struct clk_rate_request *req)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+	struct clk_hw *parent = NULL;
+	long best_rate = -EINVAL;
+	unsigned long tmp_rate, min_rate;
+	int best_diff = -1;
+	int tmp_diff;
+	int i;
+
+	for (i = 0; i < clk_hw_get_num_parents(hw); i++) {
+		u32 div;
+		unsigned long parent_rate;
+
+		parent = clk_hw_get_parent_by_index(hw, i);
+		if (!parent)
+			continue;
+
+		parent_rate = clk_hw_get_rate(parent);
+		min_rate = DIV_ROUND_CLOSEST(parent_rate, GENERATED_MAX_DIV + 1);
+		if (!parent_rate ||
+		    (gck->range.max && min_rate > gck->range.max))
+			continue;
+
+		for (div = 1; div < GENERATED_MAX_DIV + 2; div++) {
+			tmp_rate = DIV_ROUND_CLOSEST(parent_rate, div);
+			tmp_diff = abs(req->rate - tmp_rate);
+
+			if (best_diff < 0 || best_diff > tmp_diff) {
+				best_rate = tmp_rate;
+				best_diff = tmp_diff;
+				req->best_parent_rate = parent_rate;
+				req->best_parent_hw = parent;
+			}
+
+			if (!best_diff || tmp_rate < req->rate)
+				break;
+		}
+
+		if (!best_diff)
+			break;
+	}
+
+	pr_debug("GCLK: %s, best_rate = %ld, parent clk: %s @ %ld\n",
+		 __func__, best_rate,
+		 __clk_get_name((req->best_parent_hw)->clk),
+		 req->best_parent_rate);
+
+	if (best_rate < 0)
+		return best_rate;
+
+	req->rate = best_rate;
+	return 0;
+}
+
+/* No modification of hardware as we have the flag CLK_SET_PARENT_GATE set */
+static int clk_generated_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+
+	if (index >= clk_hw_get_num_parents(hw))
+		return -EINVAL;
+
+	gck->parent_id = index;
+	return 0;
+}
+
+static u8 clk_generated_get_parent(struct clk_hw *hw)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+
+	return gck->parent_id;
+}
+
+/* No modification of hardware as we have the flag CLK_SET_RATE_GATE set */
+static int clk_generated_set_rate(struct clk_hw *hw,
+				  unsigned long rate,
+				  unsigned long parent_rate)
+{
+	struct clk_generated *gck = to_clk_generated(hw);
+	u32 div;
+
+	if (!rate)
+		return -EINVAL;
+
+	if (gck->range.max && rate > gck->range.max)
+		return -EINVAL;
+
+	div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	if (div > GENERATED_MAX_DIV + 1 || !div)
+		return -EINVAL;
+
+	gck->gckdiv = div - 1;
+	return 0;
+}
+
+static const struct clk_ops generated_ops = {
+	.enable = clk_generated_enable,
+	.disable = clk_generated_disable,
+	.is_enabled = clk_generated_is_enabled,
+	.recalc_rate = clk_generated_recalc_rate,
+	.determine_rate = clk_generated_determine_rate,
+	.get_parent = clk_generated_get_parent,
+	.set_parent = clk_generated_set_parent,
+	.set_rate = clk_generated_set_rate,
+};
+
+/**
+ * clk_generated_startup - Initialize a given clock to its default parent and
+ * divisor parameter.
+ *
+ * @gck:	Generated clock to set the startup parameters for.
+ *
+ * Take parameters from the hardware and update local clock configuration
+ * accordingly.
+ */
+static void clk_generated_startup(struct clk_generated *gck)
+{
+	struct at91_pmc *pmc = gck->pmc;
+	u32 tmp;
+
+	pmc_lock(pmc);
+	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
+	tmp = pmc_read(pmc, AT91_PMC_PCR);
+	pmc_unlock(pmc);
+
+	gck->parent_id = (tmp & AT91_PMC_PCR_GCKCSS_MASK)
+					>> AT91_PMC_PCR_GCKCSS_OFFSET;
+	gck->gckdiv = (tmp & AT91_PMC_PCR_GCKDIV_MASK)
+					>> AT91_PMC_PCR_GCKDIV_OFFSET;
+}
+
+static struct clk * __init
+at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
+			    const char **parent_names, u8 num_parents,
+			    u8 id, const struct clk_range *range)
+{
+	struct clk_generated *gck;
+	struct clk *clk = NULL;
+	struct clk_init_data init;
+
+	gck = kzalloc(sizeof(*gck), GFP_KERNEL);
+	if (!gck)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &generated_ops;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+	init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
+
+	gck->id = id;
+	gck->hw.init = &init;
+	gck->pmc = pmc;
+	gck->range = *range;
+
+	clk = clk_register(NULL, &gck->hw);
+	if (IS_ERR(clk))
+		kfree(gck);
+	else
+		clk_generated_startup(gck);
+
+	return clk;
+}
+
+void __init of_sama5d2_clk_generated_setup(struct device_node *np,
+					   struct at91_pmc *pmc)
+{
+	int num;
+	u32 id;
+	const char *name;
+	struct clk *clk;
+	int num_parents;
+	const char *parent_names[GENERATED_SOURCE_MAX];
+	struct device_node *gcknp;
+	struct clk_range range = CLK_RANGE(0, 0);
+
+	num_parents = of_clk_get_parent_count(np);
+	if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX)
+		return;
+
+	of_clk_parent_fill(np, parent_names, num_parents);
+
+	num = of_get_child_count(np);
+	if (!num || num > PERIPHERAL_MAX)
+		return;
+
+	for_each_child_of_node(np, gcknp) {
+		if (of_property_read_u32(gcknp, "reg", &id))
+			continue;
+
+		if (id < PERIPHERAL_ID_MIN || id >= PERIPHERAL_MAX)
+			continue;
+
+		if (of_property_read_string(np, "clock-output-names", &name))
+			name = gcknp->name;
+
+		of_at91_get_clk_range(gcknp, "atmel,clk-output-range",
+				      &range);
+
+		clk = at91_clk_register_generated(pmc, name, parent_names,
+						  num_parents, id, &range);
+		if (IS_ERR(clk))
+			continue;
+
+		of_clk_add_provider(gcknp, of_clk_src_simple_get, clk);
+	}
+}
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index dd2bd1cba40f..8476b570779b 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -376,6 +376,12 @@ static const struct of_device_id pmc_clk_ids[] __initconst = {
 		.compatible = "atmel,sama5d4-clk-h32mx",
 		.data = of_sama5d4_clk_h32mx_setup,
 	},
+#endif
+#if defined(CONFIG_HAVE_AT91_GENERATED_CLK)
+	{
+		.compatible = "atmel,sama5d2-clk-generated",
+		.data = of_sama5d2_clk_generated_setup,
+	},
 #endif
 	{ /*sentinel*/ }
 };
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index 8b87771c69b2..f65739272779 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -118,4 +118,7 @@ void of_at91sam9x5_clk_smd_setup(struct device_node *np,
 void of_sama5d4_clk_h32mx_setup(struct device_node *np,
 				struct at91_pmc *pmc);
 
+void of_sama5d2_clk_generated_setup(struct device_node *np,
+				    struct at91_pmc *pmc);
+
 #endif /* __PMC_H_ */
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index dc2a0fa62eaa..1e6932222e11 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -184,10 +184,17 @@ extern void __iomem *at91_pmc_base;
 
 #define AT91_PMC_PCR		0x10c			/* Peripheral Control Register [some SAM9 and SAMA5] */
 #define		AT91_PMC_PCR_PID_MASK		0x3f
+#define		AT91_PMC_PCR_GCKCSS_OFFSET	8
+#define		AT91_PMC_PCR_GCKCSS_MASK	(0x7  << AT91_PMC_PCR_GCKCSS_OFFSET)
+#define		AT91_PMC_PCR_GCKCSS(n)		((n)  << AT91_PMC_PCR_GCKCSS_OFFSET)	/* GCK Clock Source Selection */
 #define		AT91_PMC_PCR_CMD		(0x1  <<  12)				/* Command (read=0, write=1) */
 #define		AT91_PMC_PCR_DIV_OFFSET		16
 #define		AT91_PMC_PCR_DIV_MASK		(0x3  << AT91_PMC_PCR_DIV_OFFSET)
 #define		AT91_PMC_PCR_DIV(n)		((n)  << AT91_PMC_PCR_DIV_OFFSET)	/* Divisor Value */
+#define		AT91_PMC_PCR_GCKDIV_OFFSET	20
+#define		AT91_PMC_PCR_GCKDIV_MASK	(0xff  << AT91_PMC_PCR_GCKDIV_OFFSET)
+#define		AT91_PMC_PCR_GCKDIV(n)		((n)  << AT91_PMC_PCR_GCKDIV_OFFSET)	/* Generated Clock Divisor Value */
 #define		AT91_PMC_PCR_EN			(0x1  <<  28)				/* Enable */
+#define		AT91_PMC_PCR_GCKEN		(0x1  <<  29)				/* GCK Enable */
 
 #endif
-- 
cgit v1.2.3


From 0610c25daa3e76e38ad5a8fae683a89ff9f71798 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Thu, 1 Oct 2015 15:37:02 -0700
Subject: memcg: fix dirty page migration

The problem starts with a file backed dirty page which is charged to a
memcg.  Then page migration is used to move oldpage to newpage.

Migration:
 - copies the oldpage's data to newpage
 - clears oldpage.PG_dirty
 - sets newpage.PG_dirty
 - uncharges oldpage from memcg
 - charges newpage to memcg

Clearing oldpage.PG_dirty decrements the charged memcg's dirty page
count.

However, because newpage is not yet charged, setting newpage.PG_dirty
does not increment the memcg's dirty page count.  After migration
completes newpage.PG_dirty is eventually cleared, often in
account_page_cleaned().  At this time newpage is charged to a memcg so
the memcg's dirty page count is decremented which causes underflow
because the count was not previously incremented by migration.  This
underflow causes balance_dirty_pages() to see a very large unsigned
number of dirty memcg pages which leads to aggressive throttling of
buffered writes by processes in non root memcg.

This issue:
 - can harm performance of non root memcg buffered writes.
 - can report too small (even negative) values in
   memory.stat[(total_)dirty] counters of all memcg, including the root.

To avoid polluting migrate.c with #ifdef CONFIG_MEMCG checks, introduce
page_memcg() and set_page_memcg() helpers.

Test:
    0) setup and enter limited memcg
    mkdir /sys/fs/cgroup/test
    echo 1G > /sys/fs/cgroup/test/memory.limit_in_bytes
    echo $$ > /sys/fs/cgroup/test/cgroup.procs

    1) buffered writes baseline
    dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k
    sync
    grep ^dirty /sys/fs/cgroup/test/memory.stat

    2) buffered writes with compaction antagonist to induce migration
    yes 1 > /proc/sys/vm/compact_memory &
    rm -rf /data/tmp/foo
    dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k
    kill %
    sync
    grep ^dirty /sys/fs/cgroup/test/memory.stat

    3) buffered writes without antagonist, should match baseline
    rm -rf /data/tmp/foo
    dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k
    sync
    grep ^dirty /sys/fs/cgroup/test/memory.stat

                       (speed, dirty residue)
             unpatched                       patched
    1) 841 MB/s 0 dirty pages          886 MB/s 0 dirty pages
    2) 611 MB/s -33427456 dirty pages  793 MB/s 0 dirty pages
    3) 114 MB/s -33427456 dirty pages  891 MB/s 0 dirty pages

    Notice that unpatched baseline performance (1) fell after
    migration (3): 841 -> 114 MB/s.  In the patched kernel, post
    migration performance matches baseline.

Fixes: c4843a7593a9 ("memcg: add per cgroup dirty page accounting")
Signed-off-by: Greg Thelen <gthelen@google.com>
Reported-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>	[4.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 21 +++++++++++++++++++++
 mm/migrate.c       | 12 +++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 91c08f6f0dc9..80001de019ba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
 #endif
 }
 
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+	return page->mem_cgroup;
+}
+
+static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
+{
+	page->mem_cgroup = memcg;
+}
+#else
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+	return NULL;
+}
+
+static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
+{
+}
+#endif
+
 /*
  * Some inline functions in vmstat.h depend on page_zone()
  */
diff --git a/mm/migrate.c b/mm/migrate.c
index 7452a00bbb50..842ecd7aaf7f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 	if (PageSwapBacked(page))
 		SetPageSwapBacked(newpage);
 
+	/*
+	 * Indirectly called below, migrate_page_copy() copies PG_dirty and thus
+	 * needs newpage's memcg set to transfer memcg dirty page accounting.
+	 * So perform memcg migration in two steps:
+	 * 1. set newpage->mem_cgroup (here)
+	 * 2. clear page->mem_cgroup (below)
+	 */
+	set_page_memcg(newpage, page_memcg(page));
+
 	mapping = page_mapping(page);
 	if (!mapping)
 		rc = migrate_page(mapping, newpage, page, mode);
@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 		rc = fallback_migrate_page(mapping, newpage, page, mode);
 
 	if (rc != MIGRATEPAGE_SUCCESS) {
+		set_page_memcg(newpage, NULL);
 		newpage->mapping = NULL;
 	} else {
-		mem_cgroup_migrate(page, newpage, false);
+		set_page_memcg(page, NULL);
 		if (page_was_mapped)
 			remove_migration_ptes(page, newpage);
 		page->mapping = NULL;
-- 
cgit v1.2.3


From ef510194cefe0cd369ef73419cd65b0a5bb4fb5b Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Thu, 1 Oct 2015 15:37:13 -0700
Subject: memcg: remove pcp_counter_lock

Commit 733a572e66d2 ("memcg: make mem_cgroup_read_{stat|event}() iterate
possible cpus instead of online") removed the last use of the per memcg
pcp_counter_lock but forgot to remove the variable.

Kill the vestigial variable.

Signed-off-by: Greg Thelen <gthelen@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 1 -
 mm/memcontrol.c            | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ad800e62cb7a..6452ff4c463f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -242,7 +242,6 @@ struct mem_cgroup {
 	 * percpu counter.
 	 */
 	struct mem_cgroup_stat_cpu __percpu *stat;
-	spinlock_t pcp_counter_lock;
 
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
 	struct cg_proto tcp_mem;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 03cc0a742ff1..1fedbde68f59 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4185,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	if (memcg_wb_domain_init(memcg, GFP_KERNEL))
 		goto out_free_stat;
 
-	spin_lock_init(&memcg->pcp_counter_lock);
 	return memcg;
 
 out_free_stat:
-- 
cgit v1.2.3


From 068654c200cc32966ce7906ca0bd096b9b97e988 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 26 May 2015 16:49:01 +0100
Subject: drivers: firmware: psci: move power_state handling to generic code

Functions implemented on arm64 to check if a power_state parameter
is valid and if the power_state implies context loss are not
arm64 specific and should be moved to generic code so that they
can be reused on arm systems too.

This patch moves the functions handling the power_state parameter
to generic PSCI firmware layer code.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Jisheng Zhang <jszhang@marvell.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/kernel/psci.c | 14 --------------
 drivers/firmware/psci.c  | 15 +++++++++++++++
 include/linux/psci.h     |  2 ++
 3 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index aa94a88f6279..f67f35b6edb1 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -30,20 +30,6 @@
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 
-static bool psci_power_state_loses_context(u32 state)
-{
-	return state & PSCI_0_2_POWER_STATE_TYPE_MASK;
-}
-
-static bool psci_power_state_is_valid(u32 state)
-{
-	const u32 valid_mask = PSCI_0_2_POWER_STATE_ID_MASK |
-			       PSCI_0_2_POWER_STATE_TYPE_MASK |
-			       PSCI_0_2_POWER_STATE_AFFL_MASK;
-
-	return !(state & ~valid_mask);
-}
-
 static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
 
 static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index 0821e332c85a..3157eb0ef300 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -70,6 +70,21 @@ enum psci_function {
 
 static u32 psci_function_id[PSCI_FN_MAX];
 
+#define PSCI_0_2_POWER_STATE_MASK		\
+				(PSCI_0_2_POWER_STATE_ID_MASK | \
+				PSCI_0_2_POWER_STATE_TYPE_MASK | \
+				PSCI_0_2_POWER_STATE_AFFL_MASK)
+
+bool psci_power_state_loses_context(u32 state)
+{
+	return state & PSCI_0_2_POWER_STATE_TYPE_MASK;
+}
+
+bool psci_power_state_is_valid(u32 state)
+{
+	return !(state & ~PSCI_0_2_POWER_STATE_MASK);
+}
+
 static int psci_to_linux_errno(int errno)
 {
 	switch (errno) {
diff --git a/include/linux/psci.h b/include/linux/psci.h
index a682fcc91c33..12c4865457ad 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -21,6 +21,8 @@
 #define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
 
 bool psci_tos_resident_on(int cpu);
+bool psci_power_state_loses_context(u32 state);
+bool psci_power_state_is_valid(u32 state);
 
 struct psci_operations {
 	int (*cpu_suspend)(u32 state, unsigned long entry_point);
-- 
cgit v1.2.3


From 934e2536b1bfe663de033298f75c1b8ff9d0c9ea Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Sep 2015 18:54:09 +0300
Subject: clk: fractional-divider: keep mwidth and nwidth internally

The patch adds mwidth and nwidth fields to the struct clk_fractional_divider
for further usage. While here, use GENMASK() instead of open coding this
functionality.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-fractional-divider.c | 6 ++++--
 include/linux/clk-provider.h         | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c
index 1af9c1e85d83..0282c7647a49 100644
--- a/drivers/clk/clk-fractional-divider.c
+++ b/drivers/clk/clk-fractional-divider.c
@@ -128,9 +128,11 @@ struct clk *clk_register_fractional_divider(struct device *dev,
 
 	fd->reg = reg;
 	fd->mshift = mshift;
-	fd->mmask = (BIT(mwidth) - 1) << mshift;
+	fd->mwidth = mwidth;
+	fd->mmask = GENMASK(mwidth - 1, 0) << mshift;
 	fd->nshift = nshift;
-	fd->nmask = (BIT(nwidth) - 1) << nshift;
+	fd->nwidth = nwidth;
+	fd->nmask = GENMASK(nwidth - 1, 0) << nshift;
 	fd->flags = clk_divider_flags;
 	fd->lock = lock;
 	fd->hw.init = &init;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 3ecc07d0da77..8ff43eb4b311 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -500,13 +500,14 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
  *
  * Clock with adjustable fractional divider affecting its output frequency.
  */
-
 struct clk_fractional_divider {
 	struct clk_hw	hw;
 	void __iomem	*reg;
 	u8		mshift;
+	u8		mwidth;
 	u32		mmask;
 	u8		nshift;
+	u8		nwidth;
 	u32		nmask;
 	u8		flags;
 	spinlock_t	*lock;
-- 
cgit v1.2.3


From 3c4b23dd71ea6f68be9731b68877fbc05f4fb693 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 30 Sep 2015 21:07:17 +0900
Subject: pinctrl: pinconf-generic: sort pin configuration params
 alphabetically

Currently, the dt_params array in drivers/pinctrl/pinconf-generic.c
is not sorted in the same order as the enum pin_config_param in
include/linux/pinctrl/pinconf-generic.h.

Sort enum pin_config_param, conf_items, dt_params, alphabetically
for consistency.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 34 +++++++++---------
 include/linux/pinctrl/pinconf-generic.h | 64 ++++++++++++++++-----------------
 2 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index e63ad9fbd388..a88a55901125 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -28,25 +28,25 @@
 
 #ifdef CONFIG_DEBUG_FS
 static const struct pin_config_item conf_items[] = {
+	PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_DISABLE, "input bias disabled", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_HIGH_IMPEDANCE, "input bias high impedance", NULL, false),
-	PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL, false),
-	PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 				"input bias pull to pin specific state", NULL, false),
-	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false),
+	PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false),
+	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_STRENGTH, "output drive strength", "mA", true),
+	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "usec", true),
 	PCONFDUMP(PIN_CONFIG_INPUT_ENABLE, "input enabled", NULL, false),
-	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL, false),
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL, false),
-	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "usec", true),
-	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector", true),
-	PCONFDUMP(PIN_CONFIG_SLEW_RATE, "slew rate", NULL, true),
+	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL, false),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode", true),
 	PCONFDUMP(PIN_CONFIG_OUTPUT, "pin output", "level", true),
+	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector", true),
+	PCONFDUMP(PIN_CONFIG_SLEW_RATE, "slew rate", NULL, true),
 };
 
 static void pinconf_generic_dump_one(struct pinctrl_dev *pctldev,
@@ -150,27 +150,27 @@ EXPORT_SYMBOL_GPL(pinconf_generic_dump_config);
 
 #ifdef CONFIG_OF
 static const struct pinconf_generic_params dt_params[] = {
+	{ "bias-bus-hold", PIN_CONFIG_BIAS_BUS_HOLD, 0 },
 	{ "bias-disable", PIN_CONFIG_BIAS_DISABLE, 0 },
 	{ "bias-high-impedance", PIN_CONFIG_BIAS_HIGH_IMPEDANCE, 0 },
-	{ "bias-bus-hold", PIN_CONFIG_BIAS_BUS_HOLD, 0 },
 	{ "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 },
-	{ "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 },
 	{ "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 },
-	{ "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 },
+	{ "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 },
 	{ "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 },
 	{ "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 },
+	{ "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 },
 	{ "drive-strength", PIN_CONFIG_DRIVE_STRENGTH, 0 },
-	{ "input-enable", PIN_CONFIG_INPUT_ENABLE, 1 },
+	{ "input-debounce", PIN_CONFIG_INPUT_DEBOUNCE, 0 },
 	{ "input-disable", PIN_CONFIG_INPUT_ENABLE, 0 },
-	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
+	{ "input-enable", PIN_CONFIG_INPUT_ENABLE, 1 },
 	{ "input-schmitt-disable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 0 },
-	{ "input-debounce", PIN_CONFIG_INPUT_DEBOUNCE, 0 },
-	{ "power-source", PIN_CONFIG_POWER_SOURCE, 0 },
-	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
+	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
 	{ "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
-	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
+	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
 	{ "output-high", PIN_CONFIG_OUTPUT, 1, },
-	{ "slew-rate", PIN_CONFIG_SLEW_RATE, 0},
+	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
+	{ "power-source", PIN_CONFIG_POWER_SOURCE, 0 },
+	{ "slew-rate", PIN_CONFIG_SLEW_RATE, 0 },
 };
 
 /**
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index fe65962b264f..d921afd5f109 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -20,6 +20,11 @@
 
 /**
  * enum pin_config_param - possible pin configuration parameters
+ * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it
+ *	weakly drives the last value on a tristate bus, also known as a "bus
+ *	holder", "bus keeper" or "repeater". This allows another device on the
+ *	bus to change the value by driving the bus high or low and switching to
+ *	tristate. The argument is ignored.
  * @PIN_CONFIG_BIAS_DISABLE: disable any pin bias on the pin, a
  *	transition from say pull-up to pull-down implies that you disable
  *	pull-up in the process, this setting disables all biasing.
@@ -29,14 +34,6 @@
  *	if for example some other pin is going to drive the signal connected
  *	to it for a while. Pins used for input are usually always high
  *	impedance.
- * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it
- *	weakly drives the last value on a tristate bus, also known as a "bus
- *	holder", "bus keeper" or "repeater". This allows another device on the
- *	bus to change the value by driving the bus high or low and switching to
- *	tristate. The argument is ignored.
- * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
- *	impedance to VDD). If the argument is != 0 pull-up is enabled,
- *	if it is 0, pull-up is total, i.e. the pin is connected to VDD.
  * @PIN_CONFIG_BIAS_PULL_DOWN: the pin will be pulled down (usually with high
  *	impedance to GROUND). If the argument is != 0 pull-down is enabled,
  *	if it is 0, pull-down is total, i.e. the pin is connected to GROUND.
@@ -48,10 +45,9 @@
  *	If the argument is != 0 pull up/down is enabled, if it is 0, the
  *	configuration is ignored. The proper way to disable it is to use
  *	@PIN_CONFIG_BIAS_DISABLE.
- * @PIN_CONFIG_DRIVE_PUSH_PULL: the pin will be driven actively high and
- *	low, this is the most typical case and is typically achieved with two
- *	active transistors on the output. Setting this config will enable
- *	push-pull mode, the argument is ignored.
+ * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
+ *	impedance to VDD). If the argument is != 0 pull-up is enabled,
+ *	if it is 0, pull-up is total, i.e. the pin is connected to VDD.
  * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open
  *	collector) which means it is usually wired with other output ports
  *	which are then pulled up with an external resistor. Setting this
@@ -59,28 +55,26 @@
  * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source
  *	(open emitter). Setting this config will enable open source mode, the
  *	argument is ignored.
+ * @PIN_CONFIG_DRIVE_PUSH_PULL: the pin will be driven actively high and
+ *	low, this is the most typical case and is typically achieved with two
+ *	active transistors on the output. Setting this config will enable
+ *	push-pull mode, the argument is ignored.
  * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current
  *	passed as argument. The argument is in mA.
+ * @PIN_CONFIG_INPUT_DEBOUNCE: this will configure the pin to debounce mode,
+ *	which means it will wait for signals to settle when reading inputs. The
+ *	argument gives the debounce time in usecs. Setting the
+ *	argument to zero turns debouncing off.
  * @PIN_CONFIG_INPUT_ENABLE: enable the pin's input.  Note that this does not
  *	affect the pin's ability to drive output.  1 enables input, 0 disables
  *	input.
- * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
- *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
- *      schmitt-trigger mode is disabled.
  * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in
  *	schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis,
  *	the threshold value is given on a custom format as argument when
  *	setting pins to this mode.
- * @PIN_CONFIG_INPUT_DEBOUNCE: this will configure the pin to debounce mode,
- *	which means it will wait for signals to settle when reading inputs. The
- *	argument gives the debounce time in usecs. Setting the
- *	argument to zero turns debouncing off.
- * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
- *	supplies, the argument to this parameter (on a custom format) tells
- *	the driver which alternative power source to use.
- * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to
- *	this parameter (on a custom format) tells the driver which alternative
- *	slew rate to use.
+ * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
+ *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
+ *      schmitt-trigger mode is disabled.
  * @PIN_CONFIG_LOW_POWER_MODE: this will configure the pin for low power
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
@@ -89,29 +83,35 @@
  *	1 to indicate high level, argument 0 to indicate low level. (Please
  *	see Documentation/pinctrl.txt, section "GPIO mode pitfalls" for a
  *	discussion around this parameter.)
+ * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
+ *	supplies, the argument to this parameter (on a custom format) tells
+ *	the driver which alternative power source to use.
+ * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to
+ *	this parameter (on a custom format) tells the driver which alternative
+ *	slew rate to use.
  * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if
  *	you need to pass in custom configurations to the pin controller, use
  *	PIN_CONFIG_END+1 as the base offset.
  */
 enum pin_config_param {
+	PIN_CONFIG_BIAS_BUS_HOLD,
 	PIN_CONFIG_BIAS_DISABLE,
 	PIN_CONFIG_BIAS_HIGH_IMPEDANCE,
-	PIN_CONFIG_BIAS_BUS_HOLD,
-	PIN_CONFIG_BIAS_PULL_UP,
 	PIN_CONFIG_BIAS_PULL_DOWN,
 	PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
-	PIN_CONFIG_DRIVE_PUSH_PULL,
+	PIN_CONFIG_BIAS_PULL_UP,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
+	PIN_CONFIG_DRIVE_PUSH_PULL,
 	PIN_CONFIG_DRIVE_STRENGTH,
+	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_INPUT_ENABLE,
-	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
-	PIN_CONFIG_INPUT_DEBOUNCE,
-	PIN_CONFIG_POWER_SOURCE,
-	PIN_CONFIG_SLEW_RATE,
+	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_LOW_POWER_MODE,
 	PIN_CONFIG_OUTPUT,
+	PIN_CONFIG_POWER_SOURCE,
+	PIN_CONFIG_SLEW_RATE,
 	PIN_CONFIG_END = 0x7FFF,
 };
 
-- 
cgit v1.2.3


From a91263d520246b63c63e75ddfb072ee6a853fe15 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2015 01:41:50 +0200
Subject: ebpf: migrate bpf_prog's flags to bitfield

As we need to add further flags to the bpf_prog structure, lets migrate
both bools to a bitfield representation. The size of the base structure
(excluding insns) remains unchanged at 40 bytes.

Add also tags for the kmemchecker, so that it doesn't throw false
positives. Even in case gcc would generate suboptimal code, it's not
being accessed in performance critical paths.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       | 2 +-
 arch/arm64/net/bpf_jit_comp.c   | 2 +-
 arch/mips/net/bpf_jit.c         | 2 +-
 arch/powerpc/net/bpf_jit_comp.c | 2 +-
 arch/s390/net/bpf_jit_comp.c    | 2 +-
 arch/sparc/net/bpf_jit_comp.c   | 2 +-
 arch/x86/net/bpf_jit_comp.c     | 2 +-
 include/linux/filter.h          | 6 ++++--
 kernel/bpf/core.c               | 4 ++++
 kernel/bpf/syscall.c            | 4 ++--
 net/core/filter.c               | 2 +-
 11 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 876060bcceeb..0df5fd561513 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1047,7 +1047,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = true;
+	fp->jited = 1;
 out:
 	kfree(ctx.offsets);
 	return;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c047598b09e0..a44e5293c6f5 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -744,7 +744,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	prog->bpf_func = (void *)ctx.image;
-	prog->jited = true;
+	prog->jited = 1;
 out:
 	kfree(ctx.offset);
 }
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 0c4a133f6216..77cb27309db2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1251,7 +1251,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
 
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = true;
+	fp->jited = 1;
 
 out:
 	kfree(ctx.offsets);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 17cea18a09d3..04782164ee67 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -679,7 +679,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 		fp->bpf_func = (void *)image;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index eeda051442c3..9a0c4c22e536 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1310,7 +1310,7 @@ void bpf_int_jit_compile(struct bpf_prog *fp)
 	if (jit.prg_buf) {
 		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.prg_buf;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 free_addrs:
 	kfree(jit.addrs);
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index f8b9f71b9a2b..22564f5f2364 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 	if (image) {
 		bpf_flush_icache(image, image + proglen);
 		fp->bpf_func = (void *)image;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 70efcd0940f9..75991979f667 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1109,7 +1109,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_flush_icache(header, image + proglen);
 		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
-		prog->jited = true;
+		prog->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fa2cab985e57..bad618f316d7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -326,8 +326,10 @@ struct bpf_binary_header {
 
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
-	bool			jited;		/* Is our filter JIT'ed? */
-	bool			gpl_compatible;	/* Is our filter GPL compatible? */
+	kmemcheck_bitfield_begin(meta);
+	u16			jited:1,	/* Is our filter JIT'ed? */
+				gpl_compatible:1; /* Is filter GPL compatible? */
+	kmemcheck_bitfield_end(meta);
 	u32			len;		/* Number of filter blocks */
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 67c380cfa9ca..c8855c2a7a48 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -82,6 +82,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (fp == NULL)
 		return NULL;
 
+	kmemcheck_annotate_bitfield(fp, meta);
+
 	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
 	if (aux == NULL) {
 		vfree(fp);
@@ -110,6 +112,8 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 
 	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 	if (fp != NULL) {
+		kmemcheck_annotate_bitfield(fp, meta);
+
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = size / PAGE_SIZE;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35bac8e8b071..2190ab14b763 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -553,10 +553,10 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	prog->orig_prog = NULL;
-	prog->jited = false;
+	prog->jited = 0;
 
 	atomic_set(&prog->aux->refcnt, 1);
-	prog->gpl_compatible = is_gpl;
+	prog->gpl_compatible = is_gpl ? 1 : 0;
 
 	/* find program type: socket_filter vs tracing_filter */
 	err = find_prog_type(type, prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index 60e3fe7c59c0..04664acb86ce 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1001,7 +1001,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
 	int err;
 
 	fp->bpf_func = NULL;
-	fp->jited = false;
+	fp->jited = 0;
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-- 
cgit v1.2.3


From c46646d0484f5d08e2bede9b45034ba5b8b489cc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2015 01:41:51 +0200
Subject: sched, bpf: add helper for retrieving routing realms

Using routing realms as part of the classifier is quite useful, it
can be viewed as a tag for one or multiple routing entries (think of
an analogy to net_cls cgroup for processes), set by user space routing
daemons or via iproute2 as an indicator for traffic classifiers and
later on processed in the eBPF program.

Unlike actions, the classifier can inspect device flags and enable
netif_keep_dst() if necessary. tc actions don't have that possibility,
but in case people know what they are doing, it can be used from there
as well (e.g. via devs that must keep dsts by design anyway).

If a realm is set, the handler returns the non-zero realm. User space
can set the full 32bit realm for the dst.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h   |  3 ++-
 include/uapi/linux/bpf.h |  7 +++++++
 kernel/bpf/syscall.c     |  2 ++
 net/core/filter.c        | 22 ++++++++++++++++++++++
 net/sched/cls_bpf.c      |  8 ++++++--
 5 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bad618f316d7..3d5fd24b321b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -328,7 +328,8 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
-				gpl_compatible:1; /* Is filter GPL compatible? */
+				gpl_compatible:1, /* Is filter GPL compatible? */
+				dst_needed:1;	/* Do we need dst entry? */
 	kmemcheck_bitfield_end(meta);
 	u32			len;		/* Number of filter blocks */
 	enum bpf_prog_type	type;		/* Type of BPF program */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4ec0b5488294..564f1f091991 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -280,6 +280,13 @@ enum bpf_func_id {
 	 * Return: TC_ACT_REDIRECT
 	 */
 	BPF_FUNC_redirect,
+
+	/**
+	 * bpf_get_route_realm(skb) - retrieve a dst's tclassid
+	 * @skb: pointer to skb
+	 * Return: realm if != 0
+	 */
+	BPF_FUNC_get_route_realm,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2190ab14b763..5f35f420c12f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -402,6 +402,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 			 */
 			BUG_ON(!prog->aux->ops->get_func_proto);
 
+			if (insn->imm == BPF_FUNC_get_route_realm)
+				prog->dst_needed = 1;
 			if (insn->imm == BPF_FUNC_tail_call) {
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
diff --git a/net/core/filter.c b/net/core/filter.c
index 04664acb86ce..45c69ce4c847 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,6 +49,7 @@
 #include <net/sch_generic.h>
 #include <net/cls_cgroup.h>
 #include <net/dst_metadata.h>
+#include <net/dst.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -1478,6 +1479,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	const struct dst_entry *dst;
+
+	dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+	if (dst)
+		return dst->tclassid;
+#endif
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+	.func           = bpf_get_route_realm,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1648,6 +1668,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return bpf_get_skb_set_tunnel_key_proto();
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
+	case BPF_FUNC_get_route_realm:
+		return &bpf_get_route_realm_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 7eeffaf69c75..5faaa5425f7b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -262,7 +262,8 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 	return 0;
 }
 
-static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog)
+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+				 const struct tcf_proto *tp)
 {
 	struct bpf_prog *fp;
 	char *name = NULL;
@@ -294,6 +295,9 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog)
 	prog->bpf_name = name;
 	prog->filter = fp;
 
+	if (fp->dst_needed)
+		netif_keep_dst(qdisc_dev(tp->q));
+
 	return 0;
 }
 
@@ -330,7 +334,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	prog->exts_integrated = have_exts;
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
-		       cls_bpf_prog_from_efd(tb, prog);
+		       cls_bpf_prog_from_efd(tb, prog, tp);
 	if (ret < 0) {
 		tcf_exts_destroy(&exts);
 		return ret;
-- 
cgit v1.2.3


From f3a6bd393c2c5d0e6b16624ba99a1c5fa07bdb0b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Wed, 30 Sep 2015 15:15:52 +0900
Subject: phylib: Add phy_set_max_speed helper

Add a helper to allow ethernet drivers to limit the speed of a phy
(that they are attached to).

This mainly involves factoring out the business-end of
of_set_phy_supported() and exporting a new symbol.

This code seems to be open coded in several places, in several different
variants.

It is is envisaged that this will be used in situations where setting the
"max-speed" property in DT is not appropriate, e.g. because the maximum
speed is not a property of the phy hardware.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 59 ++++++++++++++++++++++++++++++--------------
 include/linux/phy.h          |  1 +
 2 files changed, 41 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index f761288abe66..383389146099 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1239,6 +1239,44 @@ static int gen10g_resume(struct phy_device *phydev)
 	return 0;
 }
 
+static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+{
+	/* The default values for phydev->supported are provided by the PHY
+	 * driver "features" member, we want to reset to sane defaults first
+	 * before supporting higher speeds.
+	 */
+	phydev->supported &= PHY_DEFAULT_FEATURES;
+
+	switch (max_speed) {
+	default:
+		return -ENOTSUPP;
+	case SPEED_1000:
+		phydev->supported |= PHY_1000BT_FEATURES;
+		/* fall through */
+	case SPEED_100:
+		phydev->supported |= PHY_100BT_FEATURES;
+		/* fall through */
+	case SPEED_10:
+		phydev->supported |= PHY_10BT_FEATURES;
+	}
+
+	return 0;
+}
+
+int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
+{
+	int err;
+
+	err = __set_phy_supported(phydev, max_speed);
+	if (err)
+		return err;
+
+	phydev->advertising = phydev->supported;
+
+	return 0;
+}
+EXPORT_SYMBOL(phy_set_max_speed);
+
 static void of_set_phy_supported(struct phy_device *phydev)
 {
 	struct device_node *node = phydev->dev.of_node;
@@ -1250,25 +1288,8 @@ static void of_set_phy_supported(struct phy_device *phydev)
 	if (!node)
 		return;
 
-	if (!of_property_read_u32(node, "max-speed", &max_speed)) {
-		/* The default values for phydev->supported are provided by the PHY
-		 * driver "features" member, we want to reset to sane defaults fist
-		 * before supporting higher speeds.
-		 */
-		phydev->supported &= PHY_DEFAULT_FEATURES;
-
-		switch (max_speed) {
-		default:
-			return;
-
-		case SPEED_1000:
-			phydev->supported |= PHY_1000BT_FEATURES;
-		case SPEED_100:
-			phydev->supported |= PHY_100BT_FEATURES;
-		case SPEED_10:
-			phydev->supported |= PHY_10BT_FEATURES;
-		}
-	}
+	if (!of_property_read_u32(node, "max-speed", &max_speed))
+		__set_phy_supported(phydev, max_speed);
 }
 
 /**
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4a4e3a092337..4c477e6ece33 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -798,6 +798,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 void phy_device_free(struct phy_device *phydev);
+int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From 3220befddc0da1f4e09fc790a32a9bd8427e8889 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 1 Oct 2015 18:40:33 +0300
Subject: usb: store the new usb 3.1 SuperSpeedPlus device capability
 descriptor

If a device supports usb 3.1 SupeerSpeedPlus Gen2 speeds it povides
a SuperSpeedPlus device capability descriptor as a part of its
BOS descriptor. If we find one while parsing the BOS then save it
togeter with the other device capabilities found in the BOS

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c | 4 ++++
 include/linux/usb.h       | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index b9ddf0c1ffe5..7caff020106e 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -853,6 +853,10 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 			dev->bos->ss_cap =
 				(struct usb_ss_cap_descriptor *)buffer;
 			break;
+		case USB_SSP_CAP_TYPE:
+			dev->bos->ssp_cap =
+				(struct usb_ssp_cap_descriptor *)buffer;
+			break;
 		case CONTAINER_ID_TYPE:
 			dev->bos->ss_id =
 				(struct usb_ss_container_id_descriptor *)buffer;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2cbcf8db517a..b9a28074210f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -328,6 +328,7 @@ struct usb_host_bos {
 	/* wireless cap descriptor is handled by wusb */
 	struct usb_ext_cap_descriptor	*ext_cap;
 	struct usb_ss_cap_descriptor	*ss_cap;
+	struct usb_ssp_cap_descriptor	*ssp_cap;
 	struct usb_ss_container_id_descriptor	*ss_id;
 };
 
-- 
cgit v1.2.3


From 7117522520b9101af7a0602d6b0d1e67d689fc6b Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 1 Oct 2015 18:40:37 +0300
Subject: usb: define HCD_USB31 speed option for hosts that support USB 3.1
 features

Hosts that support USB 3.1 Enhaned SuperSpeed can set their speed to
HCD_USB31 to let usb core and host drivers know that the controller
supports new USB 3.1 features.

make sure usb core handle HCD_USB31 hosts correctly, for now similar
to HCD_USB3.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 3 +++
 include/linux/usb/hcd.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 86b3d1190500..a02d3e68476e 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -555,6 +555,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
 		switch (wValue & 0xff00) {
 		case USB_DT_DEVICE << 8:
 			switch (hcd->speed) {
+			case HCD_USB31:
 			case HCD_USB3:
 				bufp = usb3_rh_dev_descriptor;
 				break;
@@ -576,6 +577,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
 			break;
 		case USB_DT_CONFIG << 8:
 			switch (hcd->speed) {
+			case HCD_USB31:
 			case HCD_USB3:
 				bufp = ss_rh_config_descriptor;
 				len = sizeof ss_rh_config_descriptor;
@@ -2775,6 +2777,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		rhdev->speed = USB_SPEED_WIRELESS;
 		break;
 	case HCD_USB3:
+	case HCD_USB31:
 		rhdev->speed = USB_SPEED_SUPER;
 		break;
 	default:
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 4d147277b30d..f89c24bd53a4 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -250,6 +250,7 @@ struct hc_driver {
 #define	HCD_USB2	0x0020		/* USB 2.0 */
 #define	HCD_USB25	0x0030		/* Wireless USB 1.0 (USB 2.5)*/
 #define	HCD_USB3	0x0040		/* USB 3.0 */
+#define	HCD_USB31	0x0050		/* USB 3.1 */
 #define	HCD_MASK	0x0070
 #define	HCD_BH		0x0100		/* URB complete in BH context */
 
-- 
cgit v1.2.3


From f975c5cdb53c1cfef18c28aaae8385e5d16bc104 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 29 Sep 2015 18:21:18 +0900
Subject: usb: renesas_usbhs: fix build warning if 64-bit architecture

This patch fixes the following warning if 64-bit architecture environment:

./drivers/usb/renesas_usbhs/common.c:496:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
  dparam->type = of_id ? (u32)of_id->data : 0;

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/renesas_usbhs/common.c | 2 +-
 include/linux/usb/renesas_usbhs.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 7b98e1d9194c..0ce398c5482e 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -493,7 +493,7 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev)
 		return NULL;
 
 	dparam = &info->driver_param;
-	dparam->type = of_id ? (u32)of_id->data : 0;
+	dparam->type = of_id ? (uintptr_t)of_id->data : 0;
 	if (!of_property_read_u32(dev->of_node, "renesas,buswait", &tmp))
 		dparam->buswait_bwait = tmp;
 	gpio = of_get_named_gpio_flags(dev->of_node, "renesas,enable-gpio", 0,
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 3dd5a781da99..bfb74723f151 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -157,7 +157,7 @@ struct renesas_usbhs_driver_param {
 	 */
 	int pio_dma_border; /* default is 64byte */
 
-	u32 type;
+	uintptr_t type;
 	u32 enable_gpio;
 
 	/*
-- 
cgit v1.2.3


From 621a5f7ad9cd1ce7933f1d302067cbd58354173c Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 26 Sep 2015 15:04:07 -0700
Subject: debugfs: Pass bool pointer to debugfs_create_bool()

Its a bit odd that debugfs_create_bool() takes 'u32 *' as an argument,
when all it needs is a boolean pointer.

It would be better to update this API to make it accept 'bool *'
instead, as that will make it more consistent and often more convenient.
Over that bool takes just a byte.

That required updates to all user sites as well, in the same commit
updating the API. regmap core was also using
debugfs_{read|write}_file_bool(), directly and variable types were
updated for that to be bool as well.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/filesystems/debugfs.txt      |  2 +-
 arch/arm64/kernel/debug-monitors.c         |  4 ++--
 drivers/acpi/internal.h                    |  2 +-
 drivers/base/regmap/internal.h             |  6 +++---
 drivers/base/regmap/regcache-lzo.c         |  4 ++--
 drivers/base/regmap/regcache.c             | 24 ++++++++++++------------
 drivers/bluetooth/hci_qca.c                |  4 ++--
 drivers/iommu/amd_iommu_init.c             |  2 +-
 drivers/iommu/amd_iommu_types.h            |  2 +-
 drivers/misc/mei/mei_dev.h                 |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |  4 ++--
 drivers/net/wireless/ath/ath10k/core.h     |  2 +-
 drivers/net/wireless/ath/ath5k/ath5k.h     |  2 +-
 drivers/net/wireless/ath/ath9k/hw.c        |  2 +-
 drivers/net/wireless/ath/ath9k/hw.h        |  4 ++--
 drivers/net/wireless/b43/debugfs.c         | 18 +++++++++---------
 drivers/net/wireless/b43/debugfs.h         |  2 +-
 drivers/net/wireless/b43legacy/debugfs.c   | 10 +++++-----
 drivers/net/wireless/b43legacy/debugfs.h   |  2 +-
 drivers/net/wireless/iwlegacy/common.h     |  6 +++---
 drivers/net/wireless/iwlwifi/mvm/mvm.h     |  6 +++---
 drivers/scsi/snic/snic_trc.c               |  4 ++--
 drivers/scsi/snic/snic_trc.h               |  2 +-
 drivers/uwb/uwb-debug.c                    |  2 +-
 fs/debugfs/file.c                          |  6 +++---
 include/linux/debugfs.h                    |  4 ++--
 include/linux/edac.h                       |  2 +-
 include/linux/fault-inject.h               |  2 +-
 kernel/futex.c                             |  4 ++--
 lib/dma-debug.c                            |  2 +-
 mm/failslab.c                              |  8 ++++----
 mm/page_alloc.c                            |  8 ++++----
 sound/soc/codecs/wm_adsp.h                 |  2 +-
 33 files changed, 78 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 463f595733e8..4f45f71149cb 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -105,7 +105,7 @@ a variable of type size_t.
 Boolean values can be placed in debugfs with:
 
     struct dentry *debugfs_create_bool(const char *name, umode_t mode,
-				       struct dentry *parent, u32 *value);
+				       struct dentry *parent, bool *value);
 
 A read on the resulting file will yield either Y (for non-zero values) or
 N, followed by a newline.  If written to, it will accept either upper- or
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index cebf78661a55..1c4cd4a0d7cc 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -58,7 +58,7 @@ static u32 mdscr_read(void)
  * Allow root to disable self-hosted debug from userspace.
  * This is useful if you want to connect an external JTAG debugger.
  */
-static u32 debug_enabled = 1;
+static bool debug_enabled = true;
 
 static int create_debug_debugfs_entry(void)
 {
@@ -69,7 +69,7 @@ fs_initcall(create_debug_debugfs_entry);
 
 static int __init early_debug_disable(char *buf)
 {
-	debug_enabled = 0;
+	debug_enabled = false;
 	return 0;
 }
 
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 9db196de003c..5a72e2b140fc 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -138,7 +138,7 @@ struct acpi_ec {
 	unsigned long gpe;
 	unsigned long command_addr;
 	unsigned long data_addr;
-	u32 global_lock;
+	bool global_lock;
 	unsigned long flags;
 	unsigned long reference_count;
 	struct mutex mutex;
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index cc557886ab23..5b907f2c62b9 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -122,9 +122,9 @@ struct regmap {
 	unsigned int num_reg_defaults_raw;
 
 	/* if set, only the cache is modified not the HW */
-	u32 cache_only;
+	bool cache_only;
 	/* if set, only the HW is modified not the cache */
-	u32 cache_bypass;
+	bool cache_bypass;
 	/* if set, remember to free reg_defaults_raw */
 	bool cache_free;
 
@@ -132,7 +132,7 @@ struct regmap {
 	const void *reg_defaults_raw;
 	void *cache;
 	/* if set, the cache contains newer data than the HW */
-	u32 cache_dirty;
+	bool cache_dirty;
 	/* if set, the HW registers are known to match map->reg_defaults */
 	bool no_sync_defaults;
 
diff --git a/drivers/base/regmap/regcache-lzo.c b/drivers/base/regmap/regcache-lzo.c
index 2d53f6f138e1..736e0d378567 100644
--- a/drivers/base/regmap/regcache-lzo.c
+++ b/drivers/base/regmap/regcache-lzo.c
@@ -355,9 +355,9 @@ static int regcache_lzo_sync(struct regmap *map, unsigned int min,
 		if (ret > 0 && val == map->reg_defaults[ret].def)
 			continue;
 
-		map->cache_bypass = 1;
+		map->cache_bypass = true;
 		ret = _regmap_write(map, i, val);
-		map->cache_bypass = 0;
+		map->cache_bypass = false;
 		if (ret)
 			return ret;
 		dev_dbg(map->dev, "Synced register %#x, value %#x\n",
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 6f8a13ec32a4..4c07802986b2 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -54,11 +54,11 @@ static int regcache_hw_init(struct regmap *map)
 		return -ENOMEM;
 
 	if (!map->reg_defaults_raw) {
-		u32 cache_bypass = map->cache_bypass;
+		bool cache_bypass = map->cache_bypass;
 		dev_warn(map->dev, "No cache defaults, reading back from HW\n");
 
 		/* Bypass the cache access till data read from HW*/
-		map->cache_bypass = 1;
+		map->cache_bypass = true;
 		tmp_buf = kmalloc(map->cache_size_raw, GFP_KERNEL);
 		if (!tmp_buf) {
 			ret = -ENOMEM;
@@ -285,9 +285,9 @@ static int regcache_default_sync(struct regmap *map, unsigned int min,
 		if (!regcache_reg_needs_sync(map, reg, val))
 			continue;
 
-		map->cache_bypass = 1;
+		map->cache_bypass = true;
 		ret = _regmap_write(map, reg, val);
-		map->cache_bypass = 0;
+		map->cache_bypass = false;
 		if (ret) {
 			dev_err(map->dev, "Unable to sync register %#x. %d\n",
 				reg, ret);
@@ -315,7 +315,7 @@ int regcache_sync(struct regmap *map)
 	int ret = 0;
 	unsigned int i;
 	const char *name;
-	unsigned int bypass;
+	bool bypass;
 
 	BUG_ON(!map->cache_ops);
 
@@ -333,7 +333,7 @@ int regcache_sync(struct regmap *map)
 	map->async = true;
 
 	/* Apply any patch first */
-	map->cache_bypass = 1;
+	map->cache_bypass = true;
 	for (i = 0; i < map->patch_regs; i++) {
 		ret = _regmap_write(map, map->patch[i].reg, map->patch[i].def);
 		if (ret != 0) {
@@ -342,7 +342,7 @@ int regcache_sync(struct regmap *map)
 			goto out;
 		}
 	}
-	map->cache_bypass = 0;
+	map->cache_bypass = false;
 
 	if (map->cache_ops->sync)
 		ret = map->cache_ops->sync(map, 0, map->max_register);
@@ -384,7 +384,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
 {
 	int ret = 0;
 	const char *name;
-	unsigned int bypass;
+	bool bypass;
 
 	BUG_ON(!map->cache_ops);
 
@@ -637,11 +637,11 @@ static int regcache_sync_block_single(struct regmap *map, void *block,
 		if (!regcache_reg_needs_sync(map, regtmp, val))
 			continue;
 
-		map->cache_bypass = 1;
+		map->cache_bypass = true;
 
 		ret = _regmap_write(map, regtmp, val);
 
-		map->cache_bypass = 0;
+		map->cache_bypass = false;
 		if (ret != 0) {
 			dev_err(map->dev, "Unable to sync register %#x. %d\n",
 				regtmp, ret);
@@ -668,14 +668,14 @@ static int regcache_sync_block_raw_flush(struct regmap *map, const void **data,
 	dev_dbg(map->dev, "Writing %zu bytes for %d registers from 0x%x-0x%x\n",
 		count * val_bytes, count, base, cur - map->reg_stride);
 
-	map->cache_bypass = 1;
+	map->cache_bypass = true;
 
 	ret = _regmap_raw_write(map, base, *data, count * val_bytes);
 	if (ret)
 		dev_err(map->dev, "Unable to sync registers %#x-%#x. %d\n",
 			base, cur - map->reg_stride, ret);
 
-	map->cache_bypass = 0;
+	map->cache_bypass = false;
 
 	*data = NULL;
 
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 6b9b91267959..509477681661 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -80,8 +80,8 @@ struct qca_data {
 	spinlock_t hci_ibs_lock;	/* HCI_IBS state lock	*/
 	u8 tx_ibs_state;	/* HCI_IBS transmit side power state*/
 	u8 rx_ibs_state;	/* HCI_IBS receive side power state */
-	u32 tx_vote;		/* Clock must be on for TX */
-	u32 rx_vote;		/* Clock must be on for RX */
+	bool tx_vote;		/* Clock must be on for TX */
+	bool rx_vote;		/* Clock must be on for RX */
 	struct timer_list tx_idle_timer;
 	u32 tx_idle_delay;
 	struct timer_list wake_retrans_timer;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5ef347a13cb5..c59314523f4c 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -138,7 +138,7 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
-u32 amd_iommu_unmap_flush;		/* if true, flush on every unmap */
+bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 					   system */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f65908841be0..861550a1ad1f 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -674,7 +674,7 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
  * If true, the addresses will be flushed on unmap time, not when
  * they are reused
  */
-extern u32 amd_iommu_unmap_flush;
+extern bool amd_iommu_unmap_flush;
 
 /* Smallest max PASID supported by any IOMMU in the system */
 extern u32 amd_iommu_max_pasid;
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index e25ee16c658e..d74b6aa8ae27 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -528,7 +528,7 @@ struct mei_device {
 	DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX);
 	unsigned long me_client_index;
 
-	u32 allow_fixed_address;
+	bool allow_fixed_address;
 
 	struct mei_cl wd_cl;
 	enum mei_wd_states wd_state;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index fa0c7b54ec7a..5384f999c24b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -767,8 +767,8 @@ struct adapter {
 	bool tid_release_task_busy;
 
 	struct dentry *debugfs_root;
-	u32 use_bd;     /* Use SGE Back Door intfc for reading SGE Contexts */
-	u32 trace_rss;	/* 1 implies that different RSS flit per filter is
+	bool use_bd;     /* Use SGE Back Door intfc for reading SGE Contexts */
+	bool trace_rss;	/* 1 implies that different RSS flit per filter is
 			 * used per filter else if 0 default RSS flit is
 			 * used for all 4 filters.
 			 */
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 12542144fe12..b6a08177b6ee 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -680,7 +680,7 @@ struct ath10k {
 	bool monitor_started;
 	unsigned int filter_flags;
 	unsigned long dev_flags;
-	u32 dfs_block_radar_events;
+	bool dfs_block_radar_events;
 
 	/* protected by conf_mutex */
 	bool radar_enabled;
diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h
index fa6e89e5c421..ba12f7f4061d 100644
--- a/drivers/net/wireless/ath/ath5k/ath5k.h
+++ b/drivers/net/wireless/ath/ath5k/ath5k.h
@@ -1367,7 +1367,7 @@ struct ath5k_hw {
 	u8			ah_retry_long;
 	u8			ah_retry_short;
 
-	u32			ah_use_32khz_clock;
+	bool			ah_use_32khz_clock;
 
 	u8			ah_coverage_class;
 	bool			ah_ack_bitrate_high;
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 1dd0339de372..8823fadada89 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -385,7 +385,7 @@ static void ath9k_hw_init_config(struct ath_hw *ah)
 
 	ah->config.dma_beacon_response_time = 1;
 	ah->config.sw_beacon_response_time = 6;
-	ah->config.cwm_ignore_extcca = 0;
+	ah->config.cwm_ignore_extcca = false;
 	ah->config.analog_shiftreg = 1;
 
 	ah->config.rx_intr_mitigation = true;
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index e8454db17634..52971b48ab6a 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -332,14 +332,14 @@ enum ath9k_hw_hang_checks {
 struct ath9k_ops_config {
 	int dma_beacon_response_time;
 	int sw_beacon_response_time;
-	u32 cwm_ignore_extcca;
+	bool cwm_ignore_extcca;
 	u32 pcie_waen;
 	u8 analog_shiftreg;
 	u32 ofdm_trig_low;
 	u32 ofdm_trig_high;
 	u32 cck_trig_high;
 	u32 cck_trig_low;
-	u32 enable_paprd;
+	bool enable_paprd;
 	int serialize_regmode;
 	bool rx_intr_mitigation;
 	bool tx_intr_mitigation;
diff --git a/drivers/net/wireless/b43/debugfs.c b/drivers/net/wireless/b43/debugfs.c
index e807bd930647..b4bcd94aff6c 100644
--- a/drivers/net/wireless/b43/debugfs.c
+++ b/drivers/net/wireless/b43/debugfs.c
@@ -676,15 +676,15 @@ static void b43_add_dynamic_debug(struct b43_wldev *dev)
 		e->dyn_debug_dentries[id] = d;		\
 				} while (0)
 
-	add_dyn_dbg("debug_xmitpower", B43_DBG_XMITPOWER, 0);
-	add_dyn_dbg("debug_dmaoverflow", B43_DBG_DMAOVERFLOW, 0);
-	add_dyn_dbg("debug_dmaverbose", B43_DBG_DMAVERBOSE, 0);
-	add_dyn_dbg("debug_pwork_fast", B43_DBG_PWORK_FAST, 0);
-	add_dyn_dbg("debug_pwork_stop", B43_DBG_PWORK_STOP, 0);
-	add_dyn_dbg("debug_lo", B43_DBG_LO, 0);
-	add_dyn_dbg("debug_firmware", B43_DBG_FIRMWARE, 0);
-	add_dyn_dbg("debug_keys", B43_DBG_KEYS, 0);
-	add_dyn_dbg("debug_verbose_stats", B43_DBG_VERBOSESTATS, 0);
+	add_dyn_dbg("debug_xmitpower", B43_DBG_XMITPOWER, false);
+	add_dyn_dbg("debug_dmaoverflow", B43_DBG_DMAOVERFLOW, false);
+	add_dyn_dbg("debug_dmaverbose", B43_DBG_DMAVERBOSE, false);
+	add_dyn_dbg("debug_pwork_fast", B43_DBG_PWORK_FAST, false);
+	add_dyn_dbg("debug_pwork_stop", B43_DBG_PWORK_STOP, false);
+	add_dyn_dbg("debug_lo", B43_DBG_LO, false);
+	add_dyn_dbg("debug_firmware", B43_DBG_FIRMWARE, false);
+	add_dyn_dbg("debug_keys", B43_DBG_KEYS, false);
+	add_dyn_dbg("debug_verbose_stats", B43_DBG_VERBOSESTATS, false);
 
 #undef add_dyn_dbg
 }
diff --git a/drivers/net/wireless/b43/debugfs.h b/drivers/net/wireless/b43/debugfs.h
index 50517b801cb4..d05377745011 100644
--- a/drivers/net/wireless/b43/debugfs.h
+++ b/drivers/net/wireless/b43/debugfs.h
@@ -68,7 +68,7 @@ struct b43_dfsentry {
 	u32 shm32read_addr_next;
 
 	/* Enabled/Disabled list for the dynamic debugging features. */
-	u32 dyn_debug[__B43_NR_DYNDBG];
+	bool dyn_debug[__B43_NR_DYNDBG];
 	/* Dentries for the dynamic debugging entries. */
 	struct dentry *dyn_debug_dentries[__B43_NR_DYNDBG];
 };
diff --git a/drivers/net/wireless/b43legacy/debugfs.c b/drivers/net/wireless/b43legacy/debugfs.c
index 1965edb765a2..090910ea259e 100644
--- a/drivers/net/wireless/b43legacy/debugfs.c
+++ b/drivers/net/wireless/b43legacy/debugfs.c
@@ -369,11 +369,11 @@ static void b43legacy_add_dynamic_debug(struct b43legacy_wldev *dev)
 		e->dyn_debug_dentries[id] = d;		\
 				} while (0)
 
-	add_dyn_dbg("debug_xmitpower", B43legacy_DBG_XMITPOWER, 0);
-	add_dyn_dbg("debug_dmaoverflow", B43legacy_DBG_DMAOVERFLOW, 0);
-	add_dyn_dbg("debug_dmaverbose", B43legacy_DBG_DMAVERBOSE, 0);
-	add_dyn_dbg("debug_pwork_fast", B43legacy_DBG_PWORK_FAST, 0);
-	add_dyn_dbg("debug_pwork_stop", B43legacy_DBG_PWORK_STOP, 0);
+	add_dyn_dbg("debug_xmitpower", B43legacy_DBG_XMITPOWER, false);
+	add_dyn_dbg("debug_dmaoverflow", B43legacy_DBG_DMAOVERFLOW, false);
+	add_dyn_dbg("debug_dmaverbose", B43legacy_DBG_DMAVERBOSE, false);
+	add_dyn_dbg("debug_pwork_fast", B43legacy_DBG_PWORK_FAST, false);
+	add_dyn_dbg("debug_pwork_stop", B43legacy_DBG_PWORK_STOP, false);
 
 #undef add_dyn_dbg
 }
diff --git a/drivers/net/wireless/b43legacy/debugfs.h b/drivers/net/wireless/b43legacy/debugfs.h
index ae3b0d0fa849..9ee32158b947 100644
--- a/drivers/net/wireless/b43legacy/debugfs.h
+++ b/drivers/net/wireless/b43legacy/debugfs.h
@@ -47,7 +47,7 @@ struct b43legacy_dfsentry {
 	struct b43legacy_txstatus_log txstatlog;
 
 	/* Enabled/Disabled list for the dynamic debugging features. */
-	u32 dyn_debug[__B43legacy_NR_DYNDBG];
+	bool dyn_debug[__B43legacy_NR_DYNDBG];
 	/* Dentries for the dynamic debugging entries. */
 	struct dentry *dyn_debug_dentries[__B43legacy_NR_DYNDBG];
 };
diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h
index 5b972798bdff..ce52cf114fde 100644
--- a/drivers/net/wireless/iwlegacy/common.h
+++ b/drivers/net/wireless/iwlegacy/common.h
@@ -1425,9 +1425,9 @@ struct il_priv {
 #endif				/* CONFIG_IWLEGACY_DEBUGFS */
 
 	struct work_struct txpower_work;
-	u32 disable_sens_cal;
-	u32 disable_chain_noise_cal;
-	u32 disable_tx_power_cal;
+	bool disable_sens_cal;
+	bool disable_chain_noise_cal;
+	bool disable_tx_power_cal;
 	struct work_struct run_time_calib_work;
 	struct timer_list stats_periodic;
 	struct timer_list watchdog;
diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index b95a07ec9e36..72e8a03a5293 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h
@@ -649,7 +649,7 @@ struct iwl_mvm {
 	const struct iwl_fw_bcast_filter *bcast_filters;
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	struct {
-		u32 override; /* u32 for debugfs_create_bool */
+		bool override;
 		struct iwl_bcast_filter_cmd cmd;
 	} dbgfs_bcast_filtering;
 #endif
@@ -673,7 +673,7 @@ struct iwl_mvm {
 	bool disable_power_off;
 	bool disable_power_off_d3;
 
-	u32 scan_iter_notif_enabled; /* must be u32 for debugfs_create_bool */
+	bool scan_iter_notif_enabled;
 
 	struct debugfs_blob_wrapper nvm_hw_blob;
 	struct debugfs_blob_wrapper nvm_sw_blob;
@@ -729,7 +729,7 @@ struct iwl_mvm {
 	int n_nd_channels;
 	bool net_detect;
 #ifdef CONFIG_IWLWIFI_DEBUGFS
-	u32 d3_wake_sysassert; /* must be u32 for debugfs_create_bool */
+	bool d3_wake_sysassert;
 	bool d3_test_active;
 	bool store_d3_resume_sram;
 	void *d3_resume_sram;
diff --git a/drivers/scsi/snic/snic_trc.c b/drivers/scsi/snic/snic_trc.c
index 28a40a7ade38..f00ebf4717e0 100644
--- a/drivers/scsi/snic/snic_trc.c
+++ b/drivers/scsi/snic/snic_trc.c
@@ -148,7 +148,7 @@ snic_trc_init(void)
 
 	trc->max_idx = (tbuf_sz / SNIC_TRC_ENTRY_SZ);
 	trc->rd_idx = trc->wr_idx = 0;
-	trc->enable = 1;
+	trc->enable = true;
 	SNIC_INFO("Trace Facility Enabled.\n Trace Buffer SZ %lu Pages.\n",
 		  tbuf_sz / PAGE_SIZE);
 	ret = 0;
@@ -169,7 +169,7 @@ snic_trc_free(void)
 {
 	struct snic_trc *trc = &snic_glob->trc;
 
-	trc->enable = 0;
+	trc->enable = false;
 	snic_trc_debugfs_term();
 
 	if (trc->buf) {
diff --git a/drivers/scsi/snic/snic_trc.h b/drivers/scsi/snic/snic_trc.h
index 427faee5f97e..b37f8867bfde 100644
--- a/drivers/scsi/snic/snic_trc.h
+++ b/drivers/scsi/snic/snic_trc.h
@@ -45,7 +45,7 @@ struct snic_trc {
 	u32	max_idx;		/* Max Index into trace buffer */
 	u32	rd_idx;
 	u32	wr_idx;
-	u32	enable;			/* Control Variable for Tracing */
+	bool	enable;			/* Control Variable for Tracing */
 
 	struct dentry *trc_enable;	/* debugfs file object */
 	struct dentry *trc_file;
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 0b1e5a9449b5..991374b13571 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -55,7 +55,7 @@
 struct uwb_dbg {
 	struct uwb_pal pal;
 
-	u32 accept;
+	bool accept;
 	struct list_head rsvs;
 
 	struct dentry *root_d;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 6c55ade071c3..b70c20fae502 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -439,7 +439,7 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
 			       size_t count, loff_t *ppos)
 {
 	char buf[3];
-	u32 *val = file->private_data;
+	bool *val = file->private_data;
 
 	if (*val)
 		buf[0] = 'Y';
@@ -457,7 +457,7 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
 	char buf[32];
 	size_t buf_size;
 	bool bv;
-	u32 *val = file->private_data;
+	bool *val = file->private_data;
 
 	buf_size = min(count, (sizeof(buf)-1));
 	if (copy_from_user(buf, user_buf, buf_size))
@@ -503,7 +503,7 @@ static const struct file_operations fops_bool = {
  * code.
  */
 struct dentry *debugfs_create_bool(const char *name, umode_t mode,
-				   struct dentry *parent, u32 *value)
+				   struct dentry *parent, bool *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_bool);
 }
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 9beb636b97eb..8321fe3058d6 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -92,7 +92,7 @@ struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
 struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
 				     struct dentry *parent, atomic_t *value);
 struct dentry *debugfs_create_bool(const char *name, umode_t mode,
-				  struct dentry *parent, u32 *value);
+				  struct dentry *parent, bool *value);
 
 struct dentry *debugfs_create_blob(const char *name, umode_t mode,
 				  struct dentry *parent,
@@ -243,7 +243,7 @@ static inline struct dentry *debugfs_create_atomic_t(const char *name, umode_t m
 
 static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode,
 						 struct dentry *parent,
-						 u32 *value)
+						 bool *value)
 {
 	return ERR_PTR(-ENODEV);
 }
diff --git a/include/linux/edac.h b/include/linux/edac.h
index da3b72e95db3..7c6b7ba55589 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -772,7 +772,7 @@ struct mem_ctl_info {
 #ifdef CONFIG_EDAC_DEBUG
 	struct dentry *debugfs;
 	u8 fake_inject_layer[EDAC_MAX_LAYERS];
-	u32 fake_inject_ue;
+	bool fake_inject_ue;
 	u16 fake_inject_count;
 #endif
 };
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 798fad9e420d..3159a7dba034 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -18,7 +18,7 @@ struct fault_attr {
 	atomic_t times;
 	atomic_t space;
 	unsigned long verbose;
-	u32 task_filter;
+	bool task_filter;
 	unsigned long stacktrace_depth;
 	unsigned long require_start;
 	unsigned long require_end;
diff --git a/kernel/futex.c b/kernel/futex.c
index 6e443efc65f4..395b967841b4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -267,10 +267,10 @@ static struct futex_hash_bucket *futex_queues;
 static struct {
 	struct fault_attr attr;
 
-	u32 ignore_private;
+	bool ignore_private;
 } fail_futex = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_private = 0,
+	.ignore_private = false,
 };
 
 static int __init setup_fail_futex(char *str)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index dace71fe41f7..fcb65d2a0b94 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -100,7 +100,7 @@ static LIST_HEAD(free_entries);
 static DEFINE_SPINLOCK(free_entries_lock);
 
 /* Global disable flag - will be set in case of an error */
-static u32 global_disable __read_mostly;
+static bool global_disable __read_mostly;
 
 /* Early initialization disable flag, set at the end of dma_debug_init */
 static bool dma_debug_initialized __read_mostly;
diff --git a/mm/failslab.c b/mm/failslab.c
index fefaabaab76d..98fb490311eb 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -3,12 +3,12 @@
 
 static struct {
 	struct fault_attr attr;
-	u32 ignore_gfp_wait;
-	int cache_filter;
+	bool ignore_gfp_wait;
+	bool cache_filter;
 } failslab = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = 1,
-	.cache_filter = 0,
+	.ignore_gfp_wait = true,
+	.cache_filter = false,
 };
 
 bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48aaf7b9f253..805bbad2e24e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2159,13 +2159,13 @@ failed:
 static struct {
 	struct fault_attr attr;
 
-	u32 ignore_gfp_highmem;
-	u32 ignore_gfp_wait;
+	bool ignore_gfp_highmem;
+	bool ignore_gfp_wait;
 	u32 min_order;
 } fail_page_alloc = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = 1,
-	.ignore_gfp_highmem = 1,
+	.ignore_gfp_wait = true,
+	.ignore_gfp_highmem = true,
 	.min_order = 1,
 };
 
diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h
index 579a6350fb01..2d117cf0e953 100644
--- a/sound/soc/codecs/wm_adsp.h
+++ b/sound/soc/codecs/wm_adsp.h
@@ -53,7 +53,7 @@ struct wm_adsp {
 
 	int fw;
 	int fw_ver;
-	u32 running;
+	bool running;
 
 	struct list_head ctl_list;
 
-- 
cgit v1.2.3


From f7025709e29aded887becdaf4a81072ef1f475bf Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Wed, 2 Sep 2015 15:40:02 +0200
Subject: kobject: explain what kobject's sd field is

(More) unclear, especially name-wise, after sysfs_dirent became
kernfs_node.

Signed-off-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 637f67002c5a..e6284591599e 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -66,7 +66,7 @@ struct kobject {
 	struct kobject		*parent;
 	struct kset		*kset;
 	struct kobj_type	*ktype;
-	struct kernfs_node	*sd;
+	struct kernfs_node	*sd; /* sysfs directory entry */
 	struct kref		kref;
 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
 	struct delayed_work	release;
-- 
cgit v1.2.3


From 09e8b485ee4212f78bf27c8d727845919f85cf1f Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 29 Sep 2015 18:07:02 -0700
Subject: iommu: iova: Move iova cache management to the iova library

This is necessary to separate intel-iommu from the iova library.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/intel-iommu.c |  6 ++--
 drivers/iommu/iova.c        | 83 ++++++++++++++++++++++++++-------------------
 include/linux/iova.h        |  4 +--
 3 files changed, 54 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 2d7349a3ee14..7b27bcdc30ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3711,7 +3711,7 @@ static inline int iommu_devinfo_cache_init(void)
 static int __init iommu_init_mempool(void)
 {
 	int ret;
-	ret = iommu_iova_cache_init();
+	ret = iova_cache_get();
 	if (ret)
 		return ret;
 
@@ -3725,7 +3725,7 @@ static int __init iommu_init_mempool(void)
 
 	kmem_cache_destroy(iommu_domain_cache);
 domain_error:
-	iommu_iova_cache_destroy();
+	iova_cache_put();
 
 	return -ENOMEM;
 }
@@ -3734,7 +3734,7 @@ static void __init iommu_exit_mempool(void)
 {
 	kmem_cache_destroy(iommu_devinfo_cache);
 	kmem_cache_destroy(iommu_domain_cache);
-	iommu_iova_cache_destroy();
+	iova_cache_put();
 }
 
 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b7c3d923f3e1..400f4d1b566f 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -20,40 +20,6 @@
 #include <linux/iova.h>
 #include <linux/slab.h>
 
-static struct kmem_cache *iommu_iova_cache;
-
-int iommu_iova_cache_init(void)
-{
-	int ret = 0;
-
-	iommu_iova_cache = kmem_cache_create("iommu_iova",
-					 sizeof(struct iova),
-					 0,
-					 SLAB_HWCACHE_ALIGN,
-					 NULL);
-	if (!iommu_iova_cache) {
-		pr_err("Couldn't create iova cache\n");
-		ret = -ENOMEM;
-	}
-
-	return ret;
-}
-
-void iommu_iova_cache_destroy(void)
-{
-	kmem_cache_destroy(iommu_iova_cache);
-}
-
-struct iova *alloc_iova_mem(void)
-{
-	return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
-}
-
-void free_iova_mem(struct iova *iova)
-{
-	kmem_cache_free(iommu_iova_cache, iova);
-}
-
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn, unsigned long pfn_32bit)
@@ -242,6 +208,55 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova)
 	rb_insert_color(&iova->node, root);
 }
 
+static struct kmem_cache *iova_cache;
+static unsigned int iova_cache_users;
+static DEFINE_MUTEX(iova_cache_mutex);
+
+struct iova *alloc_iova_mem(void)
+{
+	return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(alloc_iova_mem);
+
+void free_iova_mem(struct iova *iova)
+{
+	kmem_cache_free(iova_cache, iova);
+}
+EXPORT_SYMBOL(free_iova_mem);
+
+int iova_cache_get(void)
+{
+	mutex_lock(&iova_cache_mutex);
+	if (!iova_cache_users) {
+		iova_cache = kmem_cache_create(
+			"iommu_iova", sizeof(struct iova), 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+		if (!iova_cache) {
+			mutex_unlock(&iova_cache_mutex);
+			printk(KERN_ERR "Couldn't create iova cache\n");
+			return -ENOMEM;
+		}
+	}
+
+	iova_cache_users++;
+	mutex_unlock(&iova_cache_mutex);
+
+	return 0;
+}
+
+void iova_cache_put(void)
+{
+	mutex_lock(&iova_cache_mutex);
+	if (WARN_ON(!iova_cache_users)) {
+		mutex_unlock(&iova_cache_mutex);
+		return;
+	}
+	iova_cache_users--;
+	if (!iova_cache_users)
+		kmem_cache_destroy(iova_cache);
+	mutex_unlock(&iova_cache_mutex);
+}
+
 /**
  * alloc_iova - allocates an iova
  * @iovad: - iova domain in question
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 3920a19d8194..92f7177db2ce 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova)
 	return iova >> iova_shift(iovad);
 }
 
-int iommu_iova_cache_init(void);
-void iommu_iova_cache_destroy(void);
+int iova_cache_get(void);
+void iova_cache_put(void);
 
 struct iova *alloc_iova_mem(void);
 void free_iova_mem(struct iova *iova);
-- 
cgit v1.2.3


From ff39988abd70bcd1b14a4c81f2d102e67b8db580 Mon Sep 17 00:00:00 2001
From: Siva Yerramreddy <yshivakrishna@gmail.com>
Date: Tue, 29 Sep 2015 18:09:37 -0700
Subject: dma: Add support to program MIC x100 status descriptiors

The MIC X100 DMA engine has a special status descriptor which writes
an 8 byte value to a destination location.  This is used to signal
completion of all DMA descriptors prior to the status descriptor.
This patch add a new DMA engine API which enables updating a
destination address with an 8 byte immediate data value.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Lawrynowicz, Jacek <jacek.lawrynowicz@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Siva Yerramreddy <yshivakrishna@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/mic_x100_dma.c | 39 ++++++++++++++++++++++++++++++++++++++-
 include/linux/dmaengine.h  |  4 ++++
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
index 74d9db05a5ad..068e920ecb68 100644
--- a/drivers/dma/mic_x100_dma.c
+++ b/drivers/dma/mic_x100_dma.c
@@ -193,8 +193,16 @@ static void mic_dma_prog_intr(struct mic_dma_chan *ch)
 static int mic_dma_do_dma(struct mic_dma_chan *ch, int flags, dma_addr_t src,
 			  dma_addr_t dst, size_t len)
 {
-	if (-ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len))
+	if (len && -ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) {
 		return -ENOMEM;
+	} else {
+		/* 3 is the maximum number of status descriptors */
+		int ret = mic_dma_avail_desc_ring_space(ch, 3);
+
+		if (ret < 0)
+			return ret;
+	}
+
 	/* Above mic_dma_prog_memcpy_desc() makes sure we have enough space */
 	if (flags & DMA_PREP_FENCE) {
 		mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
@@ -270,6 +278,33 @@ allocate_tx(struct mic_dma_chan *ch)
 	return tx;
 }
 
+/* Program a status descriptor with dst as address and value to be written */
+static struct dma_async_tx_descriptor *
+mic_dma_prep_status_lock(struct dma_chan *ch, dma_addr_t dst, u64 src_val,
+			 unsigned long flags)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+	int result;
+
+	spin_lock(&mic_ch->prep_lock);
+	result = mic_dma_avail_desc_ring_space(mic_ch, 4);
+	if (result < 0)
+		goto error;
+	mic_dma_prep_status_desc(&mic_ch->desc_ring[mic_ch->head], src_val, dst,
+				 false);
+	mic_dma_hw_ring_inc_head(mic_ch);
+	result = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
+	if (result < 0)
+		goto error;
+
+	return allocate_tx(mic_ch);
+error:
+	dev_err(mic_dma_ch_to_device(mic_ch),
+		"Error enqueueing dma status descriptor, error=%d\n", result);
+	spin_unlock(&mic_ch->prep_lock);
+	return NULL;
+}
+
 /*
  * Prepare a memcpy descriptor to be added to the ring.
  * Note that the temporary descriptor adds an extra overhead of copying the
@@ -587,6 +622,8 @@ static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev,
 		mic_dma_free_chan_resources;
 	mic_dma_dev->dma_dev.device_tx_status = mic_dma_tx_status;
 	mic_dma_dev->dma_dev.device_prep_dma_memcpy = mic_dma_prep_memcpy_lock;
+	mic_dma_dev->dma_dev.device_prep_dma_imm_data =
+		mic_dma_prep_status_lock;
 	mic_dma_dev->dma_dev.device_prep_dma_interrupt =
 		mic_dma_prep_interrupt_lock;
 	mic_dma_dev->dma_dev.device_issue_pending = mic_dma_issue_pending;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 7ea9184eaa13..c47c68e535e8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -645,6 +645,7 @@ enum dmaengine_alignment {
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
  * @device_prep_interleaved_dma: Transfer expression in a generic way.
+ * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
  * @device_config: Pushes a new configuration to a channel, return 0 or an error
  *	code
  * @device_pause: Pauses any transfer happening on a channel. Returns
@@ -727,6 +728,9 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
 		struct dma_chan *chan, struct dma_interleaved_template *xt,
 		unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)(
+		struct dma_chan *chan, dma_addr_t dst, u64 data,
+		unsigned long flags);
 
 	int (*device_config)(struct dma_chan *chan,
 			     struct dma_slave_config *config);
-- 
cgit v1.2.3


From b7f944411b4a628443f84a542858a8c78847bb48 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Tue, 29 Sep 2015 18:10:44 -0700
Subject: misc: mic: SCIF poll

SCIF poll allows both user and kernel mode clients to wait on
events on a SCIF endpoint. These events include availability of
space or data in the SCIF ring buffer, availability of connection
requests on a listening endpoint and completion of connections
when using async connects.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/scif/scif_api.c  | 158 +++++++++++++++++++++++++++++++++++++-
 drivers/misc/mic/scif/scif_epd.h  |  22 ++++++
 drivers/misc/mic/scif/scif_fd.c   |   9 +++
 drivers/misc/mic/scif/scif_main.h |   2 +
 include/linux/scif.h              |  74 ++++++++++++++++++
 5 files changed, 264 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index f39d3135a9ef..bf2d70fcd055 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -37,9 +37,21 @@ enum conn_async_state {
 	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
 };
 
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+	.owner = THIS_MODULE,
+};
+
 scif_epd_t scif_open(void)
 {
 	struct scif_endpt *ep;
+	int err;
 
 	might_sleep();
 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
@@ -50,6 +62,10 @@ scif_epd_t scif_open(void)
 	if (!ep->qp_info.qp)
 		goto err_qp_alloc;
 
+	err = scif_anon_inode_getfile(ep);
+	if (err)
+		goto err_anon_inode;
+
 	spin_lock_init(&ep->lock);
 	mutex_init(&ep->sendlock);
 	mutex_init(&ep->recvlock);
@@ -59,6 +75,8 @@ scif_epd_t scif_open(void)
 		"SCIFAPI open: ep %p success\n", ep);
 	return ep;
 
+err_anon_inode:
+	kfree(ep->qp_info.qp);
 err_qp_alloc:
 	kfree(ep);
 err_ep_alloc:
@@ -279,6 +297,7 @@ int scif_close(scif_epd_t epd)
 	}
 	}
 	scif_put_port(ep->port.port);
+	scif_anon_inode_fput(ep);
 	scif_teardown_ep(ep);
 	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 	return 0;
@@ -558,8 +577,10 @@ void scif_conn_handler(struct work_struct *work)
 			list_del(&ep->conn_list);
 		}
 		spin_unlock(&scif_info.nb_connect_lock);
-		if (ep)
+		if (ep) {
 			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
 	} while (ep);
 }
 
@@ -660,6 +681,7 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 	ep->remote_dev = &scif_dev[dst->node];
 	ep->qp_info.qp->magic = SCIFEP_MAGIC;
 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
 		spin_lock(&scif_info.nb_connect_lock);
 		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 		spin_unlock(&scif_info.nb_connect_lock);
@@ -788,6 +810,10 @@ retry_connection:
 		goto scif_accept_error_qpalloc;
 	}
 
+	err = scif_anon_inode_getfile(cep);
+	if (err)
+		goto scif_accept_error_anon_inode;
+
 	cep->qp_info.qp->magic = SCIFEP_MAGIC;
 	spdev = scif_get_peer_dev(cep->remote_dev);
 	if (IS_ERR(spdev)) {
@@ -858,6 +884,8 @@ retry:
 	spin_unlock(&cep->lock);
 	return 0;
 scif_accept_error_map:
+	scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
 	scif_teardown_ep(cep);
 scif_accept_error_qpalloc:
 	kfree(cep);
@@ -1247,6 +1275,134 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
 }
 EXPORT_SYMBOL_GPL(scif_recv);
 
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+				   poll_table *p, struct scif_endpt *ep)
+{
+	/*
+	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+	 * and regrab it afterwards. Because the endpoint state might have
+	 * changed while the lock was given up, the state must be checked
+	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
+	 * does this.
+	 */
+	spin_unlock(&ep->lock);
+	poll_wait(f, wq, p);
+	spin_lock(&ep->lock);
+}
+
+unsigned int
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+	unsigned int mask = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	spin_lock(&ep->lock);
+
+	/* Endpoint is waiting for a non-blocking connect to complete */
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			if (ep->state == SCIFEP_CONNECTED ||
+			    ep->state == SCIFEP_DISCONNECTED ||
+			    ep->conn_err)
+				mask |= POLLOUT;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is listening for incoming connection requests */
+	if (ep->state == SCIFEP_LISTENING) {
+		_scif_poll_wait(f, &ep->conwq, wait, ep);
+		if (ep->state == SCIFEP_LISTENING) {
+			if (ep->conreqcnt)
+				mask |= POLLIN;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is connected or disconnected */
+	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+		if (poll_requested_events(wait) & POLLIN)
+			_scif_poll_wait(f, &ep->recvwq, wait, ep);
+		if (poll_requested_events(wait) & POLLOUT)
+			_scif_poll_wait(f, &ep->sendwq, wait, ep);
+		if (ep->state == SCIFEP_CONNECTED ||
+		    ep->state == SCIFEP_DISCONNECTED) {
+			/* Data can be read without blocking */
+			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+				mask |= POLLIN;
+			/* Data can be written without blocking */
+			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+				mask |= POLLOUT;
+			/* Return POLLHUP if endpoint is disconnected */
+			if (ep->state == SCIFEP_DISCONNECTED)
+				mask |= POLLHUP;
+			goto exit;
+		}
+	}
+
+	/* Return POLLERR if the endpoint is in none of the above states */
+	mask |= POLLERR;
+exit:
+	spin_unlock(&ep->lock);
+	return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ *	  and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct poll_wqueues table;
+	poll_table *pt;
+	int i, mask, count = 0, timed_out = timeout_msecs == 0;
+	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+		: msecs_to_jiffies(timeout_msecs);
+
+	poll_initwait(&table);
+	pt = &table.pt;
+	while (1) {
+		for (i = 0; i < nfds; i++) {
+			pt->_key = ufds[i].events | POLLERR | POLLHUP;
+			mask = __scif_pollfd(ufds[i].epd->anon,
+					     pt, ufds[i].epd);
+			mask &= ufds[i].events | POLLERR | POLLHUP;
+			if (mask) {
+				count++;
+				pt->_qproc = NULL;
+			}
+			ufds[i].revents = mask;
+		}
+		pt->_qproc = NULL;
+		if (!count) {
+			count = table.error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		if (!schedule_timeout_interruptible(timeout))
+			timed_out = 1;
+	}
+	poll_freewait(&table);
+	return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
 {
 	int online = 0;
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
index 331322a25213..4dc4ccdbc236 100644
--- a/drivers/misc/mic/scif/scif_epd.h
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -96,7 +96,9 @@ struct scif_endpt_qp_info {
  * @conn_port: Connection port
  * @conn_err: Errors during connection
  * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
  * @conn_list: List of async connection requests
+ * @anon: anonymous file for use in kernel mode scif poll
  */
 struct scif_endpt {
 	enum scif_epd_state state;
@@ -125,7 +127,9 @@ struct scif_endpt {
 	struct scif_port_id conn_port;
 	int conn_err;
 	int conn_async_state;
+	wait_queue_head_t conn_pend_wq;
 	struct list_head conn_list;
+	struct file *anon;
 };
 
 static inline int scifdev_alive(struct scif_endpt *ep)
@@ -133,6 +137,22 @@ static inline int scifdev_alive(struct scif_endpt *ep)
 	return _scifdev_alive(ep->remote_dev);
 }
 
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+	if (IS_ERR(epd->anon))
+		return PTR_ERR(epd->anon);
+	return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+	if (epd->anon) {
+		fput(epd->anon);
+		epd->anon = NULL;
+	}
+}
+
 void scif_cleanup_zombie_epd(void);
 void scif_teardown_ep(void *endpt);
 void scif_cleanup_ep_qp(struct scif_endpt *ep);
@@ -157,4 +177,6 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
 void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
 int __scif_flush(scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait,
+			   struct scif_endpt *ep);
 #endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
index eccf7e7135f9..24e47f73e45a 100644
--- a/drivers/misc/mic/scif/scif_fd.c
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -34,6 +34,13 @@ static int scif_fdclose(struct inode *inode, struct file *f)
 	return scif_close(priv);
 }
 
+static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return __scif_pollfd(f, wait, priv);
+}
+
 static int scif_fdflush(struct file *f, fl_owner_t id)
 {
 	struct scif_endpt *ep = f->private_data;
@@ -193,6 +200,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		spin_unlock(&scif_info.eplock);
 
 		/* Free the resources automatically created from the open. */
+		scif_anon_inode_fput(priv);
 		scif_teardown_ep(priv);
 		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
 		f->private_data = newep;
@@ -298,6 +306,7 @@ const struct file_operations scif_fops = {
 	.open = scif_fdopen,
 	.release = scif_fdclose,
 	.unlocked_ioctl = scif_fdioctl,
+	.poll = scif_fdpoll,
 	.flush = scif_fdflush,
 	.owner = THIS_MODULE,
 };
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index 580bc63e1b23..87c13279a8f0 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -22,6 +22,7 @@
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/dmaengine.h>
+#include <linux/anon_inodes.h>
 #include <linux/file.h>
 #include <linux/scif.h>
 
@@ -184,6 +185,7 @@ extern struct scif_info scif_info;
 extern struct idr scif_ports;
 extern struct scif_dev *scif_dev;
 extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
 
 /* Size of the RB for the Node QP */
 #define SCIF_NODE_QP_SIZE 0x10000
diff --git a/include/linux/scif.h b/include/linux/scif.h
index 44f4f3898bbe..b1923ad2ddd3 100644
--- a/include/linux/scif.h
+++ b/include/linux/scif.h
@@ -93,6 +93,18 @@ enum {
 
 typedef struct scif_endpt *scif_epd_t;
 
+/**
+ * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
+ * @epd: SCIF endpoint
+ * @events: requested events
+ * @revents: returned events
+ */
+struct scif_pollepd {
+	scif_epd_t epd;
+	short events;
+	short revents;
+};
+
 #define SCIF_OPEN_FAILED ((scif_epd_t)-1)
 #define SCIF_REGISTER_FAILED ((off_t)-1)
 #define SCIF_MMAP_FAILED ((void *)-1)
@@ -990,4 +1002,66 @@ int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
  */
 int scif_get_node_ids(u16 *nodes, int len, u16 *self);
 
+/**
+ * scif_poll() - Wait for some event on an endpoint
+ * @epds:	Array of endpoint descriptors
+ * @nepds:	Length of epds
+ * @timeout:	Upper limit on time for which scif_poll() will block
+ *
+ * scif_poll() waits for one of a set of endpoints to become ready to perform
+ * an I/O operation.
+ *
+ * The epds argument specifies the endpoint descriptors to be examined and the
+ * events of interest for each endpoint descriptor. epds is a pointer to an
+ * array with one member for each open endpoint descriptor of interest.
+ *
+ * The number of items in the epds array is specified in nepds. The epd field
+ * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
+ * events is a bitmask specifying the events which the application is
+ * interested in. The field revents is an output parameter, filled by the
+ * kernel with the events that actually occurred. The bits returned in revents
+ * can include any of those specified in events, or one of the values POLLERR,
+ * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events
+ * field, and will be set in the revents field whenever the corresponding
+ * condition is true.)
+ *
+ * If none of the events requested (and no error) has occurred for any of the
+ * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
+ *
+ * The timeout argument specifies an upper limit on the time for which
+ * scif_poll() will block, in milliseconds. Specifying a negative value in
+ * timeout means an infinite timeout.
+ *
+ * The following bits may be set in events and returned in revents.
+ * POLLIN - Data may be received without blocking. For a connected
+ * endpoint, this means that scif_recv() may be called without blocking. For a
+ * listening endpoint, this means that scif_accept() may be called without
+ * blocking.
+ * POLLOUT - Data may be sent without blocking. For a connected endpoint, this
+ * means that scif_send() may be called without blocking. POLLOUT may also be
+ * used to block waiting for a non-blocking connect to complete. This bit value
+ * has no meaning for a listening endpoint and is ignored if specified.
+ *
+ * The following bits are only returned in revents, and are ignored if set in
+ * events.
+ * POLLERR - An error occurred on the endpoint
+ * POLLHUP - The connection to the peer endpoint was disconnected
+ * POLLNVAL - The specified endpoint descriptor is invalid.
+ *
+ * Return:
+ * Upon successful completion, scif_poll() returns a non-negative value. A
+ * positive value indicates the total number of endpoint descriptors that have
+ * been selected (that is, endpoint descriptors for which the revents member is
+ * non-zero). A value of 0 indicates that the call timed out and no endpoint
+ * descriptors have been selected. Otherwise in user mode -1 is returned and
+ * errno is set to indicate the error; in kernel mode the negative of one of
+ * the following errors is returned.
+ *
+ * Errors:
+ * EINTR - A signal occurred before any requested event
+ * EINVAL - The nepds argument is greater than {OPEN_MAX}
+ * ENOMEM - There was no space to allocate file descriptor tables
+ */
+int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
+
 #endif /* __SCIF_H__ */
-- 
cgit v1.2.3


From d3d912eb7386b7512f131b34407978cecccb3703 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Tue, 29 Sep 2015 18:11:15 -0700
Subject: misc: mic: Add support for kernel mode SCIF clients

Add support for registration/de-registration of kernel mode SCIF
clients. SCIF clients are probed with new and existing SCIF peer
devices. Similarly the client remove method is called when SCIF
peer devices are removed.

Changes to SCIF peer device framework necessitated by supporting
kernel mode SCIF clients are also included in this patch.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/scif/scif_api.c      |  43 +++++++++
 drivers/misc/mic/scif/scif_main.c     |  88 ++++--------------
 drivers/misc/mic/scif/scif_main.h     |   5 +-
 drivers/misc/mic/scif/scif_nm.c       |  10 +--
 drivers/misc/mic/scif/scif_nodeqp.c   |  65 +++++---------
 drivers/misc/mic/scif/scif_peer_bus.c | 165 +++++++++++++++++++++-------------
 drivers/misc/mic/scif/scif_peer_bus.h |  42 +--------
 include/linux/scif.h                  |  58 ++++++++++++
 8 files changed, 255 insertions(+), 221 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index bf2d70fcd055..b47d56d5d93c 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -1430,3 +1430,46 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self)
 	return online;
 }
 EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->probe)
+		client->probe(spdev);
+	return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+				   struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->remove)
+		client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+	subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+	struct subsys_interface *si = &client->si;
+
+	si->name = client->name;
+	si->subsys = &scif_peer_bus;
+	si->add_dev = scif_add_client_dev;
+	si->remove_dev = scif_remove_client_dev;
+
+	return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
index 6ce851f5c7e6..f90bd06900cb 100644
--- a/drivers/misc/mic/scif/scif_main.c
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -80,35 +80,6 @@ irqreturn_t scif_intr_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int scif_peer_probe(struct scif_peer_dev *spdev)
-{
-	struct scif_dev *scifdev = &scif_dev[spdev->dnode];
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total++;
-	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
-	mutex_unlock(&scif_info.conflock);
-	rcu_assign_pointer(scifdev->spdev, spdev);
-
-	/* In the future SCIF kernel client devices will be added here */
-	return 0;
-}
-
-static void scif_peer_remove(struct scif_peer_dev *spdev)
-{
-	struct scif_dev *scifdev = &scif_dev[spdev->dnode];
-
-	/* In the future SCIF kernel client devices will be removed here */
-	spdev = rcu_dereference(scifdev->spdev);
-	if (spdev)
-		RCU_INIT_POINTER(scifdev->spdev, NULL);
-	synchronize_rcu();
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total--;
-	mutex_unlock(&scif_info.conflock);
-}
-
 static void scif_qp_setup_handler(struct work_struct *work)
 {
 	struct scif_dev *scifdev = container_of(work, struct scif_dev,
@@ -139,20 +110,13 @@ static void scif_qp_setup_handler(struct work_struct *work)
 	}
 }
 
-static int scif_setup_scifdev(struct scif_hw_dev *sdev)
+static int scif_setup_scifdev(void)
 {
+	/* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
 	int i;
-	u8 num_nodes;
+	u8 num_nodes = MAX_SCIF_NODES;
 
-	if (sdev->snode) {
-		struct mic_bootparam __iomem *bp = sdev->rdp;
-
-		num_nodes = ioread8(&bp->tot_nodes);
-	} else {
-		struct mic_bootparam *bp = sdev->dp;
-
-		num_nodes = bp->tot_nodes;
-	}
 	scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
 	if (!scif_dev)
 		return -ENOMEM;
@@ -163,7 +127,7 @@ static int scif_setup_scifdev(struct scif_hw_dev *sdev)
 		scifdev->exit = OP_IDLE;
 		init_waitqueue_head(&scifdev->disconn_wq);
 		mutex_init(&scifdev->lock);
-		INIT_WORK(&scifdev->init_msg_work, scif_qp_response_ack);
+		INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
 		INIT_DELAYED_WORK(&scifdev->p2p_dwork,
 				  scif_poll_qp_state);
 		INIT_DELAYED_WORK(&scifdev->qp_dwork,
@@ -181,27 +145,21 @@ static void scif_destroy_scifdev(void)
 
 static int scif_probe(struct scif_hw_dev *sdev)
 {
-	struct scif_dev *scifdev;
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
 	int rc;
 
 	dev_set_drvdata(&sdev->dev, sdev);
+	scifdev->sdev = sdev;
+
 	if (1 == atomic_add_return(1, &g_loopb_cnt)) {
-		struct scif_dev *loopb_dev;
+		struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
 
-		rc = scif_setup_scifdev(sdev);
-		if (rc)
-			goto exit;
-		scifdev = &scif_dev[sdev->dnode];
-		scifdev->sdev = sdev;
-		loopb_dev = &scif_dev[sdev->snode];
 		loopb_dev->sdev = sdev;
 		rc = scif_setup_loopback_qp(loopb_dev);
 		if (rc)
-			goto free_sdev;
-	} else {
-		scifdev = &scif_dev[sdev->dnode];
-		scifdev->sdev = sdev;
+			goto exit;
 	}
+
 	rc = scif_setup_intr_wq(scifdev);
 	if (rc)
 		goto destroy_loopb;
@@ -237,8 +195,6 @@ destroy_intr:
 destroy_loopb:
 	if (atomic_dec_and_test(&g_loopb_cnt))
 		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
-free_sdev:
-	scif_destroy_scifdev();
 exit:
 	return rc;
 }
@@ -290,13 +246,6 @@ static void scif_remove(struct scif_hw_dev *sdev)
 	scifdev->sdev = NULL;
 }
 
-static struct scif_peer_driver scif_peer_driver = {
-	.driver.name =	KBUILD_MODNAME,
-	.driver.owner =	THIS_MODULE,
-	.probe = scif_peer_probe,
-	.remove = scif_peer_remove,
-};
-
 static struct scif_hw_dev_id id_table[] = {
 	{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
 	{ 0 },
@@ -312,6 +261,8 @@ static struct scif_driver scif_driver = {
 
 static int _scif_init(void)
 {
+	int rc;
+
 	spin_lock_init(&scif_info.eplock);
 	spin_lock_init(&scif_info.nb_connect_lock);
 	spin_lock_init(&scif_info.port_lock);
@@ -326,10 +277,15 @@ static int _scif_init(void)
 	init_waitqueue_head(&scif_info.exitwq);
 	scif_info.en_msg_log = 0;
 	scif_info.p2p_enable = 1;
+	rc = scif_setup_scifdev();
+	if (rc)
+		goto error;
 	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
 	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
 	idr_init(&scif_ports);
 	return 0;
+error:
+	return rc;
 }
 
 static void _scif_exit(void)
@@ -347,12 +303,9 @@ static int __init scif_init(void)
 	rc = scif_peer_bus_init();
 	if (rc)
 		goto exit;
-	rc = scif_peer_register_driver(&scif_peer_driver);
-	if (rc)
-		goto peer_bus_exit;
 	rc = scif_register_driver(&scif_driver);
 	if (rc)
-		goto unreg_scif_peer;
+		goto peer_bus_exit;
 	rc = misc_register(mdev);
 	if (rc)
 		goto unreg_scif;
@@ -360,8 +313,6 @@ static int __init scif_init(void)
 	return 0;
 unreg_scif:
 	scif_unregister_driver(&scif_driver);
-unreg_scif_peer:
-	scif_peer_unregister_driver(&scif_peer_driver);
 peer_bus_exit:
 	scif_peer_bus_exit();
 exit:
@@ -374,7 +325,6 @@ static void __exit scif_exit(void)
 	scif_exit_debugfs();
 	misc_deregister(&scif_info.mdev);
 	scif_unregister_driver(&scif_driver);
-	scif_peer_unregister_driver(&scif_peer_driver);
 	scif_peer_bus_exit();
 	_scif_exit();
 }
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index 87c13279a8f0..b0795b059803 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -140,7 +140,7 @@ struct scif_p2p_info {
  * @db: doorbell the peer will trigger to generate an interrupt on self
  * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
  * @cookie: Cookie received while registering the interrupt handler
- * init_msg_work: work scheduled for SCIF_INIT message processing
+ * @peer_add_work: Work for handling device_add for peer devices
  * @p2p_dwork: Delayed work to enable polling for P2P state
  * @qp_dwork: Delayed work for enabling polling for remote QP information
  * @p2p_retry: Number of times to retry polling of P2P state
@@ -166,7 +166,7 @@ struct scif_dev {
 	int db;
 	int rdb;
 	struct mic_irq *cookie;
-	struct work_struct init_msg_work;
+	struct work_struct peer_add_work;
 	struct delayed_work p2p_dwork;
 	struct delayed_work qp_dwork;
 	int p2p_retry;
@@ -183,6 +183,7 @@ struct scif_dev {
 
 extern struct scif_info scif_info;
 extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
 extern struct scif_dev *scif_dev;
 extern const struct file_operations scif_fops;
 extern const struct file_operations scif_anon_fops;
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
index 9b4c5382d6a7..f1b1b97b3636 100644
--- a/drivers/misc/mic/scif/scif_nm.c
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -147,14 +147,8 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
 void scif_handle_remove_node(int node)
 {
 	struct scif_dev *scifdev = &scif_dev[node];
-	struct scif_peer_dev *spdev;
-
-	rcu_read_lock();
-	spdev = rcu_dereference(scifdev->spdev);
-	rcu_read_unlock();
-	if (spdev)
-		scif_peer_unregister_device(spdev);
-	else
+
+	if (scif_peer_unregister_device(scifdev))
 		scif_send_acks(scifdev);
 }
 
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
index 6dfdae3452d6..5cdb9f088350 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.c
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -259,6 +259,11 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
 		     &qp->remote_qp->local_write,
 		     r_buf,
 		     get_count_order(remote_size));
+	/*
+	 * Because the node QP may already be processing an INIT message, set
+	 * the read pointer so the cached read offset isn't lost
+	 */
+	qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
 	/*
 	 * resetup the inbound_q now that we know where the
 	 * inbound_read really is.
@@ -529,27 +534,6 @@ static void scif_p2p_setup(void)
 	}
 }
 
-void scif_qp_response_ack(struct work_struct *work)
-{
-	struct scif_dev *scifdev = container_of(work, struct scif_dev,
-						init_msg_work);
-	struct scif_peer_dev *spdev;
-
-	/* Drop the INIT message if it has already been received */
-	if (_scifdev_alive(scifdev))
-		return;
-
-	spdev = scif_peer_register_device(scifdev);
-	if (IS_ERR(spdev))
-		return;
-
-	if (scif_is_mgmt_node()) {
-		mutex_lock(&scif_info.conflock);
-		scif_p2p_setup();
-		mutex_unlock(&scif_info.conflock);
-	}
-}
-
 static char *message_types[] = {"BAD",
 				"INIT",
 				"EXIT",
@@ -682,13 +666,14 @@ scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
 	 * address to complete initializing the inbound_q.
 	 */
 	flush_delayed_work(&scifdev->qp_dwork);
-	/*
-	 * Delegate the peer device registration to a workqueue, otherwise if
-	 * SCIF client probe (called during peer device registration) calls
-	 * scif_connect(..), it will block the message processing thread causing
-	 * a deadlock.
-	 */
-	schedule_work(&scifdev->init_msg_work);
+
+	scif_peer_register_device(scifdev);
+
+	if (scif_is_mgmt_node()) {
+		mutex_lock(&scif_info.conflock);
+		scif_p2p_setup();
+		mutex_unlock(&scif_info.conflock);
+	}
 }
 
 /**
@@ -838,13 +823,13 @@ void scif_poll_qp_state(struct work_struct *work)
 				      msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
 		return;
 	}
-	scif_peer_register_device(peerdev);
 	return;
 timeout:
 	dev_err(&peerdev->sdev->dev,
 		"%s %d remote node %d offline,  state = 0x%x\n",
 		__func__, __LINE__, peerdev->node, qp->qp_state);
 	qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+	scif_peer_unregister_device(peerdev);
 	scif_cleanup_scifdev(peerdev);
 }
 
@@ -894,6 +879,9 @@ scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
 		goto local_error;
 	peerdev->rdb = msg->payload[2];
 	qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+	scif_peer_register_device(peerdev);
+
 	schedule_delayed_work(&peerdev->p2p_dwork, 0);
 	return;
 local_error:
@@ -1169,7 +1157,6 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
 	int err = 0;
 	void *local_q;
 	struct scif_qp *qp;
-	struct scif_peer_dev *spdev;
 
 	err = scif_setup_intr_wq(scifdev);
 	if (err)
@@ -1216,15 +1203,11 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
 		     &qp->local_write,
 		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
 	scif_info.nodeid = scifdev->node;
-	spdev = scif_peer_register_device(scifdev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		goto free_local_q;
-	}
+
+	scif_peer_register_device(scifdev);
+
 	scif_info.loopb_dev = scifdev;
 	return err;
-free_local_q:
-	kfree(local_q);
 free_qpairs:
 	kfree(scifdev->qpairs);
 destroy_loopb_wq:
@@ -1243,13 +1226,7 @@ exit:
  */
 int scif_destroy_loopback_qp(struct scif_dev *scifdev)
 {
-	struct scif_peer_dev *spdev;
-
-	rcu_read_lock();
-	spdev = rcu_dereference(scifdev->spdev);
-	rcu_read_unlock();
-	if (spdev)
-		scif_peer_unregister_device(spdev);
+	scif_peer_unregister_device(scifdev);
 	destroy_workqueue(scif_info.loopb_wq);
 	scif_destroy_intr_wq(scifdev);
 	kfree(scifdev->qpairs->outbound_q.rb_base);
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
index 589ae9ad2501..547bf7bdd5c2 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.c
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -24,93 +24,138 @@ dev_to_scif_peer(struct device *dev)
 	return container_of(dev, struct scif_peer_dev, dev);
 }
 
-static inline struct scif_peer_driver *
-drv_to_scif_peer(struct device_driver *drv)
-{
-	return container_of(drv, struct scif_peer_driver, driver);
-}
+struct bus_type scif_peer_bus = {
+	.name  = "scif_peer_bus",
+};
 
-static int scif_peer_dev_match(struct device *dv, struct device_driver *dr)
+static void scif_peer_release_dev(struct device *d)
 {
-	return !strncmp(dev_name(dv), dr->name, 4);
+	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	scif_cleanup_scifdev(scifdev);
+	kfree(sdev);
 }
 
-static int scif_peer_dev_probe(struct device *d)
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
 {
-	struct scif_peer_dev *dev = dev_to_scif_peer(d);
-	struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+	struct scif_peer_dev *spdev;
+	int ret;
 
-	return drv->probe(dev);
-}
+	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+	if (!spdev) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
-static int scif_peer_dev_remove(struct device *d)
-{
-	struct scif_peer_dev *dev = dev_to_scif_peer(d);
-	struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+	spdev->dev.parent = scifdev->sdev->dev.parent;
+	spdev->dev.release = scif_peer_release_dev;
+	spdev->dnode = scifdev->node;
+	spdev->dev.bus = &scif_peer_bus;
+	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+	device_initialize(&spdev->dev);
+	get_device(&spdev->dev);
+	rcu_assign_pointer(scifdev->spdev, spdev);
 
-	drv->remove(dev);
+	mutex_lock(&scif_info.conflock);
+	scif_info.total++;
+	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+	mutex_unlock(&scif_info.conflock);
 	return 0;
+err:
+	dev_err(&scifdev->sdev->dev,
+		"dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+	return ret;
 }
 
-static struct bus_type scif_peer_bus = {
-	.name  = "scif_peer_bus",
-	.match = scif_peer_dev_match,
-	.probe = scif_peer_dev_probe,
-	.remove = scif_peer_dev_remove,
-};
-
-int scif_peer_register_driver(struct scif_peer_driver *driver)
+static int scif_peer_add_device(struct scif_dev *scifdev)
 {
-	driver->driver.bus = &scif_peer_bus;
-	return driver_register(&driver->driver);
+	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+	int ret;
+
+	ret = device_add(&spdev->dev);
+	put_device(&spdev->dev);
+	if (ret) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: peer device_add failed\n", scifdev->node);
+		goto put_spdev;
+	}
+	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+	return 0;
+put_spdev:
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	put_device(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return ret;
 }
 
-void scif_peer_unregister_driver(struct scif_peer_driver *driver)
+void scif_add_peer_device(struct work_struct *work)
 {
-	driver_unregister(&driver->driver);
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						peer_add_work);
+
+	scif_peer_add_device(scifdev);
 }
 
-static void scif_peer_release_dev(struct device *d)
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
 {
-	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
-	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+	int ret;
 
-	scif_cleanup_scifdev(scifdev);
-	kfree(sdev);
+	mutex_lock(&scifdev->lock);
+	ret = scif_peer_initialize_device(scifdev);
+	if (ret)
+		goto exit;
+	schedule_work(&scifdev->peer_add_work);
+exit:
+	mutex_unlock(&scifdev->lock);
 }
 
-struct scif_peer_dev *
-scif_peer_register_device(struct scif_dev *scifdev)
+int scif_peer_unregister_device(struct scif_dev *scifdev)
 {
-	int ret;
 	struct scif_peer_dev *spdev;
 
-	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
-	if (!spdev)
-		return ERR_PTR(-ENOMEM);
-
-	spdev->dev.parent = scifdev->sdev->dev.parent;
-	spdev->dev.release = scif_peer_release_dev;
-	spdev->dnode = scifdev->node;
-	spdev->dev.bus = &scif_peer_bus;
+	mutex_lock(&scifdev->lock);
+	/* Flush work to ensure device register is complete */
+	flush_work(&scifdev->peer_add_work);
 
-	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
 	/*
-	 * device_register() causes the bus infrastructure to look for a
-	 * matching driver.
+	 * Continue holding scifdev->lock since theoretically unregister_device
+	 * can be called simultaneously from multiple threads
 	 */
-	ret = device_register(&spdev->dev);
-	if (ret)
-		goto free_spdev;
-	return spdev;
-free_spdev:
-	kfree(spdev);
-	return ERR_PTR(ret);
-}
-
-void scif_peer_unregister_device(struct scif_peer_dev *sdev)
-{
-	device_unregister(&sdev->dev);
+	spdev = rcu_dereference(scifdev->spdev);
+	if (!spdev) {
+		mutex_unlock(&scifdev->lock);
+		return -ENODEV;
+	}
+
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	mutex_unlock(&scifdev->lock);
+
+	dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+	device_unregister(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return 0;
 }
 
 int scif_peer_bus_init(void)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
index 33f0dbb30152..a3b8dd2edaa5 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.h
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -19,47 +19,13 @@
 
 #include <linux/device.h>
 #include <linux/mic_common.h>
-
-/*
- * Peer devices show up as PCIe devices for the mgmt node but not the cards.
- * The mgmt node discovers all the cards on the PCIe bus and informs the other
- * cards about their peers. Upon notification of a peer a node adds a peer
- * device to the peer bus to maintain symmetry in the way devices are
- * discovered across all nodes in the SCIF network.
- */
-/**
- * scif_peer_dev - representation of a peer SCIF device
- * @dev: underlying device
- * @dnode - The destination node which this device will communicate with.
- */
-struct scif_peer_dev {
-	struct device dev;
-	u8 dnode;
-};
-
-/**
- * scif_peer_driver - operations for a scif_peer I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct scif_peer_driver {
-	struct device_driver driver;
-	const struct scif_peer_dev_id *id_table;
-
-	int (*probe)(struct scif_peer_dev *dev);
-	void (*remove)(struct scif_peer_dev *dev);
-};
+#include <linux/scif.h>
 
 struct scif_dev;
 
-int scif_peer_register_driver(struct scif_peer_driver *driver);
-void scif_peer_unregister_driver(struct scif_peer_driver *driver);
-
-struct scif_peer_dev *scif_peer_register_device(struct scif_dev *sdev);
-void scif_peer_unregister_device(struct scif_peer_dev *sdev);
-
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
 int scif_peer_bus_init(void);
 void scif_peer_bus_exit(void);
 #endif /* _SCIF_PEER_BUS_H */
diff --git a/include/linux/scif.h b/include/linux/scif.h
index b1923ad2ddd3..fd62c051b166 100644
--- a/include/linux/scif.h
+++ b/include/linux/scif.h
@@ -55,6 +55,7 @@
 
 #include <linux/types.h>
 #include <linux/poll.h>
+#include <linux/device.h>
 #include <linux/scif_ioctl.h>
 
 #define SCIF_ACCEPT_SYNC	1
@@ -105,6 +106,37 @@ struct scif_pollepd {
 	short revents;
 };
 
+/**
+ * scif_peer_dev - representation of a peer SCIF device
+ *
+ * Peer devices show up as PCIe devices for the mgmt node but not the cards.
+ * The mgmt node discovers all the cards on the PCIe bus and informs the other
+ * cards about their peers. Upon notification of a peer a node adds a peer
+ * device to the peer bus to maintain symmetry in the way devices are
+ * discovered across all nodes in the SCIF network.
+ *
+ * @dev: underlying device
+ * @dnode - The destination node which this device will communicate with.
+ */
+struct scif_peer_dev {
+	struct device dev;
+	u8 dnode;
+};
+
+/**
+ * scif_client - representation of a SCIF client
+ * @name: client name
+ * @probe - client method called when a peer device is registered
+ * @remove - client method called when a peer device is unregistered
+ * @si - subsys_interface used internally for implementing SCIF clients
+ */
+struct scif_client {
+	const char *name;
+	void (*probe)(struct scif_peer_dev *spdev);
+	void (*remove)(struct scif_peer_dev *spdev);
+	struct subsys_interface si;
+};
+
 #define SCIF_OPEN_FAILED ((scif_epd_t)-1)
 #define SCIF_REGISTER_FAILED ((off_t)-1)
 #define SCIF_MMAP_FAILED ((void *)-1)
@@ -1064,4 +1096,30 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self);
  */
 int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
 
+/**
+ * scif_client_register() - Register a SCIF client
+ * @client:	client to be registered
+ *
+ * scif_client_register() registers a SCIF client. The probe() method
+ * of the client is called when SCIF peer devices come online and the
+ * remove() method is called when the peer devices disappear.
+ *
+ * Return:
+ * Upon successful completion, scif_client_register() returns a non-negative
+ * value. Otherwise the return value is the same as subsys_interface_register()
+ * in the kernel.
+ */
+int scif_client_register(struct scif_client *client);
+
+/**
+ * scif_client_unregister() - Unregister a SCIF client
+ * @client:	client to be unregistered
+ *
+ * scif_client_unregister() unregisters a SCIF client.
+ *
+ * Return:
+ * None
+ */
+void scif_client_unregister(struct scif_client *client);
+
 #endif /* __SCIF_H__ */
-- 
cgit v1.2.3


From d411e79391092925457d89b77d7cd3038ba6d04b Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Tue, 29 Sep 2015 18:13:54 -0700
Subject: misc: mic: Remove COSM functionality from the MIC card driver

Since card side COSM functionality, to trigger MIC device shutdowns
and communicate shutdown status to the host, is now moved into a
separate COSM client driver, this patch removes this functionality
from the base MIC card driver. The mic_bus driver is also updated to
use the device index provided by COSM rather than maintain its own
device index.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/bus/mic_bus.c     | 22 ++--------
 drivers/misc/mic/card/mic_device.c | 88 ++------------------------------------
 drivers/misc/mic/card/mic_x100.c   |  2 +-
 drivers/misc/mic/common/mic_dev.h  | 13 ++++++
 include/linux/mic_bus.h            |  3 +-
 5 files changed, 23 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
index 961ae90aae47..c64955d8cbc1 100644
--- a/drivers/misc/mic/bus/mic_bus.c
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -25,9 +25,6 @@
 #include <linux/idr.h>
 #include <linux/mic_bus.h>
 
-/* Unique numbering for mbus devices. */
-static DEFINE_IDA(mbus_index_ida);
-
 static ssize_t device_show(struct device *d,
 			   struct device_attribute *attr, char *buf)
 {
@@ -147,7 +144,8 @@ static void mbus_release_dev(struct device *d)
 
 struct mbus_device *
 mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
-		     struct mbus_hw_ops *hw_ops, void __iomem *mmio_va)
+		     struct mbus_hw_ops *hw_ops, int index,
+		     void __iomem *mmio_va)
 {
 	int ret;
 	struct mbus_device *mbdev;
@@ -166,13 +164,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
 	mbdev->dev.release = mbus_release_dev;
 	mbdev->hw_ops = hw_ops;
 	mbdev->dev.bus = &mic_bus;
-
-	/* Assign a unique device index and hence name. */
-	ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL);
-	if (ret < 0)
-		goto free_mbdev;
-
-	mbdev->index = ret;
+	mbdev->index = index;
 	dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
 	/*
 	 * device_register() causes the bus infrastructure to look for a
@@ -180,10 +172,8 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
 	 */
 	ret = device_register(&mbdev->dev);
 	if (ret)
-		goto ida_remove;
+		goto free_mbdev;
 	return mbdev;
-ida_remove:
-	ida_simple_remove(&mbus_index_ida, mbdev->index);
 free_mbdev:
 	kfree(mbdev);
 	return ERR_PTR(ret);
@@ -192,10 +182,7 @@ EXPORT_SYMBOL_GPL(mbus_register_device);
 
 void mbus_unregister_device(struct mbus_device *mbdev)
 {
-	int index = mbdev->index; /* save for after device release */
-
 	device_unregister(&mbdev->dev);
-	ida_simple_remove(&mbus_index_ida, index);
 }
 EXPORT_SYMBOL_GPL(mbus_unregister_device);
 
@@ -207,7 +194,6 @@ static int __init mbus_init(void)
 static void __exit mbus_exit(void)
 {
 	bus_unregister(&mic_bus);
-	ida_destroy(&mbus_index_ida);
 }
 
 core_initcall(mbus_init);
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
index 6338908b2252..d0edaf7e0cd5 100644
--- a/drivers/misc/mic/card/mic_device.c
+++ b/drivers/misc/mic/card/mic_device.c
@@ -37,71 +37,6 @@
 #include "mic_virtio.h"
 
 static struct mic_driver *g_drv;
-static struct mic_irq *shutdown_cookie;
-
-static void mic_notify_host(u8 state)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	iowrite8(state, &bootparam->shutdown_status);
-	dev_dbg(mdrv->dev, "%s %d system_state %d\n",
-		__func__, __LINE__, state);
-	mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db));
-}
-
-static int mic_panic_event(struct notifier_block *this, unsigned long event,
-		void *ptr)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	iowrite8(-1, &bootparam->h2c_config_db);
-	iowrite8(-1, &bootparam->h2c_shutdown_db);
-	mic_notify_host(MIC_CRASHED);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block mic_panic = {
-	.notifier_call  = mic_panic_event,
-};
-
-static irqreturn_t mic_shutdown_isr(int irq, void *data)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	mic_ack_interrupt(&g_drv->mdev);
-	if (ioread8(&bootparam->shutdown_card))
-		orderly_poweroff(true);
-	return IRQ_HANDLED;
-}
-
-static int mic_shutdown_init(void)
-{
-	int rc = 0;
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-	int shutdown_db;
-
-	shutdown_db = mic_next_card_db();
-	shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL,
-					       "Shutdown", mdrv, shutdown_db);
-	if (IS_ERR(shutdown_cookie))
-		rc = PTR_ERR(shutdown_cookie);
-	else
-		iowrite8(shutdown_db, &bootparam->h2c_shutdown_db);
-	return rc;
-}
-
-static void mic_shutdown_uninit(void)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	iowrite8(-1, &bootparam->h2c_shutdown_db);
-	mic_free_card_irq(shutdown_cookie, mdrv);
-}
 
 static int __init mic_dp_init(void)
 {
@@ -359,11 +294,7 @@ int __init mic_driver_init(struct mic_driver *mdrv)
 	u8 node_id;
 
 	g_drv = mdrv;
-	/*
-	 * Unloading the card module is not supported. The MIC card module
-	 * handles fundamental operations like host/card initiated shutdowns
-	 * and informing the host about card crashes and cannot be unloaded.
-	 */
+	/* Unloading the card module is not supported. */
 	if (!try_module_get(mdrv->dev->driver->owner)) {
 		rc = -ENODEV;
 		goto done;
@@ -374,12 +305,9 @@ int __init mic_driver_init(struct mic_driver *mdrv)
 	rc = mic_init_irq();
 	if (rc)
 		goto dp_uninit;
-	rc = mic_shutdown_init();
-	if (rc)
-		goto irq_uninit;
 	if (!mic_request_dma_chans(mdrv)) {
 		rc = -ENODEV;
-		goto shutdown_uninit;
+		goto irq_uninit;
 	}
 	rc = mic_devices_init(mdrv);
 	if (rc)
@@ -390,21 +318,18 @@ int __init mic_driver_init(struct mic_driver *mdrv)
 					   NULL, &scif_hw_ops,
 					   0, node_id, &mdrv->mdev.mmio, NULL,
 					   NULL, mdrv->dp, mdrv->dma_ch,
-					   mdrv->num_dma_ch);
+					   mdrv->num_dma_ch, true);
 	if (IS_ERR(mdrv->scdev)) {
 		rc = PTR_ERR(mdrv->scdev);
 		goto device_uninit;
 	}
 	mic_create_card_debug_dir(mdrv);
-	atomic_notifier_chain_register(&panic_notifier_list, &mic_panic);
 done:
 	return rc;
 device_uninit:
 	mic_devices_uninit(mdrv);
 dma_free:
 	mic_free_dma_chans(mdrv);
-shutdown_uninit:
-	mic_shutdown_uninit();
 irq_uninit:
 	mic_uninit_irq();
 dp_uninit:
@@ -425,13 +350,6 @@ void mic_driver_uninit(struct mic_driver *mdrv)
 	scif_unregister_device(mdrv->scdev);
 	mic_devices_uninit(mdrv);
 	mic_free_dma_chans(mdrv);
-	/*
-	 * Inform the host about the shutdown status i.e. poweroff/restart etc.
-	 * The module cannot be unloaded so the only code path to call
-	 * mic_devices_uninit(..) is the shutdown callback.
-	 */
-	mic_notify_host(system_state);
-	mic_shutdown_uninit();
 	mic_uninit_irq();
 	mic_dp_uninit();
 	module_put(mdrv->dev->driver->owner);
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
index 77fd41781c2e..b2958ce2368c 100644
--- a/drivers/misc/mic/card/mic_x100.c
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -261,7 +261,7 @@ static int __init mic_probe(struct platform_device *pdev)
 	mic_hw_intr_init(mdrv);
 	platform_set_drvdata(pdev, mdrv);
 	mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
-					       NULL, &mbus_hw_ops,
+					       NULL, &mbus_hw_ops, 0,
 					       mdrv->mdev.mmio.va);
 	if (IS_ERR(mdrv->dma_mbdev)) {
 		rc = PTR_ERR(mdrv->dma_mbdev);
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
index 0b58c46045dc..50776772ebdf 100644
--- a/drivers/misc/mic/common/mic_dev.h
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -21,6 +21,19 @@
 #ifndef __MIC_DEV_H__
 #define __MIC_DEV_H__
 
+/* The maximum number of MIC devices supported in a single host system. */
+#define MIC_MAX_NUM_DEVS 128
+
+/**
+ * enum mic_hw_family - The hardware family to which a device belongs.
+ */
+enum mic_hw_family {
+	MIC_FAMILY_X100 = 0,
+	MIC_FAMILY_X200,
+	MIC_FAMILY_UNKNOWN,
+	MIC_FAMILY_LAST
+};
+
 /**
  * struct mic_mw - MIC memory window
  *
diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h
index d5b5f76d57ef..27d7c95fd0da 100644
--- a/include/linux/mic_bus.h
+++ b/include/linux/mic_bus.h
@@ -91,7 +91,8 @@ struct mbus_hw_ops {
 
 struct mbus_device *
 mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
-		     struct mbus_hw_ops *hw_ops, void __iomem *mmio_va);
+		     struct mbus_hw_ops *hw_ops, int index,
+		     void __iomem *mmio_va);
 void mbus_unregister_device(struct mbus_device *mbdev);
 
 int mbus_register_driver(struct mbus_driver *drv);
-- 
cgit v1.2.3


From a44f2630d78bfafc8eb5ab7d8b7b8cd4642ba749 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Tue, 29 Sep 2015 18:15:03 -0700
Subject: misc: mic: SCIF RMA header file and IOCTL changes

This patch updates the SCIF header file and IOCTL interface with the
changes required to support RMAs.  APIs added include the ability to
pin pages and register those pages with SCIF. SCIF kernel clients can
also add references to remote registered pages and access them via the
CPU. The user space IOCTL interface has been updated to enable SCIF
registration, RDMA/CPU copies and fence APIs for RDMA synchronization.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/scif.h            | 234 ++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/scif_ioctl.h |  85 +++++++++++++++
 2 files changed, 309 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/scif.h b/include/linux/scif.h
index fd62c051b166..49a35d6edc94 100644
--- a/include/linux/scif.h
+++ b/include/linux/scif.h
@@ -93,6 +93,27 @@ enum {
 #define SCIF_PORT_RSVD		1088
 
 typedef struct scif_endpt *scif_epd_t;
+typedef struct scif_pinned_pages *scif_pinned_pages_t;
+
+/**
+ * struct scif_range - SCIF registered range used in kernel mode
+ * @cookie: cookie used internally by SCIF
+ * @nr_pages: number of pages of PAGE_SIZE
+ * @prot_flags: R/W protection
+ * @phys_addr: Array of bus addresses
+ * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
+ *	array. The va is populated only when called on the host for a remote
+ *	SCIF connection on MIC. This is required to support the use case of DMA
+ *	between MIC and another device which is not a SCIF node e.g., an IB or
+ *	ethernet NIC.
+ */
+struct scif_range {
+	void *cookie;
+	int nr_pages;
+	int prot_flags;
+	dma_addr_t *phys_addr;
+	void __iomem **va;
+};
 
 /**
  * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
@@ -389,7 +410,6 @@ int scif_close(scif_epd_t epd);
  * Errors:
  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  * ECONNRESET - Connection reset by peer
- * EFAULT - An invalid address was specified for a parameter
  * EINVAL - flags is invalid, or len is negative
  * ENODEV - The remote node is lost or existed, but is not currently in the
  * network since it may have crashed
@@ -442,7 +462,6 @@ int scif_send(scif_epd_t epd, void *msg, int len, int flags);
  * EAGAIN - The destination node is returning from a low power state
  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  * ECONNRESET - Connection reset by peer
- * EFAULT - An invalid address was specified for a parameter
  * EINVAL - flags is invalid, or len is negative
  * ENODEV - The remote node is lost or existed, but is not currently in the
  * network since it may have crashed
@@ -505,9 +524,6 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
  * SCIF_PROT_READ - allow read operations from the window
  * SCIF_PROT_WRITE - allow write operations to the window
  *
- * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
- * fixed offset.
- *
  * Return:
  * Upon successful completion, scif_register() returns the offset at which the
  * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
@@ -520,7 +536,6 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
  * EAGAIN - The mapping could not be performed due to lack of resources
  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  * ECONNRESET - Connection reset by peer
- * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
  * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
  * set in flags, and offset is not a multiple of the page size, or addr is not a
  * multiple of the page size, or len is not a multiple of the page size, or is
@@ -803,7 +818,6 @@ int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
  * EACCESS - Attempt to write to a read-only range
  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  * ECONNRESET - Connection reset by peer
- * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
  * EINVAL - rma_flags is invalid
  * ENODEV - The remote node is lost or existed, but is not currently in the
  * network since it may have crashed
@@ -884,7 +898,6 @@ int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
  * EACCESS - Attempt to write to a read-only range
  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  * ECONNRESET - Connection reset by peer
- * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
  * EINVAL - rma_flags is invalid
  * ENODEV - The remote node is lost or existed, but is not currently in the
  * network since it may have crashed
@@ -1028,11 +1041,212 @@ int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
  * online nodes in the SCIF network including 'self'; otherwise in user mode
  * -1 is returned and errno is set to indicate the error; in kernel mode no
  * errors are returned.
+ */
+int scif_get_node_ids(u16 *nodes, int len, u16 *self);
+
+/**
+ * scif_pin_pages() - Pin a set of pages
+ * @addr:		Virtual address of range to pin
+ * @len:		Length of range to pin
+ * @prot_flags:		Page protection flags
+ * @map_flags:		Page classification flags
+ * @pinned_pages:	Handle to pinned pages
+ *
+ * scif_pin_pages() pins (locks in physical memory) the physical pages which
+ * back the range of virtual address pages starting at addr and continuing for
+ * len bytes. addr and len are constrained to be multiples of the page size. A
+ * successful scif_pin_pages() call returns a handle to pinned_pages which may
+ * be used in subsequent calls to scif_register_pinned_pages().
+ *
+ * The pages will remain pinned as long as there is a reference against the
+ * scif_pinned_pages_t value returned by scif_pin_pages() and until
+ * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
+ * reference is added to a scif_pinned_pages_t value each time a window is
+ * created by calling scif_register_pinned_pages() and passing the
+ * scif_pinned_pages_t value. A reference is removed from a
+ * scif_pinned_pages_t value each time such a window is deleted.
+ *
+ * Subsequent operations which change the memory pages to which virtual
+ * addresses are mapped (such as mmap(), munmap()) have no effect on the
+ * scif_pinned_pages_t value or windows created against it.
+ *
+ * If the process will fork(), it is recommended that the registered
+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
+ * problems due to copy-on-write semantics.
+ *
+ * The prot_flags argument is formed by OR'ing together one or more of the
+ * following values.
+ * SCIF_PROT_READ - allow read operations against the pages
+ * SCIF_PROT_WRITE - allow write operations against the pages
+ * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
+ * kernel space address. By default, addr is interpreted as a user space
+ * address.
+ *
+ * Return:
+ * Upon successful completion, scif_pin_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
  *
  * Errors:
- * EFAULT - Bad address
+ * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
+ * ENOMEM - Not enough space
  */
-int scif_get_node_ids(u16 *nodes, int len, u16 *self);
+int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
+		   scif_pinned_pages_t *pinned_pages);
+
+/**
+ * scif_unpin_pages() - Unpin a set of pages
+ * @pinned_pages:	Handle to pinned pages to be unpinned
+ *
+ * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
+ * windows against pinned_pages. The physical pages represented by pinned_pages
+ * will remain pinned until all windows previously registered against
+ * pinned_pages are deleted (the window is scif_unregister()'d and all
+ * references to the window are removed (see scif_unregister()).
+ *
+ * pinned_pages must have been obtain from a previous call to scif_pin_pages().
+ * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
+ * scif_register_pinned_pages().
+ *
+ * Return:
+ * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EINVAL - pinned_pages is not valid
+ */
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
+
+/**
+ * scif_register_pinned_pages() - Mark a memory region for remote access.
+ * @epd:		endpoint descriptor
+ * @pinned_pages:	Handle to pinned pages
+ * @offset:		Registered address space offset
+ * @map_flags:		Flags which control where pages are mapped
+ *
+ * The scif_register_pinned_pages() function opens a window, a range of whole
+ * pages of the registered address space of the endpoint epd, starting at
+ * offset po. The value of po, further described below, is a function of the
+ * parameters offset and pinned_pages, and the value of map_flags. Each page of
+ * the window represents a corresponding physical memory page of the range
+ * represented by pinned_pages; the length of the window is the same as the
+ * length of range represented by pinned_pages. A successful
+ * scif_register_pinned_pages() call returns po as the return value.
+ *
+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
+ * exactly, and offset is constrained to be a multiple of the page size. The
+ * mapping established by scif_register_pinned_pages() will not replace any
+ * existing registration; an error is returned if any page of the new window
+ * would intersect an existing window.
+ *
+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
+ * implementation-defined manner to arrive at po. The po so chosen will be an
+ * area of the registered address space that the implementation deems suitable
+ * for a mapping of the required size. An offset value of 0 is interpreted as
+ * granting the implementation complete freedom in selecting po, subject to
+ * constraints described below. A non-zero value of offset is taken to be a
+ * suggestion of an offset near which the mapping should be placed. When the
+ * implementation selects a value for po, it does not replace any extant
+ * window. In all cases, po will be a multiple of the page size.
+ *
+ * The physical pages which are so represented by a window are available for
+ * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
+ * physical pages represented by the window will not be reused by the memory
+ * subsystem for any other purpose. Note that the same physical page may be
+ * represented by multiple windows.
+ *
+ * Windows created by scif_register_pinned_pages() are unregistered by
+ * scif_unregister().
+ *
+ * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
+ * fixed offset.
+ *
+ * Return:
+ * Upon successful completion, scif_register_pinned_pages() returns the offset
+ * at which the mapping was placed (po); otherwise the negative of one of the
+ * following errors is returned.
+ *
+ * Errors:
+ * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
+ * would intersect an existing window
+ * EAGAIN - The mapping could not be performed due to lack of resources
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
+ * offset is not a multiple of the page size, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+off_t scif_register_pinned_pages(scif_epd_t epd,
+				 scif_pinned_pages_t pinned_pages,
+				 off_t offset, int map_flags);
+
+/**
+ * scif_get_pages() - Add references to remote registered pages
+ * @epd:	endpoint descriptor
+ * @offset:	remote registered offset
+ * @len:	length of range of pages
+ * @pages:	returned scif_range structure
+ *
+ * scif_get_pages() returns the addresses of the physical pages represented by
+ * those pages of the registered address space of the peer of epd, starting at
+ * offset and continuing for len bytes. offset and len are constrained to be
+ * multiples of the page size.
+ *
+ * All of the pages in the specified range [offset, offset + len - 1] must be
+ * within a single window of the registered address space of the peer of epd.
+ *
+ * The addresses are returned as a virtually contiguous array pointed to by the
+ * phys_addr component of the scif_range structure whose address is returned in
+ * pages. The nr_pages component of scif_range is the length of the array. The
+ * prot_flags component of scif_range holds the protection flag value passed
+ * when the pages were registered.
+ *
+ * Each physical page whose address is returned by scif_get_pages() remains
+ * available and will not be released for reuse until the scif_range structure
+ * is returned in a call to scif_put_pages(). The scif_range structure returned
+ * by scif_get_pages() must be unmodified.
+ *
+ * It is an error to call scif_close() on an endpoint on which a scif_range
+ * structure of that endpoint has not been returned to scif_put_pages().
+ *
+ * Return:
+ * Upon successful completion, scif_get_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ * Errors:
+ * ECONNRESET - Connection reset by peer.
+ * EINVAL - offset is not a multiple of the page size, or offset is negative, or
+ * len is not a multiple of the page size
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
+ * for the registered address space of the peer epd
+ */
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+		   struct scif_range **pages);
+
+/**
+ * scif_put_pages() - Remove references from remote registered pages
+ * @pages:	pages to be returned
+ *
+ * scif_put_pages() releases a scif_range structure previously obtained by
+ * calling scif_get_pages(). The physical pages represented by pages may
+ * be reused when the window which represented those pages is unregistered.
+ * Therefore, those pages must not be accessed after calling scif_put_pages().
+ *
+ * Return:
+ * Upon successful completion, scif_put_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ * Errors:
+ * EINVAL - pages does not point to a valid scif_range structure, or
+ * the scif_range structure pointed to by pages was already returned
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_put_pages(struct scif_range *pages);
 
 /**
  * scif_poll() - Wait for some event on an endpoint
diff --git a/include/uapi/linux/scif_ioctl.h b/include/uapi/linux/scif_ioctl.h
index 4a94d917cf99..d9048918be52 100644
--- a/include/uapi/linux/scif_ioctl.h
+++ b/include/uapi/linux/scif_ioctl.h
@@ -106,6 +106,82 @@ struct scifioctl_msg {
 	__s32	out_len;
 };
 
+/**
+ * struct scifioctl_reg - used for SCIF_REG IOCTL
+ * @addr:	starting virtual address
+ * @len:	length of range
+ * @offset:	offset of window
+ * @prot:	read/write protection
+ * @flags:	flags
+ * @out_offset:	offset returned
+ */
+struct scifioctl_reg {
+	__u64		addr;
+	__u64		len;
+	__s64		offset;
+	__s32		prot;
+	__s32		flags;
+	__s64		out_offset;
+};
+
+/**
+ * struct scifioctl_unreg - used for SCIF_UNREG IOCTL
+ * @offset:	start of range to unregister
+ * @len:	length of range to unregister
+ */
+struct scifioctl_unreg {
+	__s64		offset;
+	__u64		len;
+};
+
+/**
+ * struct scifioctl_copy - used for SCIF DMA copy IOCTLs
+ *
+ * @loffset:	offset in local registered address space to/from
+ *		which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to/from
+ *		which to copy
+ * @addr:	user virtual address to/from which to copy
+ * @flags:	flags
+ *
+ * This structure is used for SCIF_READFROM, SCIF_WRITETO, SCIF_VREADFROM
+ * and SCIF_VREADFROM IOCTL's.
+ */
+struct scifioctl_copy {
+	__s64		loffset;
+	__u64		len;
+	__s64		roffset;
+	__u64		addr;
+	__s32		flags;
+};
+
+/**
+ * struct scifioctl_fence_mark  - used for SCIF_FENCE_MARK IOCTL
+ * @flags:	flags
+ * @mark:	fence handle which is a pointer to a __s32
+ */
+struct scifioctl_fence_mark {
+	__s32	flags;
+	__u64	mark;
+};
+
+/**
+ * struct scifioctl_fence_signal - used for SCIF_FENCE_SIGNAL IOCTL
+ * @loff:	local offset
+ * @lval:	value to write to loffset
+ * @roff:	remote offset
+ * @rval:	value to write to roffset
+ * @flags:	flags
+ */
+struct scifioctl_fence_signal {
+	__s64		loff;
+	__u64		lval;
+	__s64		roff;
+	__u64		rval;
+	__s32		flags;
+};
+
 /**
  * struct scifioctl_node_ids - used for SCIF_GET_NODEIDS IOCTL
  * @nodes:	pointer to an array of node_ids
@@ -125,6 +201,15 @@ struct scifioctl_node_ids {
 #define SCIF_ACCEPTREG		_IOWR('s', 5, __u64)
 #define SCIF_SEND		_IOWR('s', 6, struct scifioctl_msg)
 #define SCIF_RECV		_IOWR('s', 7, struct scifioctl_msg)
+#define SCIF_REG		_IOWR('s', 8, struct scifioctl_reg)
+#define SCIF_UNREG		_IOWR('s', 9, struct scifioctl_unreg)
+#define SCIF_READFROM		_IOWR('s', 10, struct scifioctl_copy)
+#define SCIF_WRITETO		_IOWR('s', 11, struct scifioctl_copy)
+#define SCIF_VREADFROM		_IOWR('s', 12, struct scifioctl_copy)
+#define SCIF_VWRITETO		_IOWR('s', 13, struct scifioctl_copy)
 #define SCIF_GET_NODEIDS	_IOWR('s', 14, struct scifioctl_node_ids)
+#define SCIF_FENCE_MARK		_IOWR('s', 15, struct scifioctl_fence_mark)
+#define SCIF_FENCE_WAIT		_IOWR('s', 16, __s32)
+#define SCIF_FENCE_SIGNAL	_IOWR('s', 17, struct scifioctl_fence_signal)
 
 #endif /* SCIF_IOCTL_H */
-- 
cgit v1.2.3


From 9af92fbff3b06d75470717361076aa7bd097ff8b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 10 Sep 2015 11:29:03 +0200
Subject: tty/serial: at91: move ATMEL_MAX_UART

Move ATMEL_MAX_UART from platform_data/atmel.h to atmel_serial.c as this is
the only file using it and it is common practise from tty/serial drivers to
define it directly in the driver file.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c   | 6 ++++++
 include/linux/platform_data/atmel.h | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 5ca5cf3e9359..98c84038518e 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -111,6 +111,12 @@ struct atmel_uart_char {
 
 #define ATMEL_SERIAL_RINGSIZE 1024
 
+/*
+ * at91: 6 USARTs and one DBGU port (SAM9260)
+ * avr32: 4
+ */
+#define ATMEL_MAX_UART		7
+
 /*
  * We wrap our port structure around the generic uart_port.
  */
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 527a85c61924..c4bc90bfebe0 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -19,12 +19,6 @@
 #include <linux/serial.h>
 #include <linux/platform_data/macb.h>
 
-/*
- * at91: 6 USARTs and one DBGU port (SAM9260)
- * avr32: 4
- */
-#define ATMEL_MAX_UART	7
-
  /* USB Device */
 struct at91_udc_data {
 	int	vbus_pin;		/* high == host powering us */
-- 
cgit v1.2.3


From 7bd1d4093c2fa37d1ecab05da3c9d48ea2af2264 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 22 Sep 2015 15:47:10 +0300
Subject: stm class: Introduce an abstraction for System Trace Module devices

A System Trace Module (STM) is a device exporting data in System Trace
Protocol (STP) format as defined by MIPI STP standards. Examples of such
devices are Intel(R) Trace Hub and Coresight STM.

This abstraction provides a unified interface for software trace sources
to send their data over an STM device to a debug host. In order to do
that, such a trace source needs to be assigned a pair of master/channel
identifiers that all the data from this source will be tagged with. The
STP decoder on the debug host side will use these master/channel tags to
distinguish different trace streams from one another inside one STP
stream.

This abstraction provides a configfs-based policy management mechanism
for dynamic allocation of these master/channel pairs based on trace
source-supplied string identifier. It has the flexibility of being
defined at runtime and at the same time (provided that the policy
definition is aligned with the decoding end) consistency.

For userspace trace sources, this abstraction provides write()-based and
mmap()-based (if the underlying stm device allows this) output mechanism.

For kernel-side trace sources, we provide "stm_source" device class that
can be connected to an stm device at run time.

Cc: linux-api@vger.kernel.org
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/configfs-stp-policy    |   48 +
 Documentation/ABI/testing/sysfs-class-stm        |   14 +
 Documentation/ABI/testing/sysfs-class-stm_source |   11 +
 Documentation/ioctl/ioctl-number.txt             |    3 +
 Documentation/trace/stm.txt                      |   80 ++
 drivers/Kconfig                                  |    2 +
 drivers/Makefile                                 |    1 +
 drivers/hwtracing/stm/Kconfig                    |    8 +
 drivers/hwtracing/stm/Makefile                   |    3 +
 drivers/hwtracing/stm/core.c                     | 1029 ++++++++++++++++++++++
 drivers/hwtracing/stm/policy.c                   |  529 +++++++++++
 drivers/hwtracing/stm/stm.h                      |   87 ++
 include/linux/stm.h                              |  126 +++
 include/uapi/linux/stm.h                         |   50 ++
 14 files changed, 1991 insertions(+)
 create mode 100644 Documentation/ABI/testing/configfs-stp-policy
 create mode 100644 Documentation/ABI/testing/sysfs-class-stm
 create mode 100644 Documentation/ABI/testing/sysfs-class-stm_source
 create mode 100644 Documentation/trace/stm.txt
 create mode 100644 drivers/hwtracing/stm/Kconfig
 create mode 100644 drivers/hwtracing/stm/Makefile
 create mode 100644 drivers/hwtracing/stm/core.c
 create mode 100644 drivers/hwtracing/stm/policy.c
 create mode 100644 drivers/hwtracing/stm/stm.h
 create mode 100644 include/linux/stm.h
 create mode 100644 include/uapi/linux/stm.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/configfs-stp-policy b/Documentation/ABI/testing/configfs-stp-policy
new file mode 100644
index 000000000000..421ce6825c66
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-stp-policy
@@ -0,0 +1,48 @@
+What:		/config/stp-policy
+Date:		June 2015
+KernelVersion:	4.3
+Description:
+		This group contains policies mandating Master/Channel allocation
+		for software sources wishing to send trace data over an STM
+		device.
+
+What:		/config/stp-policy/<device>.<policy>
+Date:		June 2015
+KernelVersion:	4.3
+Description:
+		This group is the root of a policy; its name is a concatenation
+		of an stm device name to which this policy applies and an
+		arbitrary string. If <device> part doesn't match an existing
+		stm device, mkdir will fail with ENODEV; if that device already
+		has a policy assigned to it, mkdir will fail with EBUSY.
+
+What:		/config/stp-policy/<device>.<policy>/device
+Date:		June 2015
+KernelVersion:	4.3
+Description:
+		STM device to which this policy applies, read only. Same as the
+		<device> component of its parent directory.
+
+What:		/config/stp-policy/<device>.<policy>/<node>
+Date:		June 2015
+KernelVersion:	4.3
+Description:
+		Policy node is a string identifier that software clients will
+		use to request a master/channel to be allocated and assigned to
+		them.
+
+What:		/config/stp-policy/<device>.<policy>/<node>/masters
+Date:		June 2015
+KernelVersion:	4.3
+Description:
+		Range of masters from which to allocate for users of this node.
+		Write two numbers: the first master and the last master number.
+
+What:		/config/stp-policy/<device>.<policy>/<node>/channels
+Date:		June 2015
+KernelVersion:	4.3
+Description:
+		Range of channels from which to allocate for users of this node.
+		Write two numbers: the first channel and the last channel
+		number.
+
diff --git a/Documentation/ABI/testing/sysfs-class-stm b/Documentation/ABI/testing/sysfs-class-stm
new file mode 100644
index 000000000000..c9aa4f3fc9a7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-stm
@@ -0,0 +1,14 @@
+What:		/sys/class/stm/<stm>/masters
+Date:		June 2015
+KernelVersion:	4.3
+Contact:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Description:
+		Shows first and last available to software master numbers on
+		this STM device.
+
+What:		/sys/class/stm/<stm>/channels
+Date:		June 2015
+KernelVersion:	4.3
+Contact:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Description:
+		Shows the number of channels per master on this STM device.
diff --git a/Documentation/ABI/testing/sysfs-class-stm_source b/Documentation/ABI/testing/sysfs-class-stm_source
new file mode 100644
index 000000000000..57b8dd39bbf7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-stm_source
@@ -0,0 +1,11 @@
+What:		/sys/class/stm_source/<stm_source>/stm_source_link
+Date:		June 2015
+KernelVersion:	4.3
+Contact:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Description:
+		stm_source device linkage to stm device, where its tracing data
+		is directed. Reads return an existing connection or "<none>" if
+		this stm_source is not connected to any stm device yet.
+		Write an existing (registered) stm device's name here to
+		connect that device. If a device is already connected to this
+		stm_source, it will first be disconnected.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index df1b25eb8382..3785b7e131f8 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -81,6 +81,9 @@ Code  Seq#(hex)	Include File		Comments
 0x22	all	scsi/sg.h
 '#'	00-3F	IEEE 1394 Subsystem	Block for the entire subsystem
 '$'	00-0F	linux/perf_counter.h, linux/perf_event.h
+'%'	00-0F	include/uapi/linux/stm.h
+					System Trace Module subsystem
+					<mailto:alexander.shishkin@linux.intel.com>
 '&'	00-07	drivers/firewire/nosy-user.h
 '1'	00-1F	<linux/timepps.h>	PPS kit from Ulrich Windl
 					<ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
diff --git a/Documentation/trace/stm.txt b/Documentation/trace/stm.txt
new file mode 100644
index 000000000000..ea035f9dbfd7
--- /dev/null
+++ b/Documentation/trace/stm.txt
@@ -0,0 +1,80 @@
+System Trace Module
+===================
+
+System Trace Module (STM) is a device described in MIPI STP specs as
+STP trace stream generator. STP (System Trace Protocol) is a trace
+protocol multiplexing data from multiple trace sources, each one of
+which is assigned a unique pair of master and channel. While some of
+these masters and channels are statically allocated to certain
+hardware trace sources, others are available to software. Software
+trace sources are usually free to pick for themselves any
+master/channel combination from this pool.
+
+On the receiving end of this STP stream (the decoder side), trace
+sources can only be identified by master/channel combination, so in
+order for the decoder to be able to make sense of the trace that
+involves multiple trace sources, it needs to be able to map those
+master/channel pairs to the trace sources that it understands.
+
+For instance, it is helpful to know that syslog messages come on
+master 7 channel 15, while arbitrary user applications can use masters
+48 to 63 and channels 0 to 127.
+
+To solve this mapping problem, stm class provides a policy management
+mechanism via configfs, that allows defining rules that map string
+identifiers to ranges of masters and channels. If these rules (policy)
+are consistent with what decoder expects, it will be able to properly
+process the trace data.
+
+This policy is a tree structure containing rules (policy_node) that
+have a name (string identifier) and a range of masters and channels
+associated with it, located in "stp-policy" subsystem directory in
+configfs. The topmost directory's name (the policy) is formatted as
+the STM device name to which this policy applies and and arbitrary
+string identifier separated by a stop. From the examle above, a rule
+may look like this:
+
+$ ls /config/stp-policy/dummy_stm.my-policy/user
+channels masters
+$ cat /config/stp-policy/dummy_stm.my-policy/user/masters
+48 63
+$ cat /config/stp-policy/dummy_stm.my-policy/user/channels
+0 127
+
+which means that the master allocation pool for this rule consists of
+masters 48 through 63 and channel allocation pool has channels 0
+through 127 in it. Now, any producer (trace source) identifying itself
+with "user" identification string will be allocated a master and
+channel from within these ranges.
+
+These rules can be nested, for example, one can define a rule "dummy"
+under "user" directory from the example above and this new rule will
+be used for trace sources with the id string of "user/dummy".
+
+Trace sources have to open the stm class device's node and write their
+trace data into its file descriptor. In order to identify themselves
+to the policy, they need to do a STP_POLICY_ID_SET ioctl on this file
+descriptor providing their id string. Otherwise, they will be
+automatically allocated a master/channel pair upon first write to this
+file descriptor according to the "default" rule of the policy, if such
+exists.
+
+Some STM devices may allow direct mapping of the channel mmio regions
+to userspace for zero-copy writing. One mappable page (in terms of
+mmu) will usually contain multiple channels' mmios, so the user will
+need to allocate that many channels to themselves (via the
+aforementioned ioctl() call) to be able to do this. That is, if your
+stm device's channel mmio region is 64 bytes and hardware page size is
+4096 bytes, after a successful STP_POLICY_ID_SET ioctl() call with
+width==64, you should be able to mmap() one page on this file
+descriptor and obtain direct access to an mmio region for 64 channels.
+
+For kernel-based trace sources, there is "stm_source" device
+class. Devices of this class can be connected and disconnected to/from
+stm devices at runtime via a sysfs attribute.
+
+Examples of STM devices are Intel(R) Trace Hub [1] and Coresight STM
+[2].
+
+[1] https://software.intel.com/sites/default/files/managed/d3/3c/intel-th-developer-manual.pdf
+[2] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0444b/index.html
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 46b4a8e0f859..b6e1cea63f81 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -188,4 +188,6 @@ source "drivers/nvdimm/Kconfig"
 
 source "drivers/nvmem/Kconfig"
 
+source "drivers/hwtracing/stm/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index b250b36b54f2..e71b5e2a2c71 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -165,5 +165,6 @@ obj-$(CONFIG_PERF_EVENTS)	+= perf/
 obj-$(CONFIG_RAS)		+= ras/
 obj-$(CONFIG_THUNDERBOLT)	+= thunderbolt/
 obj-$(CONFIG_CORESIGHT)		+= hwtracing/coresight/
+obj-$(CONFIG_STM)		+= hwtracing/stm/
 obj-$(CONFIG_ANDROID)		+= android/
 obj-$(CONFIG_NVMEM)		+= nvmem/
diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
new file mode 100644
index 000000000000..e101cb4f8e43
--- /dev/null
+++ b/drivers/hwtracing/stm/Kconfig
@@ -0,0 +1,8 @@
+config STM
+	tristate "System Trace Module devices"
+	help
+	  A System Trace Module (STM) is a device exporting data in System
+	  Trace Protocol (STP) format as defined by MIPI STP standards.
+	  Examples of such devices are Intel(R) Trace Hub and Coresight STM.
+
+	  Say Y here to enable System Trace Module device support.
diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile
new file mode 100644
index 000000000000..adec7016499f
--- /dev/null
+++ b/drivers/hwtracing/stm/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_STM)	+= stm_core.o
+
+stm_core-y		:= core.o policy.o
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
new file mode 100644
index 000000000000..b79c42c62557
--- /dev/null
+++ b/drivers/hwtracing/stm/core.c
@@ -0,0 +1,1029 @@
+/*
+ * System Trace Module (STM) infrastructure
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/compat.h>
+#include <linux/kdev_t.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "stm.h"
+
+#include <uapi/linux/stm.h>
+
+static unsigned int stm_core_up;
+
+/*
+ * The SRCU here makes sure that STM device doesn't disappear from under a
+ * stm_source_write() caller, which may want to have as little overhead as
+ * possible.
+ */
+static struct srcu_struct stm_source_srcu;
+
+static ssize_t masters_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u %u\n", stm->data->sw_start, stm->data->sw_end);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(masters);
+
+static ssize_t channels_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u\n", stm->data->sw_nchannels);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(channels);
+
+static struct attribute *stm_attrs[] = {
+	&dev_attr_masters.attr,
+	&dev_attr_channels.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(stm);
+
+static struct class stm_class = {
+	.name		= "stm",
+	.dev_groups	= stm_groups,
+};
+
+static int stm_dev_match(struct device *dev, const void *data)
+{
+	const char *name = data;
+
+	return sysfs_streq(name, dev_name(dev));
+}
+
+/**
+ * stm_find_device() - find stm device by name
+ * @buf:	character buffer containing the name
+ *
+ * This is called when either policy gets assigned to an stm device or an
+ * stm_source device gets linked to an stm device.
+ *
+ * This grabs device's reference (get_device()) and module reference, both
+ * of which the calling path needs to make sure to drop with stm_put_device().
+ *
+ * Return:	stm device pointer or null if lookup failed.
+ */
+struct stm_device *stm_find_device(const char *buf)
+{
+	struct stm_device *stm;
+	struct device *dev;
+
+	if (!stm_core_up)
+		return NULL;
+
+	dev = class_find_device(&stm_class, NULL, buf, stm_dev_match);
+	if (!dev)
+		return NULL;
+
+	stm = to_stm_device(dev);
+	if (!try_module_get(stm->owner)) {
+		put_device(dev);
+		return NULL;
+	}
+
+	return stm;
+}
+
+/**
+ * stm_put_device() - drop references on the stm device
+ * @stm:	stm device, previously acquired by stm_find_device()
+ *
+ * This drops the module reference and device reference taken by
+ * stm_find_device().
+ */
+void stm_put_device(struct stm_device *stm)
+{
+	module_put(stm->owner);
+	put_device(&stm->dev);
+}
+
+/*
+ * Internally we only care about software-writable masters here, that is the
+ * ones in the range [stm_data->sw_start..stm_data..sw_end], however we need
+ * original master numbers to be visible externally, since they are the ones
+ * that will appear in the STP stream. Thus, the internal bookkeeping uses
+ * $master - stm_data->sw_start to reference master descriptors and such.
+ */
+
+#define __stm_master(_s, _m)				\
+	((_s)->masters[(_m) - (_s)->data->sw_start])
+
+static inline struct stp_master *
+stm_master(struct stm_device *stm, unsigned int idx)
+{
+	if (idx < stm->data->sw_start || idx > stm->data->sw_end)
+		return NULL;
+
+	return __stm_master(stm, idx);
+}
+
+static int stp_master_alloc(struct stm_device *stm, unsigned int idx)
+{
+	struct stp_master *master;
+	size_t size;
+
+	size = ALIGN(stm->data->sw_nchannels, 8) / 8;
+	size += sizeof(struct stp_master);
+	master = kzalloc(size, GFP_ATOMIC);
+	if (!master)
+		return -ENOMEM;
+
+	master->nr_free = stm->data->sw_nchannels;
+	__stm_master(stm, idx) = master;
+
+	return 0;
+}
+
+static void stp_master_free(struct stm_device *stm, unsigned int idx)
+{
+	struct stp_master *master = stm_master(stm, idx);
+
+	if (!master)
+		return;
+
+	__stm_master(stm, idx) = NULL;
+	kfree(master);
+}
+
+static void stm_output_claim(struct stm_device *stm, struct stm_output *output)
+{
+	struct stp_master *master = stm_master(stm, output->master);
+
+	if (WARN_ON_ONCE(master->nr_free < output->nr_chans))
+		return;
+
+	bitmap_allocate_region(&master->chan_map[0], output->channel,
+			       ilog2(output->nr_chans));
+
+	master->nr_free -= output->nr_chans;
+}
+
+static void
+stm_output_disclaim(struct stm_device *stm, struct stm_output *output)
+{
+	struct stp_master *master = stm_master(stm, output->master);
+
+	bitmap_release_region(&master->chan_map[0], output->channel,
+			      ilog2(output->nr_chans));
+
+	output->nr_chans = 0;
+	master->nr_free += output->nr_chans;
+}
+
+/*
+ * This is like bitmap_find_free_region(), except it can ignore @start bits
+ * at the beginning.
+ */
+static int find_free_channels(unsigned long *bitmap, unsigned int start,
+			      unsigned int end, unsigned int width)
+{
+	unsigned int pos;
+	int i;
+
+	for (pos = start; pos < end + 1; pos = ALIGN(pos, width)) {
+		pos = find_next_zero_bit(bitmap, end + 1, pos);
+		if (pos + width > end + 1)
+			break;
+
+		if (pos & (width - 1))
+			continue;
+
+		for (i = 1; i < width && !test_bit(pos + i, bitmap); i++)
+			;
+		if (i == width)
+			return pos;
+	}
+
+	return -1;
+}
+
+static unsigned int
+stm_find_master_chan(struct stm_device *stm, unsigned int width,
+		     unsigned int *mstart, unsigned int mend,
+		     unsigned int *cstart, unsigned int cend)
+{
+	struct stp_master *master;
+	unsigned int midx;
+	int pos, err;
+
+	for (midx = *mstart; midx <= mend; midx++) {
+		if (!stm_master(stm, midx)) {
+			err = stp_master_alloc(stm, midx);
+			if (err)
+				return err;
+		}
+
+		master = stm_master(stm, midx);
+
+		if (!master->nr_free)
+			continue;
+
+		pos = find_free_channels(master->chan_map, *cstart, cend,
+					 width);
+		if (pos < 0)
+			continue;
+
+		*mstart = midx;
+		*cstart = pos;
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int stm_output_assign(struct stm_device *stm, unsigned int width,
+			     struct stp_policy_node *policy_node,
+			     struct stm_output *output)
+{
+	unsigned int midx, cidx, mend, cend;
+	int ret = -EINVAL;
+
+	if (width > stm->data->sw_nchannels)
+		return -EINVAL;
+
+	if (policy_node) {
+		stp_policy_node_get_ranges(policy_node,
+					   &midx, &mend, &cidx, &cend);
+	} else {
+		midx = stm->data->sw_start;
+		cidx = 0;
+		mend = stm->data->sw_end;
+		cend = stm->data->sw_nchannels - 1;
+	}
+
+	spin_lock(&stm->mc_lock);
+	/* output is already assigned -- shouldn't happen */
+	if (WARN_ON_ONCE(output->nr_chans))
+		goto unlock;
+
+	ret = stm_find_master_chan(stm, width, &midx, mend, &cidx, cend);
+	if (ret)
+		goto unlock;
+
+	output->master = midx;
+	output->channel = cidx;
+	output->nr_chans = width;
+	stm_output_claim(stm, output);
+	dev_dbg(&stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width);
+
+	ret = 0;
+unlock:
+	spin_unlock(&stm->mc_lock);
+
+	return ret;
+}
+
+static void stm_output_free(struct stm_device *stm, struct stm_output *output)
+{
+	spin_lock(&stm->mc_lock);
+	if (output->nr_chans)
+		stm_output_disclaim(stm, output);
+	spin_unlock(&stm->mc_lock);
+}
+
+static int major_match(struct device *dev, const void *data)
+{
+	unsigned int major = *(unsigned int *)data;
+
+	return MAJOR(dev->devt) == major;
+}
+
+static int stm_char_open(struct inode *inode, struct file *file)
+{
+	struct stm_file *stmf;
+	struct device *dev;
+	unsigned int major = imajor(inode);
+	int err = -ENODEV;
+
+	dev = class_find_device(&stm_class, NULL, &major, major_match);
+	if (!dev)
+		return -ENODEV;
+
+	stmf = kzalloc(sizeof(*stmf), GFP_KERNEL);
+	if (!stmf)
+		return -ENOMEM;
+
+	stmf->stm = to_stm_device(dev);
+
+	if (!try_module_get(stmf->stm->owner))
+		goto err_free;
+
+	file->private_data = stmf;
+
+	return nonseekable_open(inode, file);
+
+err_free:
+	kfree(stmf);
+
+	return err;
+}
+
+static int stm_char_release(struct inode *inode, struct file *file)
+{
+	struct stm_file *stmf = file->private_data;
+
+	stm_output_free(stmf->stm, &stmf->output);
+	stm_put_device(stmf->stm);
+	kfree(stmf);
+
+	return 0;
+}
+
+static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width)
+{
+	struct stm_device *stm = stmf->stm;
+	int ret;
+
+	stmf->policy_node = stp_policy_node_lookup(stm, id);
+
+	ret = stm_output_assign(stm, width, stmf->policy_node, &stmf->output);
+
+	if (stmf->policy_node)
+		stp_policy_node_put(stmf->policy_node);
+
+	return ret;
+}
+
+static void stm_write(struct stm_data *data, unsigned int master,
+		      unsigned int channel, const char *buf, size_t count)
+{
+	unsigned int flags = STP_PACKET_TIMESTAMPED;
+	const unsigned char *p = buf, nil = 0;
+	size_t pos;
+	ssize_t sz;
+
+	for (pos = 0, p = buf; count > pos; pos += sz, p += sz) {
+		sz = min_t(unsigned int, count - pos, 8);
+		sz = data->packet(data, master, channel, STP_PACKET_DATA, flags,
+				  sz, p);
+		flags = 0;
+	}
+
+	data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil);
+}
+
+static ssize_t stm_char_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+	char *kbuf;
+	int err;
+
+	/*
+	 * if no m/c have been assigned to this writer up to this
+	 * point, use "default" policy entry
+	 */
+	if (!stmf->output.nr_chans) {
+		err = stm_file_assign(stmf, "default", 1);
+		/*
+		 * EBUSY means that somebody else just assigned this
+		 * output, which is just fine for write()
+		 */
+		if (err && err != -EBUSY)
+			return err;
+	}
+
+	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	err = copy_from_user(kbuf, buf, count);
+	if (err) {
+		kfree(kbuf);
+		return -EFAULT;
+	}
+
+	stm_write(stm->data, stmf->output.master, stmf->output.channel, kbuf,
+		  count);
+
+	kfree(kbuf);
+
+	return count;
+}
+
+static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+	unsigned long size, phys;
+
+	if (!stm->data->mmio_addr)
+		return -EOPNOTSUPP;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	size = vma->vm_end - vma->vm_start;
+
+	if (stmf->output.nr_chans * stm->data->sw_mmiosz != size)
+		return -EINVAL;
+
+	phys = stm->data->mmio_addr(stm->data, stmf->output.master,
+				    stmf->output.channel,
+				    stmf->output.nr_chans);
+
+	if (!phys)
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+	vm_iomap_memory(vma, phys, size);
+
+	return 0;
+}
+
+static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg)
+{
+	struct stm_device *stm = stmf->stm;
+	struct stp_policy_id *id;
+	int ret = -EINVAL;
+	u32 size;
+
+	if (stmf->output.nr_chans)
+		return -EBUSY;
+
+	if (copy_from_user(&size, arg, sizeof(size)))
+		return -EFAULT;
+
+	if (size >= PATH_MAX + sizeof(*id))
+		return -EINVAL;
+
+	/*
+	 * size + 1 to make sure the .id string at the bottom is terminated,
+	 * which is also why memdup_user() is not useful here
+	 */
+	id = kzalloc(size + 1, GFP_KERNEL);
+	if (!id)
+		return -ENOMEM;
+
+	if (copy_from_user(id, arg, size)) {
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	if (id->__reserved_0 || id->__reserved_1)
+		goto err_free;
+
+	if (id->width < 1 ||
+	    id->width > PAGE_SIZE / stm->data->sw_mmiosz)
+		goto err_free;
+
+	ret = stm_file_assign(stmf, id->id, id->width);
+	if (ret)
+		goto err_free;
+
+	ret = 0;
+
+	if (stm->data->link)
+		ret = stm->data->link(stm->data, stmf->output.master,
+				      stmf->output.channel);
+
+	if (ret) {
+		stm_output_free(stmf->stm, &stmf->output);
+		stm_put_device(stmf->stm);
+	}
+
+err_free:
+	kfree(id);
+
+	return ret;
+}
+
+static int stm_char_policy_get_ioctl(struct stm_file *stmf, void __user *arg)
+{
+	struct stp_policy_id id = {
+		.size		= sizeof(id),
+		.master		= stmf->output.master,
+		.channel	= stmf->output.channel,
+		.width		= stmf->output.nr_chans,
+		.__reserved_0	= 0,
+		.__reserved_1	= 0,
+	};
+
+	return copy_to_user(arg, &id, id.size) ? -EFAULT : 0;
+}
+
+static long
+stm_char_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_data *stm_data = stmf->stm->data;
+	int err = -ENOTTY;
+	u64 options;
+
+	switch (cmd) {
+	case STP_POLICY_ID_SET:
+		err = stm_char_policy_set_ioctl(stmf, (void __user *)arg);
+		if (err)
+			return err;
+
+		return stm_char_policy_get_ioctl(stmf, (void __user *)arg);
+
+	case STP_POLICY_ID_GET:
+		return stm_char_policy_get_ioctl(stmf, (void __user *)arg);
+
+	case STP_SET_OPTIONS:
+		if (copy_from_user(&options, (u64 __user *)arg, sizeof(u64)))
+			return -EFAULT;
+
+		if (stm_data->set_options)
+			err = stm_data->set_options(stm_data,
+						    stmf->output.master,
+						    stmf->output.channel,
+						    stmf->output.nr_chans,
+						    options);
+
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long
+stm_char_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return stm_char_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define stm_char_compat_ioctl	NULL
+#endif
+
+static const struct file_operations stm_fops = {
+	.open		= stm_char_open,
+	.release	= stm_char_release,
+	.write		= stm_char_write,
+	.mmap		= stm_char_mmap,
+	.unlocked_ioctl	= stm_char_ioctl,
+	.compat_ioctl	= stm_char_compat_ioctl,
+	.llseek		= no_llseek,
+};
+
+static void stm_device_release(struct device *dev)
+{
+	struct stm_device *stm = to_stm_device(dev);
+
+	kfree(stm);
+}
+
+int stm_register_device(struct device *parent, struct stm_data *stm_data,
+			struct module *owner)
+{
+	struct stm_device *stm;
+	unsigned int nmasters;
+	int err = -ENOMEM;
+
+	if (!stm_core_up)
+		return -EPROBE_DEFER;
+
+	if (!stm_data->packet || !stm_data->sw_nchannels)
+		return -EINVAL;
+
+	nmasters = stm_data->sw_end - stm_data->sw_start;
+	stm = kzalloc(sizeof(*stm) + nmasters * sizeof(void *), GFP_KERNEL);
+	if (!stm)
+		return -ENOMEM;
+
+	stm->major = register_chrdev(0, stm_data->name, &stm_fops);
+	if (stm->major < 0)
+		goto err_free;
+
+	device_initialize(&stm->dev);
+	stm->dev.devt = MKDEV(stm->major, 0);
+	stm->dev.class = &stm_class;
+	stm->dev.parent = parent;
+	stm->dev.release = stm_device_release;
+
+	err = kobject_set_name(&stm->dev.kobj, "%s", stm_data->name);
+	if (err)
+		goto err_device;
+
+	err = device_add(&stm->dev);
+	if (err)
+		goto err_device;
+
+	spin_lock_init(&stm->link_lock);
+	INIT_LIST_HEAD(&stm->link_list);
+
+	spin_lock_init(&stm->mc_lock);
+	mutex_init(&stm->policy_mutex);
+	stm->sw_nmasters = nmasters;
+	stm->owner = owner;
+	stm->data = stm_data;
+	stm_data->stm = stm;
+
+	return 0;
+
+err_device:
+	put_device(&stm->dev);
+err_free:
+	kfree(stm);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(stm_register_device);
+
+static void __stm_source_link_drop(struct stm_source_device *src,
+				   struct stm_device *stm);
+
+void stm_unregister_device(struct stm_data *stm_data)
+{
+	struct stm_device *stm = stm_data->stm;
+	struct stm_source_device *src, *iter;
+	int i;
+
+	spin_lock(&stm->link_lock);
+	list_for_each_entry_safe(src, iter, &stm->link_list, link_entry) {
+		__stm_source_link_drop(src, stm);
+	}
+	spin_unlock(&stm->link_lock);
+
+	synchronize_srcu(&stm_source_srcu);
+
+	unregister_chrdev(stm->major, stm_data->name);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy)
+		stp_policy_unbind(stm->policy);
+	mutex_unlock(&stm->policy_mutex);
+
+	for (i = 0; i < stm->sw_nmasters; i++)
+		stp_master_free(stm, i);
+
+	device_unregister(&stm->dev);
+	stm_data->stm = NULL;
+}
+EXPORT_SYMBOL_GPL(stm_unregister_device);
+
+/**
+ * stm_source_link_add() - connect an stm_source device to an stm device
+ * @src:	stm_source device
+ * @stm:	stm device
+ *
+ * This function establishes a link from stm_source to an stm device so that
+ * the former can send out trace data to the latter.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int stm_source_link_add(struct stm_source_device *src,
+			       struct stm_device *stm)
+{
+	char *id;
+	int err;
+
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+
+	/* src->link is dereferenced under stm_source_srcu but not the list */
+	rcu_assign_pointer(src->link, stm);
+	list_add_tail(&src->link_entry, &stm->link_list);
+
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+
+	id = kstrdup(src->data->name, GFP_KERNEL);
+	if (id) {
+		src->policy_node =
+			stp_policy_node_lookup(stm, id);
+
+		kfree(id);
+	}
+
+	err = stm_output_assign(stm, src->data->nr_chans,
+				src->policy_node, &src->output);
+
+	if (src->policy_node)
+		stp_policy_node_put(src->policy_node);
+
+	if (err)
+		goto fail_detach;
+
+	/* this is to notify the STM device that a new link has been made */
+	if (stm->data->link)
+		err = stm->data->link(stm->data, src->output.master,
+				      src->output.channel);
+
+	if (err)
+		goto fail_free_output;
+
+	/* this is to let the source carry out all necessary preparations */
+	if (src->data->link)
+		src->data->link(src->data);
+
+	return 0;
+
+fail_free_output:
+	stm_output_free(stm, &src->output);
+	stm_put_device(stm);
+
+fail_detach:
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+
+	rcu_assign_pointer(src->link, NULL);
+	list_del_init(&src->link_entry);
+
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+
+	return err;
+}
+
+/**
+ * __stm_source_link_drop() - detach stm_source from an stm device
+ * @src:	stm_source device
+ * @stm:	stm device
+ *
+ * If @stm is @src::link, disconnect them from one another and put the
+ * reference on the @stm device.
+ *
+ * Caller must hold stm::link_lock.
+ */
+static void __stm_source_link_drop(struct stm_source_device *src,
+				   struct stm_device *stm)
+{
+	spin_lock(&src->link_lock);
+	if (WARN_ON_ONCE(src->link != stm)) {
+		spin_unlock(&src->link_lock);
+		return;
+	}
+
+	stm_output_free(src->link, &src->output);
+	/* caller must hold stm::link_lock */
+	list_del_init(&src->link_entry);
+	/* matches stm_find_device() from stm_source_link_store() */
+	stm_put_device(src->link);
+	rcu_assign_pointer(src->link, NULL);
+
+	spin_unlock(&src->link_lock);
+}
+
+/**
+ * stm_source_link_drop() - detach stm_source from its stm device
+ * @src:	stm_source device
+ *
+ * Unlinking means disconnecting from source's STM device; after this
+ * writes will be unsuccessful until it is linked to a new STM device.
+ *
+ * This will happen on "stm_source_link" sysfs attribute write to undo
+ * the existing link (if any), or on linked STM device's de-registration.
+ */
+static void stm_source_link_drop(struct stm_source_device *src)
+{
+	struct stm_device *stm;
+	int idx;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+
+	if (stm) {
+		if (src->data->unlink)
+			src->data->unlink(src->data);
+
+		spin_lock(&stm->link_lock);
+		__stm_source_link_drop(src, stm);
+		spin_unlock(&stm->link_lock);
+	}
+
+	srcu_read_unlock(&stm_source_srcu, idx);
+}
+
+static ssize_t stm_source_link_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+	struct stm_device *stm;
+	int idx, ret;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+	ret = sprintf(buf, "%s\n",
+		      stm ? dev_name(&stm->dev) : "<none>");
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	return ret;
+}
+
+static ssize_t stm_source_link_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+	struct stm_device *link;
+	int err;
+
+	stm_source_link_drop(src);
+
+	link = stm_find_device(buf);
+	if (!link)
+		return -EINVAL;
+
+	err = stm_source_link_add(src, link);
+	if (err)
+		stm_put_device(link);
+
+	return err ? : count;
+}
+
+static DEVICE_ATTR_RW(stm_source_link);
+
+static struct attribute *stm_source_attrs[] = {
+	&dev_attr_stm_source_link.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(stm_source);
+
+static struct class stm_source_class = {
+	.name		= "stm_source",
+	.dev_groups	= stm_source_groups,
+};
+
+static void stm_source_device_release(struct device *dev)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+
+	kfree(src);
+}
+
+/**
+ * stm_source_register_device() - register an stm_source device
+ * @parent:	parent device
+ * @data:	device description structure
+ *
+ * This will create a device of stm_source class that can write
+ * data to an stm device once linked.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+int stm_source_register_device(struct device *parent,
+			       struct stm_source_data *data)
+{
+	struct stm_source_device *src;
+	int err;
+
+	if (!stm_core_up)
+		return -EPROBE_DEFER;
+
+	src = kzalloc(sizeof(*src), GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	device_initialize(&src->dev);
+	src->dev.class = &stm_source_class;
+	src->dev.parent = parent;
+	src->dev.release = stm_source_device_release;
+
+	err = kobject_set_name(&src->dev.kobj, "%s", data->name);
+	if (err)
+		goto err;
+
+	err = device_add(&src->dev);
+	if (err)
+		goto err;
+
+	spin_lock_init(&src->link_lock);
+	INIT_LIST_HEAD(&src->link_entry);
+	src->data = data;
+	data->src = src;
+
+	return 0;
+
+err:
+	put_device(&src->dev);
+	kfree(src);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(stm_source_register_device);
+
+/**
+ * stm_source_unregister_device() - unregister an stm_source device
+ * @data:	device description that was used to register the device
+ *
+ * This will remove a previously created stm_source device from the system.
+ */
+void stm_source_unregister_device(struct stm_source_data *data)
+{
+	struct stm_source_device *src = data->src;
+
+	stm_source_link_drop(src);
+
+	device_destroy(&stm_source_class, src->dev.devt);
+}
+EXPORT_SYMBOL_GPL(stm_source_unregister_device);
+
+int stm_source_write(struct stm_source_data *data, unsigned int chan,
+		     const char *buf, size_t count)
+{
+	struct stm_source_device *src = data->src;
+	struct stm_device *stm;
+	int idx;
+
+	if (!src->output.nr_chans)
+		return -ENODEV;
+
+	if (chan >= src->output.nr_chans)
+		return -EINVAL;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+	if (stm)
+		stm_write(stm->data, src->output.master,
+			  src->output.channel + chan,
+			  buf, count);
+	else
+		count = -ENODEV;
+
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(stm_source_write);
+
+static int __init stm_core_init(void)
+{
+	int err;
+
+	err = class_register(&stm_class);
+	if (err)
+		return err;
+
+	err = class_register(&stm_source_class);
+	if (err)
+		goto err_stm;
+
+	err = stp_configfs_init();
+	if (err)
+		goto err_src;
+
+	init_srcu_struct(&stm_source_srcu);
+
+	stm_core_up++;
+
+	return 0;
+
+err_src:
+	class_unregister(&stm_source_class);
+err_stm:
+	class_unregister(&stm_class);
+
+	return err;
+}
+
+module_init(stm_core_init);
+
+static void __exit stm_core_exit(void)
+{
+	cleanup_srcu_struct(&stm_source_srcu);
+	class_unregister(&stm_source_class);
+	class_unregister(&stm_class);
+	stp_configfs_exit();
+}
+
+module_exit(stm_core_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("System Trace Module device class");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c
new file mode 100644
index 000000000000..6498a9dbb7bd
--- /dev/null
+++ b/drivers/hwtracing/stm/policy.c
@@ -0,0 +1,529 @@
+/*
+ * System Trace Module (STM) master/channel allocation policy management
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * A master/channel allocation policy allows mapping string identifiers to
+ * master and channel ranges, where allocation can be done.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/configfs.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+/*
+ * STP Master/Channel allocation policy configfs layout.
+ */
+
+struct stp_policy {
+	struct config_group	group;
+	struct stm_device	*stm;
+};
+
+struct stp_policy_node {
+	struct config_group	group;
+	struct stp_policy	*policy;
+	unsigned int		first_master;
+	unsigned int		last_master;
+	unsigned int		first_channel;
+	unsigned int		last_channel;
+};
+
+static struct configfs_subsystem stp_policy_subsys;
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend)
+{
+	*mstart	= policy_node->first_master;
+	*mend	= policy_node->last_master;
+	*cstart	= policy_node->first_channel;
+	*cend	= policy_node->last_channel;
+}
+
+static inline char *stp_policy_node_name(struct stp_policy_node *policy_node)
+{
+	return policy_node->group.cg_item.ci_name ? : "<none>";
+}
+
+static inline struct stp_policy *to_stp_policy(struct config_item *item)
+{
+	return item ?
+		container_of(to_config_group(item), struct stp_policy, group) :
+		NULL;
+}
+
+static inline struct stp_policy_node *
+to_stp_policy_node(struct config_item *item)
+{
+	return item ?
+		container_of(to_config_group(item), struct stp_policy_node,
+			     group) :
+		NULL;
+}
+
+static ssize_t stp_policy_node_masters_show(struct stp_policy_node *policy_node,
+					    char *page)
+{
+	ssize_t count;
+
+	count = sprintf(page, "%u %u\n", policy_node->first_master,
+			policy_node->last_master);
+
+	return count;
+}
+
+static ssize_t
+stp_policy_node_masters_store(struct stp_policy_node *policy_node,
+			      const char *page, size_t count)
+{
+	unsigned int first, last;
+	struct stm_device *stm;
+	char *p = (char *)page;
+	ssize_t ret = -ENODEV;
+
+	if (sscanf(p, "%u %u", &first, &last) != 2)
+		return -EINVAL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+	stm = policy_node->policy->stm;
+	if (!stm)
+		goto unlock;
+
+	/* must be within [sw_start..sw_end], which is an inclusive range */
+	if (first > INT_MAX || last > INT_MAX || first > last ||
+	    first < stm->data->sw_start ||
+	    last > stm->data->sw_end) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
+	ret = count;
+	policy_node->first_master = first;
+	policy_node->last_master = last;
+
+unlock:
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return ret;
+}
+
+static ssize_t
+stp_policy_node_channels_show(struct stp_policy_node *policy_node, char *page)
+{
+	ssize_t count;
+
+	count = sprintf(page, "%u %u\n", policy_node->first_channel,
+			policy_node->last_channel);
+
+	return count;
+}
+
+static ssize_t
+stp_policy_node_channels_store(struct stp_policy_node *policy_node,
+			       const char *page, size_t count)
+{
+	unsigned int first, last;
+	struct stm_device *stm;
+	char *p = (char *)page;
+	ssize_t ret = -ENODEV;
+
+	if (sscanf(p, "%u %u", &first, &last) != 2)
+		return -EINVAL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+	stm = policy_node->policy->stm;
+	if (!stm)
+		goto unlock;
+
+	if (first > INT_MAX || last > INT_MAX || first > last ||
+	    last >= stm->data->sw_nchannels) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
+	ret = count;
+	policy_node->first_channel = first;
+	policy_node->last_channel = last;
+
+unlock:
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return ret;
+}
+
+static void stp_policy_node_release(struct config_item *item)
+{
+	kfree(to_stp_policy_node(item));
+}
+
+struct stp_policy_node_attribute {
+	struct configfs_attribute	attr;
+	ssize_t (*show)(struct stp_policy_node *, char *);
+	ssize_t (*store)(struct stp_policy_node *, const char *, size_t);
+};
+
+static ssize_t stp_policy_node_attr_show(struct config_item *item,
+					 struct configfs_attribute *attr,
+					 char *page)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	struct stp_policy_node_attribute *pn_attr =
+		container_of(attr, struct stp_policy_node_attribute, attr);
+	ssize_t count = 0;
+
+	if (pn_attr->show)
+		count = pn_attr->show(policy_node, page);
+
+	return count;
+}
+
+static ssize_t stp_policy_node_attr_store(struct config_item *item,
+					  struct configfs_attribute *attr,
+					  const char *page, size_t len)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	struct stp_policy_node_attribute *pn_attr =
+		container_of(attr, struct stp_policy_node_attribute, attr);
+	ssize_t count = -EINVAL;
+
+	if (pn_attr->store)
+		count = pn_attr->store(policy_node, page, len);
+
+	return count;
+}
+
+static struct configfs_item_operations stp_policy_node_item_ops = {
+	.release		= stp_policy_node_release,
+	.show_attribute		= stp_policy_node_attr_show,
+	.store_attribute	= stp_policy_node_attr_store,
+};
+
+static struct stp_policy_node_attribute stp_policy_node_attr_range = {
+	.attr	= {
+		.ca_owner = THIS_MODULE,
+		.ca_name = "masters",
+		.ca_mode = S_IRUGO | S_IWUSR,
+	},
+	.show	= stp_policy_node_masters_show,
+	.store	= stp_policy_node_masters_store,
+};
+
+static struct stp_policy_node_attribute stp_policy_node_attr_channels = {
+	.attr	= {
+		.ca_owner = THIS_MODULE,
+		.ca_name = "channels",
+		.ca_mode = S_IRUGO | S_IWUSR,
+	},
+	.show	= stp_policy_node_channels_show,
+	.store	= stp_policy_node_channels_store,
+};
+
+static struct configfs_attribute *stp_policy_node_attrs[] = {
+	&stp_policy_node_attr_range.attr,
+	&stp_policy_node_attr_channels.attr,
+	NULL,
+};
+
+static struct config_item_type stp_policy_type;
+static struct config_item_type stp_policy_node_type;
+
+static struct config_group *
+stp_policy_node_make(struct config_group *group, const char *name)
+{
+	struct stp_policy_node *policy_node, *parent_node;
+	struct stp_policy *policy;
+
+	if (group->cg_item.ci_type == &stp_policy_type) {
+		policy = container_of(group, struct stp_policy, group);
+	} else {
+		parent_node = container_of(group, struct stp_policy_node,
+					   group);
+		policy = parent_node->policy;
+	}
+
+	if (!policy->stm)
+		return ERR_PTR(-ENODEV);
+
+	policy_node = kzalloc(sizeof(struct stp_policy_node), GFP_KERNEL);
+	if (!policy_node)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&policy_node->group, name,
+				    &stp_policy_node_type);
+
+	policy_node->policy = policy;
+
+	/* default values for the attributes */
+	policy_node->first_master = policy->stm->data->sw_start;
+	policy_node->last_master = policy->stm->data->sw_end;
+	policy_node->first_channel = 0;
+	policy_node->last_channel = policy->stm->data->sw_nchannels - 1;
+
+	return &policy_node->group;
+}
+
+static void
+stp_policy_node_drop(struct config_group *group, struct config_item *item)
+{
+	config_item_put(item);
+}
+
+static struct configfs_group_operations stp_policy_node_group_ops = {
+	.make_group	= stp_policy_node_make,
+	.drop_item	= stp_policy_node_drop,
+};
+
+static struct config_item_type stp_policy_node_type = {
+	.ct_item_ops	= &stp_policy_node_item_ops,
+	.ct_group_ops	= &stp_policy_node_group_ops,
+	.ct_attrs	= stp_policy_node_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+/*
+ * Root group: policies.
+ */
+static struct configfs_attribute stp_policy_attr_device = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "device",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *stp_policy_attrs[] = {
+	&stp_policy_attr_device,
+	NULL,
+};
+
+static ssize_t stp_policy_attr_show(struct config_item *item,
+				    struct configfs_attribute *attr,
+				    char *page)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	ssize_t count;
+
+	count = sprintf(page, "%s\n",
+			(policy && policy->stm) ?
+			policy->stm->data->name :
+			"<none>");
+
+	return count;
+}
+
+void stp_policy_unbind(struct stp_policy *policy)
+{
+	struct stm_device *stm = policy->stm;
+
+	if (WARN_ON_ONCE(!policy->stm))
+		return;
+
+	mutex_lock(&stm->policy_mutex);
+	stm->policy = NULL;
+	mutex_unlock(&stm->policy_mutex);
+
+	policy->stm = NULL;
+
+	stm_put_device(stm);
+}
+
+static void stp_policy_release(struct config_item *item)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+
+	stp_policy_unbind(policy);
+	kfree(policy);
+}
+
+static struct configfs_item_operations stp_policy_item_ops = {
+	.release		= stp_policy_release,
+	.show_attribute		= stp_policy_attr_show,
+};
+
+static struct configfs_group_operations stp_policy_group_ops = {
+	.make_group	= stp_policy_node_make,
+};
+
+static struct config_item_type stp_policy_type = {
+	.ct_item_ops	= &stp_policy_item_ops,
+	.ct_group_ops	= &stp_policy_group_ops,
+	.ct_attrs	= stp_policy_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group *
+stp_policies_make(struct config_group *group, const char *name)
+{
+	struct config_group *ret;
+	struct stm_device *stm;
+	char *devname, *p;
+
+	devname = kasprintf(GFP_KERNEL, "%s", name);
+	if (!devname)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * node must look like <device_name>.<policy_name>, where
+	 * <device_name> is the name of an existing stm device and
+	 * <policy_name> is an arbitrary string
+	 */
+	p = strchr(devname, '.');
+	if (!p) {
+		kfree(devname);
+		return ERR_PTR(-EINVAL);
+	}
+
+	*p++ = '\0';
+
+	stm = stm_find_device(devname);
+	kfree(devname);
+
+	if (!stm)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy) {
+		ret = ERR_PTR(-EBUSY);
+		goto unlock_policy;
+	}
+
+	stm->policy = kzalloc(sizeof(*stm->policy), GFP_KERNEL);
+	if (!stm->policy) {
+		ret = ERR_PTR(-ENOMEM);
+		goto unlock_policy;
+	}
+
+	config_group_init_type_name(&stm->policy->group, name,
+				    &stp_policy_type);
+	stm->policy->stm = stm;
+
+	ret = &stm->policy->group;
+
+unlock_policy:
+	mutex_unlock(&stm->policy_mutex);
+
+	if (IS_ERR(ret))
+		stm_put_device(stm);
+
+	return ret;
+}
+
+static struct configfs_group_operations stp_policies_group_ops = {
+	.make_group	= stp_policies_make,
+};
+
+static struct config_item_type stp_policies_type = {
+	.ct_group_ops	= &stp_policies_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem stp_policy_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf	= "stp-policy",
+			.ci_type	= &stp_policies_type,
+		},
+	},
+};
+
+/*
+ * Lock the policy mutex from the outside
+ */
+static struct stp_policy_node *
+__stp_policy_node_lookup(struct stp_policy *policy, char *s)
+{
+	struct stp_policy_node *policy_node, *ret;
+	struct list_head *head = &policy->group.cg_children;
+	struct config_item *item;
+	char *start, *end = s;
+
+	if (list_empty(head))
+		return NULL;
+
+	/* return the first entry if everything else fails */
+	item = list_entry(head->next, struct config_item, ci_entry);
+	ret = to_stp_policy_node(item);
+
+next:
+	for (;;) {
+		start = strsep(&end, "/");
+		if (!start)
+			break;
+
+		if (!*start)
+			continue;
+
+		list_for_each_entry(item, head, ci_entry) {
+			policy_node = to_stp_policy_node(item);
+
+			if (!strcmp(start,
+				    policy_node->group.cg_item.ci_name)) {
+				ret = policy_node;
+
+				if (!end)
+					goto out;
+
+				head = &policy_node->group.cg_children;
+				goto next;
+			}
+		}
+		break;
+	}
+
+out:
+	return ret;
+}
+
+
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s)
+{
+	struct stp_policy_node *policy_node = NULL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy)
+		policy_node = __stp_policy_node_lookup(stm->policy, s);
+	mutex_unlock(&stm->policy_mutex);
+
+	if (policy_node)
+		config_item_get(&policy_node->group.cg_item);
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return policy_node;
+}
+
+void stp_policy_node_put(struct stp_policy_node *policy_node)
+{
+	config_item_put(&policy_node->group.cg_item);
+}
+
+int __init stp_configfs_init(void)
+{
+	int err;
+
+	config_group_init(&stp_policy_subsys.su_group);
+	mutex_init(&stp_policy_subsys.su_mutex);
+	err = configfs_register_subsystem(&stp_policy_subsys);
+
+	return err;
+}
+
+void __exit stp_configfs_exit(void)
+{
+	configfs_unregister_subsystem(&stp_policy_subsys);
+}
diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h
new file mode 100644
index 000000000000..cf33bf976abe
--- /dev/null
+++ b/drivers/hwtracing/stm/stm.h
@@ -0,0 +1,87 @@
+/*
+ * System Trace Module (STM) infrastructure
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#ifndef _STM_STM_H_
+#define _STM_STM_H_
+
+struct stp_policy;
+struct stp_policy_node;
+
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s);
+void stp_policy_node_put(struct stp_policy_node *policy_node);
+void stp_policy_unbind(struct stp_policy *policy);
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend);
+int stp_configfs_init(void);
+void stp_configfs_exit(void);
+
+struct stp_master {
+	unsigned int	nr_free;
+	unsigned long	chan_map[0];
+};
+
+struct stm_device {
+	struct device		dev;
+	struct module		*owner;
+	struct stp_policy	*policy;
+	struct mutex		policy_mutex;
+	int			major;
+	unsigned int		sw_nmasters;
+	struct stm_data		*data;
+	spinlock_t		link_lock;
+	struct list_head	link_list;
+	/* master allocation */
+	spinlock_t		mc_lock;
+	struct stp_master	*masters[0];
+};
+
+#define to_stm_device(_d)				\
+	container_of((_d), struct stm_device, dev)
+
+struct stm_output {
+	unsigned int		master;
+	unsigned int		channel;
+	unsigned int		nr_chans;
+};
+
+struct stm_file {
+	struct stm_device	*stm;
+	struct stp_policy_node	*policy_node;
+	struct stm_output	output;
+};
+
+struct stm_device *stm_find_device(const char *name);
+void stm_put_device(struct stm_device *stm);
+
+struct stm_source_device {
+	struct device		dev;
+	struct stm_source_data	*data;
+	spinlock_t		link_lock;
+	struct stm_device	*link;
+	struct list_head	link_entry;
+	/* one output per stm_source device */
+	struct stp_policy_node	*policy_node;
+	struct stm_output	output;
+};
+
+#define to_stm_source_device(_d)				\
+	container_of((_d), struct stm_source_device, dev)
+
+#endif /* _STM_STM_H_ */
diff --git a/include/linux/stm.h b/include/linux/stm.h
new file mode 100644
index 000000000000..9d0083d364e6
--- /dev/null
+++ b/include/linux/stm.h
@@ -0,0 +1,126 @@
+/*
+ * System Trace Module (STM) infrastructure apis
+ * Copyright (C) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _STM_H_
+#define _STM_H_
+
+#include <linux/device.h>
+
+/**
+ * enum stp_packet_type - STP packets that an STM driver sends
+ */
+enum stp_packet_type {
+	STP_PACKET_DATA = 0,
+	STP_PACKET_FLAG,
+	STP_PACKET_USER,
+	STP_PACKET_MERR,
+	STP_PACKET_GERR,
+	STP_PACKET_TRIG,
+	STP_PACKET_XSYNC,
+};
+
+/**
+ * enum stp_packet_flags - STP packet modifiers
+ */
+enum stp_packet_flags {
+	STP_PACKET_MARKED	= 0x1,
+	STP_PACKET_TIMESTAMPED	= 0x2,
+};
+
+struct stp_policy;
+
+struct stm_device;
+
+/**
+ * struct stm_data - STM device description and callbacks
+ * @name:		device name
+ * @stm:		internal structure, only used by stm class code
+ * @sw_start:		first STP master available to software
+ * @sw_end:		last STP master available to software
+ * @sw_nchannels:	number of STP channels per master
+ * @sw_mmiosz:		size of one channel's IO space, for mmap, optional
+ * @packet:		callback that sends an STP packet
+ * @mmio_addr:		mmap callback, optional
+ * @link:		called when a new stm_source gets linked to us, optional
+ * @unlink:		likewise for unlinking, again optional
+ * @set_options:	set device-specific options on a channel
+ *
+ * Fill out this structure before calling stm_register_device() to create
+ * an STM device and stm_unregister_device() to destroy it. It will also be
+ * passed back to @packet(), @mmio_addr(), @link(), @unlink() and @set_options()
+ * callbacks.
+ *
+ * Normally, an STM device will have a range of masters available to software
+ * and the rest being statically assigned to various hardware trace sources.
+ * The former is defined by the the range [@sw_start..@sw_end] of the device
+ * description. That is, the lowest master that can be allocated to software
+ * writers is @sw_start and data from this writer will appear is @sw_start
+ * master in the STP stream.
+ */
+struct stm_data {
+	const char		*name;
+	struct stm_device	*stm;
+	unsigned int		sw_start;
+	unsigned int		sw_end;
+	unsigned int		sw_nchannels;
+	unsigned int		sw_mmiosz;
+	ssize_t			(*packet)(struct stm_data *, unsigned int,
+					  unsigned int, unsigned int,
+					  unsigned int, unsigned int,
+					  const unsigned char *);
+	phys_addr_t		(*mmio_addr)(struct stm_data *, unsigned int,
+					     unsigned int, unsigned int);
+	int			(*link)(struct stm_data *, unsigned int,
+					unsigned int);
+	void			(*unlink)(struct stm_data *, unsigned int,
+					  unsigned int);
+	long			(*set_options)(struct stm_data *, unsigned int,
+					       unsigned int, unsigned int,
+					       unsigned long);
+};
+
+int stm_register_device(struct device *parent, struct stm_data *stm_data,
+			struct module *owner);
+void stm_unregister_device(struct stm_data *stm_data);
+
+struct stm_source_device;
+
+/**
+ * struct stm_source_data - STM source device description and callbacks
+ * @name:	device name, will be used for policy lookup
+ * @src:	internal structure, only used by stm class code
+ * @nr_chans:	number of channels to allocate
+ * @link:	called when this source gets linked to an STM device
+ * @unlink:	called when this source is about to get unlinked from its STM
+ *
+ * Fill in this structure before calling stm_source_register_device() to
+ * register a source device. Also pass it to unregister and write calls.
+ */
+struct stm_source_data {
+	const char		*name;
+	struct stm_source_device *src;
+	unsigned int		percpu;
+	unsigned int		nr_chans;
+	int			(*link)(struct stm_source_data *data);
+	void			(*unlink)(struct stm_source_data *data);
+};
+
+int stm_source_register_device(struct device *parent,
+			       struct stm_source_data *data);
+void stm_source_unregister_device(struct stm_source_data *data);
+
+int stm_source_write(struct stm_source_data *data, unsigned int chan,
+		     const char *buf, size_t count);
+
+#endif /* _STM_H_ */
diff --git a/include/uapi/linux/stm.h b/include/uapi/linux/stm.h
new file mode 100644
index 000000000000..626a8d3f63b5
--- /dev/null
+++ b/include/uapi/linux/stm.h
@@ -0,0 +1,50 @@
+/*
+ * System Trace Module (STM) userspace interfaces
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#ifndef _UAPI_LINUX_STM_H
+#define _UAPI_LINUX_STM_H
+
+#include <linux/types.h>
+
+/**
+ * struct stp_policy_id - identification for the STP policy
+ * @size:	size of the structure including real id[] length
+ * @master:	assigned master
+ * @channel:	first assigned channel
+ * @width:	number of requested channels
+ * @id:		identification string
+ *
+ * User must calculate the total size of the structure and put it into
+ * @size field, fill out the @id and desired @width. In return, kernel
+ * fills out @master, @channel and @width.
+ */
+struct stp_policy_id {
+	__u32		size;
+	__u16		master;
+	__u16		channel;
+	__u16		width;
+	/* padding */
+	__u16		__reserved_0;
+	__u32		__reserved_1;
+	char		id[0];
+};
+
+#define STP_POLICY_ID_SET	_IOWR('%', 0, struct stp_policy_id)
+#define STP_POLICY_ID_GET	_IOR('%', 1, struct stp_policy_id)
+#define STP_SET_OPTIONS		_IOW('%', 2, __u64)
+
+#endif /* _UAPI_LINUX_STM_H */
-- 
cgit v1.2.3


From b7bd1809e0784435791657502bc0d8280ad6f7ea Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 30 Sep 2015 22:53:44 +0100
Subject: netfilter: nfnetlink_queue: get rid of nfnetlink_queue_ct.c

The original intention was to avoid dependencies between nfnetlink_queue and
conntrack without ifdef pollution. However, we can achieve this by moving the
conntrack dependent code into ctnetlink and keep some glue code to access the
nfq_ct indirection from nfqueue.

After this patch, the nfq_ct indirection is always compiled in the netfilter
core to avoid polluting nfqueue with ifdefs. Thus, if nf_conntrack is not
compiled this results in only 8-bytes of memory waste in x86_64.

This patch also adds ctnetlink_nfqueue_seqadj() to avoid that the nf_conn
structure layout if exposed to nf_queue, which creates another dependency with
nf_conntrack at compilation time.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h               |  12 ++--
 include/net/netfilter/nfnetlink_queue.h |  51 --------------
 net/netfilter/Makefile                  |   1 -
 net/netfilter/core.c                    |   9 ++-
 net/netfilter/nf_conntrack_netlink.c    |  52 ++++++++++++++-
 net/netfilter/nfnetlink_queue_core.c    |  52 +++++++++++----
 net/netfilter/nfnetlink_queue_ct.c      | 113 --------------------------------
 7 files changed, 103 insertions(+), 187 deletions(-)
 delete mode 100644 include/net/netfilter/nfnetlink_queue.h
 delete mode 100644 net/netfilter/nfnetlink_queue_ct.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 165ab2d14734..3e5e8f2b65f6 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -369,14 +369,21 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
 extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
+#else
+static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+#endif
 
 struct nf_conn;
 enum ip_conntrack_info;
 struct nlattr;
 
 struct nfq_ct_hook {
+	struct nf_conn *(*get_ct)(struct sk_buff *skb,
+				  enum ip_conntrack_info *ctinfo);
 	size_t (*build_size)(const struct nf_conn *ct);
-	int (*build)(struct sk_buff *skb, struct nf_conn *ct);
+	int (*build)(struct sk_buff *skb, struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo,
+		     u_int16_t ct_attr, u_int16_t ct_info_attr);
 	int (*parse)(const struct nlattr *attr, struct nf_conn *ct);
 	int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct,
 			     u32 portid, u32 report);
@@ -384,9 +391,6 @@ struct nfq_ct_hook {
 			   enum ip_conntrack_info ctinfo, s32 off);
 };
 extern struct nfq_ct_hook __rcu *nfq_ct_hook;
-#else
-static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
-#endif
 
 /**
  * nf_skb_duplicated - TEE target has sent a packet
diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h
deleted file mode 100644
index aff88ba91391..000000000000
--- a/include/net/netfilter/nfnetlink_queue.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef _NET_NFNL_QUEUE_H_
-#define _NET_NFNL_QUEUE_H_
-
-#include <linux/netfilter/nf_conntrack_common.h>
-
-struct nf_conn;
-
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
-struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size,
-			     enum ip_conntrack_info *ctinfo);
-struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb,
-			       const struct nlattr *attr,
-			       enum ip_conntrack_info *ctinfo);
-int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
-		 enum ip_conntrack_info ctinfo);
-void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
-			 enum ip_conntrack_info ctinfo, int diff);
-int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
-			u32 portid, u32 report);
-#else
-inline struct nf_conn *
-nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo)
-{
-	return NULL;
-}
-
-inline struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb,
-				      const struct nlattr *attr,
-				      enum ip_conntrack_info *ctinfo)
-{
-	return NULL;
-}
-
-inline int
-nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo)
-{
-	return 0;
-}
-
-inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
-				enum ip_conntrack_info ctinfo, int diff)
-{
-}
-
-inline int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
-			       u32 portid, u32 report)
-{
-	return 0;
-}
-#endif /* NF_CONNTRACK */
-#endif
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 70d026d46fe7..4d68e72d59ab 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
 nfnetlink_queue-y := nfnetlink_queue_core.o
-nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2e907335ee81..1412e3684c1c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -348,6 +348,12 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 }
 EXPORT_SYMBOL(skb_make_writable);
 
+/* This needs to be compiled in any case to avoid dependencies between the
+ * nfnetlink_queue code and nf_conntrack.
+ */
+struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfq_ct_hook);
+
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
@@ -385,9 +391,6 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
-struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_hook);
-
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 94a66541e0b7..eb67bf8dd174 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2162,8 +2162,19 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
 	       ;
 }
 
-static int
-ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
+static struct nf_conn *ctnetlink_nfqueue_get_ct(struct sk_buff *skb,
+						enum ip_conntrack_info *ctinfo)
+{
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, ctinfo);
+	if (ct && nf_ct_is_untracked(ct))
+		ct = NULL;
+
+	return ct;
+}
+
+static int __ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nlattr *nest_parms;
@@ -2235,6 +2246,31 @@ nla_put_failure:
 	return -ENOSPC;
 }
 
+static int
+ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo,
+			u_int16_t ct_attr, u_int16_t ct_info_attr)
+{
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (__ctnetlink_nfqueue_build(skb, ct) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
 static int
 ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 {
@@ -2350,12 +2386,22 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	return 0;
 }
 
+static void ctnetlink_nfqueue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+				     enum ip_conntrack_info ctinfo, int diff)
+{
+	if (!(ct->status & IPS_NAT_MASK))
+		return;
+
+	nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
+}
+
 static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
+	.get_ct		= ctnetlink_nfqueue_get_ct,
 	.build_size	= ctnetlink_nfqueue_build_size,
 	.build		= ctnetlink_nfqueue_build,
 	.parse		= ctnetlink_nfqueue_parse,
 	.attach_expect	= ctnetlink_nfqueue_attach_expect,
-	.seq_adjust	= nf_ct_tcp_seqadj_set,
+	.seq_adjust	= ctnetlink_nfqueue_seqadj,
 };
 #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
 
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 41583e30051b..b1f1c747d518 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -28,12 +28,12 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/list.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/netfilter/nf_queue.h>
 #include <net/netns/generic.h>
-#include <net/netfilter/nfnetlink_queue.h>
 
 #include <linux/atomic.h>
 
@@ -313,6 +313,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	struct net_device *outdev;
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
+	struct nfq_ct_hook *nfq_ct;
 	bool csum_verify;
 	char *secdata = NULL;
 	u32 seclen = 0;
@@ -364,8 +365,14 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		break;
 	}
 
-	if (queue->flags & NFQA_CFG_F_CONNTRACK)
-		ct = nfqnl_ct_get(entskb, &size, &ctinfo);
+	if (queue->flags & NFQA_CFG_F_CONNTRACK) {
+		nfq_ct = rcu_dereference(nfq_ct_hook);
+		if (nfq_ct != NULL) {
+			ct = nfq_ct->get_ct(entskb, &ctinfo);
+			if (ct != NULL)
+				size += nfq_ct->build_size(ct);
+		}
+	}
 
 	if (queue->flags & NFQA_CFG_F_UID_GID) {
 		size +=  (nla_total_size(sizeof(u_int32_t))	/* uid */
@@ -508,7 +515,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
 		goto nla_put_failure;
 
-	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+	if (ct && nfq_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
 		goto nla_put_failure;
 
 	if (cap_len > data_len &&
@@ -1001,6 +1008,28 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
 	return 0;
 }
 
+static struct nf_conn *nfqnl_ct_parse(struct nfq_ct_hook *nfq_ct,
+				      const struct nlmsghdr *nlh,
+				      const struct nlattr * const nfqa[],
+				      struct nf_queue_entry *entry,
+				      enum ip_conntrack_info *ctinfo)
+{
+	struct nf_conn *ct;
+
+	ct = nfq_ct->get_ct(entry->skb, ctinfo);
+	if (ct == NULL)
+		return NULL;
+
+	if (nfq_ct->parse(nfqa[NFQA_CT], ct) < 0)
+		return NULL;
+
+	if (nfqa[NFQA_EXP])
+		nfq_ct->attach_expect(nfqa[NFQA_EXP], ct,
+				      NETLINK_CB(entry->skb).portid,
+				      nlmsg_report(nlh));
+	return ct;
+}
+
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		   const struct nlmsghdr *nlh,
@@ -1014,6 +1043,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	unsigned int verdict;
 	struct nf_queue_entry *entry;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
+	struct nfq_ct_hook *nfq_ct;
 	struct nf_conn *ct = NULL;
 
 	struct net *net = sock_net(ctnl);
@@ -1037,12 +1067,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		return -ENOENT;
 
 	if (nfqa[NFQA_CT]) {
-		ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
-		if (ct && nfqa[NFQA_EXP]) {
-			nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
-					    NETLINK_CB(skb).portid,
-					    nlmsg_report(nlh));
-		}
+		/* rcu lock already held from nfnl->call_rcu. */
+		nfq_ct = rcu_dereference(nfq_ct_hook);
+		if (nfq_ct != NULL)
+			ct = nfqnl_ct_parse(nfq_ct, nlh, nfqa, entry, &ctinfo);
 	}
 
 	if (nfqa[NFQA_PAYLOAD]) {
@@ -1053,8 +1081,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 				 payload_len, entry, diff) < 0)
 			verdict = NF_DROP;
 
-		if (ct)
-			nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
+		if (ct && diff)
+			nfq_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
 	}
 
 	if (nfqa[NFQA_MARK])
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
deleted file mode 100644
index 96cac50e0d12..000000000000
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/skbuff.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_queue.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nfnetlink_queue.h>
-
-struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size,
-			     enum ip_conntrack_info *ctinfo)
-{
-	struct nfq_ct_hook *nfq_ct;
-	struct nf_conn *ct;
-
-	/* rcu_read_lock()ed by __nf_queue already. */
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return NULL;
-
-	ct = nf_ct_get(entskb, ctinfo);
-	if (ct) {
-		if (!nf_ct_is_untracked(ct))
-			*size += nfq_ct->build_size(ct);
-		else
-			ct = NULL;
-	}
-	return ct;
-}
-
-struct nf_conn *
-nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr,
-	       enum ip_conntrack_info *ctinfo)
-{
-	struct nfq_ct_hook *nfq_ct;
-	struct nf_conn *ct;
-
-	/* rcu_read_lock()ed by __nf_queue already. */
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return NULL;
-
-	ct = nf_ct_get(skb, ctinfo);
-	if (ct && !nf_ct_is_untracked(ct))
-		nfq_ct->parse(attr, ct);
-
-	return ct;
-}
-
-int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
-		 enum ip_conntrack_info ctinfo)
-{
-	struct nfq_ct_hook *nfq_ct;
-	struct nlattr *nest_parms;
-	u_int32_t tmp;
-
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return 0;
-
-	nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED);
-	if (!nest_parms)
-		goto nla_put_failure;
-
-	if (nfq_ct->build(skb, ct) < 0)
-		goto nla_put_failure;
-
-	nla_nest_end(skb, nest_parms);
-
-	tmp = ctinfo;
-	if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp)))
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
-void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
-			 enum ip_conntrack_info ctinfo, int diff)
-{
-	struct nfq_ct_hook *nfq_ct;
-
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return;
-
-	if ((ct->status & IPS_NAT_MASK) && diff)
-		nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
-}
-
-int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
-			u32 portid, u32 report)
-{
-	struct nfq_ct_hook *nfq_ct;
-
-	if (nf_ct_is_untracked(ct))
-		return 0;
-
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return -EOPNOTSUPP;
-
-	return nfq_ct->attach_expect(attr, ct, portid, report);
-}
-- 
cgit v1.2.3


From dbe2256ddd8e8420c254c79f4045c41cb5f4bd6b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 25 Sep 2015 17:29:04 +0200
Subject: driver-core: platform: Provide helpers for multi-driver modules

Some modules register several sub-drivers. Provide a helper that makes
it easy to register and unregister a list of sub-drivers, as well as
unwind properly on error.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-model/platform.txt | 14 ++++++++
 drivers/base/platform.c                 | 61 +++++++++++++++++++++++++++++++++
 include/linux/platform_device.h         |  8 +++++
 3 files changed, 83 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt
index 07795ec51cde..e456696cfef2 100644
--- a/Documentation/driver-model/platform.txt
+++ b/Documentation/driver-model/platform.txt
@@ -63,6 +63,20 @@ runtime memory footprint:
 	int platform_driver_probe(struct platform_driver *drv,
 			  int (*probe)(struct platform_device *))
 
+Kernel modules can be composed of several platform drivers. The platform core
+provides helpers to register and unregister an array of drivers:
+
+	int __platform_register_drivers(struct platform_driver * const *drivers,
+				      unsigned int count, struct module *owner);
+	void platform_unregister_drivers(struct platform_driver * const *drivers,
+					 unsigned int count);
+
+If one of the drivers fails to register, all drivers registered up to that
+point will be unregistered in reverse order. Note that there is a convenience
+macro that passes THIS_MODULE as owner parameter:
+
+	#define platform_register_driver(drivers, count)
+
 
 Device Enumeration
 ~~~~~~~~~~~~~~~~~~
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 07cec9ba5e70..1dd6d3bf1098 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -710,6 +710,67 @@ err_out:
 }
 EXPORT_SYMBOL_GPL(__platform_create_bundle);
 
+/**
+ * __platform_register_drivers - register an array of platform drivers
+ * @drivers: an array of drivers to register
+ * @count: the number of drivers to register
+ * @owner: module owning the drivers
+ *
+ * Registers platform drivers specified by an array. On failure to register a
+ * driver, all previously registered drivers will be unregistered. Callers of
+ * this API should use platform_unregister_drivers() to unregister drivers in
+ * the reverse order.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int __platform_register_drivers(struct platform_driver * const *drivers,
+				unsigned int count, struct module *owner)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < count; i++) {
+		pr_debug("registering platform driver %ps\n", drivers[i]);
+
+		err = __platform_driver_register(drivers[i], owner);
+		if (err < 0) {
+			pr_err("failed to register platform driver %ps: %d\n",
+			       drivers[i], err);
+			goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	while (i--) {
+		pr_debug("unregistering platform driver %ps\n", drivers[i]);
+		platform_driver_unregister(drivers[i]);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(__platform_register_drivers);
+
+/**
+ * platform_unregister_drivers - unregister an array of platform drivers
+ * @drivers: an array of drivers to unregister
+ * @count: the number of drivers to unregister
+ *
+ * Unegisters platform drivers specified by an array. This is typically used
+ * to complement an earlier call to platform_register_drivers(). Drivers are
+ * unregistered in the reverse order in which they were registered.
+ */
+void platform_unregister_drivers(struct platform_driver * const *drivers,
+				 unsigned int count)
+{
+	while (count--) {
+		pr_debug("unregistering platform driver %ps\n", drivers[count]);
+		platform_driver_unregister(drivers[count]);
+	}
+}
+EXPORT_SYMBOL_GPL(platform_unregister_drivers);
+
 /* modalias support enables more hands-off userspace setup:
  * (a) environment variable lets new-style hotplug events work once system is
  *     fully running:  "modprobe $MODALIAS"
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index bba08f44cc97..dc777be5f2e1 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -270,6 +270,14 @@ extern struct platform_device *__platform_create_bundle(
 	struct resource *res, unsigned int n_res,
 	const void *data, size_t size, struct module *module);
 
+int __platform_register_drivers(struct platform_driver * const *drivers,
+				unsigned int count, struct module *owner);
+void platform_unregister_drivers(struct platform_driver * const *drivers,
+				 unsigned int count);
+
+#define platform_register_drivers(drivers, count) \
+	__platform_register_drivers(drivers, count, THIS_MODULE)
+
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
-- 
cgit v1.2.3


From 7e50711993552800644a4667daa0f569a7665eca Mon Sep 17 00:00:00 2001
From: Grigoryev Denis <grigoryev@fastwel.ru>
Date: Fri, 2 Oct 2015 16:14:41 +0000
Subject: mfd: tps6105x: Use i2c regmap to access registers

This patch modifies tps6105x and associated function driver to use regmap
instead of operating directly on i2c.

Signed-off-by: Denis Grigoryev <grigoryev@fastwel.ru>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps6105x.c                 | 78 ++++++----------------------------
 drivers/regulator/tps6105x-regulator.c | 16 +++----
 include/linux/mfd/tps6105x.h           | 10 ++---
 3 files changed, 24 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps6105x.c b/drivers/mfd/tps6105x.c
index 5de95c265c1a..4a174cdb50b6 100644
--- a/drivers/mfd/tps6105x.c
+++ b/drivers/mfd/tps6105x.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
-#include <linux/mutex.h>
+#include <linux/regmap.h>
 #include <linux/gpio.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
@@ -25,73 +25,18 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps6105x.h>
 
-int tps6105x_set(struct tps6105x *tps6105x, u8 reg, u8 value)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&tps6105x->lock);
-	if (ret)
-		return ret;
-	ret = i2c_smbus_write_byte_data(tps6105x->client, reg, value);
-	mutex_unlock(&tps6105x->lock);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-EXPORT_SYMBOL(tps6105x_set);
-
-int tps6105x_get(struct tps6105x *tps6105x, u8 reg, u8 *buf)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&tps6105x->lock);
-	if (ret)
-		return ret;
-	ret = i2c_smbus_read_byte_data(tps6105x->client, reg);
-	mutex_unlock(&tps6105x->lock);
-	if (ret < 0)
-		return ret;
-
-	*buf = ret;
-	return 0;
-}
-EXPORT_SYMBOL(tps6105x_get);
-
-/*
- * Masks off the bits in the mask and sets the bits in the bitvalues
- * parameter in one atomic operation
- */
-int tps6105x_mask_and_set(struct tps6105x *tps6105x, u8 reg,
-			  u8 bitmask, u8 bitvalues)
-{
-	int ret;
-	u8 regval;
-
-	ret = mutex_lock_interruptible(&tps6105x->lock);
-	if (ret)
-		return ret;
-	ret = i2c_smbus_read_byte_data(tps6105x->client, reg);
-	if (ret < 0)
-		goto fail;
-	regval = ret;
-	regval = (~bitmask & regval) | (bitmask & bitvalues);
-	ret = i2c_smbus_write_byte_data(tps6105x->client, reg, regval);
-fail:
-	mutex_unlock(&tps6105x->lock);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-EXPORT_SYMBOL(tps6105x_mask_and_set);
+static struct regmap_config tps6105x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = TPS6105X_REG_3,
+};
 
 static int tps6105x_startup(struct tps6105x *tps6105x)
 {
 	int ret;
-	u8 regval;
+	unsigned int regval;
 
-	ret = tps6105x_get(tps6105x, TPS6105X_REG_0, &regval);
+	ret = regmap_read(tps6105x->regmap, TPS6105X_REG_0, &regval);
 	if (ret)
 		return ret;
 	switch (regval >> TPS6105X_REG0_MODE_SHIFT) {
@@ -145,11 +90,14 @@ static int tps6105x_probe(struct i2c_client *client,
 	if (!tps6105x)
 		return -ENOMEM;
 
+	tps6105x->regmap = devm_regmap_init_i2c(client, &tps6105x_regmap_config);
+	if (IS_ERR(tps6105x->regmap))
+		return PTR_ERR(tps6105x->regmap);
+
 	i2c_set_clientdata(client, tps6105x);
 	tps6105x->client = client;
 	pdata = dev_get_platdata(&client->dev);
 	tps6105x->pdata = pdata;
-	mutex_init(&tps6105x->lock);
 
 	ret = tps6105x_startup(tps6105x);
 	if (ret) {
@@ -198,7 +146,7 @@ static int tps6105x_remove(struct i2c_client *client)
 	mfd_remove_devices(&client->dev);
 
 	/* Put chip in shutdown mode */
-	tps6105x_mask_and_set(tps6105x, TPS6105X_REG_0,
+	regmap_update_bits(tps6105x->regmap, TPS6105X_REG_0,
 		TPS6105X_REG0_MODE_MASK,
 		TPS6105X_MODE_SHUTDOWN << TPS6105X_REG0_MODE_SHIFT);
 
diff --git a/drivers/regulator/tps6105x-regulator.c b/drivers/regulator/tps6105x-regulator.c
index 3510b3e7330a..ddc4f10e268a 100644
--- a/drivers/regulator/tps6105x-regulator.c
+++ b/drivers/regulator/tps6105x-regulator.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/err.h>
-#include <linux/i2c.h>
+#include <linux/regmap.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/mfd/core.h>
@@ -33,7 +33,7 @@ static int tps6105x_regulator_enable(struct regulator_dev *rdev)
 	int ret;
 
 	/* Activate voltage mode */
-	ret = tps6105x_mask_and_set(tps6105x, TPS6105X_REG_0,
+	ret = regmap_update_bits(tps6105x->regmap, TPS6105X_REG_0,
 		TPS6105X_REG0_MODE_MASK,
 		TPS6105X_REG0_MODE_VOLTAGE << TPS6105X_REG0_MODE_SHIFT);
 	if (ret)
@@ -48,7 +48,7 @@ static int tps6105x_regulator_disable(struct regulator_dev *rdev)
 	int ret;
 
 	/* Set into shutdown mode */
-	ret = tps6105x_mask_and_set(tps6105x, TPS6105X_REG_0,
+	ret = regmap_update_bits(tps6105x->regmap, TPS6105X_REG_0,
 		TPS6105X_REG0_MODE_MASK,
 		TPS6105X_REG0_MODE_SHUTDOWN << TPS6105X_REG0_MODE_SHIFT);
 	if (ret)
@@ -60,10 +60,10 @@ static int tps6105x_regulator_disable(struct regulator_dev *rdev)
 static int tps6105x_regulator_is_enabled(struct regulator_dev *rdev)
 {
 	struct tps6105x *tps6105x = rdev_get_drvdata(rdev);
-	u8 regval;
+	unsigned int regval;
 	int ret;
 
-	ret = tps6105x_get(tps6105x, TPS6105X_REG_0, &regval);
+	ret = regmap_read(tps6105x->regmap, TPS6105X_REG_0, &regval);
 	if (ret)
 		return ret;
 	regval &= TPS6105X_REG0_MODE_MASK;
@@ -78,10 +78,10 @@ static int tps6105x_regulator_is_enabled(struct regulator_dev *rdev)
 static int tps6105x_regulator_get_voltage_sel(struct regulator_dev *rdev)
 {
 	struct tps6105x *tps6105x = rdev_get_drvdata(rdev);
-	u8 regval;
+	unsigned int regval;
 	int ret;
 
-	ret = tps6105x_get(tps6105x, TPS6105X_REG_0, &regval);
+	ret = regmap_read(tps6105x->regmap, TPS6105X_REG_0, &regval);
 	if (ret)
 		return ret;
 
@@ -96,7 +96,7 @@ static int tps6105x_regulator_set_voltage_sel(struct regulator_dev *rdev,
 	struct tps6105x *tps6105x = rdev_get_drvdata(rdev);
 	int ret;
 
-	ret = tps6105x_mask_and_set(tps6105x, TPS6105X_REG_0,
+	ret = regmap_update_bits(tps6105x->regmap, TPS6105X_REG_0,
 				    TPS6105X_REG0_VOLTAGE_MASK,
 				    selector << TPS6105X_REG0_VOLTAGE_SHIFT);
 	if (ret)
diff --git a/include/linux/mfd/tps6105x.h b/include/linux/mfd/tps6105x.h
index 386743dd931c..8bc51180800a 100644
--- a/include/linux/mfd/tps6105x.h
+++ b/include/linux/mfd/tps6105x.h
@@ -10,6 +10,7 @@
 #define MFD_TPS6105X_H
 
 #include <linux/i2c.h>
+#include <linux/regmap.h>
 #include <linux/regulator/machine.h>
 
 /*
@@ -82,20 +83,15 @@ struct tps6105x_platform_data {
 
 /**
  * struct tps6105x - state holder for the TPS6105x drivers
- * @mutex: mutex to serialize I2C accesses
  * @i2c_client: corresponding I2C client
  * @regulator: regulator device if used in voltage mode
+ * @regmap: used for i2c communcation on accessing registers
  */
 struct tps6105x {
 	struct tps6105x_platform_data *pdata;
-	struct mutex		lock;
 	struct i2c_client	*client;
 	struct regulator_dev	*regulator;
+	struct regmap		*regmap;
 };
 
-extern int tps6105x_set(struct tps6105x *tps6105x, u8 reg, u8 value);
-extern int tps6105x_get(struct tps6105x *tps6105x, u8 reg, u8 *buf);
-extern int tps6105x_mask_and_set(struct tps6105x *tps6105x, u8 reg,
-				 u8 bitmask, u8 bitvalues);
-
 #endif
-- 
cgit v1.2.3


From e7eadb4de9e645e1b34539dc4128240b1e5f71dc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 4 Oct 2015 21:08:09 -0700
Subject: ipv6: inet6_sk() should use sk_fullsock()

SYN_RECV & TIMEWAIT sockets are not full blown, they do not have a pinet6
pointer.

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f1f32af6d9b9..0ef2a97ccdb5 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -264,9 +264,9 @@ struct tcp6_timewait_sock {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
-static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
+static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
 {
-	return inet_sk(__sk)->pinet6;
+	return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL;
 }
 
 static inline struct raw6_sock *raw6_sk(const struct sock *sk)
-- 
cgit v1.2.3


From 7741c373cf3ea1f5383fa97fb7a640a429d3dd7c Mon Sep 17 00:00:00 2001
From: Jon Ringle <jringle@gridpoint.com>
Date: Thu, 1 Oct 2015 07:43:20 -0400
Subject: regmap: Allow installing custom reg_update_bits function

This commit allows installing a custom reg_update_bits function for cases where
the hardware provides a mechanism to set or clear register bits without a
read/modify/write cycle. Such is the case with the Microchip ENCX24J600.

Signed-off-by: Jon Ringle <jringle@gridpoint.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/regmap/internal.h |  3 +++
 drivers/base/regmap/regmap.c   | 25 +++++++++++++++++++++++++
 include/linux/regmap.h         |  4 ++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index cc557886ab23..4036d7a90f63 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -98,6 +98,9 @@ struct regmap {
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+	int (*reg_update_bits)(void *context, unsigned int reg,
+			       unsigned int mask, unsigned int val,
+			       bool *change, bool force_write);
 
 	bool defer_caching;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index afaf56200674..70387c9f281b 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -619,6 +619,7 @@ struct regmap *__regmap_init(struct device *dev,
 		goto skip_format_initialization;
 	} else {
 		map->reg_read  = _regmap_bus_read;
+		map->reg_update_bits = bus->reg_update_bits;
 	}
 
 	reg_endian = regmap_get_reg_endian(bus, config);
@@ -2509,6 +2510,30 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 	int ret;
 	unsigned int tmp, orig;
 
+	if (map->reg_update_bits) {
+		ret = map->reg_update_bits(map->bus_context, reg, mask, val,
+					   change, force_write);
+		if (ret != 0)
+			return ret;
+
+		/* Fix up the cache by read/modify/write */
+		if (!map->cache_bypass && !map->defer_caching) {
+			ret = regcache_read(map, reg, &orig);
+			if (ret != 0)
+				return ret;
+
+			tmp = orig & ~mask;
+			tmp |= val & mask;
+
+			ret = regcache_write(map, reg, tmp);
+			if (ret != 0)
+				return ret;
+			if (map->cache_only)
+				map->cache_dirty = true;
+		}
+		return ret;
+	}
+
 	ret = _regmap_read(map, reg, &orig);
 	if (ret != 0)
 		return ret;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 8fc0bfd8edc4..4d3a3b1680bb 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -296,6 +296,9 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg,
 				  unsigned int *val);
 typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg,
 				   unsigned int val);
+typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg,
+					 unsigned int mask, unsigned int val,
+					 bool *change, bool force_write);
 typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
@@ -335,6 +338,7 @@ struct regmap_bus {
 	regmap_hw_gather_write gather_write;
 	regmap_hw_async_write async_write;
 	regmap_hw_reg_write reg_write;
+	regmap_hw_reg_update_bits reg_update_bits;
 	regmap_hw_read read;
 	regmap_hw_reg_read reg_read;
 	regmap_hw_free_context free_context;
-- 
cgit v1.2.3


From bab18991871545dfbd10c931eb0fe8f7637156a9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 2 Oct 2015 15:17:33 +0200
Subject: bpf, seccomp: prepare for upcoming criu support

The current ongoing effort to dump existing cBPF seccomp filters back
to user space requires to hold the pre-transformed instructions like
we do in case of socket filters from sk_attach_filter() side, so they
can be reloaded in original form at a later point in time by utilities
such as criu.

To prepare for this, simply extend the bpf_prog_create_from_user()
API to hold a flag that tells whether we should store the original
or not. Also, fanout filters could make use of that in future for
things like diag. While fanout filters already use bpf_prog_destroy(),
move seccomp over to them as well to handle original programs when
present.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tycho Andersen <tycho.andersen@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Tested-by: Tycho Andersen <tycho.andersen@canonical.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  2 +-
 kernel/seccomp.c       |  4 ++--
 net/core/filter.c      | 16 +++++++++++-----
 net/packet/af_packet.c |  2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3d5fd24b321b..1bbce14bcf17 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -411,7 +411,7 @@ typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
 
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
-			      bpf_aux_classic_check_t trans);
+			      bpf_aux_classic_check_t trans, bool save_orig);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5bd4779282df..06858a74bb9c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -370,7 +370,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 		return ERR_PTR(-ENOMEM);
 
 	ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
-					seccomp_check_filter);
+					seccomp_check_filter, false);
 	if (ret < 0) {
 		kfree(sfilter);
 		return ERR_PTR(ret);
@@ -469,7 +469,7 @@ void get_seccomp_filter(struct task_struct *tsk)
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
 {
 	if (filter) {
-		bpf_prog_free(filter->prog);
+		bpf_prog_destroy(filter->prog);
 		kfree(filter);
 	}
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 53a5036fb32d..da3e5357f138 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1084,16 +1084,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
  *	@pfp: the unattached filter that is created
  *	@fprog: the filter program
  *	@trans: post-classic verifier transformation handler
+ *	@save_orig: save classic BPF program
  *
  * This function effectively does the same as bpf_prog_create(), only
  * that it builds up its insns buffer from user space provided buffer.
  * It also allows for passing a bpf_aux_classic_check_t handler.
  */
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
-			      bpf_aux_classic_check_t trans)
+			      bpf_aux_classic_check_t trans, bool save_orig)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct bpf_prog *fp;
+	int err;
 
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
@@ -1109,12 +1111,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 	}
 
 	fp->len = fprog->len;
-	/* Since unattached filters are not copied back to user
-	 * space through sk_get_filter(), we do not need to hold
-	 * a copy here, and can spare us the work.
-	 */
 	fp->orig_prog = NULL;
 
+	if (save_orig) {
+		err = bpf_prog_store_orig_filter(fp, fprog);
+		if (err) {
+			__bpf_prog_free(fp);
+			return -ENOMEM;
+		}
+	}
+
 	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aa4b15c35884..81c900fbc4a4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1567,7 +1567,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fprog, data, len))
 		return -EFAULT;
 
-	ret = bpf_prog_create_from_user(&new, &fprog, NULL);
+	ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 0cdf5640e4f6940bdbbefee4bb0adb7dffb185ec Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 2 Oct 2015 18:42:00 +0200
Subject: ebpf: include perf_event only where really needed

Commit ea317b267e9d ("bpf: Add new bpf map type to store the pointer
to struct perf_event") added perf_event.h to the main eBPF header, so
it gets included for all users. perf_event.h is actually only needed
from array map side, so lets sanitize this a bit.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   | 1 -
 kernel/bpf/arraymap.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f57d7fed9ec3..c915a6b54570 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -10,7 +10,6 @@
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
 #include <linux/file.h>
-#include <linux/perf_event.h>
 
 struct bpf_map;
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 29ace107f236..2fecc4aed119 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/filter.h>
+#include <linux/perf_event.h>
 
 /* Called from syscall */
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
-- 
cgit v1.2.3


From a4b4766c3cebb4018167e06b863d8e95b7274757 Mon Sep 17 00:00:00 2001
From: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
Date: Mon, 5 Oct 2015 11:47:13 +0900
Subject: netfilter: nfnetlink_queue: rename related to nfqueue attaching
 conntrack info

The idea of this series of patch is to attach conntrack information to
nflog like nfqueue has already done. nfqueue conntrack info attaching
basis is generic, rename those names to generic one, glue.

Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h            |  4 +--
 net/netfilter/core.c                 |  4 +--
 net/netfilter/nf_conntrack_netlink.c | 60 ++++++++++++++++++------------------
 net/netfilter/nfnetlink_queue.c      | 30 +++++++++---------
 4 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 3e5e8f2b65f6..27747deb96ed 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -377,7 +377,7 @@ struct nf_conn;
 enum ip_conntrack_info;
 struct nlattr;
 
-struct nfq_ct_hook {
+struct nfnl_ct_hook {
 	struct nf_conn *(*get_ct)(struct sk_buff *skb,
 				  enum ip_conntrack_info *ctinfo);
 	size_t (*build_size)(const struct nf_conn *ct);
@@ -390,7 +390,7 @@ struct nfq_ct_hook {
 	void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo, s32 off);
 };
-extern struct nfq_ct_hook __rcu *nfq_ct_hook;
+extern struct nfnl_ct_hook __rcu *nfnl_ct_hook;
 
 /**
  * nf_skb_duplicated - TEE target has sent a packet
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 1412e3684c1c..32a289420caf 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -351,8 +351,8 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This needs to be compiled in any case to avoid dependencies between the
  * nfnetlink_queue code and nf_conntrack.
  */
-struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_hook);
+struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfnl_ct_hook);
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 /* This does not belong here, but locally generated errors need it if connection
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eb67bf8dd174..704f29bd8c4e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2135,7 +2135,7 @@ ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
 
 #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
 static size_t
-ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
+ctnetlink_glue_build_size(const struct nf_conn *ct)
 {
 	return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
 	       + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
@@ -2162,8 +2162,8 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
 	       ;
 }
 
-static struct nf_conn *ctnetlink_nfqueue_get_ct(struct sk_buff *skb,
-						enum ip_conntrack_info *ctinfo)
+static struct nf_conn *ctnetlink_glue_get_ct(struct sk_buff *skb,
+					     enum ip_conntrack_info *ctinfo)
 {
 	struct nf_conn *ct;
 
@@ -2174,7 +2174,7 @@ static struct nf_conn *ctnetlink_nfqueue_get_ct(struct sk_buff *skb,
 	return ct;
 }
 
-static int __ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
+static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nlattr *nest_parms;
@@ -2247,9 +2247,9 @@ nla_put_failure:
 }
 
 static int
-ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct,
-			enum ip_conntrack_info ctinfo,
-			u_int16_t ct_attr, u_int16_t ct_info_attr)
+ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo,
+		     u_int16_t ct_attr, u_int16_t ct_info_attr)
 {
 	struct nlattr *nest_parms;
 
@@ -2257,7 +2257,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct,
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	if (__ctnetlink_nfqueue_build(skb, ct) < 0)
+	if (__ctnetlink_glue_build(skb, ct) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest_parms);
@@ -2272,7 +2272,7 @@ nla_put_failure:
 }
 
 static int
-ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
+ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 {
 	int err;
 
@@ -2312,7 +2312,7 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 }
 
 static int
-ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
+ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
 {
 	struct nlattr *cda[CTA_MAX+1];
 	int ret;
@@ -2322,16 +2322,16 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
 		return ret;
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+	ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 
 	return ret;
 }
 
-static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
-				       const struct nf_conn *ct,
-				       struct nf_conntrack_tuple *tuple,
-				       struct nf_conntrack_tuple *mask)
+static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
+				    const struct nf_conn *ct,
+				    struct nf_conntrack_tuple *tuple,
+				    struct nf_conntrack_tuple *mask)
 {
 	int err;
 
@@ -2345,8 +2345,8 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
 }
 
 static int
-ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
-				u32 portid, u32 report)
+ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+			     u32 portid, u32 report)
 {
 	struct nlattr *cda[CTA_EXPECT_MAX+1];
 	struct nf_conntrack_tuple tuple, mask;
@@ -2358,8 +2358,8 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	if (err < 0)
 		return err;
 
-	err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
-					  ct, &tuple, &mask);
+	err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
+				       ct, &tuple, &mask);
 	if (err < 0)
 		return err;
 
@@ -2386,8 +2386,8 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	return 0;
 }
 
-static void ctnetlink_nfqueue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
-				     enum ip_conntrack_info ctinfo, int diff)
+static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo, int diff)
 {
 	if (!(ct->status & IPS_NAT_MASK))
 		return;
@@ -2395,13 +2395,13 @@ static void ctnetlink_nfqueue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
 	nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
 }
 
-static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
-	.get_ct		= ctnetlink_nfqueue_get_ct,
-	.build_size	= ctnetlink_nfqueue_build_size,
-	.build		= ctnetlink_nfqueue_build,
-	.parse		= ctnetlink_nfqueue_parse,
-	.attach_expect	= ctnetlink_nfqueue_attach_expect,
-	.seq_adjust	= ctnetlink_nfqueue_seqadj,
+static struct nfnl_ct_hook ctnetlink_glue_hook = {
+	.get_ct		= ctnetlink_glue_get_ct,
+	.build_size	= ctnetlink_glue_build_size,
+	.build		= ctnetlink_glue_build,
+	.parse		= ctnetlink_glue_parse,
+	.attach_expect	= ctnetlink_glue_attach_expect,
+	.seq_adjust	= ctnetlink_glue_seqadj,
 };
 #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
 
@@ -3389,7 +3389,7 @@ static int __init ctnetlink_init(void)
 	}
 #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
 	/* setup interaction between nf_queue and nf_conntrack_netlink. */
-	RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook);
+	RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook);
 #endif
 	return 0;
 
@@ -3409,7 +3409,7 @@ static void __exit ctnetlink_exit(void)
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
 #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
-	RCU_INIT_POINTER(nfq_ct_hook, NULL);
+	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
 #endif
 }
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 0d2e856dbbf2..a659e57aa576 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -313,7 +313,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	struct net_device *outdev;
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
-	struct nfq_ct_hook *nfq_ct;
+	struct nfnl_ct_hook *nfnl_ct;
 	bool csum_verify;
 	char *secdata = NULL;
 	u32 seclen = 0;
@@ -366,11 +366,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	}
 
 	if (queue->flags & NFQA_CFG_F_CONNTRACK) {
-		nfq_ct = rcu_dereference(nfq_ct_hook);
-		if (nfq_ct != NULL) {
-			ct = nfq_ct->get_ct(entskb, &ctinfo);
+		nfnl_ct = rcu_dereference(nfnl_ct_hook);
+		if (nfnl_ct != NULL) {
+			ct = nfnl_ct->get_ct(entskb, &ctinfo);
 			if (ct != NULL)
-				size += nfq_ct->build_size(ct);
+				size += nfnl_ct->build_size(ct);
 		}
 	}
 
@@ -516,7 +516,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
 		goto nla_put_failure;
 
-	if (ct && nfq_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
+	if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
 		goto nla_put_failure;
 
 	if (cap_len > data_len &&
@@ -1009,7 +1009,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
 	return 0;
 }
 
-static struct nf_conn *nfqnl_ct_parse(struct nfq_ct_hook *nfq_ct,
+static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
 				      const struct nlmsghdr *nlh,
 				      const struct nlattr * const nfqa[],
 				      struct nf_queue_entry *entry,
@@ -1017,15 +1017,15 @@ static struct nf_conn *nfqnl_ct_parse(struct nfq_ct_hook *nfq_ct,
 {
 	struct nf_conn *ct;
 
-	ct = nfq_ct->get_ct(entry->skb, ctinfo);
+	ct = nfnl_ct->get_ct(entry->skb, ctinfo);
 	if (ct == NULL)
 		return NULL;
 
-	if (nfq_ct->parse(nfqa[NFQA_CT], ct) < 0)
+	if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
 		return NULL;
 
 	if (nfqa[NFQA_EXP])
-		nfq_ct->attach_expect(nfqa[NFQA_EXP], ct,
+		nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
 				      NETLINK_CB(entry->skb).portid,
 				      nlmsg_report(nlh));
 	return ct;
@@ -1044,7 +1044,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	unsigned int verdict;
 	struct nf_queue_entry *entry;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
-	struct nfq_ct_hook *nfq_ct;
+	struct nfnl_ct_hook *nfnl_ct;
 	struct nf_conn *ct = NULL;
 
 	struct net *net = sock_net(ctnl);
@@ -1069,9 +1069,9 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 
 	if (nfqa[NFQA_CT]) {
 		/* rcu lock already held from nfnl->call_rcu. */
-		nfq_ct = rcu_dereference(nfq_ct_hook);
-		if (nfq_ct != NULL)
-			ct = nfqnl_ct_parse(nfq_ct, nlh, nfqa, entry, &ctinfo);
+		nfnl_ct = rcu_dereference(nfnl_ct_hook);
+		if (nfnl_ct != NULL)
+			ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
 	}
 
 	if (nfqa[NFQA_PAYLOAD]) {
@@ -1083,7 +1083,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 			verdict = NF_DROP;
 
 		if (ct && diff)
-			nfq_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
+			nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
 	}
 
 	if (nfqa[NFQA_MARK])
-- 
cgit v1.2.3


From 224a05975ebbbdf507c65043f8aba280ccb39e6e Mon Sep 17 00:00:00 2001
From: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
Date: Mon, 5 Oct 2015 11:49:56 +0900
Subject: netfilter: ctnetlink: add const qualifier to nfnl_hook.get_ct

get_ct as is and will not update its skb argument, and users of
nfnl_ct_hook is currently only nfqueue, we can add const qualifier.

Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
---
 include/linux/netfilter.h            | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 27747deb96ed..edb3dc32f1da 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -378,7 +378,7 @@ enum ip_conntrack_info;
 struct nlattr;
 
 struct nfnl_ct_hook {
-	struct nf_conn *(*get_ct)(struct sk_buff *skb,
+	struct nf_conn *(*get_ct)(const struct sk_buff *skb,
 				  enum ip_conntrack_info *ctinfo);
 	size_t (*build_size)(const struct nf_conn *ct);
 	int (*build)(struct sk_buff *skb, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 71d6ada9996e..9f5272968abb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2162,7 +2162,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
 	       ;
 }
 
-static struct nf_conn *ctnetlink_glue_get_ct(struct sk_buff *skb,
+static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
 					     enum ip_conntrack_info *ctinfo)
 {
 	struct nf_conn *ct;
-- 
cgit v1.2.3


From 7d8d05d11473a169ab4d53bc7fc23d1fe3f1959f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sun, 16 Aug 2015 11:23:46 +0200
Subject: misc: atmel_tclib: get and use slow clock

Commit dca1a4b5ff6e ("clk: at91: keep slow clk enabled to prevent system
hang") added a workaround for the slow clock as it is not properly handled
by its users.

Get and use the slow clock as it is necessary for the timer counters.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/clocksource/tcb_clksrc.c | 10 +++++++++-
 drivers/misc/atmel_tclib.c       |  4 ++++
 drivers/pwm/pwm-atmel-tcb.c      | 26 +++++++++++++++++++-------
 include/linux/atmel_tc.h         |  1 +
 4 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index f8d11fcf80f1..6ee91401918e 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -193,10 +193,17 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	struct clk *t2_clk = tc->clk[2];
 	int irq = tc->irq[2];
 
+	ret = clk_prepare_enable(tc->slow_clk);
+	if (ret)
+		return ret;
+
 	/* try to enable t2 clk to avoid future errors in mode change */
 	ret = clk_prepare_enable(t2_clk);
-	if (ret)
+	if (ret) {
+		clk_disable_unprepare(tc->slow_clk);
 		return ret;
+	}
+
 	clk_disable(t2_clk);
 
 	clkevt.regs = tc->regs;
@@ -209,6 +216,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	ret = request_irq(irq, ch2_irq, IRQF_TIMER, "tc_clkevt", &clkevt);
 	if (ret) {
 		clk_unprepare(t2_clk);
+		clk_disable_unprepare(tc->slow_clk);
 		return ret;
 	}
 
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index 0ca05c3ec8d6..ac24a4bd63f7 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -125,6 +125,10 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
+	if (IS_ERR(tc->slow_clk))
+		return PTR_ERR(tc->slow_clk);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tc->regs = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(tc->regs))
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index 6da01b3bf6f4..75db585a2a94 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -305,7 +305,7 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 */
 	if (i == 5) {
 		i = slowclk;
-		rate = 32768;
+		rate = clk_get_rate(tc->slow_clk);
 		min = div_u64(NSEC_PER_SEC, rate);
 		max = min << tc->tcb_config->counter_width;
 
@@ -387,9 +387,9 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 
 	tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
 	if (tcbpwm == NULL) {
-		atmel_tc_free(tc);
+		err = -ENOMEM;
 		dev_err(&pdev->dev, "failed to allocate memory\n");
-		return -ENOMEM;
+		goto err_free_tc;
 	}
 
 	tcbpwm->chip.dev = &pdev->dev;
@@ -400,17 +400,27 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 	tcbpwm->chip.npwm = NPWM;
 	tcbpwm->tc = tc;
 
+	err = clk_prepare_enable(tc->slow_clk);
+	if (err)
+		goto err_free_tc;
+
 	spin_lock_init(&tcbpwm->lock);
 
 	err = pwmchip_add(&tcbpwm->chip);
-	if (err < 0) {
-		atmel_tc_free(tc);
-		return err;
-	}
+	if (err < 0)
+		goto err_disable_clk;
 
 	platform_set_drvdata(pdev, tcbpwm);
 
 	return 0;
+
+err_disable_clk:
+	clk_disable_unprepare(tcbpwm->tc->slow_clk);
+
+err_free_tc:
+	atmel_tc_free(tc);
+
+	return err;
 }
 
 static int atmel_tcb_pwm_remove(struct platform_device *pdev)
@@ -418,6 +428,8 @@ static int atmel_tcb_pwm_remove(struct platform_device *pdev)
 	struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
 	int err;
 
+	clk_disable_unprepare(tcbpwm->tc->slow_clk);
+
 	err = pwmchip_remove(&tcbpwm->chip);
 	if (err < 0)
 		return err;
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
index b87c1c7c242a..468fdfa643f0 100644
--- a/include/linux/atmel_tc.h
+++ b/include/linux/atmel_tc.h
@@ -67,6 +67,7 @@ struct atmel_tc {
 	const struct atmel_tcb_config *tcb_config;
 	int			irq[3];
 	struct clk		*clk[3];
+	struct clk		*slow_clk;
 	struct list_head	node;
 	bool			allocated;
 };
-- 
cgit v1.2.3


From 21c4c073f14509d685ed219aa3c76362a7bfa0ac Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 6 Oct 2015 06:25:43 -0700
Subject: Revert "regmap: Allow installing custom reg_update_bits function"

This reverts commit 7741c373cf3ea1f5383fa97fb7a640a429d3dd7c.
---
 drivers/base/regmap/internal.h |  3 ---
 drivers/base/regmap/regmap.c   | 25 -------------------------
 include/linux/regmap.h         |  4 ----
 3 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 4036d7a90f63..cc557886ab23 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -98,9 +98,6 @@ struct regmap {
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
-	int (*reg_update_bits)(void *context, unsigned int reg,
-			       unsigned int mask, unsigned int val,
-			       bool *change, bool force_write);
 
 	bool defer_caching;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 70387c9f281b..afaf56200674 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -619,7 +619,6 @@ struct regmap *__regmap_init(struct device *dev,
 		goto skip_format_initialization;
 	} else {
 		map->reg_read  = _regmap_bus_read;
-		map->reg_update_bits = bus->reg_update_bits;
 	}
 
 	reg_endian = regmap_get_reg_endian(bus, config);
@@ -2510,30 +2509,6 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 	int ret;
 	unsigned int tmp, orig;
 
-	if (map->reg_update_bits) {
-		ret = map->reg_update_bits(map->bus_context, reg, mask, val,
-					   change, force_write);
-		if (ret != 0)
-			return ret;
-
-		/* Fix up the cache by read/modify/write */
-		if (!map->cache_bypass && !map->defer_caching) {
-			ret = regcache_read(map, reg, &orig);
-			if (ret != 0)
-				return ret;
-
-			tmp = orig & ~mask;
-			tmp |= val & mask;
-
-			ret = regcache_write(map, reg, tmp);
-			if (ret != 0)
-				return ret;
-			if (map->cache_only)
-				map->cache_dirty = true;
-		}
-		return ret;
-	}
-
 	ret = _regmap_read(map, reg, &orig);
 	if (ret != 0)
 		return ret;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4d3a3b1680bb..8fc0bfd8edc4 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -296,9 +296,6 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg,
 				  unsigned int *val);
 typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg,
 				   unsigned int val);
-typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg,
-					 unsigned int mask, unsigned int val,
-					 bool *change, bool force_write);
 typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
@@ -338,7 +335,6 @@ struct regmap_bus {
 	regmap_hw_gather_write gather_write;
 	regmap_hw_async_write async_write;
 	regmap_hw_reg_write reg_write;
-	regmap_hw_reg_update_bits reg_update_bits;
 	regmap_hw_read read;
 	regmap_hw_reg_read reg_read;
 	regmap_hw_free_context free_context;
-- 
cgit v1.2.3


From 87dcbc0610cb580c8eaf289f52aca3620af825f0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Sep 2015 17:45:40 +0200
Subject: sched/core: Simplify INIT_PREEMPT_COUNT

As per the following commit:

  d86ee4809d03 ("sched: optimize cond_resched()")

we need PREEMPT_ACTIVE to avoid cond_resched() from working before
the scheduler is set up.

However, keeping preemption disabled should do the same thing
already, making the PREEMPT_ACTIVE part entirely redundant.

The only complication is !PREEMPT_COUNT kernels, where
PREEMPT_DISABLED ends up being 0. Instead we use an unconditional
PREEMPT_OFFSET to set preempt_count() even on !PREEMPT_COUNT
kernels.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d086cf0ca2c7..e5b8cbc4b8d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -606,19 +606,18 @@ struct task_cputime_atomic {
 #endif
 
 /*
- * Disable preemption until the scheduler is running.
- * Reset by start_kernel()->sched_init()->init_idle().
+ * Disable preemption until the scheduler is running -- use an unconditional
+ * value so that it also works on !PREEMPT_COUNT kernels.
  *
- * We include PREEMPT_ACTIVE to avoid cond_resched() from working
- * before the scheduler is active -- see should_resched().
+ * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
  */
-#define INIT_PREEMPT_COUNT	(PREEMPT_DISABLED + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.
  * @running:		non-zero when there are timers running and
- * 			@cputime receives updates.
+ *			@cputime receives updates.
  *
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU timer calculations.
-- 
cgit v1.2.3


From 609ca066386b2e64d4c0b0f55da327654962a0c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Sep 2015 17:52:18 +0200
Subject: sched/core: Create preempt_count invariant

Assuming units of PREEMPT_DISABLE_OFFSET for preempt_count() numbers.

Now that TASK_DEAD no longer results in preempt_count() == 3 during
scheduling, we will always call context_switch() with preempt_count()
== 2.

However, we don't always end up with preempt_count() == 2 in
finish_task_switch() because new tasks get created with
preempt_count() == 1.

Create FORK_PREEMPT_COUNT and set it to 2 and use that in the right
places. Note that we cannot use INIT_PREEMPT_COUNT as that serves
another purpose (boot).

After this, preempt_count() is invariant across the context switch,
with exception of PREEMPT_ACTIVE.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/preempt.h |  2 +-
 include/asm-generic/preempt.h  |  2 +-
 include/linux/sched.h          | 17 ++++++++++++-----
 kernel/sched/core.c            | 23 +++++++++++++++++++++--
 4 files changed, 35 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index b12f81022a6b..01e700d392cb 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -31,7 +31,7 @@ static __always_inline void preempt_count_set(int pc)
  * must be macros to avoid header recursion hell
  */
 #define init_task_preempt_count(p) do { \
-	task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
+	task_thread_info(p)->saved_preempt_count = FORK_PREEMPT_COUNT; \
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 0bec580a4885..5d8ffa3e6f8c 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -24,7 +24,7 @@ static __always_inline void preempt_count_set(int pc)
  * must be macros to avoid header recursion hell
  */
 #define init_task_preempt_count(p) do { \
-	task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
+	task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e5b8cbc4b8d6..23ca455d9582 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -599,11 +599,7 @@ struct task_cputime_atomic {
 		.sum_exec_runtime = ATOMIC64_INIT(0),		\
 	}
 
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_DISABLED	(1 + PREEMPT_ENABLED)
-#else
-#define PREEMPT_DISABLED	PREEMPT_ENABLED
-#endif
+#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
 
 /*
  * Disable preemption until the scheduler is running -- use an unconditional
@@ -613,6 +609,17 @@ struct task_cputime_atomic {
  */
 #define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
 
+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 530fe8baa645..8d8722b84dee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2504,6 +2504,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	struct mm_struct *mm = rq->prev_mm;
 	long prev_state;
 
+	/*
+	 * The previous task will have left us with a preempt_count of 2
+	 * because it left us after:
+	 *
+	 *	schedule()
+	 *	  preempt_disable();			// 1
+	 *	  __schedule()
+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
+	 *
+	 * Also, see FORK_PREEMPT_COUNT.
+	 */
+
 	rq->prev_mm = NULL;
 
 	/*
@@ -2588,8 +2600,15 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 {
 	struct rq *rq;
 
-	/* finish_task_switch() drops rq->lock and enables preemtion */
-	preempt_disable();
+	/*
+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
+	 * finish_task_switch() for details.
+	 *
+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
+	 * and the preempt_enable() will end up enabling preemption (on
+	 * PREEMPT_COUNT kernels).
+	 */
+
 	rq = finish_task_switch(prev);
 	balance_callback(rq);
 	preempt_enable();
-- 
cgit v1.2.3


From 3d8f74dd4ca1da8a1a464bbafcf679e40c2fc10f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Sep 2015 18:09:19 +0200
Subject: sched/core: Stop setting PREEMPT_ACTIVE

Now that nothing tests for PREEMPT_ACTIVE anymore, stop setting it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 12 ------------
 kernel/sched/core.c     | 19 ++++++-------------
 2 files changed, 6 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index bea8dd8ff5e0..448dfd0b2ea6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -146,18 +146,6 @@ extern void preempt_count_sub(int val);
 #define preempt_count_inc() preempt_count_add(1)
 #define preempt_count_dec() preempt_count_sub(1)
 
-#define preempt_active_enter() \
-do { \
-	preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
-	barrier(); \
-} while (0)
-
-#define preempt_active_exit() \
-do { \
-	barrier(); \
-	preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
-} while (0)
-
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cfad7f5f74f8..6344d82a84f6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3201,9 +3201,9 @@ void __sched schedule_preempt_disabled(void)
 static void __sched notrace preempt_schedule_common(void)
 {
 	do {
-		preempt_active_enter();
+		preempt_disable();
 		__schedule(true);
-		preempt_active_exit();
+		sched_preempt_enable_no_resched();
 
 		/*
 		 * Check again in case we missed a preemption opportunity
@@ -3254,13 +3254,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 		return;
 
 	do {
-		/*
-		 * Use raw __prempt_count() ops that don't call function.
-		 * We can't call functions before disabling preemption which
-		 * disarm preemption tracing recursions.
-		 */
-		__preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
-		barrier();
+		preempt_disable_notrace();
 		/*
 		 * Needs preempt disabled in case user_exit() is traced
 		 * and the tracer calls preempt_enable_notrace() causing
@@ -3270,8 +3264,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 		__schedule(true);
 		exception_exit(prev_ctx);
 
-		barrier();
-		__preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
+		preempt_enable_no_resched_notrace();
 	} while (need_resched());
 }
 EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
@@ -3294,11 +3287,11 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 	prev_state = exception_enter();
 
 	do {
-		preempt_active_enter();
+		preempt_disable();
 		local_irq_enable();
 		__schedule(true);
 		local_irq_disable();
-		preempt_active_exit();
+		sched_preempt_enable_no_resched();
 	} while (need_resched());
 
 	exception_exit(prev_state);
-- 
cgit v1.2.3


From da7142e2ed735e1c1bef5a757dc55de35c65fbd6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Sep 2015 18:11:45 +0200
Subject: sched/core: Simplify preempt_count tests

Since we stopped setting PREEMPT_ACTIVE, there is no need to mask it
out of preempt_count() tests.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 3 +--
 kernel/sched/core.c     | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 448dfd0b2ea6..b2676a16cfbe 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -126,8 +126,7 @@
  * Check whether we were atomic before we did preempt_disable():
  * (used by the scheduler)
  */
-#define in_atomic_preempt_off() \
-		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET)
+#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
 extern void preempt_count_add(int val);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d6989f85c641..ca260cc5d881 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7486,7 +7486,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
+	int nested = preempt_count() + rcu_preempt_depth();
 
 	return (nested == preempt_offset);
 }
-- 
cgit v1.2.3


From e61bf1e43b6f8e687ce93e5d0ce85bca7e481600 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Sep 2015 18:11:35 +0200
Subject: sched/core: Kill PREEMPT_ACTIVE

Its unused, kill the definition.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index b2676a16cfbe..75e4e30677f1 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -26,7 +26,6 @@
  *         SOFTIRQ_MASK:	0x0000ff00
  *         HARDIRQ_MASK:	0x000f0000
  *             NMI_MASK:	0x00100000
- *       PREEMPT_ACTIVE:	0x00200000
  * PREEMPT_NEED_RESCHED:	0x80000000
  */
 #define PREEMPT_BITS	8
@@ -53,10 +52,6 @@
 
 #define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
 
-#define PREEMPT_ACTIVE_BITS	1
-#define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
-- 
cgit v1.2.3


From 77792b11409c9270d98e604b4314b85ce886ac7d Mon Sep 17 00:00:00 2001
From: Jon Ringle <jringle@gridpoint.com>
Date: Thu, 1 Oct 2015 12:38:07 -0400
Subject: regmap: Allow installing custom reg_update_bits function

This commit allows installing a custom reg_update_bits function for cases where
the hardware provides a mechanism to set or clear register bits without a
read/modify/write cycle. Such is the case with the Microchip ENCX24J600.

If a custom reg_update_bits function is provided, it will only be used against
volatile registers.

Signed-off-by: Jon Ringle <jringle@gridpoint.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h |  2 ++
 drivers/base/regmap/regmap.c   | 29 ++++++++++++++++++-----------
 include/linux/regmap.h         |  3 +++
 3 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index cc557886ab23..628ad7ac078b 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -98,6 +98,8 @@ struct regmap {
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+	int (*reg_update_bits)(void *context, unsigned int reg,
+			       unsigned int mask, unsigned int val);
 
 	bool defer_caching;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index afaf56200674..8cd155af3d63 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -619,6 +619,7 @@ struct regmap *__regmap_init(struct device *dev,
 		goto skip_format_initialization;
 	} else {
 		map->reg_read  = _regmap_bus_read;
+		map->reg_update_bits = bus->reg_update_bits;
 	}
 
 	reg_endian = regmap_get_reg_endian(bus, config);
@@ -2509,20 +2510,26 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 	int ret;
 	unsigned int tmp, orig;
 
-	ret = _regmap_read(map, reg, &orig);
-	if (ret != 0)
-		return ret;
+	if (change)
+		*change = false;
 
-	tmp = orig & ~mask;
-	tmp |= val & mask;
-
-	if (force_write || (tmp != orig)) {
-		ret = _regmap_write(map, reg, tmp);
-		if (change)
+	if (regmap_volatile(map, reg) && map->reg_update_bits) {
+		ret = map->reg_update_bits(map->bus_context, reg, mask, val);
+		if (ret == 0 && change)
 			*change = true;
 	} else {
-		if (change)
-			*change = false;
+		ret = _regmap_read(map, reg, &orig);
+		if (ret != 0)
+			return ret;
+
+		tmp = orig & ~mask;
+		tmp |= val & mask;
+
+		if (force_write || (tmp != orig)) {
+			ret = _regmap_write(map, reg, tmp);
+			if (ret == 0 && change)
+				*change = true;
+		}
 	}
 
 	return ret;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 8fc0bfd8edc4..b49d4133750e 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -296,6 +296,8 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg,
 				  unsigned int *val);
 typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg,
 				   unsigned int val);
+typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg,
+					 unsigned int mask, unsigned int val);
 typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
@@ -335,6 +337,7 @@ struct regmap_bus {
 	regmap_hw_gather_write gather_write;
 	regmap_hw_async_write async_write;
 	regmap_hw_reg_write reg_write;
+	regmap_hw_reg_update_bits reg_update_bits;
 	regmap_hw_read read;
 	regmap_hw_reg_read reg_read;
 	regmap_hw_free_context free_context;
-- 
cgit v1.2.3


From 63ab7bd0d450b726b88fa4b932f151b98cee2557 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 30 Sep 2015 13:03:11 -0700
Subject: locking/asm-generic: Add _{relaxed|acquire|release}() variants for
 inc/dec atomics

Similar to what we have for regular add/sub calls. For now, no actual arch
implements them, so everyone falls back to the default atomics... iow,
nothing changes. These will be used in future primitives.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul E.McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1443643395-17016-2-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-long.h | 29 +++++++-----
 include/linux/atomic.h            | 97 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 8942cdc676a7..f91093c2984c 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -159,19 +159,24 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_inc_return)(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_dec_return)(v);
+#define ATOMIC_LONG_INC_DEC_OP(op, mo)					\
+static inline long							\
+atomic_long_##op##_return##mo(atomic_long_t *l)				\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v);		\
 }
+ATOMIC_LONG_INC_DEC_OP(inc,)
+ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
+ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
+ATOMIC_LONG_INC_DEC_OP(inc, _release)
+ATOMIC_LONG_INC_DEC_OP(dec,)
+ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
+ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
+ATOMIC_LONG_INC_DEC_OP(dec, _release)
+
+#undef ATOMIC_LONG_INC_DEC_OP
 
 static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 {
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index e326469bb9d6..27e580d232ca 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -90,6 +90,30 @@ static inline int atomic_read_ctrl(const atomic_t *v)
 #endif
 #endif /* atomic_add_return_relaxed */
 
+/* atomic_inc_return_relaxed */
+#ifndef atomic_inc_return_relaxed
+#define  atomic_inc_return_relaxed	atomic_inc_return
+#define  atomic_inc_return_acquire	atomic_inc_return
+#define  atomic_inc_return_release	atomic_inc_return
+
+#else /* atomic_inc_return_relaxed */
+
+#ifndef atomic_inc_return_acquire
+#define  atomic_inc_return_acquire(...)					\
+	__atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_inc_return_release
+#define  atomic_inc_return_release(...)					\
+	__atomic_op_release(atomic_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_inc_return
+#define  atomic_inc_return(...)						\
+	__atomic_op_fence(atomic_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic_inc_return_relaxed */
+
 /* atomic_sub_return_relaxed */
 #ifndef atomic_sub_return_relaxed
 #define  atomic_sub_return_relaxed	atomic_sub_return
@@ -114,6 +138,30 @@ static inline int atomic_read_ctrl(const atomic_t *v)
 #endif
 #endif /* atomic_sub_return_relaxed */
 
+/* atomic_dec_return_relaxed */
+#ifndef atomic_dec_return_relaxed
+#define  atomic_dec_return_relaxed	atomic_dec_return
+#define  atomic_dec_return_acquire	atomic_dec_return
+#define  atomic_dec_return_release	atomic_dec_return
+
+#else /* atomic_dec_return_relaxed */
+
+#ifndef atomic_dec_return_acquire
+#define  atomic_dec_return_acquire(...)					\
+	__atomic_op_acquire(atomic_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_dec_return_release
+#define  atomic_dec_return_release(...)					\
+	__atomic_op_release(atomic_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_dec_return
+#define  atomic_dec_return(...)						\
+	__atomic_op_fence(atomic_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic_dec_return_relaxed */
+
 /* atomic_xchg_relaxed */
 #ifndef atomic_xchg_relaxed
 #define  atomic_xchg_relaxed		atomic_xchg
@@ -194,6 +242,31 @@ static inline int atomic_read_ctrl(const atomic_t *v)
 #endif
 #endif /* atomic64_add_return_relaxed */
 
+/* atomic64_inc_return_relaxed */
+#ifndef atomic64_inc_return_relaxed
+#define  atomic64_inc_return_relaxed	atomic64_inc_return
+#define  atomic64_inc_return_acquire	atomic64_inc_return
+#define  atomic64_inc_return_release	atomic64_inc_return
+
+#else /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_inc_return_acquire
+#define  atomic64_inc_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return_release
+#define  atomic64_inc_return_release(...)				\
+	__atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return
+#define  atomic64_inc_return(...)					\
+	__atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_inc_return_relaxed */
+
+
 /* atomic64_sub_return_relaxed */
 #ifndef atomic64_sub_return_relaxed
 #define  atomic64_sub_return_relaxed	atomic64_sub_return
@@ -218,6 +291,30 @@ static inline int atomic_read_ctrl(const atomic_t *v)
 #endif
 #endif /* atomic64_sub_return_relaxed */
 
+/* atomic64_dec_return_relaxed */
+#ifndef atomic64_dec_return_relaxed
+#define  atomic64_dec_return_relaxed	atomic64_dec_return
+#define  atomic64_dec_return_acquire	atomic64_dec_return
+#define  atomic64_dec_return_release	atomic64_dec_return
+
+#else /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_dec_return_acquire
+#define  atomic64_dec_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return_release
+#define  atomic64_dec_return_release(...)				\
+	__atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return
+#define  atomic64_dec_return(...)					\
+	__atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_dec_return_relaxed */
+
 /* atomic64_xchg_relaxed */
 #ifndef atomic64_xchg_relaxed
 #define  atomic64_xchg_relaxed		atomic64_xchg
-- 
cgit v1.2.3


From b6a4ae766e3133a4db73fabc81e522d1601cb623 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Wed, 29 Jul 2015 13:29:38 +0800
Subject: rcu: Use rcu_callback_t in call_rcu*() and friends

As we now have rcu_callback_t typedefs as the type of rcu callbacks, we
should use it in call_rcu*() and friends as the type of parameters. This
could save us a few lines of code and make it clear which function
requires an rcu callbacks rather than other callbacks as its argument.

Besides, this can also help cscope to generate a better database for
code reading.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 10 +++++-----
 include/linux/rcutiny.h  |  2 +-
 include/linux/rcutree.h  |  2 +-
 kernel/rcu/rcutorture.c  |  4 ++--
 kernel/rcu/srcu.c        |  2 +-
 kernel/rcu/tiny.c        |  8 ++++----
 kernel/rcu/tree.c        |  8 ++++----
 kernel/rcu/tree.h        |  2 +-
 kernel/rcu/tree_plugin.h |  2 +-
 kernel/rcu/update.c      |  2 +-
 10 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 581abf848566..d63bb77dab35 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -160,7 +160,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
  * more than one CPU).
  */
 void call_rcu(struct rcu_head *head,
-	      void (*func)(struct rcu_head *head));
+	      rcu_callback_t func);
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
@@ -191,7 +191,7 @@ void call_rcu(struct rcu_head *head,
  * memory ordering guarantees.
  */
 void call_rcu_bh(struct rcu_head *head,
-		 void (*func)(struct rcu_head *head));
+		 rcu_callback_t func);
 
 /**
  * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
@@ -213,7 +213,7 @@ void call_rcu_bh(struct rcu_head *head,
  * memory ordering guarantees.
  */
 void call_rcu_sched(struct rcu_head *head,
-		    void (*func)(struct rcu_head *rcu));
+		    rcu_callback_t func);
 
 void synchronize_sched(void);
 
@@ -274,7 +274,7 @@ do {									\
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
  */
-void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void rcu_barrier_tasks(void);
 
@@ -1065,7 +1065,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define __kfree_rcu(head, offset) \
 	do { \
 		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
-		kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+		kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
 	} while (0)
 
 /**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ff968b7af3a4..c8a0722f77ea 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -83,7 +83,7 @@ static inline void synchronize_sched_expedited(void)
 }
 
 static inline void kfree_call_rcu(struct rcu_head *head,
-				  void (*func)(struct rcu_head *rcu))
+				  rcu_callback_t func)
 {
 	call_rcu(head, func);
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5abec82f325e..60d15a080d7c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -48,7 +48,7 @@ void synchronize_rcu_bh(void);
 void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);
 
-void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 /**
  * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 77192953dee5..51c8e7f02f48 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -448,7 +448,7 @@ static void synchronize_rcu_busted(void)
 }
 
 static void
-call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+call_rcu_busted(struct rcu_head *head, rcu_callback_t func)
 {
 	/* This is a deliberate bug for testing purposes only! */
 	func(head);
@@ -523,7 +523,7 @@ static void srcu_torture_synchronize(void)
 }
 
 static void srcu_torture_call(struct rcu_head *head,
-			      void (*func)(struct rcu_head *head))
+			      rcu_callback_t func)
 {
 	call_srcu(srcu_ctlp, head, func);
 }
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index d3fcb2ec8536..9e6122540d28 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -387,7 +387,7 @@ static void srcu_flip(struct srcu_struct *sp)
  * srcu_struct structure.
  */
 void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
-		void (*func)(struct rcu_head *head))
+	       rcu_callback_t func)
 {
 	unsigned long flags;
 
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d0471056d0af..944b1b491ed8 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -44,7 +44,7 @@ struct rcu_ctrlblk;
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
 static void rcu_process_callbacks(struct softirq_action *unused);
 static void __call_rcu(struct rcu_head *head,
-		       void (*func)(struct rcu_head *rcu),
+		       rcu_callback_t func,
 		       struct rcu_ctrlblk *rcp);
 
 #include "tiny_plugin.h"
@@ -203,7 +203,7 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
  * Helper function for call_rcu() and call_rcu_bh().
  */
 static void __call_rcu(struct rcu_head *head,
-		       void (*func)(struct rcu_head *rcu),
+		       rcu_callback_t func,
 		       struct rcu_ctrlblk *rcp)
 {
 	unsigned long flags;
@@ -229,7 +229,7 @@ static void __call_rcu(struct rcu_head *head,
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
 	__call_rcu(head, func, &rcu_sched_ctrlblk);
 }
@@ -239,7 +239,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
  * Post an RCU bottom-half callback to be invoked after any subsequent
  * quiescent state.
  */
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
 	__call_rcu(head, func, &rcu_bh_ctrlblk);
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 775d36cc0050..b9d9e0249e2f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3017,7 +3017,7 @@ static void rcu_leak_callback(struct rcu_head *rhp)
  * is expected to specify a CPU.
  */
 static void
-__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+__call_rcu(struct rcu_head *head, rcu_callback_t func,
 	   struct rcu_state *rsp, int cpu, bool lazy)
 {
 	unsigned long flags;
@@ -3088,7 +3088,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 /*
  * Queue an RCU-sched callback for invocation after a grace period.
  */
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
 	__call_rcu(head, func, &rcu_sched_state, -1, 0);
 }
@@ -3097,7 +3097,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
 /*
  * Queue an RCU callback for invocation after a quicker grace period.
  */
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
 	__call_rcu(head, func, &rcu_bh_state, -1, 0);
 }
@@ -3111,7 +3111,7 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
  * function may only be called from __kfree_rcu().
  */
 void kfree_call_rcu(struct rcu_head *head,
-		    void (*func)(struct rcu_head *rcu))
+		    rcu_callback_t func)
 {
 	__call_rcu(head, func, rcu_state_p, -1, 1);
 }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2e991f8361e4..ad11529375cc 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -584,7 +584,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp);
 static int rcu_print_task_stall(struct rcu_node *rnp);
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 static void rcu_preempt_check_callbacks(void);
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
 static void __init __rcu_init_preempt(void);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index b2bf3963a0ae..06116ae6dfd7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -500,7 +500,7 @@ static void rcu_preempt_do_callbacks(void)
 /*
  * Queue a preemptible-RCU callback for invocation after a grace period.
  */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	__call_rcu(head, func, rcu_state_p, -1, 0);
 }
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 7a0b3bc7c5ed..5f748c5a40f0 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -534,7 +534,7 @@ static void rcu_spawn_tasks_kthread(void);
  * Post an RCU-tasks callback.  First call must be from process context
  * after the scheduler if fully operational.
  */
-void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
+void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 {
 	unsigned long flags;
 	bool needwake;
-- 
cgit v1.2.3


From bb73c52bad3666997ed2ec83c0c80c3f8ef55008 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Thu, 30 Jul 2015 16:55:38 -0700
Subject: rcu: Don't disable preemption for Tiny and Tree RCU readers

Because preempt_disable() maps to barrier() for non-debug builds,
it forces the compiler to spill and reload registers.  Because Tree
RCU and Tiny RCU now only appear in CONFIG_PREEMPT=n builds, these
barrier() instances generate needless extra code for each instance of
rcu_read_lock() and rcu_read_unlock().  This extra code slows down Tree
RCU and bloats Tiny RCU.

This commit therefore removes the preempt_disable() and preempt_enable()
from the non-preemptible implementations of __rcu_read_lock() and
__rcu_read_unlock(), respectively.  However, for debug purposes,
preempt_disable() and preempt_enable() are still invoked if
CONFIG_PREEMPT_COUNT=y, because this allows detection of sleeping inside
atomic sections in non-preemptible kernels.

However, Tiny and Tree RCU operates by coalescing all RCU read-side
critical sections on a given CPU that lie between successive quiescent
states.  It is therefore necessary to compensate for removing barriers
from __rcu_read_lock() and __rcu_read_unlock() by adding them to a
couple of the RCU functions invoked during quiescent states, namely to
rcu_all_qs() and rcu_note_context_switch().  However, note that the latter
is more paranoia than necessity, at least until link-time optimizations
become more aggressive.

This is based on an earlier patch by Paul E. McKenney, fixing
a bug encountered in kernels built with CONFIG_PREEMPT=n and
CONFIG_PREEMPT_COUNT=y.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 6 ++++--
 include/linux/rcutiny.h  | 1 +
 kernel/rcu/tree.c        | 9 +++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d63bb77dab35..6c3ceceb6148 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -297,12 +297,14 @@ void synchronize_rcu(void);
 
 static inline void __rcu_read_lock(void)
 {
-	preempt_disable();
+	if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+		preempt_disable();
 }
 
 static inline void __rcu_read_unlock(void)
 {
-	preempt_enable();
+	if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+		preempt_enable();
 }
 
 static inline void synchronize_rcu(void)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c8a0722f77ea..4c1aaf9cce7b 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -216,6 +216,7 @@ static inline bool rcu_is_watching(void)
 
 static inline void rcu_all_qs(void)
 {
+	barrier(); /* Avoid RCU read-side critical sections leaking across. */
 }
 
 #endif /* __LINUX_RCUTINY_H */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b9d9e0249e2f..93c0f23c3e45 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -337,12 +337,14 @@ static void rcu_momentary_dyntick_idle(void)
  */
 void rcu_note_context_switch(void)
 {
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
 	rcu_preempt_note_context_switch();
 	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
 		rcu_momentary_dyntick_idle();
 	trace_rcu_utilization(TPS("End context switch"));
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
@@ -353,12 +355,19 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
  * RCU flavors in desperate need of a quiescent state, which will normally
  * be none of them).  Either way, do a lightweight quiescent state for
  * all RCU flavors.
+ *
+ * The barrier() calls are redundant in the common case when this is
+ * called externally, but just in case this is called from within this
+ * file.
+ *
  */
 void rcu_all_qs(void)
 {
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
 		rcu_momentary_dyntick_idle();
 	this_cpu_inc(rcu_qs_ctr);
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
 
-- 
cgit v1.2.3


From 49f5903b473c5f63f3b57856d1bd4593db0a2eef Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Sep 2015 00:42:57 -0700
Subject: rcu: Move preemption disabling out of __srcu_read_lock()

Currently, __srcu_read_lock() cannot be invoked from restricted
environments because it contains calls to preempt_disable() and
preempt_enable(), both of which can invoke lockdep, which is a bad
idea in some restricted execution modes.  This commit therefore moves
the preempt_disable() and preempt_enable() from __srcu_read_lock()
to srcu_read_lock().  It also inserts the preempt_disable() and
preempt_enable() around the call to __srcu_read_lock() in do_exit().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/srcu.h | 5 ++++-
 kernel/exit.c        | 2 ++
 kernel/rcu/srcu.c    | 2 --
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index bdeb4567b71e..f5f80c5643ac 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -215,8 +215,11 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
  */
 static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
-	int retval = __srcu_read_lock(sp);
+	int retval;
 
+	preempt_disable();
+	retval = __srcu_read_lock(sp);
+	preempt_enable();
 	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }
diff --git a/kernel/exit.c b/kernel/exit.c
index ea95ee1b5ef7..0e93b63bbc59 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -761,7 +761,9 @@ void do_exit(long code)
 	 */
 	flush_ptrace_hw_breakpoint(tsk);
 
+	TASKS_RCU(preempt_disable());
 	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
+	TASKS_RCU(preempt_enable());
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
 #ifdef CONFIG_NUMA
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 9e6122540d28..a63a1ea5a41b 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -298,11 +298,9 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->completed) & 0x1;
-	preempt_disable();
 	__this_cpu_inc(sp->per_cpu_ref->c[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
-	preempt_enable();
 	return idx;
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock);
-- 
cgit v1.2.3


From c3ac7cf1847a4e68c909984f60d36adef2088e35 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 10 Sep 2015 16:29:02 -0700
Subject: rcu: Add rcu_pointer_handoff()

This commit adds an rcu_pointer_handoff() that is intended to mark
situations where a structure's protection transitions from RCU to some
other mechanism (locking, reference counting, whatever).  These markings
should allow external tools to more easily spot bugs involving leaking
pointers out of RCU read-side critical sections.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6c3ceceb6148..587eb057e2fa 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -812,6 +812,28 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
 
+/**
+ * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism
+ * @p: The pointer to hand off
+ *
+ * This is simply an identity function, but it documents where a pointer
+ * is handed off from RCU to some other synchronization mechanism, for
+ * example, reference counting or locking.  In C11, it would map to
+ * kill_dependency().  It could be used as follows:
+ *
+ *	rcu_read_lock();
+ *	p = rcu_dereference(gp);
+ *	long_lived = is_long_lived(p);
+ *	if (long_lived) {
+ *		if (!atomic_inc_not_zero(p->refcnt))
+ *			long_lived = false;
+ *		else
+ *			p = rcu_pointer_handoff(p);
+ *	}
+ *	rcu_read_unlock();
+ */
+#define rcu_pointer_handoff(p) (p)
+
 /**
  * rcu_read_lock() - mark the beginning of an RCU read-side critical section
  *
-- 
cgit v1.2.3


From 8db70b132dd57696cfc7560203a72e90c51bfdda Mon Sep 17 00:00:00 2001
From: Patrick Marlier <patrick.marlier@gmail.com>
Date: Fri, 11 Sep 2015 15:50:35 -0700
Subject: rculist: Make list_entry_rcu() use lockless_dereference()

The current list_entry_rcu() implementation copies the pointer to a stack
variable, then invokes rcu_dereference_raw() on it.  This results in an
additional store-load pair.  Now, most compilers will emit normal store
and load instructions, which might seem to be of negligible overhead,
but this results in a load-hit-store situation that can cause surprisingly
long pipeline stalls, even on modern microprocessors.  The problem is
that it takes time for the store to get the store buffer updated, which
can delay the subsequent load, which immediately follows.

This commit therefore switches to the lockless_dereference() primitive,
which does not expect the __rcu annotations (that are anyway not present
in the list_head structure) and which, like rcu_dereference_raw(),
does not check for an enclosing RCU read-side critical section.
Most importantly, it does not copy the pointer, thus avoiding the
load-hit-store overhead.

Signed-off-by: Patrick Marlier <patrick.marlier@gmail.com>
[ paulmck: Switched to lockless_dereference() to suppress sparse warnings. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rculist.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 17c6b1f84a77..5ed540986019 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -247,10 +247,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
  */
 #define list_entry_rcu(ptr, type, member) \
-({ \
-	typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
-	container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
-})
+	container_of(lockless_dereference(ptr), type, member)
 
 /**
  * Where are list_empty_rcu() and list_first_entry_rcu()?
-- 
cgit v1.2.3


From e62e3f620ba8d437f4998441fc11cf3dc9d466d1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 14 Sep 2015 12:23:01 -0700
Subject: rcu: Remove deprecated rcu_lockdep_assert()

The old rcu_lockdep_assert() was retained to ease handling of incoming
patches, but any use will result in deprecated warnings.  However, its
replacement, RCU_LOCKDEP_WARN(), is now upstream.  It is therefore
time to remove rcu_lockdep_assert(), which this commit does.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 587eb057e2fa..a0189ba67fde 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -537,28 +537,8 @@ static inline int rcu_read_lock_sched_held(void)
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-/* Deprecate rcu_lockdep_assert():  Use RCU_LOCKDEP_WARN() instead. */
-static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void)
-{
-}
-
 #ifdef CONFIG_PROVE_RCU
 
-/**
- * rcu_lockdep_assert - emit lockdep splat if specified condition not met
- * @c: condition to check
- * @s: informative message
- */
-#define rcu_lockdep_assert(c, s)					\
-	do {								\
-		static bool __section(.data.unlikely) __warned;		\
-		deprecate_rcu_lockdep_assert();				\
-		if (debug_lockdep_rcu_enabled() && !__warned && !(c)) {	\
-			__warned = true;				\
-			lockdep_rcu_suspicious(__FILE__, __LINE__, s);	\
-		}							\
-	} while (0)
-
 /**
  * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
  * @c: condition to check
@@ -596,7 +576,6 @@ static inline void rcu_preempt_sleep_check(void)
 
 #else /* #ifdef CONFIG_PROVE_RCU */
 
-#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert()
 #define RCU_LOCKDEP_WARN(c, s) do { } while (0)
 #define rcu_sleep_check() do { } while (0)
 
-- 
cgit v1.2.3


From 7f5f873c6a0772970d5fee1f364231207051ecd8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 18 Sep 2015 08:45:22 -0700
Subject: rculist: Use WRITE_ONCE() when deleting from reader-visible list

The various RCU list-deletion macros (list_del_rcu(),
hlist_del_init_rcu(), hlist_del_rcu(), hlist_bl_del_init_rcu(),
hlist_bl_del_rcu(), hlist_nulls_del_init_rcu(), and hlist_nulls_del_rcu())
do plain stores into the ->next pointer of the preceding list elemment.
Unfortunately, the compiler is within its rights to (for example) use
byte-at-a-time writes to update the pointer, which would fatally confuse
concurrent readers.  This patch therefore adds the needed WRITE_ONCE()
macros.

KernelThreadSanitizer (KTSAN) reported the __hlist_del() issue, which
is a problem when __hlist_del() is invoked by hlist_del_rcu().

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/list.h       | 5 +++--
 include/linux/list_bl.h    | 5 +++--
 include/linux/list_nulls.h | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 3e3e64a61002..993395a2e55c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -87,7 +87,7 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
 static inline void __list_del(struct list_head * prev, struct list_head * next)
 {
 	next->prev = prev;
-	prev->next = next;
+	WRITE_ONCE(prev->next, next);
 }
 
 /**
@@ -615,7 +615,8 @@ static inline void __hlist_del(struct hlist_node *n)
 {
 	struct hlist_node *next = n->next;
 	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
+
+	WRITE_ONCE(*pprev, next);
 	if (next)
 		next->pprev = pprev;
 }
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 2eb88556c5c5..8132214e8efd 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -93,9 +93,10 @@ static inline void __hlist_bl_del(struct hlist_bl_node *n)
 	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
 
 	/* pprev may be `first`, so be careful not to lose the lock bit */
-	*pprev = (struct hlist_bl_node *)
+	WRITE_ONCE(*pprev,
+		   (struct hlist_bl_node *)
 			((unsigned long)next |
-			 ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+			 ((unsigned long)*pprev & LIST_BL_LOCKMASK)));
 	if (next)
 		next->pprev = pprev;
 }
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index f266661d2666..444d2b1313bd 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -76,7 +76,8 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
 {
 	struct hlist_nulls_node *next = n->next;
 	struct hlist_nulls_node **pprev = n->pprev;
-	*pprev = next;
+
+	WRITE_ONCE(*pprev, next);
 	if (!is_a_nulls(next))
 		next->pprev = pprev;
 }
-- 
cgit v1.2.3


From cc44ca848f5e517aeca9f5eabbe13609a3f71450 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:44 +0200
Subject: rcu: Create rcu_sync infrastructure

The rcu_sync infrastructure can be thought of as infrastructure to be
used to implement reader-writer primitives having extremely lightweight
readers during times when there are no writers.  The first use is in
the percpu_rwsem used by the VFS subsystem.

This infrastructure is functionally equivalent to

        struct rcu_sync_struct {
                atomic_t counter;
        };

	/* Check possibility of fast-path read-side operations. */
        static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
        {
                return atomic_read(&rss->counter) == 0;
        }

	/* Tell readers to use slowpaths. */
        static inline void rcu_sync_enter(struct rcu_sync_struct *rss)
        {
                atomic_inc(&rss->counter);
                synchronize_sched();
        }

	/* Allow readers to once again use fastpaths. */
        static inline void rcu_sync_exit(struct rcu_sync_struct *rss)
        {
                synchronize_sched();
                atomic_dec(&rss->counter);
        }

The main difference is that it records the state and only calls
synchronize_sched() if required.  At least some of the calls to
synchronize_sched() will be optimized away when rcu_sync_enter() and
rcu_sync_exit() are invoked repeatedly in quick succession.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h |  94 +++++++++++++++++++++++++
 kernel/rcu/Makefile      |   2 +-
 kernel/rcu/sync.c        | 175 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 270 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/rcu_sync.h
 create mode 100644 kernel/rcu/sync.c

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
new file mode 100644
index 000000000000..cb044df2e21c
--- /dev/null
+++ b/include/linux/rcu_sync.h
@@ -0,0 +1,94 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#ifndef _LINUX_RCU_SYNC_H_
+#define _LINUX_RCU_SYNC_H_
+
+#include <linux/wait.h>
+#include <linux/rcupdate.h>
+
+/* Structure to mediate between updaters and fastpath-using readers.  */
+struct rcu_sync {
+	int			gp_state;
+	int			gp_count;
+	wait_queue_head_t	gp_wait;
+
+	int			cb_state;
+	struct rcu_head		cb_head;
+
+	void (*sync)(void);
+	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+};
+
+#define ___RCU_SYNC_INIT(name)						\
+	.gp_state = 0,							\
+	.gp_count = 0,							\
+	.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),		\
+	.cb_state = 0
+
+#define __RCU_SCHED_SYNC_INIT(name) {					\
+	___RCU_SYNC_INIT(name),						\
+	.sync = synchronize_sched,					\
+	.call = call_rcu_sched,						\
+}
+
+#define __RCU_BH_SYNC_INIT(name) {					\
+	___RCU_SYNC_INIT(name),						\
+	.sync = synchronize_rcu_bh,					\
+	.call = call_rcu_bh,						\
+}
+
+#define __RCU_SYNC_INIT(name) {						\
+	___RCU_SYNC_INIT(name),						\
+	.sync = synchronize_rcu,					\
+	.call = call_rcu,						\
+}
+
+#define DEFINE_RCU_SCHED_SYNC(name)					\
+	struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name)
+
+#define DEFINE_RCU_BH_SYNC(name)					\
+	struct rcu_sync name = __RCU_BH_SYNC_INIT(name)
+
+#define DEFINE_RCU_SYNC(name)						\
+	struct rcu_sync name = __RCU_SYNC_INIT(name)
+
+/**
+ * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * Returns true if readers are permitted to use their fastpaths.
+ * Must be invoked within an RCU read-side critical section whose
+ * flavor matches that of the rcu_sync struture.
+ */
+static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
+{
+	return !rsp->gp_state; /* GP_IDLE */
+}
+
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
+extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter(struct rcu_sync *);
+extern void rcu_sync_exit(struct rcu_sync *);
+
+#endif /* _LINUX_RCU_SYNC_H_ */
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 50a808424b06..61a16569ffbf 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,4 +1,4 @@
-obj-y += update.o
+obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += tree.o
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
new file mode 100644
index 000000000000..0a11df43be23
--- /dev/null
+++ b/kernel/rcu/sync.c
@@ -0,0 +1,175 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#include <linux/rcu_sync.h>
+#include <linux/sched.h>
+
+enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
+enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
+
+#define	rss_lock	gp_wait.lock
+
+/**
+ * rcu_sync_init() - Initialize an rcu_sync structure
+ * @rsp: Pointer to rcu_sync structure to be initialized
+ * @type: Flavor of RCU with which to synchronize rcu_sync structure
+ */
+void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
+{
+	memset(rsp, 0, sizeof(*rsp));
+	init_waitqueue_head(&rsp->gp_wait);
+
+	switch (type) {
+	case RCU_SYNC:
+		rsp->sync = synchronize_rcu;
+		rsp->call = call_rcu;
+		break;
+
+	case RCU_SCHED_SYNC:
+		rsp->sync = synchronize_sched;
+		rsp->call = call_rcu_sched;
+		break;
+
+	case RCU_BH_SYNC:
+		rsp->sync = synchronize_rcu_bh;
+		rsp->call = call_rcu_bh;
+		break;
+	}
+}
+
+/**
+ * rcu_sync_enter() - Force readers onto slowpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who need readers to make use of
+ * a slowpath during the update.  After this function returns, all
+ * subsequent calls to rcu_sync_is_idle() will return false, which
+ * tells readers to stay off their fastpaths.  A later call to
+ * rcu_sync_exit() re-enables reader slowpaths.
+ *
+ * When called in isolation, rcu_sync_enter() must wait for a grace
+ * period, however, closely spaced calls to rcu_sync_enter() can
+ * optimize away the grace-period wait via a state machine implemented
+ * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
+ */
+void rcu_sync_enter(struct rcu_sync *rsp)
+{
+	bool need_wait, need_sync;
+
+	spin_lock_irq(&rsp->rss_lock);
+	need_wait = rsp->gp_count++;
+	need_sync = rsp->gp_state == GP_IDLE;
+	if (need_sync)
+		rsp->gp_state = GP_PENDING;
+	spin_unlock_irq(&rsp->rss_lock);
+
+	BUG_ON(need_wait && need_sync);
+
+	if (need_sync) {
+		rsp->sync();
+		rsp->gp_state = GP_PASSED;
+		wake_up_all(&rsp->gp_wait);
+	} else if (need_wait) {
+		wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
+	} else {
+		/*
+		 * Possible when there's a pending CB from a rcu_sync_exit().
+		 * Nobody has yet been allowed the 'fast' path and thus we can
+		 * avoid doing any sync(). The callback will get 'dropped'.
+		 */
+		BUG_ON(rsp->gp_state != GP_PASSED);
+	}
+}
+
+/**
+ * rcu_sync_func() - Callback function managing reader access to fastpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is passed to one of the call_rcu() functions by
+ * rcu_sync_exit(), so that it is invoked after a grace period following the
+ * that invocation of rcu_sync_exit().  It takes action based on events that
+ * have taken place in the meantime, so that closely spaced rcu_sync_enter()
+ * and rcu_sync_exit() pairs need not wait for a grace period.
+ *
+ * If another rcu_sync_enter() is invoked before the grace period
+ * ended, reset state to allow the next rcu_sync_exit() to let the
+ * readers back onto their fastpaths (after a grace period).  If both
+ * another rcu_sync_enter() and its matching rcu_sync_exit() are invoked
+ * before the grace period ended, re-invoke call_rcu() on behalf of that
+ * rcu_sync_exit().  Otherwise, set all state back to idle so that readers
+ * can again use their fastpaths.
+ */
+static void rcu_sync_func(struct rcu_head *rcu)
+{
+	struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head);
+	unsigned long flags;
+
+	BUG_ON(rsp->gp_state != GP_PASSED);
+	BUG_ON(rsp->cb_state == CB_IDLE);
+
+	spin_lock_irqsave(&rsp->rss_lock, flags);
+	if (rsp->gp_count) {
+		/*
+		 * A new rcu_sync_begin() has happened; drop the callback.
+		 */
+		rsp->cb_state = CB_IDLE;
+	} else if (rsp->cb_state == CB_REPLAY) {
+		/*
+		 * A new rcu_sync_exit() has happened; requeue the callback
+		 * to catch a later GP.
+		 */
+		rsp->cb_state = CB_PENDING;
+		rsp->call(&rsp->cb_head, rcu_sync_func);
+	} else {
+		/*
+		 * We're at least a GP after rcu_sync_exit(); eveybody will now
+		 * have observed the write side critical section. Let 'em rip!.
+		 */
+		rsp->cb_state = CB_IDLE;
+		rsp->gp_state = GP_IDLE;
+	}
+	spin_unlock_irqrestore(&rsp->rss_lock, flags);
+}
+
+/**
+ * rcu_sync_exit() - Allow readers back onto fast patch after grace period
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who have completed, and can therefore
+ * now allow readers to make use of their fastpaths after a grace period
+ * has elapsed.  After this grace period has completed, all subsequent
+ * calls to rcu_sync_is_idle() will return true, which tells readers that
+ * they can once again use their fastpaths.
+ */
+void rcu_sync_exit(struct rcu_sync *rsp)
+{
+	spin_lock_irq(&rsp->rss_lock);
+	if (!--rsp->gp_count) {
+		if (rsp->cb_state == CB_IDLE) {
+			rsp->cb_state = CB_PENDING;
+			rsp->call(&rsp->cb_head, rcu_sync_func);
+		} else if (rsp->cb_state == CB_PENDING) {
+			rsp->cb_state = CB_REPLAY;
+		}
+	}
+	spin_unlock_irq(&rsp->rss_lock);
+}
-- 
cgit v1.2.3


From 82e8c565be8a72957570d7da8dd9b441db7bb648 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:47 +0200
Subject: rcu_sync: Simplify rcu_sync using new rcu_sync_ops structure

This commit adds the new struct rcu_sync_ops which holds sync/call
methods, and turns the function pointers in rcu_sync_struct into an array
of struct rcu_sync_ops.  This simplifies the "init" helpers by collapsing
a switch statement and explicit multiple definitions into a simple
assignment and a helper macro, respectively.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 60 +++++++++++++++++++-----------------------------
 kernel/rcu/sync.c        | 42 +++++++++++++++++----------------
 2 files changed, 45 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index cb044df2e21c..c6d2272c4459 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -26,6 +26,8 @@
 #include <linux/wait.h>
 #include <linux/rcupdate.h>
 
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
 /* Structure to mediate between updaters and fastpath-using readers.  */
 struct rcu_sync {
 	int			gp_state;
@@ -35,43 +37,9 @@ struct rcu_sync {
 	int			cb_state;
 	struct rcu_head		cb_head;
 
-	void (*sync)(void);
-	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+	enum rcu_sync_type	gp_type;
 };
 
-#define ___RCU_SYNC_INIT(name)						\
-	.gp_state = 0,							\
-	.gp_count = 0,							\
-	.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),		\
-	.cb_state = 0
-
-#define __RCU_SCHED_SYNC_INIT(name) {					\
-	___RCU_SYNC_INIT(name),						\
-	.sync = synchronize_sched,					\
-	.call = call_rcu_sched,						\
-}
-
-#define __RCU_BH_SYNC_INIT(name) {					\
-	___RCU_SYNC_INIT(name),						\
-	.sync = synchronize_rcu_bh,					\
-	.call = call_rcu_bh,						\
-}
-
-#define __RCU_SYNC_INIT(name) {						\
-	___RCU_SYNC_INIT(name),						\
-	.sync = synchronize_rcu,					\
-	.call = call_rcu,						\
-}
-
-#define DEFINE_RCU_SCHED_SYNC(name)					\
-	struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name)
-
-#define DEFINE_RCU_BH_SYNC(name)					\
-	struct rcu_sync name = __RCU_BH_SYNC_INIT(name)
-
-#define DEFINE_RCU_SYNC(name)						\
-	struct rcu_sync name = __RCU_SYNC_INIT(name)
-
 /**
  * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
  * @rsp: Pointer to rcu_sync structure to use for synchronization
@@ -85,10 +53,28 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 	return !rsp->gp_state; /* GP_IDLE */
 }
 
-enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
-
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
 extern void rcu_sync_enter(struct rcu_sync *);
 extern void rcu_sync_exit(struct rcu_sync *);
 
+#define __RCU_SYNC_INITIALIZER(name, type) {				\
+		.gp_state = 0,						\
+		.gp_count = 0,						\
+		.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),	\
+		.cb_state = 0,						\
+		.gp_type = type,					\
+	}
+
+#define	__DEFINE_RCU_SYNC(name, type)	\
+	struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
+
+#define DEFINE_RCU_SYNC(name)		\
+	__DEFINE_RCU_SYNC(name, RCU_SYNC)
+
+#define DEFINE_RCU_SCHED_SYNC(name)	\
+	__DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
+
+#define DEFINE_RCU_BH_SYNC(name)	\
+	__DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+
 #endif /* _LINUX_RCU_SYNC_H_ */
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 0a11df43be23..5a9aa4c394f1 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -23,6 +23,24 @@
 #include <linux/rcu_sync.h>
 #include <linux/sched.h>
 
+static const struct {
+	void (*sync)(void);
+	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+} gp_ops[] = {
+	[RCU_SYNC] = {
+		.sync = synchronize_rcu,
+		.call = call_rcu,
+	},
+	[RCU_SCHED_SYNC] = {
+		.sync = synchronize_sched,
+		.call = call_rcu_sched,
+	},
+	[RCU_BH_SYNC] = {
+		.sync = synchronize_rcu_bh,
+		.call = call_rcu_bh,
+	},
+};
+
 enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
 enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
@@ -37,23 +55,7 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
 {
 	memset(rsp, 0, sizeof(*rsp));
 	init_waitqueue_head(&rsp->gp_wait);
-
-	switch (type) {
-	case RCU_SYNC:
-		rsp->sync = synchronize_rcu;
-		rsp->call = call_rcu;
-		break;
-
-	case RCU_SCHED_SYNC:
-		rsp->sync = synchronize_sched;
-		rsp->call = call_rcu_sched;
-		break;
-
-	case RCU_BH_SYNC:
-		rsp->sync = synchronize_rcu_bh;
-		rsp->call = call_rcu_bh;
-		break;
-	}
+	rsp->gp_type = type;
 }
 
 /**
@@ -85,7 +87,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
 	BUG_ON(need_wait && need_sync);
 
 	if (need_sync) {
-		rsp->sync();
+		gp_ops[rsp->gp_type].sync();
 		rsp->gp_state = GP_PASSED;
 		wake_up_all(&rsp->gp_wait);
 	} else if (need_wait) {
@@ -138,7 +140,7 @@ static void rcu_sync_func(struct rcu_head *rcu)
 		 * to catch a later GP.
 		 */
 		rsp->cb_state = CB_PENDING;
-		rsp->call(&rsp->cb_head, rcu_sync_func);
+		gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
 	} else {
 		/*
 		 * We're at least a GP after rcu_sync_exit(); eveybody will now
@@ -166,7 +168,7 @@ void rcu_sync_exit(struct rcu_sync *rsp)
 	if (!--rsp->gp_count) {
 		if (rsp->cb_state == CB_IDLE) {
 			rsp->cb_state = CB_PENDING;
-			rsp->call(&rsp->cb_head, rcu_sync_func);
+			gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
 		} else if (rsp->cb_state == CB_PENDING) {
 			rsp->cb_state = CB_REPLAY;
 		}
-- 
cgit v1.2.3


From 3a518b76af7bb411efe6dd090fbf098e29accb2e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:50 +0200
Subject: rcu_sync: Add CONFIG_PROVE_RCU checks

This commit validates that the caller of rcu_sync_is_idle() holds the
corresponding type of RCU read-side lock, but only in kernels built
with CONFIG_PROVE_RCU=y.  This validation is carried out via a new
rcu_sync_ops->held() method that is checked within rcu_sync_is_idle().

Note that although this does add code to the fast path, it only does so
in kernels built with CONFIG_PROVE_RCU=y.

Suggested-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h |  6 ++++++
 kernel/rcu/sync.c        | 20 ++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index c6d2272c4459..1f2d4fc30b04 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -40,6 +40,8 @@ struct rcu_sync {
 	enum rcu_sync_type	gp_type;
 };
 
+extern bool __rcu_sync_is_idle(struct rcu_sync *);
+
 /**
  * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
  * @rsp: Pointer to rcu_sync structure to use for synchronization
@@ -50,7 +52,11 @@ struct rcu_sync {
  */
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
+#ifdef CONFIG_PROVE_RCU
+	return __rcu_sync_is_idle(rsp);
+#else
 	return !rsp->gp_state; /* GP_IDLE */
+#endif
 }
 
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 5a9aa4c394f1..01c9807a7f73 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -23,21 +23,33 @@
 #include <linux/rcu_sync.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_PROVE_RCU
+#define __INIT_HELD(func)	.held = func,
+#else
+#define __INIT_HELD(func)
+#endif
+
 static const struct {
 	void (*sync)(void);
 	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+#ifdef CONFIG_PROVE_RCU
+	int  (*held)(void);
+#endif
 } gp_ops[] = {
 	[RCU_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
+		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
 		.sync = synchronize_sched,
 		.call = call_rcu_sched,
+		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
 		.sync = synchronize_rcu_bh,
 		.call = call_rcu_bh,
+		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
 
@@ -46,6 +58,14 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
 #define	rss_lock	gp_wait.lock
 
+#ifdef CONFIG_PROVE_RCU
+bool __rcu_sync_is_idle(struct rcu_sync *rsp)
+{
+	WARN_ON(!gp_ops[rsp->gp_type].held());
+	return rsp->gp_state == GP_IDLE;
+}
+#endif
+
 /**
  * rcu_sync_init() - Initialize an rcu_sync structure
  * @rsp: Pointer to rcu_sync structure to be initialized
-- 
cgit v1.2.3


From 07899a6e5f56136028c44a57ad0451e797365ac3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:52 +0200
Subject: rcu_sync: Introduce rcu_sync_dtor()

This commit allows rcu_sync structures to be safely deallocated,
The trick is to add a new ->wait field to the gp_ops array.
This field is a pointer to the rcu_barrier() function corresponding
to the flavor of RCU in question.  This allows a new rcu_sync_dtor()
to wait for any outstanding callbacks before freeing the rcu_sync
structure.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h |  1 +
 kernel/rcu/sync.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 1f2d4fc30b04..8069d6468bc4 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -62,6 +62,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
 extern void rcu_sync_enter(struct rcu_sync *);
 extern void rcu_sync_exit(struct rcu_sync *);
+extern void rcu_sync_dtor(struct rcu_sync *);
 
 #define __RCU_SYNC_INITIALIZER(name, type) {				\
 		.gp_state = 0,						\
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 01c9807a7f73..1e353f0a2b66 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -32,6 +32,7 @@
 static const struct {
 	void (*sync)(void);
 	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+	void (*wait)(void);
 #ifdef CONFIG_PROVE_RCU
 	int  (*held)(void);
 #endif
@@ -39,16 +40,19 @@ static const struct {
 	[RCU_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
+		.wait = rcu_barrier,
 		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
 		.sync = synchronize_sched,
 		.call = call_rcu_sched,
+		.wait = rcu_barrier_sched,
 		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
 		.sync = synchronize_rcu_bh,
 		.call = call_rcu_bh,
+		.wait = rcu_barrier_bh,
 		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
@@ -195,3 +199,25 @@ void rcu_sync_exit(struct rcu_sync *rsp)
 	}
 	spin_unlock_irq(&rsp->rss_lock);
 }
+
+/**
+ * rcu_sync_dtor() - Clean up an rcu_sync structure
+ * @rsp: Pointer to rcu_sync structure to be cleaned up
+ */
+void rcu_sync_dtor(struct rcu_sync *rsp)
+{
+	int cb_state;
+
+	BUG_ON(rsp->gp_count);
+
+	spin_lock_irq(&rsp->rss_lock);
+	if (rsp->cb_state == CB_REPLAY)
+		rsp->cb_state = CB_PENDING;
+	cb_state = rsp->cb_state;
+	spin_unlock_irq(&rsp->rss_lock);
+
+	if (cb_state != CB_IDLE) {
+		gp_ops[rsp->gp_type].wait();
+		BUG_ON(rsp->cb_state != CB_IDLE);
+	}
+}
-- 
cgit v1.2.3


From 001dac627ff37433d5528ffb0d897cd19c2b1e43 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:57 +0200
Subject: locking/percpu-rwsem: Make use of the rcu_sync infrastructure

Currently down_write/up_write calls synchronize_sched_expedited()
twice, which is evil.  Change this code to rely on rcu-sync primitives.
This avoids the _expedited "big hammer", and this can be faster in
the contended case or even in the case when a single thread does
down_write/up_write in a loop.

Of course, a single down_write() will take more time, but otoh it
will be much more friendly to the whole system.

To simplify the review this patch doesn't update the comments, fixed
by the next change.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/percpu-rwsem.h  |  3 ++-
 kernel/locking/percpu-rwsem.c | 18 +++++++-----------
 2 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 834c4e52cb2d..c2fa3ecb0dce 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,11 +5,12 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
+	struct rcu_sync		rss;
 	unsigned int __percpu	*fast_read_ctr;
-	atomic_t		write_ctr;
 	struct rw_semaphore	rw_sem;
 	atomic_t		slow_read_ctr;
 	wait_queue_head_t	write_waitq;
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 9529a30ec57b..183a71151ac0 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	__init_rwsem(&brw->rw_sem, name, rwsem_key);
-	atomic_set(&brw->write_ctr, 0);
+	rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
 	return 0;
@@ -33,6 +33,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 	if (!brw->fast_read_ctr)
 		return;
 
+	rcu_sync_dtor(&brw->rss);
 	free_percpu(brw->fast_read_ctr);
 	brw->fast_read_ctr = NULL; /* catch use after free bugs */
 }
@@ -62,13 +63,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
-	bool success = false;
+	bool success;
 
 	preempt_disable();
-	if (likely(!atomic_read(&brw->write_ctr))) {
+	success = rcu_sync_is_idle(&brw->rss);
+	if (likely(success))
 		__this_cpu_add(*brw->fast_read_ctr, val);
-		success = true;
-	}
 	preempt_enable();
 
 	return success;
@@ -149,8 +149,6 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-	/* tell update_fast_ctr() there is a pending writer */
-	atomic_inc(&brw->write_ctr);
 	/*
 	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
 	 *    so that update_fast_ctr() can't succeed.
@@ -162,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 	 *    fast-path, it executes a full memory barrier before we return.
 	 *    See R_W case in the comment above update_fast_ctr().
 	 */
-	synchronize_sched_expedited();
+	rcu_sync_enter(&brw->rss);
 
 	/* exclude other writers, and block the new readers completely */
 	down_write(&brw->rw_sem);
@@ -183,8 +181,6 @@ void percpu_up_write(struct percpu_rw_semaphore *brw)
 	 * Insert the barrier before the next fast-path in down_read,
 	 * see W_R case in the comment above update_fast_ctr().
 	 */
-	synchronize_sched_expedited();
-	/* the last writer unblocks update_fast_ctr() */
-	atomic_dec(&brw->write_ctr);
+	rcu_sync_exit(&brw->rss);
 }
 EXPORT_SYMBOL_GPL(percpu_up_write);
-- 
cgit v1.2.3


From 4bace7344d6dbd7a1b0b801abf24ea9878064317 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 Sep 2015 17:59:18 +0200
Subject: rcu_sync: Cleanup the CONFIG_PROVE_RCU checks

1. Rename __rcu_sync_is_idle() to rcu_sync_lockdep_assert() and
   change it to use rcu_lockdep_assert().

2. Change rcu_sync_is_idle() to return rsp->gp_state == GP_IDLE
   unconditonally, this way we can remove the same check from
   rcu_sync_lockdep_assert() and clearly isolate the debugging
   code.

Note: rcu_sync_enter()->wait_event(gp_state == GP_PASSED) needs
another CONFIG_PROVE_RCU check, the same as is done in ->sync(); but
this needs some simple preparations in the core RCU code to avoid the
code duplication.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 7 +++----
 kernel/rcu/sync.c        | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 8069d6468bc4..a63a33e6196e 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -40,7 +40,7 @@ struct rcu_sync {
 	enum rcu_sync_type	gp_type;
 };
 
-extern bool __rcu_sync_is_idle(struct rcu_sync *);
+extern void rcu_sync_lockdep_assert(struct rcu_sync *);
 
 /**
  * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
@@ -53,10 +53,9 @@ extern bool __rcu_sync_is_idle(struct rcu_sync *);
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
 #ifdef CONFIG_PROVE_RCU
-	return __rcu_sync_is_idle(rsp);
-#else
-	return !rsp->gp_state; /* GP_IDLE */
+	rcu_sync_lockdep_assert(rsp);
 #endif
+	return !rsp->gp_state; /* GP_IDLE */
 }
 
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 1e353f0a2b66..be922c9f3d37 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -63,10 +63,10 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 #define	rss_lock	gp_wait.lock
 
 #ifdef CONFIG_PROVE_RCU
-bool __rcu_sync_is_idle(struct rcu_sync *rsp)
+void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
 {
-	WARN_ON(!gp_ops[rsp->gp_type].held());
-	return rsp->gp_state == GP_IDLE;
+	RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
+			 "suspicious rcu_sync_is_idle() usage");
 }
 #endif
 
-- 
cgit v1.2.3


From b4eb6cdbbd13698704863f680c643c569909e1c2 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Tue, 25 Aug 2015 20:25:37 -0400
Subject: PCI: Add builtin_pci_driver() to avoid registration boilerplate

In f309d4443130 ("platform_device: better support builtin boilerplate
avoidance"), we introduced the builtin_driver() macro.

Here we use that support and extend it to PCI driver registration, so where
a driver is clearly non-modular and builtin-only, we can register it in a
similar fashion.  Existing code that is clearly non-modular can be updated
with the simple mapping of

  module_pci_driver(...)  ---> builtin_pci_driver(...)

We've essentially cloned the former to make the latter, and taken out the
remove/module_exit parts since those never get used in a non-modular build
of the code.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index e90eb22de628..b54fbf1571b1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1192,6 +1192,17 @@ void pci_unregister_driver(struct pci_driver *dev);
 	module_driver(__pci_driver, pci_register_driver, \
 		       pci_unregister_driver)
 
+/**
+ * builtin_pci_driver() - Helper macro for registering a PCI driver
+ * @__pci_driver: pci_driver struct
+ *
+ * Helper macro for PCI drivers which do not do anything special in their
+ * init code. This eliminates a lot of boilerplate. Each driver may only
+ * use this macro once, and calling it replaces device_initcall(...)
+ */
+#define builtin_pci_driver(__pci_driver) \
+	builtin_driver(__pci_driver, pci_register_driver)
+
 struct pci_driver *pci_dev_driver(const struct pci_dev *dev);
 int pci_add_dynid(struct pci_driver *drv,
 		  unsigned int vendor, unsigned int device,
-- 
cgit v1.2.3


From fee6d4c777a125e56de9370db3b2bf359bf958d6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 5 Oct 2015 08:51:24 -0700
Subject: net: Add netif_is_l3_slave

IPv6 addrconf keys off of IFF_SLAVE so can not use it for L3 slave.
Add a new private flag and add netif_is_l3_slave function for checking
it.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c         | 10 ++++------
 include/linux/netdevice.h |  7 +++++++
 net/l3mdev/l3mdev.c       |  8 ++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4fd5af1acff0..8713317eed86 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -39,8 +39,6 @@
 #define DRV_NAME	"vrf"
 #define DRV_VERSION	"1.0"
 
-#define vrf_is_slave(dev)   ((dev)->flags & IFF_SLAVE)
-
 #define vrf_master_get_rcu(dev) \
 	((struct net_device *)rcu_dereference(dev->rx_handler_data))
 
@@ -433,7 +431,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 	if (ret < 0)
 		goto out_unregister;
 
-	port_dev->flags |= IFF_SLAVE;
+	port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
 	__vrf_insert_slave(queue, slave);
 	cycle_netdev(port_dev);
 
@@ -448,7 +446,7 @@ out_fail:
 
 static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
+	if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev))
 		return -EINVAL;
 
 	return do_vrf_add_slave(dev, port_dev);
@@ -462,7 +460,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
 	struct slave *slave;
 
 	netdev_upper_dev_unlink(port_dev, dev);
-	port_dev->flags &= ~IFF_SLAVE;
+	port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
 
 	netdev_rx_handler_unregister(port_dev);
 
@@ -672,7 +670,7 @@ static int vrf_device_event(struct notifier_block *unused,
 	if (event == NETDEV_UNREGISTER) {
 		struct net_device *vrf_dev;
 
-		if (!vrf_is_slave(dev) || netif_is_l3_master(dev))
+		if (!netif_is_l3_slave(dev))
 			goto out;
 
 		vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b9450784ae06..b3374402c1ea 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1261,6 +1261,7 @@ struct net_device_ops {
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
+ * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1286,6 +1287,7 @@ enum netdev_priv_flags {
 	IFF_L3MDEV_MASTER		= 1<<20,
 	IFF_NO_QUEUE			= 1<<21,
 	IFF_OPENVSWITCH			= 1<<22,
+	IFF_L3MDEV_SLAVE		= 1<<23,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -3830,6 +3832,11 @@ static inline bool netif_is_l3_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_L3MDEV_MASTER;
 }
 
+static inline bool netif_is_l3_slave(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_L3MDEV_SLAVE;
+}
+
 static inline bool netif_is_bridge_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_EBRIDGE;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index ddf75ad41713..8e5ead366e7f 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -26,11 +26,11 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
 
 	if (netif_is_l3_master(dev)) {
 		ifindex = dev->ifindex;
-	} else if (dev->flags & IFF_SLAVE) {
+	} else if (netif_is_l3_slave(dev)) {
 		struct net_device *master;
 
 		master = netdev_master_upper_dev_get_rcu(dev);
-		if (master && netif_is_l3_master(master))
+		if (master)
 			ifindex = master->ifindex;
 	}
 
@@ -54,7 +54,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev)
 	if (netif_is_l3_master(dev)) {
 		if (dev->l3mdev_ops->l3mdev_fib_table)
 			tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
-	} else if (dev->flags & IFF_SLAVE) {
+	} else if (netif_is_l3_slave(dev)) {
 		/* Users of netdev_master_upper_dev_get_rcu need non-const,
 		 * but current inet_*type functions take a const
 		 */
@@ -62,7 +62,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev)
 		const struct net_device *master;
 
 		master = netdev_master_upper_dev_get_rcu(_dev);
-		if (master && netif_is_l3_master(master) &&
+		if (master &&
 		    master->l3mdev_ops->l3mdev_fib_table)
 			tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
 	}
-- 
cgit v1.2.3


From 1d27ff5aba2d36b152542e9d62158eb6584bcbd3 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 7 Oct 2015 09:26:39 -0600
Subject: coresight: fixing typographical error

Tracing gets enabled _for_ a source rather than _from_ a source.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index c69e1b932809..a7cabfa23b55 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -207,7 +207,7 @@ struct coresight_ops_link {
  * Operations available for sources.
  * @trace_id:	returns the value of the component's trace ID as known
 		to the HW.
- * @enable:	enables tracing from a source.
+ * @enable:	enables tracing for a source.
  * @disable:	disables tracing for a source.
  */
 struct coresight_ops_source {
-- 
cgit v1.2.3


From 6a8c3be7ec8eb3c1197766f9245e0d65a4e5aff8 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Wed, 7 Oct 2015 16:36:28 +0100
Subject: add FPGA manager core

API to support programming FPGA's.

The following functions are exported as GPL:
* fpga_mgr_buf_load
   Load fpga from image in buffer

* fpga_mgr_firmware_load
   Request firmware and load it to the FPGA.

* fpga_mgr_register
* fpga_mgr_unregister
   FPGA device drivers can be added by calling
   fpga_mgr_register() to register a set of
   fpga_manager_ops to do device specific stuff.

* of_fpga_mgr_get
* fpga_mgr_put
   Get/put a reference to a fpga manager.

The following sysfs files are created:
* /sys/class/fpga_manager/<fpga>/name
  Name of low level driver.

* /sys/class/fpga_manager/<fpga>/state
  State of fpga manager

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/Kconfig               |   2 +
 drivers/Makefile              |   1 +
 drivers/fpga/Kconfig          |  14 ++
 drivers/fpga/Makefile         |   8 +
 drivers/fpga/fpga-mgr.c       | 382 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/fpga/fpga-mgr.h | 127 ++++++++++++++
 6 files changed, 534 insertions(+)
 create mode 100644 drivers/fpga/Kconfig
 create mode 100644 drivers/fpga/Makefile
 create mode 100644 drivers/fpga/fpga-mgr.c
 create mode 100644 include/linux/fpga/fpga-mgr.h

(limited to 'include/linux')

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 709488ae882e..5a89e409ad18 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -192,4 +192,6 @@ source "drivers/hwtracing/stm/Kconfig"
 
 source "drivers/hwtracing/intel_th/Kconfig"
 
+source "drivers/fpga/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index e63542dd7010..7064bf476c2a 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -169,3 +169,4 @@ obj-y				+= hwtracing/intel_th/
 obj-$(CONFIG_STM)		+= hwtracing/stm/
 obj-$(CONFIG_ANDROID)		+= android/
 obj-$(CONFIG_NVMEM)		+= nvmem/
+obj-$(CONFIG_FPGA)		+= fpga/
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
new file mode 100644
index 000000000000..f1f1f6df54d3
--- /dev/null
+++ b/drivers/fpga/Kconfig
@@ -0,0 +1,14 @@
+#
+# FPGA framework configuration
+#
+
+menu "FPGA Configuration Support"
+
+config FPGA
+	tristate "FPGA Configuration Framework"
+	help
+	  Say Y here if you want support for configuring FPGAs from the
+	  kernel.  The FPGA framework adds a FPGA manager class and FPGA
+	  manager drivers.
+
+endmenu
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
new file mode 100644
index 000000000000..3313c5266795
--- /dev/null
+++ b/drivers/fpga/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the fpga framework and fpga manager drivers.
+#
+
+# Core FPGA Manager Framework
+obj-$(CONFIG_FPGA)			+= fpga-mgr.o
+
+# FPGA Manager Drivers
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
new file mode 100644
index 000000000000..25261636687c
--- /dev/null
+++ b/drivers/fpga/fpga-mgr.c
@@ -0,0 +1,382 @@
+/*
+ * FPGA Manager Core
+ *
+ *  Copyright (C) 2013-2015 Altera Corporation
+ *
+ * With code from the mailing list:
+ * Copyright (C) 2013 Xilinx, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/firmware.h>
+#include <linux/fpga/fpga-mgr.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+static DEFINE_IDA(fpga_mgr_ida);
+static struct class *fpga_mgr_class;
+
+/**
+ * fpga_mgr_buf_load - load fpga from image in buffer
+ * @mgr:	fpga manager
+ * @flags:	flags setting fpga confuration modes
+ * @buf:	buffer contain fpga image
+ * @count:	byte count of buf
+ *
+ * Step the low level fpga manager through the device-specific steps of getting
+ * an FPGA ready to be configured, writing the image to it, then doing whatever
+ * post-configuration steps necessary.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf,
+		      size_t count)
+{
+	struct device *dev = &mgr->dev;
+	int ret;
+
+	if (!mgr)
+		return -ENODEV;
+
+	/*
+	 * Call the low level driver's write_init function.  This will do the
+	 * device-specific things to get the FPGA into the state where it is
+	 * ready to receive an FPGA image.
+	 */
+	mgr->state = FPGA_MGR_STATE_WRITE_INIT;
+	ret = mgr->mops->write_init(mgr, flags, buf, count);
+	if (ret) {
+		dev_err(dev, "Error preparing FPGA for writing\n");
+		mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR;
+		return ret;
+	}
+
+	/*
+	 * Write the FPGA image to the FPGA.
+	 */
+	mgr->state = FPGA_MGR_STATE_WRITE;
+	ret = mgr->mops->write(mgr, buf, count);
+	if (ret) {
+		dev_err(dev, "Error while writing image data to FPGA\n");
+		mgr->state = FPGA_MGR_STATE_WRITE_ERR;
+		return ret;
+	}
+
+	/*
+	 * After all the FPGA image has been written, do the device specific
+	 * steps to finish and set the FPGA into operating mode.
+	 */
+	mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE;
+	ret = mgr->mops->write_complete(mgr, flags);
+	if (ret) {
+		dev_err(dev, "Error after writing image data to FPGA\n");
+		mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR;
+		return ret;
+	}
+	mgr->state = FPGA_MGR_STATE_OPERATING;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_buf_load);
+
+/**
+ * fpga_mgr_firmware_load - request firmware and load to fpga
+ * @mgr:	fpga manager
+ * @flags:	flags setting fpga confuration modes
+ * @image_name:	name of image file on the firmware search path
+ *
+ * Request an FPGA image using the firmware class, then write out to the FPGA.
+ * Update the state before each step to provide info on what step failed if
+ * there is a failure.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
+			   const char *image_name)
+{
+	struct device *dev = &mgr->dev;
+	const struct firmware *fw;
+	int ret;
+
+	if (!mgr)
+		return -ENODEV;
+
+	dev_info(dev, "writing %s to %s\n", image_name, mgr->name);
+
+	mgr->state = FPGA_MGR_STATE_FIRMWARE_REQ;
+
+	ret = request_firmware(&fw, image_name, dev);
+	if (ret) {
+		mgr->state = FPGA_MGR_STATE_FIRMWARE_REQ_ERR;
+		dev_err(dev, "Error requesting firmware %s\n", image_name);
+		return ret;
+	}
+
+	ret = fpga_mgr_buf_load(mgr, flags, fw->data, fw->size);
+	if (ret)
+		return ret;
+
+	release_firmware(fw);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_firmware_load);
+
+static const char * const state_str[] = {
+	[FPGA_MGR_STATE_UNKNOWN] =		"unknown",
+	[FPGA_MGR_STATE_POWER_OFF] =		"power off",
+	[FPGA_MGR_STATE_POWER_UP] =		"power up",
+	[FPGA_MGR_STATE_RESET] =		"reset",
+
+	/* requesting FPGA image from firmware */
+	[FPGA_MGR_STATE_FIRMWARE_REQ] =		"firmware request",
+	[FPGA_MGR_STATE_FIRMWARE_REQ_ERR] =	"firmware request error",
+
+	/* Preparing FPGA to receive image */
+	[FPGA_MGR_STATE_WRITE_INIT] =		"write init",
+	[FPGA_MGR_STATE_WRITE_INIT_ERR] =	"write init error",
+
+	/* Writing image to FPGA */
+	[FPGA_MGR_STATE_WRITE] =		"write",
+	[FPGA_MGR_STATE_WRITE_ERR] =		"write error",
+
+	/* Finishing configuration after image has been written */
+	[FPGA_MGR_STATE_WRITE_COMPLETE] =	"write complete",
+	[FPGA_MGR_STATE_WRITE_COMPLETE_ERR] =	"write complete error",
+
+	/* FPGA reports to be in normal operating mode */
+	[FPGA_MGR_STATE_OPERATING] =		"operating",
+};
+
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct fpga_manager *mgr = to_fpga_manager(dev);
+
+	return sprintf(buf, "%s\n", mgr->name);
+}
+
+static ssize_t state_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct fpga_manager *mgr = to_fpga_manager(dev);
+
+	return sprintf(buf, "%s\n", state_str[mgr->state]);
+}
+
+static DEVICE_ATTR_RO(name);
+static DEVICE_ATTR_RO(state);
+
+static struct attribute *fpga_mgr_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_state.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(fpga_mgr);
+
+static int fpga_mgr_of_node_match(struct device *dev, const void *data)
+{
+	return dev->of_node == data;
+}
+
+/**
+ * of_fpga_mgr_get - get an exclusive reference to a fpga mgr
+ * @node:	device node
+ *
+ * Given a device node, get an exclusive reference to a fpga mgr.
+ *
+ * Return: fpga manager struct or IS_ERR() condition containing error code.
+ */
+struct fpga_manager *of_fpga_mgr_get(struct device_node *node)
+{
+	struct fpga_manager *mgr;
+	struct device *dev;
+
+	if (!node)
+		return ERR_PTR(-EINVAL);
+
+	dev = class_find_device(fpga_mgr_class, NULL, node,
+				fpga_mgr_of_node_match);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	mgr = to_fpga_manager(dev);
+	put_device(dev);
+	if (!mgr)
+		return ERR_PTR(-ENODEV);
+
+	/* Get exclusive use of fpga manager */
+	if (!mutex_trylock(&mgr->ref_mutex))
+		return ERR_PTR(-EBUSY);
+
+	if (!try_module_get(THIS_MODULE)) {
+		mutex_unlock(&mgr->ref_mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return mgr;
+}
+EXPORT_SYMBOL_GPL(of_fpga_mgr_get);
+
+/**
+ * fpga_mgr_put - release a reference to a fpga manager
+ * @mgr:	fpga manager structure
+ */
+void fpga_mgr_put(struct fpga_manager *mgr)
+{
+	if (mgr) {
+		module_put(THIS_MODULE);
+		mutex_unlock(&mgr->ref_mutex);
+	}
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_put);
+
+/**
+ * fpga_mgr_register - register a low level fpga manager driver
+ * @dev:	fpga manager device from pdev
+ * @name:	fpga manager name
+ * @mops:	pointer to structure of fpga manager ops
+ * @priv:	fpga manager private data
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int fpga_mgr_register(struct device *dev, const char *name,
+		      const struct fpga_manager_ops *mops,
+		      void *priv)
+{
+	struct fpga_manager *mgr;
+	const char *dt_label;
+	int id, ret;
+
+	if (!mops || !mops->write_init || !mops->write ||
+	    !mops->write_complete || !mops->state) {
+		dev_err(dev, "Attempt to register without fpga_manager_ops\n");
+		return -EINVAL;
+	}
+
+	if (!name || !strlen(name)) {
+		dev_err(dev, "Attempt to register with no name!\n");
+		return -EINVAL;
+	}
+
+	mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+	if (!mgr)
+		return -ENOMEM;
+
+	id = ida_simple_get(&fpga_mgr_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		ret = id;
+		goto error_kfree;
+	}
+
+	mutex_init(&mgr->ref_mutex);
+
+	mgr->name = name;
+	mgr->mops = mops;
+	mgr->priv = priv;
+
+	/*
+	 * Initialize framework state by requesting low level driver read state
+	 * from device.  FPGA may be in reset mode or may have been programmed
+	 * by bootloader or EEPROM.
+	 */
+	mgr->state = mgr->mops->state(mgr);
+
+	device_initialize(&mgr->dev);
+	mgr->dev.class = fpga_mgr_class;
+	mgr->dev.parent = dev;
+	mgr->dev.of_node = dev->of_node;
+	mgr->dev.id = id;
+	dev_set_drvdata(dev, mgr);
+
+	dt_label = of_get_property(mgr->dev.of_node, "label", NULL);
+	if (dt_label)
+		ret = dev_set_name(&mgr->dev, "%s", dt_label);
+	else
+		ret = dev_set_name(&mgr->dev, "fpga%d", id);
+
+	ret = device_add(&mgr->dev);
+	if (ret)
+		goto error_device;
+
+	dev_info(&mgr->dev, "%s registered\n", mgr->name);
+
+	return 0;
+
+error_device:
+	ida_simple_remove(&fpga_mgr_ida, id);
+error_kfree:
+	kfree(mgr);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_register);
+
+/**
+ * fpga_mgr_unregister - unregister a low level fpga manager driver
+ * @dev:	fpga manager device from pdev
+ */
+void fpga_mgr_unregister(struct device *dev)
+{
+	struct fpga_manager *mgr = dev_get_drvdata(dev);
+
+	dev_info(&mgr->dev, "%s %s\n", __func__, mgr->name);
+
+	/*
+	 * If the low level driver provides a method for putting fpga into
+	 * a desired state upon unregister, do it.
+	 */
+	if (mgr->mops->fpga_remove)
+		mgr->mops->fpga_remove(mgr);
+
+	device_unregister(&mgr->dev);
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_unregister);
+
+static void fpga_mgr_dev_release(struct device *dev)
+{
+	struct fpga_manager *mgr = to_fpga_manager(dev);
+
+	ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
+	kfree(mgr);
+}
+
+static int __init fpga_mgr_class_init(void)
+{
+	pr_info("FPGA manager framework\n");
+
+	fpga_mgr_class = class_create(THIS_MODULE, "fpga_manager");
+	if (IS_ERR(fpga_mgr_class))
+		return PTR_ERR(fpga_mgr_class);
+
+	fpga_mgr_class->dev_groups = fpga_mgr_groups;
+	fpga_mgr_class->dev_release = fpga_mgr_dev_release;
+
+	return 0;
+}
+
+static void __exit fpga_mgr_class_exit(void)
+{
+	class_destroy(fpga_mgr_class);
+	ida_destroy(&fpga_mgr_ida);
+}
+
+MODULE_AUTHOR("Alan Tull <atull@opensource.altera.com>");
+MODULE_DESCRIPTION("FPGA manager framework");
+MODULE_LICENSE("GPL v2");
+
+subsys_initcall(fpga_mgr_class_init);
+module_exit(fpga_mgr_class_exit);
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
new file mode 100644
index 000000000000..0940bf45e2f2
--- /dev/null
+++ b/include/linux/fpga/fpga-mgr.h
@@ -0,0 +1,127 @@
+/*
+ * FPGA Framework
+ *
+ *  Copyright (C) 2013-2015 Altera Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#ifndef _LINUX_FPGA_MGR_H
+#define _LINUX_FPGA_MGR_H
+
+struct fpga_manager;
+
+/**
+ * enum fpga_mgr_states - fpga framework states
+ * @FPGA_MGR_STATE_UNKNOWN: can't determine state
+ * @FPGA_MGR_STATE_POWER_OFF: FPGA power is off
+ * @FPGA_MGR_STATE_POWER_UP: FPGA reports power is up
+ * @FPGA_MGR_STATE_RESET: FPGA in reset state
+ * @FPGA_MGR_STATE_FIRMWARE_REQ: firmware request in progress
+ * @FPGA_MGR_STATE_FIRMWARE_REQ_ERR: firmware request failed
+ * @FPGA_MGR_STATE_WRITE_INIT: preparing FPGA for programming
+ * @FPGA_MGR_STATE_WRITE_INIT_ERR: Error during WRITE_INIT stage
+ * @FPGA_MGR_STATE_WRITE: writing image to FPGA
+ * @FPGA_MGR_STATE_WRITE_ERR: Error while writing FPGA
+ * @FPGA_MGR_STATE_WRITE_COMPLETE: Doing post programming steps
+ * @FPGA_MGR_STATE_WRITE_COMPLETE_ERR: Error during WRITE_COMPLETE
+ * @FPGA_MGR_STATE_OPERATING: FPGA is programmed and operating
+ */
+enum fpga_mgr_states {
+	/* default FPGA states */
+	FPGA_MGR_STATE_UNKNOWN,
+	FPGA_MGR_STATE_POWER_OFF,
+	FPGA_MGR_STATE_POWER_UP,
+	FPGA_MGR_STATE_RESET,
+
+	/* getting an image for loading */
+	FPGA_MGR_STATE_FIRMWARE_REQ,
+	FPGA_MGR_STATE_FIRMWARE_REQ_ERR,
+
+	/* write sequence: init, write, complete */
+	FPGA_MGR_STATE_WRITE_INIT,
+	FPGA_MGR_STATE_WRITE_INIT_ERR,
+	FPGA_MGR_STATE_WRITE,
+	FPGA_MGR_STATE_WRITE_ERR,
+	FPGA_MGR_STATE_WRITE_COMPLETE,
+	FPGA_MGR_STATE_WRITE_COMPLETE_ERR,
+
+	/* fpga is programmed and operating */
+	FPGA_MGR_STATE_OPERATING,
+};
+
+/*
+ * FPGA Manager flags
+ * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
+ */
+#define FPGA_MGR_PARTIAL_RECONFIG	BIT(0)
+
+/**
+ * struct fpga_manager_ops - ops for low level fpga manager drivers
+ * @state: returns an enum value of the FPGA's state
+ * @write_init: prepare the FPGA to receive confuration data
+ * @write: write count bytes of configuration data to the FPGA
+ * @write_complete: set FPGA to operating state after writing is done
+ * @fpga_remove: optional: Set FPGA into a specific state during driver remove
+ *
+ * fpga_manager_ops are the low level functions implemented by a specific
+ * fpga manager driver.  The optional ones are tested for NULL before being
+ * called, so leaving them out is fine.
+ */
+struct fpga_manager_ops {
+	enum fpga_mgr_states (*state)(struct fpga_manager *mgr);
+	int (*write_init)(struct fpga_manager *mgr, u32 flags,
+			  const char *buf, size_t count);
+	int (*write)(struct fpga_manager *mgr, const char *buf, size_t count);
+	int (*write_complete)(struct fpga_manager *mgr, u32 flags);
+	void (*fpga_remove)(struct fpga_manager *mgr);
+};
+
+/**
+ * struct fpga_manager - fpga manager structure
+ * @name: name of low level fpga manager
+ * @dev: fpga manager device
+ * @ref_mutex: only allows one reference to fpga manager
+ * @state: state of fpga manager
+ * @mops: pointer to struct of fpga manager ops
+ * @priv: low level driver private date
+ */
+struct fpga_manager {
+	const char *name;
+	struct device dev;
+	struct mutex ref_mutex;
+	enum fpga_mgr_states state;
+	const struct fpga_manager_ops *mops;
+	void *priv;
+};
+
+#define to_fpga_manager(d) container_of(d, struct fpga_manager, dev)
+
+int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags,
+		      const char *buf, size_t count);
+
+int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
+			   const char *image_name);
+
+struct fpga_manager *of_fpga_mgr_get(struct device_node *node);
+
+void fpga_mgr_put(struct fpga_manager *mgr);
+
+int fpga_mgr_register(struct device *dev, const char *name,
+		      const struct fpga_manager_ops *mops, void *priv);
+
+void fpga_mgr_unregister(struct device *dev);
+
+#endif /*_LINUX_FPGA_MGR_H */
-- 
cgit v1.2.3


From 7f5028cf6190407b7a632b0f30b83187577824cc Mon Sep 17 00:00:00 2001
From: Emilio López <emilio.lopez@collabora.co.uk>
Date: Mon, 21 Sep 2015 10:38:20 -0300
Subject: sysfs: Support is_visible() on binary attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the sysfs header file:

    "The returned value will replace static permissions defined in
     struct attribute or struct bin_attribute."

but this isn't the case, as is_visible is only called on struct attribute
only. This patch introduces a new is_bin_visible() function to implement
the same functionality for binary attributes, and updates documentation
accordingly.

Note that to keep functionality and code similar to that of normal
attributes, the mode is now checked as well to ensure it contains only
read/write permissions or SYSFS_PREALLOC.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Emilio López <emilio.lopez@collabora.co.uk>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 fs/sysfs/group.c      | 17 +++++++++++++++--
 include/linux/sysfs.h | 18 ++++++++++++++----
 2 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 39a019936768..51b56e6d9537 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -73,13 +73,26 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 	}
 
 	if (grp->bin_attrs) {
-		for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
+		for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) {
+			umode_t mode = (*bin_attr)->attr.mode;
+
 			if (update)
 				kernfs_remove_by_name(parent,
 						(*bin_attr)->attr.name);
+			if (grp->is_bin_visible) {
+				mode = grp->is_bin_visible(kobj, *bin_attr, i);
+				if (!mode)
+					continue;
+			}
+
+			WARN(mode & ~(SYSFS_PREALLOC | 0664),
+			     "Attribute %s: Invalid permissions 0%o\n",
+			     (*bin_attr)->attr.name, mode);
+
+			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent,
 					&(*bin_attr)->attr, true,
-					(*bin_attr)->attr.mode, NULL);
+					mode, NULL);
 			if (error)
 				break;
 		}
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9f65758311a4..2f66050d073b 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -64,10 +64,18 @@ do {							\
  *		a new subdirectory with this name.
  * @is_visible:	Optional: Function to return permissions associated with an
  *		attribute of the group. Will be called repeatedly for each
- *		attribute in the group. Only read/write permissions as well as
- *		SYSFS_PREALLOC are accepted. Must return 0 if an attribute is
- *		not visible. The returned value will replace static permissions
- *		defined in struct attribute or struct bin_attribute.
+ *		non-binary attribute in the group. Only read/write
+ *		permissions as well as SYSFS_PREALLOC are accepted. Must
+ *		return 0 if an attribute is not visible. The returned value
+ *		will replace static permissions defined in struct attribute.
+ * @is_bin_visible:
+ *		Optional: Function to return permissions associated with a
+ *		binary attribute of the group. Will be called repeatedly
+ *		for each binary attribute in the group. Only read/write
+ *		permissions as well as SYSFS_PREALLOC are accepted. Must
+ *		return 0 if a binary attribute is not visible. The returned
+ *		value will replace static permissions defined in
+ *		struct bin_attribute.
  * @attrs:	Pointer to NULL terminated list of attributes.
  * @bin_attrs:	Pointer to NULL terminated list of binary attributes.
  *		Either attrs or bin_attrs or both must be provided.
@@ -76,6 +84,8 @@ struct attribute_group {
 	const char		*name;
 	umode_t			(*is_visible)(struct kobject *,
 					      struct attribute *, int);
+	umode_t			(*is_bin_visible)(struct kobject *,
+						  struct bin_attribute *, int);
 	struct attribute	**attrs;
 	struct bin_attribute	**bin_attrs;
 };
-- 
cgit v1.2.3


From 18800fc7a04e7df8a345e7ef4fc3064368276f83 Mon Sep 17 00:00:00 2001
From: Emilio López <emilio.lopez@collabora.co.uk>
Date: Mon, 21 Sep 2015 10:38:22 -0300
Subject: platform/chrome: Support reading/writing the vboot context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some EC implementations include a small nvram space used to store
verified boot context data. This patch offers a way to expose this
data to userspace.

Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Emilio López <emilio.lopez@collabora.co.uk>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/platform/chrome/Makefile      |   3 +-
 drivers/platform/chrome/cros_ec_dev.c |   1 +
 drivers/platform/chrome/cros_ec_vbc.c | 137 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/cros_ec.h           |   1 +
 4 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/cros_ec_vbc.c

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 4a11b010f5d8..bc498bda8211 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -1,7 +1,8 @@
 
 obj-$(CONFIG_CHROMEOS_LAPTOP)	+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)	+= chromeos_pstore.o
-cros_ec_devs-objs               := cros_ec_dev.o cros_ec_sysfs.o cros_ec_lightbar.o
+cros_ec_devs-objs		:= cros_ec_dev.o cros_ec_sysfs.o \
+				   cros_ec_lightbar.o cros_ec_vbc.o
 obj-$(CONFIG_CROS_EC_CHARDEV)   += cros_ec_devs.o
 obj-$(CONFIG_CROS_EC_LPC)       += cros_ec_lpc.o
 obj-$(CONFIG_CROS_EC_PROTO)	+= cros_ec_proto.o
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 2f4099820480..d45cd254ed1c 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -32,6 +32,7 @@ static int ec_major;
 static const struct attribute_group *cros_ec_groups[] = {
 	&cros_ec_attr_group,
 	&cros_ec_lightbar_attr_group,
+	&cros_ec_vbc_attr_group,
 	NULL,
 };
 
diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c
new file mode 100644
index 000000000000..564a0d08c8bf
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_vbc.c
@@ -0,0 +1,137 @@
+/*
+ * cros_ec_vbc - Expose the vboot context nvram to userspace
+ *
+ * Copyright (C) 2015 Collabora Ltd.
+ *
+ * based on vendor driver,
+ *
+ * Copyright (C) 2012 The Chromium OS Authors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/slab.h>
+
+static ssize_t vboot_context_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *att, char *buf,
+				  loff_t pos, size_t count)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+	struct cros_ec_device *ecdev = ec->ec_dev;
+	struct ec_params_vbnvcontext *params;
+	struct cros_ec_command *msg;
+	int err;
+	const size_t para_sz = sizeof(params->op);
+	const size_t resp_sz = sizeof(struct ec_response_vbnvcontext);
+	const size_t payload = max(para_sz, resp_sz);
+
+	msg = kmalloc(sizeof(*msg) + payload, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	/* NB: we only kmalloc()ated enough space for the op field */
+	params = (struct ec_params_vbnvcontext *)msg->data;
+	params->op = EC_VBNV_CONTEXT_OP_READ;
+
+	msg->version = EC_VER_VBNV_CONTEXT;
+	msg->command = EC_CMD_VBNV_CONTEXT;
+	msg->outsize = para_sz;
+	msg->insize = resp_sz;
+
+	err = cros_ec_cmd_xfer(ecdev, msg);
+	if (err < 0) {
+		dev_err(dev, "Error sending read request: %d\n", err);
+		kfree(msg);
+		return err;
+	}
+
+	memcpy(buf, msg->data, resp_sz);
+
+	kfree(msg);
+	return resp_sz;
+}
+
+static ssize_t vboot_context_write(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr, char *buf,
+				   loff_t pos, size_t count)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+	struct cros_ec_device *ecdev = ec->ec_dev;
+	struct ec_params_vbnvcontext *params;
+	struct cros_ec_command *msg;
+	int err;
+	const size_t para_sz = sizeof(*params);
+	const size_t data_sz = sizeof(params->block);
+
+	/* Only write full values */
+	if (count != data_sz)
+		return -EINVAL;
+
+	msg = kmalloc(sizeof(*msg) + para_sz, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	params = (struct ec_params_vbnvcontext *)msg->data;
+	params->op = EC_VBNV_CONTEXT_OP_WRITE;
+	memcpy(params->block, buf, data_sz);
+
+	msg->version = EC_VER_VBNV_CONTEXT;
+	msg->command = EC_CMD_VBNV_CONTEXT;
+	msg->outsize = para_sz;
+	msg->insize = 0;
+
+	err = cros_ec_cmd_xfer(ecdev, msg);
+	if (err < 0) {
+		dev_err(dev, "Error sending write request: %d\n", err);
+		kfree(msg);
+		return err;
+	}
+
+	kfree(msg);
+	return data_sz;
+}
+
+static umode_t cros_ec_vbc_is_visible(struct kobject *kobj,
+				      struct bin_attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+	struct device_node *np = ec->ec_dev->dev->of_node;
+
+	if (IS_ENABLED(CONFIG_OF) && np) {
+		if (of_property_read_bool(np, "google,has-vbc-nvram"))
+			return a->attr.mode;
+	}
+
+	return 0;
+}
+
+static BIN_ATTR_RW(vboot_context, 16);
+
+static struct bin_attribute *cros_ec_vbc_bin_attrs[] = {
+	&bin_attr_vboot_context,
+	NULL
+};
+
+struct attribute_group cros_ec_vbc_attr_group = {
+	.name = "vbc",
+	.bin_attrs = cros_ec_vbc_bin_attrs,
+	.is_bin_visible = cros_ec_vbc_is_visible,
+};
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index da72671a42fa..494682ce4bf3 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -255,5 +255,6 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
 extern struct attribute_group cros_ec_lightbar_attr_group;
+extern struct attribute_group cros_ec_vbc_attr_group;
 
 #endif /* __LINUX_MFD_CROS_EC_H */
-- 
cgit v1.2.3


From 02ef3c4a2aae65a1632b27770bfea3f83ca06772 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 7 Aug 2015 15:14:30 -0700
Subject: cpu: Remove try_get_online_cpus()

Now that synchronize_sched_expedited() no longer uses it, there are
no users of try_get_online_cpus() in mainline.  This commit therefore
removes it.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h |  2 --
 kernel/cpu.c        | 13 -------------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 23c30bdcca86..d2ca8c38f9c4 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,7 +228,6 @@ extern struct bus_type cpu_subsys;
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
-extern bool try_get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
@@ -246,7 +245,6 @@ int cpu_down(unsigned int cpu);
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
-#define try_get_online_cpus()	true
 #define put_online_cpus()	do { } while (0)
 #define cpu_hotplug_disable()	do { } while (0)
 #define cpu_hotplug_enable()	do { } while (0)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 82cf9dff4295..14a9cdf8abe9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -102,19 +102,6 @@ void get_online_cpus(void)
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
-bool try_get_online_cpus(void)
-{
-	if (cpu_hotplug.active_writer == current)
-		return true;
-	if (!mutex_trylock(&cpu_hotplug.lock))
-		return false;
-	cpuhp_lock_acquire_tryread();
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
-	return true;
-}
-EXPORT_SYMBOL_GPL(try_get_online_cpus);
-
 void put_online_cpus(void)
 {
 	int refcount;
-- 
cgit v1.2.3


From a1cba5613edf50c2a213fa90c30aa10500b241b7 Mon Sep 17 00:00:00 2001
From: Arun Parameswaran <arunp@broadcom.com>
Date: Tue, 6 Oct 2015 12:25:48 -0700
Subject: net: phy: Add Broadcom phy library for common interfaces

This patch adds the Broadcom phy library to consolidate common
interfaces shared by Broadcom phy's.

Moved the common interfaces to the 'bcm-phy-lib.c' and updated
the Broadcom PHY drivers to use the new APIs.

Signed-off-by: Arun Parameswaran <arunp@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig       |   6 ++
 drivers/net/phy/Makefile      |   1 +
 drivers/net/phy/bcm-phy-lib.c | 208 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  37 ++++++++
 drivers/net/phy/bcm63xx.c     |  38 +-------
 drivers/net/phy/bcm7xxx.c     | 127 ++++++--------------------
 drivers/net/phy/broadcom.c    | 149 +++++++++---------------------
 include/linux/brcmphy.h       |  22 +----
 8 files changed, 332 insertions(+), 256 deletions(-)
 create mode 100644 drivers/net/phy/bcm-phy-lib.c
 create mode 100644 drivers/net/phy/bcm-phy-lib.h

(limited to 'include/linux')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index b57f6c280cad..606fdc924768 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -69,8 +69,12 @@ config SMSC_PHY
 	---help---
 	  Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
 
+config BCM_NET_PHYLIB
+	tristate
+
 config BROADCOM_PHY
 	tristate "Drivers for Broadcom PHYs"
+	select BCM_NET_PHYLIB
 	---help---
 	  Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
 	  BCM5481 and BCM5482 PHYs.
@@ -78,11 +82,13 @@ config BROADCOM_PHY
 config BCM63XX_PHY
 	tristate "Drivers for Broadcom 63xx SOCs internal PHY"
 	depends on BCM63XX
+	select BCM_NET_PHYLIB
 	---help---
 	  Currently supports the 6348 and 6358 PHYs.
 
 config BCM7XXX_PHY
 	tristate "Drivers for Broadcom 7xxx SOCs internal PHYs"
+	select BCM_NET_PHYLIB
 	---help---
 	  Currently supports the BCM7366, BCM7439, BCM7445, and
 	  40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index f4e6eb9b2363..6932475a1a8a 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
 obj-$(CONFIG_SMSC_PHY)		+= smsc.o
 obj-$(CONFIG_TERANETICS_PHY)	+= teranetics.o
 obj-$(CONFIG_VITESSE_PHY)	+= vitesse.o
+obj-$(CONFIG_BCM_NET_PHYLIB)	+= bcm-phy-lib.o
 obj-$(CONFIG_BROADCOM_PHY)	+= broadcom.o
 obj-$(CONFIG_BCM63XX_PHY)	+= bcm63xx.o
 obj-$(CONFIG_BCM7XXX_PHY)	+= bcm7xxx.o
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
new file mode 100644
index 000000000000..dd79ea6ba023
--- /dev/null
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "bcm-phy-lib.h"
+#include <linux/brcmphy.h>
+#include <linux/export.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+
+#define MII_BCM_CHANNEL_WIDTH     0x2000
+#define BCM_CL45VEN_EEE_ADV       0x3c
+
+int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val)
+{
+	int rc;
+
+	rc = phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
+	if (rc < 0)
+		return rc;
+
+	return phy_write(phydev, MII_BCM54XX_EXP_DATA, val);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_write_exp);
+
+int bcm_phy_read_exp(struct phy_device *phydev, u16 reg)
+{
+	int val;
+
+	val = phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
+	if (val < 0)
+		return val;
+
+	val = phy_read(phydev, MII_BCM54XX_EXP_DATA);
+
+	/* Restore default value.  It's O.K. if this write fails. */
+	phy_write(phydev, MII_BCM54XX_EXP_SEL, 0);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_read_exp);
+
+int bcm_phy_write_misc(struct phy_device *phydev,
+		       u16 reg, u16 chl, u16 val)
+{
+	int rc;
+	int tmp;
+
+	rc = phy_write(phydev, MII_BCM54XX_AUX_CTL,
+		       MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+	if (rc < 0)
+		return rc;
+
+	tmp = phy_read(phydev, MII_BCM54XX_AUX_CTL);
+	tmp |= MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA;
+	rc = phy_write(phydev, MII_BCM54XX_AUX_CTL, tmp);
+	if (rc < 0)
+		return rc;
+
+	tmp = (chl * MII_BCM_CHANNEL_WIDTH) | reg;
+	rc = bcm_phy_write_exp(phydev, tmp, val);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_write_misc);
+
+int bcm_phy_read_misc(struct phy_device *phydev,
+		      u16 reg, u16 chl)
+{
+	int rc;
+	int tmp;
+
+	rc = phy_write(phydev, MII_BCM54XX_AUX_CTL,
+		       MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+	if (rc < 0)
+		return rc;
+
+	tmp = phy_read(phydev, MII_BCM54XX_AUX_CTL);
+	tmp |= MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA;
+	rc = phy_write(phydev, MII_BCM54XX_AUX_CTL, tmp);
+	if (rc < 0)
+		return rc;
+
+	tmp = (chl * MII_BCM_CHANNEL_WIDTH) | reg;
+	rc = bcm_phy_read_exp(phydev, tmp);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_read_misc);
+
+int bcm_phy_ack_intr(struct phy_device *phydev)
+{
+	int reg;
+
+	/* Clear pending interrupts.  */
+	reg = phy_read(phydev, MII_BCM54XX_ISR);
+	if (reg < 0)
+		return reg;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_ack_intr);
+
+int bcm_phy_config_intr(struct phy_device *phydev)
+{
+	int reg;
+
+	reg = phy_read(phydev, MII_BCM54XX_ECR);
+	if (reg < 0)
+		return reg;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+		reg &= ~MII_BCM54XX_ECR_IM;
+	else
+		reg |= MII_BCM54XX_ECR_IM;
+
+	return phy_write(phydev, MII_BCM54XX_ECR, reg);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_config_intr);
+
+int bcm_phy_read_shadow(struct phy_device *phydev, u16 shadow)
+{
+	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
+	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
+}
+EXPORT_SYMBOL_GPL(bcm_phy_read_shadow);
+
+int bcm_phy_write_shadow(struct phy_device *phydev, u16 shadow,
+			 u16 val)
+{
+	return phy_write(phydev, MII_BCM54XX_SHD,
+			 MII_BCM54XX_SHD_WRITE |
+			 MII_BCM54XX_SHD_VAL(shadow) |
+			 MII_BCM54XX_SHD_DATA(val));
+}
+EXPORT_SYMBOL_GPL(bcm_phy_write_shadow);
+
+int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down)
+{
+	int val;
+
+	if (dll_pwr_down) {
+		val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR3);
+		if (val < 0)
+			return val;
+
+		val |= BCM54XX_SHD_SCR3_DLLAPD_DIS;
+		bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR3, val);
+	}
+
+	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_APD);
+	if (val < 0)
+		return val;
+
+	/* Clear APD bits */
+	val &= BCM_APD_CLR_MASK;
+
+	if (phydev->autoneg == AUTONEG_ENABLE)
+		val |= BCM54XX_SHD_APD_EN;
+	else
+		val |= BCM_NO_ANEG_APD_EN;
+
+	/* Enable energy detect single link pulse for easy wakeup */
+	val |= BCM_APD_SINGLELP_EN;
+
+	/* Enable Auto Power-Down (APD) for the PHY */
+	return bcm_phy_write_shadow(phydev, BCM54XX_SHD_APD, val);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_enable_apd);
+
+int bcm_phy_enable_eee(struct phy_device *phydev)
+{
+	int val;
+
+	/* Enable EEE at PHY level */
+	val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
+				    MDIO_MMD_AN, phydev->addr);
+	if (val < 0)
+		return val;
+
+	val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X;
+
+	phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
+			       MDIO_MMD_AN,  phydev->addr, (u32)val);
+
+	/* Advertise EEE */
+	val = phy_read_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV,
+				    MDIO_MMD_AN, phydev->addr);
+	if (val < 0)
+		return val;
+
+	val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+
+	phy_write_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV,
+			       MDIO_MMD_AN,  phydev->addr, (u32)val);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_enable_eee);
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
new file mode 100644
index 000000000000..b2091c88b44d
--- /dev/null
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_BCM_PHY_LIB_H
+#define _LINUX_BCM_PHY_LIB_H
+
+#include <linux/phy.h>
+
+int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
+int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
+
+int bcm_phy_write_misc(struct phy_device *phydev,
+		       u16 reg, u16 chl, u16 value);
+int bcm_phy_read_misc(struct phy_device *phydev,
+		      u16 reg, u16 chl);
+
+int bcm_phy_write_shadow(struct phy_device *phydev, u16 shadow,
+			 u16 val);
+int bcm_phy_read_shadow(struct phy_device *phydev, u16 shadow);
+
+int bcm_phy_ack_intr(struct phy_device *phydev);
+int bcm_phy_config_intr(struct phy_device *phydev);
+
+int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down);
+
+int bcm_phy_enable_eee(struct phy_device *phydev);
+#endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c
index 830ec31f952f..86b28052bf06 100644
--- a/drivers/net/phy/bcm63xx.c
+++ b/drivers/net/phy/bcm63xx.c
@@ -6,6 +6,7 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  */
+#include "bcm-phy-lib.h"
 #include <linux/module.h>
 #include <linux/phy.h>
 
@@ -42,35 +43,6 @@ static int bcm63xx_config_init(struct phy_device *phydev)
 	return phy_write(phydev, MII_BCM63XX_IR, reg);
 }
 
-static int bcm63xx_ack_interrupt(struct phy_device *phydev)
-{
-	int reg;
-
-	/* Clear pending interrupts.  */
-	reg = phy_read(phydev, MII_BCM63XX_IR);
-	if (reg < 0)
-		return reg;
-
-	return 0;
-}
-
-static int bcm63xx_config_intr(struct phy_device *phydev)
-{
-	int reg, err;
-
-	reg = phy_read(phydev, MII_BCM63XX_IR);
-	if (reg < 0)
-		return reg;
-
-	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
-		reg &= ~MII_BCM63XX_IR_GMASK;
-	else
-		reg |= MII_BCM63XX_IR_GMASK;
-
-	err = phy_write(phydev, MII_BCM63XX_IR, reg);
-	return err;
-}
-
 static struct phy_driver bcm63xx_driver[] = {
 {
 	.phy_id		= 0x00406000,
@@ -82,8 +54,8 @@ static struct phy_driver bcm63xx_driver[] = {
 	.config_init	= bcm63xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm63xx_ack_interrupt,
-	.config_intr	= bcm63xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	/* same phy as above, with just a different OUI */
@@ -95,8 +67,8 @@ static struct phy_driver bcm63xx_driver[] = {
 	.config_init	= bcm63xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm63xx_ack_interrupt,
-	.config_intr	= bcm63xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 } };
 
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 6b701b3ded74..efa31a655eb1 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -12,12 +12,12 @@
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/delay.h>
+#include "bcm-phy-lib.h"
 #include <linux/bitops.h>
 #include <linux/brcmphy.h>
 #include <linux/mdio.h>
 
 /* Broadcom BCM7xxx internal PHY registers */
-#define MII_BCM7XXX_CHANNEL_WIDTH	0x2000
 
 /* 40nm only register definitions */
 #define MII_BCM7XXX_100TX_AUX_CTL	0x10
@@ -48,37 +48,13 @@
 
 #define CORE_EXPB0			0xb0
 
-static void phy_write_exp(struct phy_device *phydev,
-					u16 reg, u16 value)
-{
-	phy_write(phydev, MII_BCM54XX_EXP_SEL, MII_BCM54XX_EXP_SEL_ER | reg);
-	phy_write(phydev, MII_BCM54XX_EXP_DATA, value);
-}
-
-static void phy_write_misc(struct phy_device *phydev,
-					u16 reg, u16 chl, u16 value)
-{
-	int tmp;
-
-	phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
-
-	tmp = phy_read(phydev, MII_BCM54XX_AUX_CTL);
-	tmp |= MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA;
-	phy_write(phydev, MII_BCM54XX_AUX_CTL, tmp);
-
-	tmp = (chl * MII_BCM7XXX_CHANNEL_WIDTH) | reg;
-	phy_write(phydev, MII_BCM54XX_EXP_SEL, tmp);
-
-	phy_write(phydev, MII_BCM54XX_EXP_DATA, value);
-}
-
 static void r_rc_cal_reset(struct phy_device *phydev)
 {
 	/* Reset R_CAL/RC_CAL Engine */
-	phy_write_exp(phydev, 0x00b0, 0x0010);
+	bcm_phy_write_exp(phydev, 0x00b0, 0x0010);
 
 	/* Disable Reset R_AL/RC_CAL Engine */
-	phy_write_exp(phydev, 0x00b0, 0x0000);
+	bcm_phy_write_exp(phydev, 0x00b0, 0x0000);
 }
 
 static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)
@@ -86,18 +62,18 @@ static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)
 	/* Increase VCO range to prevent unlocking problem of PLL at low
 	 * temp
 	 */
-	phy_write_misc(phydev, PLL_PLLCTRL_1, 0x0048);
+	bcm_phy_write_misc(phydev, PLL_PLLCTRL_1, 0x0048);
 
 	/* Change Ki to 011 */
-	phy_write_misc(phydev, PLL_PLLCTRL_2, 0x021b);
+	bcm_phy_write_misc(phydev, PLL_PLLCTRL_2, 0x021b);
 
 	/* Disable loading of TVCO buffer to bandgap, set bandgap trim
 	 * to 111
 	 */
-	phy_write_misc(phydev, PLL_PLLCTRL_4, 0x0e20);
+	bcm_phy_write_misc(phydev, PLL_PLLCTRL_4, 0x0e20);
 
 	/* Adjust bias current trim by -3 */
-	phy_write_misc(phydev, DSP_TAP10, 0x690b);
+	bcm_phy_write_misc(phydev, DSP_TAP10, 0x690b);
 
 	/* Switch to CORE_BASE1E */
 	phy_write(phydev, MII_BCM7XXX_CORE_BASE1E, 0xd);
@@ -105,19 +81,19 @@ static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)
 	r_rc_cal_reset(phydev);
 
 	/* write AFE_RXCONFIG_0 */
-	phy_write_misc(phydev, AFE_RXCONFIG_0, 0xeb19);
+	bcm_phy_write_misc(phydev, AFE_RXCONFIG_0, 0xeb19);
 
 	/* write AFE_RXCONFIG_1 */
-	phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9a3f);
+	bcm_phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9a3f);
 
 	/* write AFE_RX_LP_COUNTER */
-	phy_write_misc(phydev, AFE_RX_LP_COUNTER, 0x7fc0);
+	bcm_phy_write_misc(phydev, AFE_RX_LP_COUNTER, 0x7fc0);
 
 	/* write AFE_HPF_TRIM_OTHERS */
-	phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x000b);
+	bcm_phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x000b);
 
 	/* write AFTE_TX_CONFIG */
-	phy_write_misc(phydev, AFE_TX_CONFIG, 0x0800);
+	bcm_phy_write_misc(phydev, AFE_TX_CONFIG, 0x0800);
 
 	return 0;
 }
@@ -125,36 +101,36 @@ static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)
 static int bcm7xxx_28nm_d0_afe_config_init(struct phy_device *phydev)
 {
 	/* AFE_RXCONFIG_0 */
-	phy_write_misc(phydev, AFE_RXCONFIG_0, 0xeb15);
+	bcm_phy_write_misc(phydev, AFE_RXCONFIG_0, 0xeb15);
 
 	/* AFE_RXCONFIG_1 */
-	phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9b2f);
+	bcm_phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9b2f);
 
 	/* AFE_RXCONFIG_2, set rCal offset for HT=0 code and LT=-2 code */
-	phy_write_misc(phydev, AFE_RXCONFIG_2, 0x2003);
+	bcm_phy_write_misc(phydev, AFE_RXCONFIG_2, 0x2003);
 
 	/* AFE_RX_LP_COUNTER, set RX bandwidth to maximum */
-	phy_write_misc(phydev, AFE_RX_LP_COUNTER, 0x7fc0);
+	bcm_phy_write_misc(phydev, AFE_RX_LP_COUNTER, 0x7fc0);
 
 	/* AFE_TX_CONFIG, set 100BT Cfeed=011 to improve rise/fall time */
-	phy_write_misc(phydev, AFE_TX_CONFIG, 0x431);
+	bcm_phy_write_misc(phydev, AFE_TX_CONFIG, 0x431);
 
 	/* AFE_VDCA_ICTRL_0, set Iq=1101 instead of 0111 for AB symmetry */
-	phy_write_misc(phydev, AFE_VDCA_ICTRL_0, 0xa7da);
+	bcm_phy_write_misc(phydev, AFE_VDCA_ICTRL_0, 0xa7da);
 
 	/* AFE_VDAC_OTHERS_0, set 1000BT Cidac=010 for all ports */
-	phy_write_misc(phydev, AFE_VDAC_OTHERS_0, 0xa020);
+	bcm_phy_write_misc(phydev, AFE_VDAC_OTHERS_0, 0xa020);
 
 	/* AFE_HPF_TRIM_OTHERS, set 100Tx/10BT to -4.5% swing and set rCal
 	 * offset for HT=0 code
 	 */
-	phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x00e3);
+	bcm_phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x00e3);
 
 	/* CORE_BASE1E, force trim to overwrite and set I_ext trim to 0000 */
 	phy_write(phydev, MII_BCM7XXX_CORE_BASE1E, 0x0010);
 
 	/* DSP_TAP10, adjust bias current trim (+0% swing, +0 tick) */
-	phy_write_misc(phydev, DSP_TAP10, 0x011b);
+	bcm_phy_write_misc(phydev, DSP_TAP10, 0x011b);
 
 	/* Reset R_CAL/RC_CAL engine */
 	r_rc_cal_reset(phydev);
@@ -165,24 +141,24 @@ static int bcm7xxx_28nm_d0_afe_config_init(struct phy_device *phydev)
 static int bcm7xxx_28nm_e0_plus_afe_config_init(struct phy_device *phydev)
 {
 	/* AFE_RXCONFIG_1, provide more margin for INL/DNL measurement */
-	phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9b2f);
+	bcm_phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9b2f);
 
 	/* AFE_TX_CONFIG, set 100BT Cfeed=011 to improve rise/fall time */
-	phy_write_misc(phydev, AFE_TX_CONFIG, 0x431);
+	bcm_phy_write_misc(phydev, AFE_TX_CONFIG, 0x431);
 
 	/* AFE_VDCA_ICTRL_0, set Iq=1101 instead of 0111 for AB symmetry */
-	phy_write_misc(phydev, AFE_VDCA_ICTRL_0, 0xa7da);
+	bcm_phy_write_misc(phydev, AFE_VDCA_ICTRL_0, 0xa7da);
 
 	/* AFE_HPF_TRIM_OTHERS, set 100Tx/10BT to -4.5% swing and set rCal
 	 * offset for HT=0 code
 	 */
-	phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x00e3);
+	bcm_phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x00e3);
 
 	/* CORE_BASE1E, force trim to overwrite and set I_ext trim to 0000 */
 	phy_write(phydev, MII_BCM7XXX_CORE_BASE1E, 0x0010);
 
 	/* DSP_TAP10, adjust bias current trim (+0% swing, +0 tick) */
-	phy_write_misc(phydev, DSP_TAP10, 0x011b);
+	bcm_phy_write_misc(phydev, DSP_TAP10, 0x011b);
 
 	/* Reset R_CAL/RC_CAL engine */
 	r_rc_cal_reset(phydev);
@@ -190,53 +166,6 @@ static int bcm7xxx_28nm_e0_plus_afe_config_init(struct phy_device *phydev)
 	return 0;
 }
 
-static int bcm7xxx_apd_enable(struct phy_device *phydev)
-{
-	int val;
-
-	/* Enable powering down of the DLL during auto-power down */
-	val = bcm54xx_shadow_read(phydev, BCM54XX_SHD_SCR3);
-	if (val < 0)
-		return val;
-
-	val |= BCM54XX_SHD_SCR3_DLLAPD_DIS;
-	bcm54xx_shadow_write(phydev, BCM54XX_SHD_SCR3, val);
-
-	/* Enable auto-power down */
-	val = bcm54xx_shadow_read(phydev, BCM54XX_SHD_APD);
-	if (val < 0)
-		return val;
-
-	val |= BCM54XX_SHD_APD_EN;
-	return bcm54xx_shadow_write(phydev, BCM54XX_SHD_APD, val);
-}
-
-static int bcm7xxx_eee_enable(struct phy_device *phydev)
-{
-	int val;
-
-	val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
-				    MDIO_MMD_AN, phydev->addr);
-	if (val < 0)
-		return val;
-
-	/* Enable general EEE feature at the PHY level */
-	val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X;
-
-	phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
-			       MDIO_MMD_AN, phydev->addr, val);
-
-	/* Advertise supported modes */
-	val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
-				    MDIO_MMD_AN, phydev->addr);
-
-	val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
-	phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
-			       MDIO_MMD_AN, phydev->addr, val);
-
-	return 0;
-}
-
 static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 {
 	u8 rev = PHY_BRCM_7XXX_REV(phydev->dev_flags);
@@ -273,11 +202,11 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
-	ret = bcm7xxx_eee_enable(phydev);
+	ret = bcm_phy_enable_eee(phydev);
 	if (ret)
 		return ret;
 
-	return bcm7xxx_apd_enable(phydev);
+	return bcm_phy_enable_apd(phydev, true);
 }
 
 static int bcm7xxx_28nm_resume(struct phy_device *phydev)
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 9c71295f2fef..07a6119121c3 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -14,6 +14,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include "bcm-phy-lib.h"
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/brcmphy.h>
@@ -29,39 +30,6 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
-/* Indirect register access functions for the Expansion Registers */
-static int bcm54xx_exp_read(struct phy_device *phydev, u16 regnum)
-{
-	int val;
-
-	val = phy_write(phydev, MII_BCM54XX_EXP_SEL, regnum);
-	if (val < 0)
-		return val;
-
-	val = phy_read(phydev, MII_BCM54XX_EXP_DATA);
-
-	/* Restore default value.  It's O.K. if this write fails. */
-	phy_write(phydev, MII_BCM54XX_EXP_SEL, 0);
-
-	return val;
-}
-
-static int bcm54xx_exp_write(struct phy_device *phydev, u16 regnum, u16 val)
-{
-	int ret;
-
-	ret = phy_write(phydev, MII_BCM54XX_EXP_SEL, regnum);
-	if (ret < 0)
-		return ret;
-
-	ret = phy_write(phydev, MII_BCM54XX_EXP_DATA, val);
-
-	/* Restore default value.  It's O.K. if this write fails. */
-	phy_write(phydev, MII_BCM54XX_EXP_SEL, 0);
-
-	return ret;
-}
-
 static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val)
 {
 	return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val);
@@ -72,28 +40,28 @@ static int bcm50610_a0_workaround(struct phy_device *phydev)
 {
 	int err;
 
-	err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_AADJ1CH0,
+	err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_AADJ1CH0,
 				MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN |
 				MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF);
 	if (err < 0)
 		return err;
 
-	err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_AADJ1CH3,
-					MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ);
+	err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_AADJ1CH3,
+				MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ);
 	if (err < 0)
 		return err;
 
-	err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_EXP75,
+	err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_EXP75,
 				MII_BCM54XX_EXP_EXP75_VDACCTRL);
 	if (err < 0)
 		return err;
 
-	err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_EXP96,
+	err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_EXP96,
 				MII_BCM54XX_EXP_EXP96_MYST);
 	if (err < 0)
 		return err;
 
-	err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_EXP97,
+	err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_EXP97,
 				MII_BCM54XX_EXP_EXP97_MYST);
 
 	return err;
@@ -114,7 +82,7 @@ static int bcm54xx_phydsp_config(struct phy_device *phydev)
 	if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610 ||
 	    BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610M) {
 		/* Clear bit 9 to fix a phy interop issue. */
-		err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_EXP08,
+		err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_EXP08,
 					MII_BCM54XX_EXP_EXP08_RJCT_2MHZ);
 		if (err < 0)
 			goto error;
@@ -129,12 +97,12 @@ static int bcm54xx_phydsp_config(struct phy_device *phydev)
 	if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM57780) {
 		int val;
 
-		val = bcm54xx_exp_read(phydev, MII_BCM54XX_EXP_EXP75);
+		val = bcm_phy_read_exp(phydev, MII_BCM54XX_EXP_EXP75);
 		if (val < 0)
 			goto error;
 
 		val |= MII_BCM54XX_EXP_EXP75_CM_OSC;
-		err = bcm54xx_exp_write(phydev, MII_BCM54XX_EXP_EXP75, val);
+		err = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_EXP75, val);
 	}
 
 error:
@@ -159,7 +127,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610M)
 		return;
 
-	val = bcm54xx_shadow_read(phydev, BCM54XX_SHD_SCR3);
+	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR3);
 	if (val < 0)
 		return;
 
@@ -190,9 +158,9 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 		val |= BCM54XX_SHD_SCR3_TRDDAPD;
 
 	if (orig != val)
-		bcm54xx_shadow_write(phydev, BCM54XX_SHD_SCR3, val);
+		bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR3, val);
 
-	val = bcm54xx_shadow_read(phydev, BCM54XX_SHD_APD);
+	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_APD);
 	if (val < 0)
 		return;
 
@@ -204,7 +172,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 		val &= ~BCM54XX_SHD_APD_EN;
 
 	if (orig != val)
-		bcm54xx_shadow_write(phydev, BCM54XX_SHD_APD, val);
+		bcm_phy_write_shadow(phydev, BCM54XX_SHD_APD, val);
 }
 
 static int bcm54xx_config_init(struct phy_device *phydev)
@@ -232,7 +200,7 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 	if ((BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610 ||
 	     BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610M) &&
 	    (phydev->dev_flags & PHY_BRCM_CLEAR_RGMII_MODE))
-		bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0);
+		bcm_phy_write_shadow(phydev, BCM54XX_SHD_RGMII_MODE, 0);
 
 	if ((phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) ||
 	    (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) ||
@@ -254,8 +222,8 @@ static int bcm5482_config_init(struct phy_device *phydev)
 		/*
 		 * Enable secondary SerDes and its use as an LED source
 		 */
-		reg = bcm54xx_shadow_read(phydev, BCM5482_SHD_SSD);
-		bcm54xx_shadow_write(phydev, BCM5482_SHD_SSD,
+		reg = bcm_phy_read_shadow(phydev, BCM5482_SHD_SSD);
+		bcm_phy_write_shadow(phydev, BCM5482_SHD_SSD,
 				     reg |
 				     BCM5482_SHD_SSD_LEDM |
 				     BCM5482_SHD_SSD_EN);
@@ -264,10 +232,10 @@ static int bcm5482_config_init(struct phy_device *phydev)
 		 * Enable SGMII slave mode and auto-detection
 		 */
 		reg = BCM5482_SSD_SGMII_SLAVE | MII_BCM54XX_EXP_SEL_SSD;
-		err = bcm54xx_exp_read(phydev, reg);
+		err = bcm_phy_read_exp(phydev, reg);
 		if (err < 0)
 			return err;
-		err = bcm54xx_exp_write(phydev, reg, err |
+		err = bcm_phy_write_exp(phydev, reg, err |
 					BCM5482_SSD_SGMII_SLAVE_EN |
 					BCM5482_SSD_SGMII_SLAVE_AD);
 		if (err < 0)
@@ -277,10 +245,10 @@ static int bcm5482_config_init(struct phy_device *phydev)
 		 * Disable secondary SerDes powerdown
 		 */
 		reg = BCM5482_SSD_1000BX_CTL | MII_BCM54XX_EXP_SEL_SSD;
-		err = bcm54xx_exp_read(phydev, reg);
+		err = bcm_phy_read_exp(phydev, reg);
 		if (err < 0)
 			return err;
-		err = bcm54xx_exp_write(phydev, reg,
+		err = bcm_phy_write_exp(phydev, reg,
 					err & ~BCM5482_SSD_1000BX_CTL_PWRDOWN);
 		if (err < 0)
 			return err;
@@ -288,15 +256,15 @@ static int bcm5482_config_init(struct phy_device *phydev)
 		/*
 		 * Select 1000BASE-X register set (primary SerDes)
 		 */
-		reg = bcm54xx_shadow_read(phydev, BCM5482_SHD_MODE);
-		bcm54xx_shadow_write(phydev, BCM5482_SHD_MODE,
+		reg = bcm_phy_read_shadow(phydev, BCM5482_SHD_MODE);
+		bcm_phy_write_shadow(phydev, BCM5482_SHD_MODE,
 				     reg | BCM5482_SHD_MODE_1000BX);
 
 		/*
 		 * LED1=ACTIVITYLED, LED3=LINKSPD[2]
 		 * (Use LED1 as secondary SerDes ACTIVITY LED)
 		 */
-		bcm54xx_shadow_write(phydev, BCM5482_SHD_LEDS1,
+		bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1,
 			BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_ACTIVITYLED) |
 			BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_LINKSPD2));
 
@@ -334,35 +302,6 @@ static int bcm5482_read_status(struct phy_device *phydev)
 	return err;
 }
 
-static int bcm54xx_ack_interrupt(struct phy_device *phydev)
-{
-	int reg;
-
-	/* Clear pending interrupts.  */
-	reg = phy_read(phydev, MII_BCM54XX_ISR);
-	if (reg < 0)
-		return reg;
-
-	return 0;
-}
-
-static int bcm54xx_config_intr(struct phy_device *phydev)
-{
-	int reg, err;
-
-	reg = phy_read(phydev, MII_BCM54XX_ECR);
-	if (reg < 0)
-		return reg;
-
-	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
-		reg &= ~MII_BCM54XX_ECR_IM;
-	else
-		reg |= MII_BCM54XX_ECR_IM;
-
-	err = phy_write(phydev, MII_BCM54XX_ECR, reg);
-	return err;
-}
-
 static int bcm5481_config_aneg(struct phy_device *phydev)
 {
 	int ret;
@@ -519,8 +458,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM5421,
@@ -532,8 +471,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM5461,
@@ -545,8 +484,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM54616S,
@@ -558,8 +497,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM5464,
@@ -571,8 +510,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM5481,
@@ -584,8 +523,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= bcm5481_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM5482,
@@ -597,8 +536,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm5482_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= bcm5482_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM50610,
@@ -610,8 +549,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM50610M,
@@ -623,8 +562,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCM57780,
@@ -636,8 +575,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.ack_interrupt	= bcm54xx_ack_interrupt,
-	.config_intr	= bcm54xx_config_intr,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_ID_BCMAC131,
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 697ca7795bd9..6a53ab91407c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -138,7 +138,10 @@
 
 /* 01010: Auto Power-Down */
 #define BCM54XX_SHD_APD			0x0a
+#define  BCM_APD_CLR_MASK		0xFE9F /* clear bits 5, 6 & 8 */
 #define  BCM54XX_SHD_APD_EN		0x0020
+#define  BCM_NO_ANEG_APD_EN		0x0060 /* bits 5 & 6 */
+#define  BCM_APD_SINGLELP_EN	0x0100 /* Bit 8 */
 
 #define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
 					/* LED3 / ~LINKSPD[2] selector */
@@ -209,25 +212,6 @@
 #define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
 #define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
 
-/*
- * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
- * 0x1c shadow registers.
- */
-static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
-{
-	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
-	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
-}
-
-static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
-				       u16 val)
-{
-	return phy_write(phydev, MII_BCM54XX_SHD,
-			 MII_BCM54XX_SHD_WRITE |
-			 MII_BCM54XX_SHD_VAL(shadow) |
-			 MII_BCM54XX_SHD_DATA(val));
-}
-
 #define BRCM_CL45VEN_EEE_CONTROL	0x803d
 #define LPI_FEATURE_EN			0x8000
 #define LPI_FEATURE_EN_DIG1000X		0x4000
-- 
cgit v1.2.3


From 8e185d6997bb67068f0ca8f062a50caa2608cf1b Mon Sep 17 00:00:00 2001
From: Arun Parameswaran <arunp@broadcom.com>
Date: Tue, 6 Oct 2015 12:25:49 -0700
Subject: net: phy: Broadcom Cygnus internal Etherent PHY driver

Add support for the Broadcom Cygnus SoCs internal PHY's.
The PHYs are 1000M/100M/10M capable with support for 'EEE'
and 'APD' (Auto Power Down).

This driver supports the following Broadcom Cygnus SoCs:
 - BCM583XX (BCM58300, BCM58302, BCM58303, BCM58305)
 - BCM113XX (BCM11300, BCM11320, BCM11350, BCM11360)

The PHY's on these SoC's require some workarounds for
stable operation, both during configuration time and
during suspend/resume. This driver handles the
application of the workarounds.

Signed-off-by: Arun Parameswaran <arunp@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig      |  13 ++++
 drivers/net/phy/Makefile     |   1 +
 drivers/net/phy/bcm-cygnus.c | 158 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/brcmphy.h      |   7 ++
 4 files changed, 179 insertions(+)
 create mode 100644 drivers/net/phy/bcm-cygnus.c

(limited to 'include/linux')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 606fdc924768..9d097ae54fb2 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -79,6 +79,19 @@ config BROADCOM_PHY
 	  Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
 	  BCM5481 and BCM5482 PHYs.
 
+config BCM_CYGNUS_PHY
+	tristate "Drivers for Broadcom Cygnus SoC internal PHY"
+	depends on ARCH_BCM_CYGNUS || COMPILE_TEST
+	depends on MDIO_BCM_IPROC
+	select BCM_NET_PHYLIB
+	---help---
+	  This PHY driver is for the 1G internal PHYs of the Broadcom
+	  Cygnus Family SoC.
+
+	  Currently supports internal PHY's used in the BCM11300,
+	  BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
+	  BCM58303 & BCM58305 Broadcom Cygnus SoCs.
+
 config BCM63XX_PHY
 	tristate "Drivers for Broadcom 63xx SOCs internal PHY"
 	depends on BCM63XX
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 6932475a1a8a..7655d47ad8d8 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_BROADCOM_PHY)	+= broadcom.o
 obj-$(CONFIG_BCM63XX_PHY)	+= bcm63xx.o
 obj-$(CONFIG_BCM7XXX_PHY)	+= bcm7xxx.o
 obj-$(CONFIG_BCM87XX_PHY)	+= bcm87xx.o
+obj-$(CONFIG_BCM_CYGNUS_PHY)	+= bcm-cygnus.o
 obj-$(CONFIG_ICPLUS_PHY)	+= icplus.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
 obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c
new file mode 100644
index 000000000000..49bbc6826883
--- /dev/null
+++ b/drivers/net/phy/bcm-cygnus.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/* Broadcom Cygnus SoC internal transceivers support. */
+#include "bcm-phy-lib.h"
+#include <linux/brcmphy.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+
+/* Broadcom Cygnus Phy specific registers */
+#define MII_BCM_CYGNUS_AFE_VDAC_ICTRL_0  0x91E5 /* VDAL Control register */
+
+static int bcm_cygnus_afe_config(struct phy_device *phydev)
+{
+	int rc;
+
+	/* ensure smdspclk is enabled */
+	rc = phy_write(phydev, MII_BCM54XX_AUX_CTL, 0x0c30);
+	if (rc < 0)
+		return rc;
+
+	/* AFE_VDAC_ICTRL_0 bit 7:4 Iq=1100 for 1g 10bt, normal modes */
+	rc = bcm_phy_write_misc(phydev, 0x39, 0x01, 0xA7C8);
+	if (rc < 0)
+		return rc;
+
+	/* AFE_HPF_TRIM_OTHERS bit11=1, short cascode enable for all modes*/
+	rc = bcm_phy_write_misc(phydev, 0x3A, 0x00, 0x0803);
+	if (rc < 0)
+		return rc;
+
+	/* AFE_TX_CONFIG_1 bit 7:4 Iq=1100 for test modes */
+	rc = bcm_phy_write_misc(phydev, 0x3A, 0x01, 0xA740);
+	if (rc < 0)
+		return rc;
+
+	/* AFE TEMPSEN_OTHERS rcal_HT, rcal_LT 10000 */
+	rc = bcm_phy_write_misc(phydev, 0x3A, 0x03, 0x8400);
+	if (rc < 0)
+		return rc;
+
+	/* AFE_FUTURE_RSV bit 2:0 rccal <2:0>=100 */
+	rc = bcm_phy_write_misc(phydev, 0x3B, 0x00, 0x0004);
+	if (rc < 0)
+		return rc;
+
+	/* Adjust bias current trim to overcome digital offSet */
+	rc = phy_write(phydev, MII_BRCM_CORE_BASE1E, 0x02);
+	if (rc < 0)
+		return rc;
+
+	/* make rcal=100, since rdb default is 000 */
+	rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10);
+	if (rc < 0)
+		return rc;
+
+	/* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */
+	rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10);
+	if (rc < 0)
+		return rc;
+
+	/* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */
+	rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00);
+
+	return 0;
+}
+
+static int bcm_cygnus_config_init(struct phy_device *phydev)
+{
+	int reg, rc;
+
+	reg = phy_read(phydev, MII_BCM54XX_ECR);
+	if (reg < 0)
+		return reg;
+
+	/* Mask interrupts globally. */
+	reg |= MII_BCM54XX_ECR_IM;
+	rc = phy_write(phydev, MII_BCM54XX_ECR, reg);
+	if (rc)
+		return rc;
+
+	/* Unmask events of interest */
+	reg = ~(MII_BCM54XX_INT_DUPLEX |
+		MII_BCM54XX_INT_SPEED |
+		MII_BCM54XX_INT_LINK);
+	rc = phy_write(phydev, MII_BCM54XX_IMR, reg);
+	if (rc)
+		return rc;
+
+	/* Apply AFE settings for the PHY */
+	rc = bcm_cygnus_afe_config(phydev);
+	if (rc)
+		return rc;
+
+	/* Advertise EEE */
+	rc = bcm_phy_enable_eee(phydev);
+	if (rc)
+		return rc;
+
+	/* Enable APD */
+	return bcm_phy_enable_apd(phydev, false);
+}
+
+static int bcm_cygnus_resume(struct phy_device *phydev)
+{
+	int rc;
+
+	genphy_resume(phydev);
+
+	/* Re-initialize the PHY to apply AFE work-arounds and
+	 * configurations when coming out of suspend.
+	 */
+	rc = bcm_cygnus_config_init(phydev);
+	if (rc)
+		return rc;
+
+	/* restart auto negotiation with the new settings */
+	return genphy_config_aneg(phydev);
+}
+
+static struct phy_driver bcm_cygnus_phy_driver[] = {
+{
+	.phy_id        = PHY_ID_BCM_CYGNUS,
+	.phy_id_mask   = 0xfffffff0,
+	.name          = "Broadcom Cygnus PHY",
+	.features      = PHY_GBIT_FEATURES |
+			SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.config_init   = bcm_cygnus_config_init,
+	.config_aneg   = genphy_config_aneg,
+	.read_status   = genphy_read_status,
+	.ack_interrupt = bcm_phy_ack_intr,
+	.config_intr   = bcm_phy_config_intr,
+	.suspend       = genphy_suspend,
+	.resume        = bcm_cygnus_resume,
+} };
+
+static struct mdio_device_id __maybe_unused bcm_cygnus_phy_tbl[] = {
+	{ PHY_ID_BCM_CYGNUS, 0xfffffff0, },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, bcm_cygnus_phy_tbl);
+
+module_phy_driver(bcm_cygnus_phy_driver);
+
+MODULE_DESCRIPTION("Broadcom Cygnus internal PHY driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Broadcom Corporation");
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6a53ab91407c..59f4a7304419 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -30,6 +30,8 @@
 #define PHY_ID_BCM7439_2		0xae025080
 #define PHY_ID_BCM7445			0x600d8510
 
+#define PHY_ID_BCM_CYGNUS		0xae025200
+
 #define PHY_BCM_OUI_MASK		0xfffffc00
 #define PHY_BCM_OUI_1			0x00206000
 #define PHY_BCM_OUI_2			0x0143bc00
@@ -216,4 +218,9 @@
 #define LPI_FEATURE_EN			0x8000
 #define LPI_FEATURE_EN_DIG1000X		0x4000
 
+/* Core register definitions*/
+#define MII_BRCM_CORE_BASE1E	0x1E
+#define MII_BRCM_CORE_EXPB0	0xB0
+#define MII_BRCM_CORE_EXPB1	0xB1
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 46234253b9363894a254844a6550b4cc5f3edfe8 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 8 Oct 2015 01:20:35 +0200
Subject: net: move net_get_random_once to lib

There's no good reason why users outside of networking should not
be using this facility, f.e. for initializing their seeds.

Therefore, make it accessible from there as get_random_once().

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h  | 21 ++++----------------
 include/linux/once.h | 24 +++++++++++++++++++++++
 lib/Makefile         |  3 ++-
 lib/once.c           | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/utils.c     | 49 -----------------------------------------------
 5 files changed, 84 insertions(+), 67 deletions(-)
 create mode 100644 include/linux/once.h
 create mode 100644 lib/once.c

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 049d4b03c4c4..70ac5e28e6b7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -24,7 +24,8 @@
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/kmemcheck.h>
 #include <linux/rcupdate.h>
-#include <linux/jump_label.h>
+#include <linux/once.h>
+
 #include <uapi/linux/net.h>
 
 struct poll_table_struct;
@@ -250,22 +251,8 @@ do {								\
 	} while (0)
 #endif
 
-bool __net_get_random_once(void *buf, int nbytes, bool *done,
-			   struct static_key *done_key);
-
-#define net_get_random_once(buf, nbytes)				\
-	({								\
-		bool ___ret = false;					\
-		static bool ___done = false;				\
-		static struct static_key ___once_key =			\
-			STATIC_KEY_INIT_TRUE;				\
-		if (static_key_true(&___once_key))			\
-			___ret = __net_get_random_once(buf,		\
-						       nbytes,		\
-						       &___done,	\
-						       &___once_key);	\
-		___ret;							\
-	})
+#define net_get_random_once(buf, nbytes)			\
+	get_random_once((buf), (nbytes))
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
diff --git a/include/linux/once.h b/include/linux/once.h
new file mode 100644
index 000000000000..2a83b538dd6a
--- /dev/null
+++ b/include/linux/once.h
@@ -0,0 +1,24 @@
+#ifndef _LINUX_ONCE_H
+#define _LINUX_ONCE_H
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+
+bool __get_random_once(void *buf, int nbytes, bool *done,
+		       struct static_key *once_key);
+
+#define get_random_once(buf, nbytes)					\
+	({								\
+		bool ___ret = false;					\
+		static bool ___done = false;				\
+		static struct static_key ___once_key =			\
+			STATIC_KEY_INIT_TRUE;				\
+		if (static_key_true(&___once_key))			\
+			___ret = __get_random_once((buf),		\
+						   (nbytes),		\
+						   &___done,		\
+						   &___once_key);	\
+		___ret;							\
+	})
+
+#endif /* _LINUX_ONCE_H */
diff --git a/lib/Makefile b/lib/Makefile
index 13a7c6ae3fec..8de3b012eac7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,8 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
+	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
+	 once.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
diff --git a/lib/once.c b/lib/once.c
new file mode 100644
index 000000000000..2d5a7de17aba
--- /dev/null
+++ b/lib/once.c
@@ -0,0 +1,54 @@
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/once.h>
+#include <linux/random.h>
+
+struct __random_once_work {
+	struct work_struct work;
+	struct static_key *key;
+};
+
+static void __random_once_deferred(struct work_struct *w)
+{
+	struct __random_once_work *work;
+
+	work = container_of(w, struct __random_once_work, work);
+	BUG_ON(!static_key_enabled(work->key));
+	static_key_slow_dec(work->key);
+	kfree(work);
+}
+
+static void __random_once_disable_jump(struct static_key *key)
+{
+	struct __random_once_work *w;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return;
+
+	INIT_WORK(&w->work, __random_once_deferred);
+	w->key = key;
+	schedule_work(&w->work);
+}
+
+bool __get_random_once(void *buf, int nbytes, bool *done,
+		       struct static_key *once_key)
+{
+	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&lock, flags);
+	if (*done) {
+		spin_unlock_irqrestore(&lock, flags);
+		return false;
+	}
+
+	get_random_bytes(buf, nbytes);
+	*done = true;
+	spin_unlock_irqrestore(&lock, flags);
+
+	__random_once_disable_jump(once_key);
+
+	return true;
+}
+EXPORT_SYMBOL(__get_random_once);
diff --git a/net/core/utils.c b/net/core/utils.c
index 3dffce953c39..3d17ca8b4744 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
 	}
 }
 EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
-
-struct __net_random_once_work {
-	struct work_struct work;
-	struct static_key *key;
-};
-
-static void __net_random_once_deferred(struct work_struct *w)
-{
-	struct __net_random_once_work *work =
-		container_of(w, struct __net_random_once_work, work);
-	BUG_ON(!static_key_enabled(work->key));
-	static_key_slow_dec(work->key);
-	kfree(work);
-}
-
-static void __net_random_once_disable_jump(struct static_key *key)
-{
-	struct __net_random_once_work *w;
-
-	w = kmalloc(sizeof(*w), GFP_ATOMIC);
-	if (!w)
-		return;
-
-	INIT_WORK(&w->work, __net_random_once_deferred);
-	w->key = key;
-	schedule_work(&w->work);
-}
-
-bool __net_get_random_once(void *buf, int nbytes, bool *done,
-			   struct static_key *once_key)
-{
-	static DEFINE_SPINLOCK(lock);
-	unsigned long flags;
-
-	spin_lock_irqsave(&lock, flags);
-	if (*done) {
-		spin_unlock_irqrestore(&lock, flags);
-		return false;
-	}
-
-	get_random_bytes(buf, nbytes);
-	*done = true;
-	spin_unlock_irqrestore(&lock, flags);
-
-	__net_random_once_disable_jump(once_key);
-
-	return true;
-}
-EXPORT_SYMBOL(__net_get_random_once);
-- 
cgit v1.2.3


From c90aeb948222a7b3d3391d232ec4f50fd8322ad3 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 8 Oct 2015 01:20:36 +0200
Subject: once: make helper generic for calling functions once

Make the get_random_once() helper generic enough, so that functions
in general would only be called once, where one user of this is then
net_get_random_once().

The only implementation specific call is to get_random_bytes(), all
the rest of this *_once() facility would be duplicated among different
subsystems otherwise. The new DO_ONCE() helper will be used by prandom()
later on, but might also be useful for other scenarios/subsystems as
well where a one-time initialization in often-called, possibly fast
path code could occur.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/once.h | 61 ++++++++++++++++++++++++++++++++++++++++------------
 lib/once.c           | 50 ++++++++++++++++++++++++------------------
 2 files changed, 76 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/once.h b/include/linux/once.h
index 2a83b538dd6a..285f12cb40e6 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -4,21 +4,54 @@
 #include <linux/types.h>
 #include <linux/jump_label.h>
 
-bool __get_random_once(void *buf, int nbytes, bool *done,
-		       struct static_key *once_key);
+bool __do_once_start(bool *done, unsigned long *flags);
+void __do_once_done(bool *done, struct static_key *once_key,
+		    unsigned long *flags);
 
-#define get_random_once(buf, nbytes)					\
-	({								\
-		bool ___ret = false;					\
-		static bool ___done = false;				\
-		static struct static_key ___once_key =			\
-			STATIC_KEY_INIT_TRUE;				\
-		if (static_key_true(&___once_key))			\
-			___ret = __get_random_once((buf),		\
-						   (nbytes),		\
-						   &___done,		\
-						   &___once_key);	\
-		___ret;							\
+/* Call a function exactly once. The idea of DO_ONCE() is to perform
+ * a function call such as initialization of random seeds, etc, only
+ * once, where DO_ONCE() can live in the fast-path. After @func has
+ * been called with the passed arguments, the static key will patch
+ * out the condition into a nop. DO_ONCE() guarantees type safety of
+ * arguments!
+ *
+ * Not that the following is not equivalent ...
+ *
+ *   DO_ONCE(func, arg);
+ *   DO_ONCE(func, arg);
+ *
+ * ... to this version:
+ *
+ *   void foo(void)
+ *   {
+ *     DO_ONCE(func, arg);
+ *   }
+ *
+ *   foo();
+ *   foo();
+ *
+ * In case the one-time invocation could be triggered from multiple
+ * places, then a common helper function must be defined, so that only
+ * a single static key will be placed there!
+ */
+#define DO_ONCE(func, ...)						     \
+	({								     \
+		bool ___ret = false;					     \
+		static bool ___done = false;				     \
+		static struct static_key ___once_key = STATIC_KEY_INIT_TRUE; \
+		if (static_key_true(&___once_key)) {			     \
+			unsigned long ___flags;				     \
+			___ret = __do_once_start(&___done, &___flags);	     \
+			if (unlikely(___ret)) {				     \
+				func(__VA_ARGS__);			     \
+				__do_once_done(&___done, &___once_key,	     \
+					       &___flags);		     \
+			}						     \
+		}							     \
+		___ret;							     \
 	})
 
+#define get_random_once(buf, nbytes)					     \
+	DO_ONCE(get_random_bytes, (buf), (nbytes))
+
 #endif /* _LINUX_ONCE_H */
diff --git a/lib/once.c b/lib/once.c
index 2d5a7de17aba..05c8604627eb 100644
--- a/lib/once.c
+++ b/lib/once.c
@@ -3,52 +3,60 @@
 #include <linux/once.h>
 #include <linux/random.h>
 
-struct __random_once_work {
+struct once_work {
 	struct work_struct work;
 	struct static_key *key;
 };
 
-static void __random_once_deferred(struct work_struct *w)
+static void once_deferred(struct work_struct *w)
 {
-	struct __random_once_work *work;
+	struct once_work *work;
 
-	work = container_of(w, struct __random_once_work, work);
+	work = container_of(w, struct once_work, work);
 	BUG_ON(!static_key_enabled(work->key));
 	static_key_slow_dec(work->key);
 	kfree(work);
 }
 
-static void __random_once_disable_jump(struct static_key *key)
+static void once_disable_jump(struct static_key *key)
 {
-	struct __random_once_work *w;
+	struct once_work *w;
 
 	w = kmalloc(sizeof(*w), GFP_ATOMIC);
 	if (!w)
 		return;
 
-	INIT_WORK(&w->work, __random_once_deferred);
+	INIT_WORK(&w->work, once_deferred);
 	w->key = key;
 	schedule_work(&w->work);
 }
 
-bool __get_random_once(void *buf, int nbytes, bool *done,
-		       struct static_key *once_key)
-{
-	static DEFINE_SPINLOCK(lock);
-	unsigned long flags;
+static DEFINE_SPINLOCK(once_lock);
 
-	spin_lock_irqsave(&lock, flags);
+bool __do_once_start(bool *done, unsigned long *flags)
+	__acquires(once_lock)
+{
+	spin_lock_irqsave(&once_lock, *flags);
 	if (*done) {
-		spin_unlock_irqrestore(&lock, flags);
+		spin_unlock_irqrestore(&once_lock, *flags);
+		/* Keep sparse happy by restoring an even lock count on
+		 * this lock. In case we return here, we don't call into
+		 * __do_once_done but return early in the DO_ONCE() macro.
+		 */
+		__acquire(once_lock);
 		return false;
 	}
 
-	get_random_bytes(buf, nbytes);
-	*done = true;
-	spin_unlock_irqrestore(&lock, flags);
-
-	__random_once_disable_jump(once_key);
-
 	return true;
 }
-EXPORT_SYMBOL(__get_random_once);
+EXPORT_SYMBOL(__do_once_start);
+
+void __do_once_done(bool *done, struct static_key *once_key,
+		    unsigned long *flags)
+	__releases(once_lock)
+{
+	*done = true;
+	spin_unlock_irqrestore(&once_lock, *flags);
+	once_disable_jump(once_key);
+}
+EXPORT_SYMBOL(__do_once_done);
-- 
cgit v1.2.3


From 897ece56e714a2cc64e6914cb89a362d7021b36e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 8 Oct 2015 01:20:38 +0200
Subject: random32: add prandom_init_once helper for own rngs

Add a prandom_init_once() facility that works on the rnd_state, so that
users that are keeping their own state independent from prandom_u32() can
initialize their taus113 per cpu states.

The motivation here is similar to net_get_random_once(): initialize the
state as late as possible in the hope that enough entropy has been
collected for the seeding. prandom_init_once() makes use of the recently
introduced prandom_seed_full_state() helper and is generic enough so that
it could also be used on fast-paths due to the DO_ONCE().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h | 6 ++++++
 lib/random32.c         | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index e651874df2c9..a75840c1aa71 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -7,6 +7,8 @@
 #define _LINUX_RANDOM_H
 
 #include <linux/list.h>
+#include <linux/once.h>
+
 #include <uapi/linux/random.h>
 
 struct random_ready_callback {
@@ -45,6 +47,10 @@ struct rnd_state {
 
 u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
+void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
+
+#define prandom_init_once(pcpu_state)			\
+	DO_ONCE(prandom_seed_full_state, (pcpu_state))
 
 /**
  * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro)
diff --git a/lib/random32.c b/lib/random32.c
index 36c09fb3fec9..12111910ccd0 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -238,7 +238,7 @@ static void __init __prandom_start_seed_timer(void)
 	add_timer(&seed_timer);
 }
 
-static void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
+void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
 {
 	int i;
 
-- 
cgit v1.2.3


From 3ad0040573b0c00f88488bc31958acd07a55ee2e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 8 Oct 2015 01:20:39 +0200
Subject: bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs

While recently arguing on a seccomp discussion that raw prandom_u32()
access shouldn't be exposed to unpriviledged user space, I forgot the
fact that SKF_AD_RANDOM extension actually already does it for some time
in cBPF via commit 4cd3675ebf74 ("filter: added BPF random opcode").

Since prandom_u32() is being used in a lot of critical networking code,
lets be more conservative and split their states. Furthermore, consolidate
eBPF and cBPF prandom handlers to use the new internal PRNG. For eBPF,
bpf_get_prandom_u32() was only accessible for priviledged users, but
should that change one day, we also don't want to leak raw sequences
through things like eBPF maps.

One thought was also to have own per bpf_prog states, but due to ABI
reasons this is not easily possible, i.e. the program code currently
cannot access bpf_prog itself, and copying the rnd_state to/from the
stack scratch space whenever a program uses the prng seems not really
worth the trouble and seems too hacky. If needed, taus113 could in such
cases be implemented within eBPF using a map entry to keep the state
space, or get_random_bytes() could become a second helper in cases where
performance would not be critical.

Both sides can trigger a one-time late init via prandom_init_once() on
the shared state. Performance-wise, there should even be a tiny gain
as bpf_user_rnd_u32() saves one function call. The PRNG needs to live
inside the BPF core since kernels could have a NET-less config as well.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Chema Gonzalez <chema@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h  |  4 ++++
 kernel/bpf/core.c    | 26 ++++++++++++++++++++++++++
 kernel/bpf/helpers.c |  7 +------
 kernel/bpf/syscall.c |  2 ++
 net/core/filter.c    |  9 ++-------
 5 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c915a6b54570..3697ad563899 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -200,4 +200,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
 
+/* Shared helpers among cBPF and eBPF. */
+void bpf_user_rnd_init_once(void);
+u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c8855c2a7a48..80864712d2c4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -731,6 +731,32 @@ void bpf_prog_free(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
 
+/* RNG for unpriviledged user space with separated state from prandom_u32(). */
+static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
+
+void bpf_user_rnd_init_once(void)
+{
+	prandom_init_once(&bpf_user_rnd_state);
+}
+
+u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* Should someone ever have the rather unwise idea to use some
+	 * of the registers passed into this function, then note that
+	 * this function is called from native eBPF and classic-to-eBPF
+	 * transformations. Register assignments from both sides are
+	 * different, f.e. classic always sets fn(ctx, A, X) here.
+	 */
+	struct rnd_state *state;
+	u32 res;
+
+	state = &get_cpu_var(bpf_user_rnd_state);
+	res = prandom_u32_state(state);
+	put_cpu_var(state);
+
+	return res;
+}
+
 /* Weak definitions of helper functions in case we don't have bpf syscall. */
 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1447ec09421e..4504ca66118d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -93,13 +93,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 };
 
-static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
-{
-	return prandom_u32();
-}
-
 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
-	.func		= bpf_get_prandom_u32,
+	.func		= bpf_user_rnd_u32,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5f35f420c12f..c868cafbc00c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -404,6 +404,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 
 			if (insn->imm == BPF_FUNC_get_route_realm)
 				prog->dst_needed = 1;
+			if (insn->imm == BPF_FUNC_get_prandom_u32)
+				bpf_user_rnd_init_once();
 			if (insn->imm == BPF_FUNC_tail_call) {
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
diff --git a/net/core/filter.c b/net/core/filter.c
index 8f4603c712cd..342e6c8fc415 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -149,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
-	return prandom_u32();
-}
-
 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 			      struct bpf_insn *insn_buf)
 {
@@ -313,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
 			break;
 		case SKF_AD_OFF + SKF_AD_RANDOM:
-			*insn = BPF_EMIT_CALL(__get_random_u32);
+			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+			bpf_user_rnd_init_once();
 			break;
 		}
 		break;
-- 
cgit v1.2.3


From edc1b01cd3b20a5fff049e98f82a2b0d24a34c89 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 5 Oct 2015 10:53:49 -0400
Subject: SUNRPC: Move TCP receive data path into a workqueue context

Stream protocols such as TCP can often build up a backlog of data to be
read due to ordering. Combine this with the fact that some workloads such
as NFS read()-intensive workloads need to receive a lot of data per RPC
call, and it turns out that receiving the data from inside a softirq
context can cause starvation.

The following patch moves the TCP data receive into a workqueue context.
We still end up calling tcp_read_sock(), but we do so from a process
context, meaning that softirqs are enabled for most of the time.

With this patch, I see a doubling of read bandwidth when running a
multi-threaded iozone workload between a virtual client and server setup.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprtsock.h |  2 ++
 net/sunrpc/xprtsock.c           | 51 +++++++++++++++++++++++++++++------------
 2 files changed, 38 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 357e44c1a46b..0ece4ba06f06 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -44,6 +44,8 @@ struct sock_xprt {
 	 */
 	unsigned long		sock_state;
 	struct delayed_work	connect_worker;
+	struct work_struct	recv_worker;
+	struct mutex		recv_mutex;
 	struct sockaddr_storage	srcaddr;
 	unsigned short		srcport;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fa8d0c15c8cd..58dc90ccebb6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -823,6 +823,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 
 	kernel_sock_shutdown(sock, SHUT_RDWR);
 
+	mutex_lock(&transport->recv_mutex);
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;
@@ -833,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	xprt_clear_connected(xprt);
 	write_unlock_bh(&sk->sk_callback_lock);
 	xs_sock_reset_connection_flags(xprt);
+	mutex_unlock(&transport->recv_mutex);
 
 	trace_rpc_socket_close(xprt, sock);
 	sock_release(sock);
@@ -886,6 +888,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	cancel_delayed_work_sync(&transport->connect_worker);
 	xs_close(xprt);
+	cancel_work_sync(&transport->recv_worker);
 	xs_xprt_free(xprt);
 	module_put(THIS_MODULE);
 }
@@ -1243,12 +1246,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
 
 	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	req = xprt_lookup_rqst(xprt, transport->tcp_xid);
 	if (!req) {
 		dprintk("RPC:       XID %08x request not found!\n",
 				ntohl(transport->tcp_xid));
-		spin_unlock(&xprt->transport_lock);
+		spin_unlock_bh(&xprt->transport_lock);
 		return -1;
 	}
 
@@ -1257,7 +1260,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_rqst(req->rq_task, transport->tcp_copied);
 
-	spin_unlock(&xprt->transport_lock);
+	spin_unlock_bh(&xprt->transport_lock);
 	return 0;
 }
 
@@ -1277,10 +1280,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 	struct rpc_rqst *req;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
 	if (req == NULL) {
-		spin_unlock(&xprt->transport_lock);
+		spin_unlock_bh(&xprt->transport_lock);
 		printk(KERN_WARNING "Callback slot table overflowed\n");
 		xprt_force_disconnect(xprt);
 		return -1;
@@ -1291,7 +1294,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_bc_request(req, transport->tcp_copied);
-	spin_unlock(&xprt->transport_lock);
+	spin_unlock_bh(&xprt->transport_lock);
 
 	return 0;
 }
@@ -1402,19 +1405,33 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
 	unsigned long total = 0;
 	int read = 0;
 
+	mutex_lock(&transport->recv_mutex);
 	sk = transport->inet;
+	if (sk == NULL)
+		goto out;
 
 	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
 	for (;;) {
+		lock_sock(sk);
 		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
+		release_sock(sk);
 		if (read <= 0)
 			break;
 		total += read;
 		rd_desc.count = 65536;
 	}
+out:
+	mutex_unlock(&transport->recv_mutex);
 	trace_xs_tcp_data_ready(xprt, read, total);
 }
 
+static void xs_tcp_data_receive_workfn(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, recv_worker);
+	xs_tcp_data_receive(transport);
+}
+
 /**
  * xs_tcp_data_ready - "data ready" callback for TCP sockets
  * @sk: socket with data to read
@@ -1437,8 +1454,8 @@ static void xs_tcp_data_ready(struct sock *sk)
 	 */
 	if (xprt->reestablish_timeout)
 		xprt->reestablish_timeout = 0;
+	queue_work(rpciod_workqueue, &transport->recv_worker);
 
-	xs_tcp_data_receive(transport);
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1840,6 +1857,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
 }
 #endif
 
+static void xs_dummy_data_receive_workfn(struct work_struct *work)
+{
+}
+
 static void xs_dummy_setup_socket(struct work_struct *work)
 {
 }
@@ -2664,6 +2685,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 	}
 
 	new = container_of(xprt, struct sock_xprt, xprt);
+	mutex_init(&new->recv_mutex);
 	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
 	xprt->addrlen = args->addrlen;
 	if (args->srcaddr)
@@ -2717,6 +2739,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 	xprt->ops = &xs_local_ops;
 	xprt->timeout = &xs_local_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
 	INIT_DELAYED_WORK(&transport->connect_worker,
 			xs_dummy_setup_socket);
 
@@ -2788,21 +2811,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	xprt->timeout = &xs_udp_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_udp_setup_socket);
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_udp_setup_socket);
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
 		break;
 	default:
@@ -2867,21 +2889,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->ops = &xs_tcp_ops;
 	xprt->timeout = &xs_tcp_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_tcp_setup_socket);
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_tcp_setup_socket);
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
 		break;
 	default:
-- 
cgit v1.2.3


From 516285ebe0efadc40b914a0e61a913a390604810 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 20 Sep 2015 16:15:24 -0400
Subject: NFSv4: nfs4_async_handle_error should take a non-const nfs_server

For symmetry with the synchronous handler, and so that we can potentially
handle errors such as NFS4ERR_BADNAME.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 6 +++---
 include/linux/nfs_xdr.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d044c7b11ff7..ae5cde621954 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -78,7 +78,7 @@ struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
+static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *, struct nfs4_state *, long *);
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
@@ -4982,7 +4982,7 @@ out:
 
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
 			struct nfs4_state *state, long *timeout)
 {
 	struct nfs_client *clp = server->nfs_client;
@@ -5559,7 +5559,7 @@ struct nfs4_unlockdata {
 	struct nfs4_lock_state *lsp;
 	struct nfs_open_context *ctx;
 	struct file_lock fl;
-	const struct nfs_server *server;
+	struct nfs_server *server;
 	unsigned long timestamp;
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 52faf7e96c65..53f2acc68baf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -528,7 +528,7 @@ struct nfs4_delegreturnargs {
 struct nfs4_delegreturnres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
-	const struct nfs_server *server;
+	struct nfs_server *server;
 };
 
 /*
@@ -601,7 +601,7 @@ struct nfs_removeargs {
 
 struct nfs_removeres {
 	struct nfs4_sequence_res 	seq_res;
-	const struct nfs_server *server;
+	struct nfs_server *server;
 	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
 };
@@ -619,7 +619,7 @@ struct nfs_renameargs {
 
 struct nfs_renameres {
 	struct nfs4_sequence_res	seq_res;
-	const struct nfs_server		*server;
+	struct nfs_server		*server;
 	struct nfs4_change_info		old_cinfo;
 	struct nfs_fattr		*old_fattr;
 	struct nfs4_change_info		new_cinfo;
-- 
cgit v1.2.3


From 38a1bdc9ff9f6c8cfad228eac5c1ce31ce038b25 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Fri, 19 Jun 2015 15:31:46 +0100
Subject: firmware: arm_scpi: Extend to support sensors

ARM System Control Processor (SCP) provides an API to query and use
the sensors available in the system. Extend the SCPI driver to support
 sensor messages.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scpi.c   | 60 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/scpi_protocol.h | 17 ++++++++++++
 2 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index cb75c750ca54..6174db80c663 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -219,6 +219,21 @@ struct dvfs_set {
 	u8 index;
 } __packed;
 
+struct sensor_capabilities {
+	__le16 sensors;
+} __packed;
+
+struct _scpi_sensor_info {
+	__le16 sensor_id;
+	u8 class;
+	u8 trigger_type;
+	char name[20];
+};
+
+struct sensor_value {
+	__le32 val;
+} __packed;
+
 static struct scpi_drvinfo *scpi_info;
 
 static int scpi_linux_errmap[SCPI_ERR_MAX] = {
@@ -481,6 +496,48 @@ static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
 	return info;
 }
 
+static int scpi_sensor_get_capability(u16 *sensors)
+{
+	struct sensor_capabilities cap_buf;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_CAPABILITIES, NULL, 0, &cap_buf,
+				sizeof(cap_buf));
+	if (!ret)
+		*sensors = le16_to_cpu(cap_buf.sensors);
+
+	return ret;
+}
+
+static int scpi_sensor_get_info(u16 sensor_id, struct scpi_sensor_info *info)
+{
+	__le16 id = cpu_to_le16(sensor_id);
+	struct _scpi_sensor_info _info;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_INFO, &id, sizeof(id),
+				&_info, sizeof(_info));
+	if (!ret) {
+		memcpy(info, &_info, sizeof(*info));
+		info->sensor_id = le16_to_cpu(_info.sensor_id);
+	}
+
+	return ret;
+}
+
+int scpi_sensor_get_value(u16 sensor, u32 *val)
+{
+	struct sensor_value buf;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_VALUE, &sensor, sizeof(sensor),
+				&buf, sizeof(buf));
+	if (!ret)
+		*val = le32_to_cpu(buf.val);
+
+	return ret;
+}
+
 static struct scpi_ops scpi_ops = {
 	.get_version = scpi_get_version,
 	.clk_get_range = scpi_clk_get_range,
@@ -489,6 +546,9 @@ static struct scpi_ops scpi_ops = {
 	.dvfs_get_idx = scpi_dvfs_get_idx,
 	.dvfs_set_idx = scpi_dvfs_set_idx,
 	.dvfs_get_info = scpi_dvfs_get_info,
+	.sensor_get_capability = scpi_sensor_get_capability,
+	.sensor_get_info = scpi_sensor_get_info,
+	.sensor_get_value = scpi_sensor_get_value,
 };
 
 struct scpi_ops *get_scpi_ops(void)
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index e7169cd54e19..80af3cd35ae4 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -28,6 +28,20 @@ struct scpi_dvfs_info {
 	struct scpi_opp *opps;
 };
 
+enum scpi_sensor_class {
+	TEMPERATURE,
+	VOLTAGE,
+	CURRENT,
+	POWER,
+};
+
+struct scpi_sensor_info {
+	u16 sensor_id;
+	u8 class;
+	u8 trigger_type;
+	char name[20];
+} __packed;
+
 /**
  * struct scpi_ops - represents the various operations provided
  *	by SCP through SCPI message protocol
@@ -52,6 +66,9 @@ struct scpi_ops {
 	int (*dvfs_get_idx)(u8);
 	int (*dvfs_set_idx)(u8, u8);
 	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
+	int (*sensor_get_capability)(u16 *sensors);
+	int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *);
+	int (*sensor_get_value)(u16, u32 *);
 };
 
 #if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
-- 
cgit v1.2.3


From 020446e01eebc9dbe7eda038e570ab9c7ab13586 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 8 Oct 2015 17:13:58 +0300
Subject: net/mlx5_core: Prepare cmd interface to system errors handling

In preparation to handling system errors at the mlx5_core level, change the
interface of cmd_work_handler to accept a 64 bit argument for the vector.

This allows to encode a flag that signifies when the handler is called
as a result of a driver logic that wishes to terminate commands that
the hardware may not be able to terminate. Such command completions
are detected at the handler and proper return status is encoded.

To be able to terminate page handler commands, we make sure to set
the corresponding bit in the bitmask.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 20 ++++++++++++++++++--
 include/linux/mlx5/driver.h                   |  6 +++++-
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 84838c2f528f..c3e54b7e8780 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -254,6 +254,10 @@ static void dump_buf(void *buf, int size, int data_only, int offset)
 		pr_debug("\n");
 }
 
+enum {
+	MLX5_DRIVER_STATUS_ABORTED = 0xfe,
+};
+
 const char *mlx5_command_str(int command)
 {
 	switch (command) {
@@ -473,6 +477,7 @@ static void cmd_work_handler(struct work_struct *work)
 	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
 	struct mlx5_cmd_layout *lay;
 	struct semaphore *sem;
+	unsigned long flags;
 
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
@@ -485,6 +490,9 @@ static void cmd_work_handler(struct work_struct *work)
 		}
 	} else {
 		ent->idx = cmd->max_reg_cmds;
+		spin_lock_irqsave(&cmd->alloc_lock, flags);
+		clear_bit(ent->idx, &cmd->bitmask);
+		spin_unlock_irqrestore(&cmd->alloc_lock, flags);
 	}
 
 	ent->token = alloc_token(cmd);
@@ -1081,7 +1089,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1092,7 +1100,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 	s64 ds;
 	struct mlx5_cmd_stats *stats;
 	unsigned long flags;
+	unsigned long vector;
 
+	/* there can be at most 32 command queues */
+	vector = vec & 0xffffffff;
 	for (i = 0; i < (1 << cmd->log_sz); i++) {
 		if (test_bit(i, &vector)) {
 			struct semaphore *sem;
@@ -1110,11 +1121,16 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 					ent->ret = verify_signature(ent);
 				else
 					ent->ret = 0;
-				ent->status = ent->lay->status_own >> 1;
+				if (vec & MLX5_TRIGGERED_CMD_COMP)
+					ent->status = MLX5_DRIVER_STATUS_ABORTED;
+				else
+					ent->status = ent->lay->status_own >> 1;
+
 				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
 			free_ent(cmd, ent->idx);
+
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
 				if (ent->op < ARRAY_SIZE(cmd->stats)) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8b6d6f2154a4..aa899559eec0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -731,7 +731,7 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 #endif
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name, struct mlx5_uar *uar);
@@ -865,4 +865,8 @@ static inline int mlx5_get_gid_table_len(u16 param)
 	return 8 * (1 << param);
 }
 
+enum {
+	MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
+};
+
 #endif /* MLX5_DRIVER_H */
-- 
cgit v1.2.3


From ac6ea6e81a80172612e0c9ef93720f371b198918 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 8 Oct 2015 17:14:00 +0300
Subject: net/mlx5_core: Use private health thread for each device

Use a single threaded work queue for each device in the system instead of
using one thread for any device. This is required so we can concurrently
process system error handling for all the devices that need that.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 63 +++++++++++-------------
 drivers/net/ethernet/mellanox/mlx5/core/main.c   | 37 +++++++-------
 include/linux/mlx5/driver.h                      |  7 +--
 3 files changed, 52 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 8770968fff35..9b81e1ceb8de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -57,31 +57,16 @@ enum {
 	MLX5_HEALTH_SYNDR_HIGH_TEMP		= 0x10
 };
 
-static DEFINE_SPINLOCK(health_lock);
-static LIST_HEAD(health_list);
-static struct work_struct health_work;
-
 static void health_care(struct work_struct *work)
 {
-	struct mlx5_core_health *health, *n;
+	struct mlx5_core_health *health;
 	struct mlx5_core_dev *dev;
 	struct mlx5_priv *priv;
-	LIST_HEAD(tlist);
-
-	spin_lock_irq(&health_lock);
-	list_splice_init(&health_list, &tlist);
-
-	spin_unlock_irq(&health_lock);
 
-	list_for_each_entry_safe(health, n, &tlist, list) {
-		priv = container_of(health, struct mlx5_priv, health);
-		dev = container_of(priv, struct mlx5_core_dev, priv);
-		mlx5_core_warn(dev, "handling bad device here\n");
-		/* nothing yet */
-		spin_lock_irq(&health_lock);
-		list_del_init(&health->list);
-		spin_unlock_irq(&health_lock);
-	}
+	health = container_of(work, struct mlx5_core_health, work);
+	priv = container_of(health, struct mlx5_priv, health);
+	dev = container_of(priv, struct mlx5_core_dev, priv);
+	mlx5_core_warn(dev, "handling bad device here\n");
 }
 
 static const char *hsynd_str(u8 synd)
@@ -168,11 +153,7 @@ static void poll_health(unsigned long data)
 	if (health->miss_counter == MAX_MISSES) {
 		mlx5_core_err(dev, "device's health compromised\n");
 		print_health_info(dev);
-		spin_lock_irq(&health_lock);
-		list_add_tail(&health->list, &health_list);
-		spin_unlock_irq(&health_lock);
-
-		queue_work(mlx5_core_wq, &health_work);
+		queue_work(health->wq, &health->work);
 	} else {
 		get_random_bytes(&next, sizeof(next));
 		next %= HZ;
@@ -185,7 +166,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
-	INIT_LIST_HEAD(&health->list);
 	init_timer(&health->timer);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
@@ -201,18 +181,33 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	del_timer_sync(&health->timer);
-
-	spin_lock_irq(&health_lock);
-	if (!list_empty(&health->list))
-		list_del_init(&health->list);
-	spin_unlock_irq(&health_lock);
 }
 
-void mlx5_health_cleanup(void)
+void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	destroy_workqueue(health->wq);
 }
 
-void  __init mlx5_health_init(void)
+int mlx5_health_init(struct mlx5_core_dev *dev)
 {
-	INIT_WORK(&health_work, health_care);
+	struct mlx5_core_health *health;
+	char *name;
+
+	health = &dev->priv.health;
+	name = kmalloc(64, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+
+	strcpy(name, "mlx5_health");
+	strcat(name, dev_name(&dev->pdev->dev));
+	health->wq = create_singlethread_workqueue(name);
+	kfree(name);
+	if (!health->wq)
+		return -ENOMEM;
+
+	INIT_WORK(&health->work, health_care);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7718f6ac6214..b6edc58766ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -62,7 +62,6 @@ static int prof_sel = MLX5_DEFAULT_PROF;
 module_param_named(prof_sel, prof_sel, int, 0444);
 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
 
-struct workqueue_struct *mlx5_core_wq;
 static LIST_HEAD(intf_list);
 static LIST_HEAD(dev_list);
 static DEFINE_MUTEX(intf_mutex);
@@ -1046,6 +1045,7 @@ err_pagealloc_cleanup:
 
 static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
+	int err;
 
 	mlx5_unregister_device(dev);
 	mlx5_cleanup_mr_table(dev);
@@ -1060,9 +1060,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_eq_cleanup(dev);
 	mlx5_disable_msix(dev);
 	mlx5_stop_health_poll(dev);
-	if (mlx5_cmd_teardown_hca(dev)) {
+	err = mlx5_cmd_teardown_hca(dev);
+	if (err) {
 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
-		return 1;
+		goto out;
 	}
 	mlx5_pagealloc_stop(dev);
 	mlx5_reclaim_startup_pages(dev);
@@ -1070,11 +1071,12 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
 
-	return 0;
+out:
+	return err;
 }
 
 static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-			    unsigned long param)
+		     unsigned long param)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	struct mlx5_device_context *dev_ctx;
@@ -1129,14 +1131,22 @@ static int init_one(struct pci_dev *pdev,
 		goto clean_dev;
 	}
 
+	err = mlx5_health_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
+		goto close_pci;
+	}
+
 	err = mlx5_load_one(dev, priv);
 	if (err) {
 		dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
-		goto close_pci;
+		goto clean_health;
 	}
 
 	return 0;
 
+clean_health:
+	mlx5_health_cleanup(dev);
 close_pci:
 	mlx5_pci_close(dev, priv);
 clean_dev:
@@ -1153,8 +1163,10 @@ static void remove_one(struct pci_dev *pdev)
 
 	if (mlx5_unload_one(dev, priv)) {
 		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
+		mlx5_health_cleanup(dev);
 		return;
 	}
+	mlx5_health_cleanup(dev);
 	mlx5_pci_close(dev, priv);
 	pci_set_drvdata(pdev, NULL);
 	kfree(dev);
@@ -1184,16 +1196,10 @@ static int __init init(void)
 	int err;
 
 	mlx5_register_debugfs();
-	mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq");
-	if (!mlx5_core_wq) {
-		err = -ENOMEM;
-		goto err_debug;
-	}
-	mlx5_health_init();
 
 	err = pci_register_driver(&mlx5_core_driver);
 	if (err)
-		goto err_health;
+		goto err_debug;
 
 #ifdef CONFIG_MLX5_CORE_EN
 	mlx5e_init();
@@ -1201,9 +1207,6 @@ static int __init init(void)
 
 	return 0;
 
-err_health:
-	mlx5_health_cleanup();
-	destroy_workqueue(mlx5_core_wq);
 err_debug:
 	mlx5_unregister_debugfs();
 	return err;
@@ -1215,8 +1218,6 @@ static void __exit cleanup(void)
 	mlx5e_cleanup();
 #endif
 	pci_unregister_driver(&mlx5_core_driver);
-	mlx5_health_cleanup();
-	destroy_workqueue(mlx5_core_wq);
 	mlx5_unregister_debugfs();
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index aa899559eec0..41a32873f608 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -391,9 +391,10 @@ struct mlx5_core_health {
 	struct health_buffer __iomem   *health;
 	__be32 __iomem		       *health_counter;
 	struct timer_list		timer;
-	struct list_head		list;
 	u32				prev;
 	int				miss_counter;
+	struct workqueue_struct	       *wq;
+	struct work_struct		work;
 };
 
 struct mlx5_cq_table {
@@ -676,8 +677,8 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
 int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
 int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
 void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
-void mlx5_health_cleanup(void);
-void  __init mlx5_health_init(void);
+void mlx5_health_cleanup(struct mlx5_core_dev *dev);
+int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-- 
cgit v1.2.3


From 61d03535e4be3a46c1e171a25458237e343195e3 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:28:54 +0800
Subject: net/netlink: lockdep_genl_is_held can be boolean

This patch makes lockdep_genl_is_held return bool to improve
readability due to this particular function only using either
one or zero as its return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 2 +-
 net/netlink/genetlink.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 09460d6d6682..a4c61cbce777 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -8,7 +8,7 @@
 extern void genl_lock(void);
 extern void genl_unlock(void);
 #ifdef CONFIG_LOCKDEP
-extern int lockdep_genl_is_held(void);
+extern bool lockdep_genl_is_held(void);
 #endif
 
 /* for synchronisation between af_netlink and genetlink */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 75724a96aef2..bc0e504f33a6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -39,7 +39,7 @@ void genl_unlock(void)
 EXPORT_SYMBOL(genl_unlock);
 
 #ifdef CONFIG_LOCKDEP
-int lockdep_genl_is_held(void)
+bool lockdep_genl_is_held(void)
 {
 	return lockdep_is_held(&genl_mutex);
 }
-- 
cgit v1.2.3


From 35498edc6481d588feadee7e76220884d5bbca48 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:28:55 +0800
Subject: net/ieee80211: ieee80211_is_* can be boolean

This patch makes ieee80211_is_* return bool to improve
readability due to these particular functions only using either
one or zero as their return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ieee80211.h | 76 +++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f79a02a69d26..dcfb2f43d316 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -121,7 +121,7 @@
 #define IEEE80211_MAX_SN		IEEE80211_SN_MASK
 #define IEEE80211_SN_MODULO		(IEEE80211_MAX_SN + 1)
 
-static inline int ieee80211_sn_less(u16 sn1, u16 sn2)
+static inline bool ieee80211_sn_less(u16 sn1, u16 sn2)
 {
 	return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1);
 }
@@ -250,7 +250,7 @@ struct ieee80211_qos_hdr {
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_tods(__le16 fc)
+static inline bool ieee80211_has_tods(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_TODS)) != 0;
 }
@@ -259,7 +259,7 @@ static inline int ieee80211_has_tods(__le16 fc)
  * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_fromds(__le16 fc)
+static inline bool ieee80211_has_fromds(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FROMDS)) != 0;
 }
@@ -268,7 +268,7 @@ static inline int ieee80211_has_fromds(__le16 fc)
  * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_a4(__le16 fc)
+static inline bool ieee80211_has_a4(__le16 fc)
 {
 	__le16 tmp = cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS);
 	return (fc & tmp) == tmp;
@@ -278,7 +278,7 @@ static inline int ieee80211_has_a4(__le16 fc)
  * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_morefrags(__le16 fc)
+static inline bool ieee80211_has_morefrags(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_MOREFRAGS)) != 0;
 }
@@ -287,7 +287,7 @@ static inline int ieee80211_has_morefrags(__le16 fc)
  * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_retry(__le16 fc)
+static inline bool ieee80211_has_retry(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_RETRY)) != 0;
 }
@@ -296,7 +296,7 @@ static inline int ieee80211_has_retry(__le16 fc)
  * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_pm(__le16 fc)
+static inline bool ieee80211_has_pm(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_PM)) != 0;
 }
@@ -305,7 +305,7 @@ static inline int ieee80211_has_pm(__le16 fc)
  * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_moredata(__le16 fc)
+static inline bool ieee80211_has_moredata(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) != 0;
 }
@@ -314,7 +314,7 @@ static inline int ieee80211_has_moredata(__le16 fc)
  * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_protected(__le16 fc)
+static inline bool ieee80211_has_protected(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_PROTECTED)) != 0;
 }
@@ -323,7 +323,7 @@ static inline int ieee80211_has_protected(__le16 fc)
  * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_has_order(__le16 fc)
+static inline bool ieee80211_has_order(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_ORDER)) != 0;
 }
@@ -332,7 +332,7 @@ static inline int ieee80211_has_order(__le16 fc)
  * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_mgmt(__le16 fc)
+static inline bool ieee80211_is_mgmt(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT);
@@ -342,7 +342,7 @@ static inline int ieee80211_is_mgmt(__le16 fc)
  * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_ctl(__le16 fc)
+static inline bool ieee80211_is_ctl(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL);
@@ -352,7 +352,7 @@ static inline int ieee80211_is_ctl(__le16 fc)
  * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_data(__le16 fc)
+static inline bool ieee80211_is_data(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_DATA);
@@ -362,7 +362,7 @@ static inline int ieee80211_is_data(__le16 fc)
  * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_data_qos(__le16 fc)
+static inline bool ieee80211_is_data_qos(__le16 fc)
 {
 	/*
 	 * mask with QOS_DATA rather than IEEE80211_FCTL_STYPE as we just need
@@ -376,7 +376,7 @@ static inline int ieee80211_is_data_qos(__le16 fc)
  * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_data_present(__le16 fc)
+static inline bool ieee80211_is_data_present(__le16 fc)
 {
 	/*
 	 * mask with 0x40 and test that that bit is clear to only return true
@@ -390,7 +390,7 @@ static inline int ieee80211_is_data_present(__le16 fc)
  * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_assoc_req(__le16 fc)
+static inline bool ieee80211_is_assoc_req(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ);
@@ -400,7 +400,7 @@ static inline int ieee80211_is_assoc_req(__le16 fc)
  * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_assoc_resp(__le16 fc)
+static inline bool ieee80211_is_assoc_resp(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_RESP);
@@ -410,7 +410,7 @@ static inline int ieee80211_is_assoc_resp(__le16 fc)
  * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_reassoc_req(__le16 fc)
+static inline bool ieee80211_is_reassoc_req(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_REQ);
@@ -420,7 +420,7 @@ static inline int ieee80211_is_reassoc_req(__le16 fc)
  * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_reassoc_resp(__le16 fc)
+static inline bool ieee80211_is_reassoc_resp(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_RESP);
@@ -430,7 +430,7 @@ static inline int ieee80211_is_reassoc_resp(__le16 fc)
  * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_probe_req(__le16 fc)
+static inline bool ieee80211_is_probe_req(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ);
@@ -440,7 +440,7 @@ static inline int ieee80211_is_probe_req(__le16 fc)
  * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_probe_resp(__le16 fc)
+static inline bool ieee80211_is_probe_resp(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP);
@@ -450,7 +450,7 @@ static inline int ieee80211_is_probe_resp(__le16 fc)
  * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_beacon(__le16 fc)
+static inline bool ieee80211_is_beacon(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON);
@@ -460,7 +460,7 @@ static inline int ieee80211_is_beacon(__le16 fc)
  * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_atim(__le16 fc)
+static inline bool ieee80211_is_atim(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ATIM);
@@ -470,7 +470,7 @@ static inline int ieee80211_is_atim(__le16 fc)
  * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_disassoc(__le16 fc)
+static inline bool ieee80211_is_disassoc(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DISASSOC);
@@ -480,7 +480,7 @@ static inline int ieee80211_is_disassoc(__le16 fc)
  * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_auth(__le16 fc)
+static inline bool ieee80211_is_auth(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH);
@@ -490,7 +490,7 @@ static inline int ieee80211_is_auth(__le16 fc)
  * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_deauth(__le16 fc)
+static inline bool ieee80211_is_deauth(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DEAUTH);
@@ -500,7 +500,7 @@ static inline int ieee80211_is_deauth(__le16 fc)
  * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_action(__le16 fc)
+static inline bool ieee80211_is_action(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION);
@@ -510,7 +510,7 @@ static inline int ieee80211_is_action(__le16 fc)
  * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_back_req(__le16 fc)
+static inline bool ieee80211_is_back_req(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ);
@@ -520,7 +520,7 @@ static inline int ieee80211_is_back_req(__le16 fc)
  * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_back(__le16 fc)
+static inline bool ieee80211_is_back(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK);
@@ -530,7 +530,7 @@ static inline int ieee80211_is_back(__le16 fc)
  * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_pspoll(__le16 fc)
+static inline bool ieee80211_is_pspoll(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL);
@@ -540,7 +540,7 @@ static inline int ieee80211_is_pspoll(__le16 fc)
  * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_rts(__le16 fc)
+static inline bool ieee80211_is_rts(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS);
@@ -550,7 +550,7 @@ static inline int ieee80211_is_rts(__le16 fc)
  * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_cts(__le16 fc)
+static inline bool ieee80211_is_cts(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS);
@@ -560,7 +560,7 @@ static inline int ieee80211_is_cts(__le16 fc)
  * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_ack(__le16 fc)
+static inline bool ieee80211_is_ack(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_ACK);
@@ -570,7 +570,7 @@ static inline int ieee80211_is_ack(__le16 fc)
  * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_cfend(__le16 fc)
+static inline bool ieee80211_is_cfend(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFEND);
@@ -580,7 +580,7 @@ static inline int ieee80211_is_cfend(__le16 fc)
  * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_cfendack(__le16 fc)
+static inline bool ieee80211_is_cfendack(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFENDACK);
@@ -590,7 +590,7 @@ static inline int ieee80211_is_cfendack(__le16 fc)
  * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_nullfunc(__le16 fc)
+static inline bool ieee80211_is_nullfunc(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC);
@@ -600,7 +600,7 @@ static inline int ieee80211_is_nullfunc(__le16 fc)
  * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame
  * @fc: frame control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_qos_nullfunc(__le16 fc)
+static inline bool ieee80211_is_qos_nullfunc(__le16 fc)
 {
 	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
 	       cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
@@ -624,7 +624,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(__le16 fc)
  * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set
  * @seq_ctrl: frame sequence control bytes in little-endian byteorder
  */
-static inline int ieee80211_is_first_frag(__le16 seq_ctrl)
+static inline bool ieee80211_is_first_frag(__le16 seq_ctrl)
 {
 	return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0;
 }
-- 
cgit v1.2.3


From 875e08294911b3cb8c60416d64d990809421de29 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:28:56 +0800
Subject: net/nfnetlink: lockdep_nfnl_is_held can be boolean

This patch makes lockdep_nfnl_is_held return bool to improve
readability due to this particular function only using either
one or zero as its return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h | 6 +++---
 net/netfilter/nfnetlink.c           | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index e955d4730625..249d1bb01e03 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -45,11 +45,11 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
 void nfnl_lock(__u8 subsys_id);
 void nfnl_unlock(__u8 subsys_id);
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_nfnl_is_held(__u8 subsys_id);
+bool lockdep_nfnl_is_held(__u8 subsys_id);
 #else
-static inline int lockdep_nfnl_is_held(__u8 subsys_id)
+static inline bool lockdep_nfnl_is_held(__u8 subsys_id)
 {
-	return 1;
+	return true;
 }
 #endif /* CONFIG_PROVE_LOCKING */
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 70277b11f742..f1d9e887f5b1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -64,7 +64,7 @@ void nfnl_unlock(__u8 subsys_id)
 EXPORT_SYMBOL_GPL(nfnl_unlock);
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_nfnl_is_held(u8 subsys_id)
+bool lockdep_nfnl_is_held(u8 subsys_id)
 {
 	return lockdep_is_held(&table[subsys_id].mutex);
 }
-- 
cgit v1.2.3


From d6fbaea5f635216c9861587c4e658086cf3b1b6b Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:28:57 +0800
Subject: net/can: can_dropped_invalid_skb can be boolean

This patch makes can_dropped_invalid_skb return bool due to this
particular function only using either one or zero as its return
value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/dev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 56dcadd83716..735f9f8c4e43 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -78,7 +78,7 @@ struct can_priv {
 #define get_canfd_dlc(i)	(min_t(__u8, (i), CANFD_MAX_DLC))
 
 /* Drop a given socketbuffer if it does not contain a valid CAN frame. */
-static inline int can_dropped_invalid_skb(struct net_device *dev,
+static inline bool can_dropped_invalid_skb(struct net_device *dev,
 					  struct sk_buff *skb)
 {
 	const struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
@@ -94,12 +94,12 @@ static inline int can_dropped_invalid_skb(struct net_device *dev,
 	} else
 		goto inval_skb;
 
-	return 0;
+	return false;
 
 inval_skb:
 	kfree_skb(skb);
 	dev->stats.tx_dropped++;
-	return 1;
+	return true;
 }
 
 static inline bool can_is_canfd_skb(const struct sk_buff *skb)
-- 
cgit v1.2.3


From 0c6119d99bf5df9403a688d267537284e9cc8bcb Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:28:58 +0800
Subject: net/dccp: dccp_list_has_service can be boolean

This patch makes dccp_list_has_service return bool due to this
particular function only using either one or zero as its return
value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 221025423e6c..61d042bbbf60 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -202,16 +202,16 @@ struct dccp_service_list {
 #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
 #define DCCP_SERVICE_CODE_IS_ABSENT		0
 
-static inline int dccp_list_has_service(const struct dccp_service_list *sl,
+static inline bool dccp_list_has_service(const struct dccp_service_list *sl,
 					const __be32 service)
 {
 	if (likely(sl != NULL)) {
 		u32 i = sl->dccpsl_nr;
 		while (i--)
 			if (sl->dccpsl_list[i] == service)
-				return 1;
+				return true;
 	}
-	return 0;
+	return false;
 }
 
 struct dccp_ackvec;
-- 
cgit v1.2.3


From c3225164cf60ccecce2459dcb5813dd798233f2d Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:29:00 +0800
Subject: net/inetdevice: inet_ifa_match can be boolean

This patch makes inet_ifa_match return bool due to this
particular function only using either one or zero as its return
value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index a4328cea376a..3b0999e0260f 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -171,7 +171,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst,
 			 __be32 local, int scope);
 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 				    __be32 mask);
-static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
+static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
 {
 	return !((addr^ifa->ifa_address)&ifa->ifa_mask);
 }
-- 
cgit v1.2.3


From f06cc7b284f3dfb2c5decbf9fde711b50a530050 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:29:01 +0800
Subject: net/inetdevice: bad_mask can be boolean

This patch makes bad_mask return bool due to this particular function
only using either one or zero as its return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 3b0999e0260f..ee971f335a8b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -180,15 +180,15 @@ static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
  *	Check if a mask is acceptable.
  */
  
-static __inline__ int bad_mask(__be32 mask, __be32 addr)
+static __inline__ bool bad_mask(__be32 mask, __be32 addr)
 {
 	__u32 hmask;
 	if (addr & (mask = ~mask))
-		return 1;
+		return true;
 	hmask = ntohl(mask);
 	if (hmask & (hmask+1))
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 
 #define for_primary_ifa(in_dev)	{ struct in_ifaddr *ifa; \
-- 
cgit v1.2.3


From 0cbf334376d5e82d7a2f5cd234ca4f5d0843f3ea Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 8 Oct 2015 21:29:02 +0800
Subject: net/core: lockdep_rtnl_is_held can be boolean

This patch makes lockdep_rtnl_is_held return bool due to this
particular function only using either one or zero as its return
value.

In another patch lockdep_is_held is also made return bool.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 6 +++---
 net/core/rtnetlink.c      | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 39adaa9529eb..4be5048b1fbe 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -33,11 +33,11 @@ extern wait_queue_head_t netdev_unregistering_wq;
 extern struct mutex net_mutex;
 
 #ifdef CONFIG_PROVE_LOCKING
-extern int lockdep_rtnl_is_held(void);
+extern bool lockdep_rtnl_is_held(void);
 #else
-static inline int lockdep_rtnl_is_held(void)
+static inline bool lockdep_rtnl_is_held(void)
 {
-	return 1;
+	return true;
 }
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b2258a36d894..24775953fa68 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -96,7 +96,7 @@ int rtnl_is_locked(void)
 EXPORT_SYMBOL(rtnl_is_locked);
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_rtnl_is_held(void)
+bool lockdep_rtnl_is_held(void)
 {
 	return lockdep_is_held(&rtnl_mutex);
 }
-- 
cgit v1.2.3


From 10abc7df9277a81971924a6c03f74e86d799daf1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Oct 2015 15:50:11 +0100
Subject: irqdomain: Add an accessor for the of_node field

As we're about to remove the of_node field from the irqdomain
structure, introduce an accessor for it. Subsequent patches
will take care of the actual repainting.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1444402211-1141-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index d3ca79236fb0..f644fdb06dd6 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -161,6 +161,11 @@ enum {
 	IRQ_DOMAIN_FLAG_NONCORE		= (1 << 16),
 };
 
+static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
+{
+	return d->of_node;
+}
+
 #ifdef CONFIG_IRQ_DOMAIN
 struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
-- 
cgit v1.2.3


From 188c3568f814fea965947ed24739987ba9c5a87e Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 1 Oct 2015 17:14:10 -0600
Subject: NVMe: Reference count open namespaces

Dynamic namespace attachment means the namespace may be removed at any
time, so the namespace reference count can not be tied to the device
reference count. This fixes a NULL dereference if an opened namespace
is detached from a controller.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 29 ++++++++++++++++++++---------
 include/linux/nvme.h      |  1 +
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 6f04771f1019..b02ae3d759d7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1943,6 +1943,18 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define nvme_compat_ioctl	NULL
 #endif
 
+static void nvme_free_ns(struct kref *kref)
+{
+	struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
 static int nvme_open(struct block_device *bdev, fmode_t mode)
 {
 	int ret = 0;
@@ -1952,21 +1964,25 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
 	ns = bdev->bd_disk->private_data;
 	if (!ns)
 		ret = -ENXIO;
-	else if (!kref_get_unless_zero(&ns->dev->kref))
+	else if (!kref_get_unless_zero(&ns->kref))
 		ret = -ENXIO;
+	else if (!kref_get_unless_zero(&ns->dev->kref)) {
+		kref_put(&ns->kref, nvme_free_ns);
+		ret = -ENXIO;
+	}
 	spin_unlock(&dev_list_lock);
 
 	return ret;
 }
 
 static void nvme_free_dev(struct kref *kref);
-
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
 	struct nvme_ns *ns = disk->private_data;
 	struct nvme_dev *dev = ns->dev;
 
 	kref_put(&dev->kref, nvme_free_dev);
+	kref_put(&ns->kref, nvme_free_ns);
 }
 
 static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
@@ -2126,6 +2142,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	if (!disk)
 		goto out_free_queue;
 
+	kref_init(&ns->kref);
 	ns->ns_id = nsid;
 	ns->disk = disk;
 	ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
@@ -2360,13 +2377,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 static void nvme_free_namespace(struct nvme_ns *ns)
 {
 	list_del(&ns->list);
-
-	spin_lock(&dev_list_lock);
-	ns->disk->private_data = NULL;
-	spin_unlock(&dev_list_lock);
-
-	put_disk(ns->disk);
-	kfree(ns);
+	kref_put(&ns->kref, nvme_free_ns);
 }
 
 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b5812c395351..992b9c118678 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -135,6 +135,7 @@ struct nvme_ns {
 	struct nvme_dev *dev;
 	struct request_queue *queue;
 	struct gendisk *disk;
+	struct kref kref;
 
 	unsigned ns_id;
 	int lba_shift;
-- 
cgit v1.2.3


From 0a7385ad69f0f210c5cfbfd334b42423a6e05e5a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 2 Oct 2015 10:37:29 -0600
Subject: NVMe: Simplify device resume on io queue failure

Releasing IO queues and disks was done in a work queue outside the
controller resume context to delete namespaces if the controller failed
after a resume from suspend. This is unnecessary since we can resume
a device asynchronously.

This patch makes resume use probe_work so it can directly remove
namespaces if the device is manageable but not IO capable. Since the
deleting disks was the only reason we had the convoluted "reset_workfn",
this patch removes that unnecessary indirection.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 34 ++++++----------------------------
 include/linux/nvme.h      |  1 -
 2 files changed, 6 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 904b54fcbbcd..bf35846558c8 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1285,7 +1285,6 @@ static void nvme_abort_req(struct request *req)
 		list_del_init(&dev->node);
 		dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
 							req->tag, nvmeq->qid);
-		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
  out:
 		spin_unlock_irqrestore(&dev_list_lock, flags);
@@ -2089,7 +2088,6 @@ static int nvme_kthread(void *data)
 				dev_warn(dev->dev,
 					"Failed status: %x, reset controller\n",
 					readl(&dev->bar->csts));
-				dev->reset_workfn = nvme_reset_failed_dev;
 				queue_work(nvme_workq, &dev->reset_work);
 				continue;
 			}
@@ -3025,14 +3023,6 @@ static int nvme_remove_dead_ctrl(void *arg)
 	return 0;
 }
 
-static void nvme_remove_disks(struct work_struct *ws)
-{
-	struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
-
-	nvme_free_queues(dev, 1);
-	nvme_dev_remove(dev);
-}
-
 static int nvme_dev_resume(struct nvme_dev *dev)
 {
 	int ret;
@@ -3041,10 +3031,9 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 	if (ret)
 		return ret;
 	if (dev->online_queues < 2) {
-		spin_lock(&dev_list_lock);
-		dev->reset_workfn = nvme_remove_disks;
-		queue_work(nvme_workq, &dev->reset_work);
-		spin_unlock(&dev_list_lock);
+		dev_warn(dev->dev, "IO queues not created\n");
+		nvme_free_queues(dev, 1);
+		nvme_dev_remove(dev);
 	} else {
 		nvme_unfreeze_queues(dev);
 		nvme_dev_add(dev);
@@ -3091,12 +3080,6 @@ static void nvme_reset_failed_dev(struct work_struct *ws)
 	nvme_dev_reset(dev);
 }
 
-static void nvme_reset_workfn(struct work_struct *work)
-{
-	struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
-	dev->reset_workfn(work);
-}
-
 static int nvme_reset(struct nvme_dev *dev)
 {
 	int ret = -EBUSY;
@@ -3106,7 +3089,6 @@ static int nvme_reset(struct nvme_dev *dev)
 
 	spin_lock(&dev_list_lock);
 	if (!work_pending(&dev->reset_work)) {
-		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
 		ret = 0;
 	}
@@ -3159,8 +3141,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto free;
 
 	INIT_LIST_HEAD(&dev->namespaces);
-	dev->reset_workfn = nvme_reset_failed_dev;
-	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+	INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
 	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -3223,7 +3204,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 	if (prepare)
 		nvme_dev_shutdown(dev);
 	else
-		nvme_dev_resume(dev);
+		schedule_work(&dev->probe_work);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -3277,10 +3258,7 @@ static int nvme_resume(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-	if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
-		ndev->reset_workfn = nvme_reset_failed_dev;
-		queue_work(nvme_workq, &ndev->reset_work);
-	}
+	schedule_work(&ndev->probe_work);
 	return 0;
 }
 #endif
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 992b9c118678..7725b4c8b718 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -104,7 +104,6 @@ struct nvme_dev {
 	struct list_head namespaces;
 	struct kref kref;
 	struct device *device;
-	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
 	struct work_struct scan_work;
-- 
cgit v1.2.3


From f11bb3e244c4b14e2d0a3b9d7e41895752997170 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:46:41 +0200
Subject: nvme: add a local nvme.h header

Add a new drivers/block/nvme.h which contains all the driver internal
interface.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c |   3 +-
 drivers/block/nvme-scsi.c |   2 +-
 drivers/block/nvme.h      | 133 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h      | 114 ---------------------------------------
 4 files changed, 136 insertions(+), 116 deletions(-)
 create mode 100644 drivers/block/nvme.h

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 01a6d1b2d7e5..a20f66a44b96 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -12,7 +12,6 @@
  * more details.
  */
 
-#include <linux/nvme.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
@@ -43,6 +42,8 @@
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
+#include "nvme.h"
+
 #define NVME_MINORS		(1U << MINORBITS)
 #define NVME_Q_DEPTH		1024
 #define NVME_AQ_DEPTH		256
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index e5a63f06fb0f..c3d8d3887a31 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -17,7 +17,6 @@
  * each command is translated.
  */
 
-#include <linux/nvme.h>
 #include <linux/bio.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
@@ -45,6 +44,7 @@
 #include <scsi/sg.h>
 #include <scsi/scsi.h>
 
+#include "nvme.h"
 
 static int sg_version_num = 30534;	/* 2 digits for each component */
 
diff --git a/drivers/block/nvme.h b/drivers/block/nvme.h
new file mode 100644
index 000000000000..c1f41bf3c0f2
--- /dev/null
+++ b/drivers/block/nvme.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_H
+#define _NVME_H
+
+#include <linux/nvme.h>
+#include <linux/pci.h>
+#include <linux/kref.h>
+#include <linux/blk-mq.h>
+
+extern unsigned char nvme_io_timeout;
+#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
+
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct list_head node;
+	struct nvme_queue **queues;
+	struct request_queue *admin_q;
+	struct blk_mq_tag_set tagset;
+	struct blk_mq_tag_set admin_tagset;
+	u32 __iomem *dbs;
+	struct device *dev;
+	struct dma_pool *prp_page_pool;
+	struct dma_pool *prp_small_pool;
+	int instance;
+	unsigned queue_count;
+	unsigned online_queues;
+	unsigned max_qid;
+	int q_depth;
+	u32 db_stride;
+	u32 ctrl_config;
+	struct msix_entry *entry;
+	struct nvme_bar __iomem *bar;
+	struct list_head namespaces;
+	struct kref kref;
+	struct device *device;
+	struct work_struct reset_work;
+	struct work_struct probe_work;
+	struct work_struct scan_work;
+	char name[12];
+	char serial[20];
+	char model[40];
+	char firmware_rev[8];
+	bool subsystem;
+	u32 max_hw_sectors;
+	u32 stripe_size;
+	u32 page_size;
+	void __iomem *cmb;
+	dma_addr_t cmb_dma_addr;
+	u64 cmb_size;
+	u32 cmbsz;
+	u16 oncs;
+	u16 abort_limit;
+	u8 event_limit;
+	u8 vwc;
+};
+
+/*
+ * An NVM Express namespace is equivalent to a SCSI LUN
+ */
+struct nvme_ns {
+	struct list_head list;
+
+	struct nvme_dev *dev;
+	struct request_queue *queue;
+	struct gendisk *disk;
+	struct kref kref;
+
+	unsigned ns_id;
+	int lba_shift;
+	u16 ms;
+	bool ext;
+	u8 pi_type;
+	u64 mode_select_num_blocks;
+	u32 mode_select_block_len;
+};
+
+/*
+ * The nvme_iod describes the data in an I/O, including the list of PRP
+ * entries.  You can't see it in this data structure because C doesn't let
+ * me express that.  Use nvme_alloc_iod to ensure there's enough space
+ * allocated to store the PRP list.
+ */
+struct nvme_iod {
+	unsigned long private;	/* For the use of the submitter of the I/O */
+	int npages;		/* In the PRP list. 0 means small pool in use */
+	int offset;		/* Of PRP list */
+	int nents;		/* Used in scatterlist */
+	int length;		/* Of data, in bytes */
+	dma_addr_t first_dma;
+	struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
+	struct scatterlist sg[0];
+};
+
+static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
+{
+	return (sector >> (ns->lba_shift - 9));
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buf, unsigned bufflen);
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, void __user *ubuffer, unsigned bufflen,
+		u32 *result, unsigned timeout);
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+		struct nvme_id_ns **id);
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
+int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+			dma_addr_t dma_addr, u32 *result);
+int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+			dma_addr_t dma_addr, u32 *result);
+
+struct sg_io_hdr;
+
+int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
+int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
+int nvme_sg_get_version_num(int __user *ip);
+
+#endif /* _NVME_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 7725b4c8b718..364cb9adbbbc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -16,9 +16,6 @@
 #define _LINUX_NVME_H
 
 #include <uapi/linux/nvme.h>
-#include <linux/pci.h>
-#include <linux/kref.h>
-#include <linux/blk-mq.h>
 
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
@@ -76,115 +73,4 @@ enum {
 	NVME_CSTS_SHST_MASK	= 3 << 2,
 };
 
-extern unsigned char nvme_io_timeout;
-#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
-
-/*
- * Represents an NVM Express device.  Each nvme_dev is a PCI function.
- */
-struct nvme_dev {
-	struct list_head node;
-	struct nvme_queue **queues;
-	struct request_queue *admin_q;
-	struct blk_mq_tag_set tagset;
-	struct blk_mq_tag_set admin_tagset;
-	u32 __iomem *dbs;
-	struct device *dev;
-	struct dma_pool *prp_page_pool;
-	struct dma_pool *prp_small_pool;
-	int instance;
-	unsigned queue_count;
-	unsigned online_queues;
-	unsigned max_qid;
-	int q_depth;
-	u32 db_stride;
-	u32 ctrl_config;
-	struct msix_entry *entry;
-	struct nvme_bar __iomem *bar;
-	struct list_head namespaces;
-	struct kref kref;
-	struct device *device;
-	struct work_struct reset_work;
-	struct work_struct probe_work;
-	struct work_struct scan_work;
-	char name[12];
-	char serial[20];
-	char model[40];
-	char firmware_rev[8];
-	bool subsystem;
-	u32 max_hw_sectors;
-	u32 stripe_size;
-	u32 page_size;
-	void __iomem *cmb;
-	dma_addr_t cmb_dma_addr;
-	u64 cmb_size;
-	u32 cmbsz;
-	u16 oncs;
-	u16 abort_limit;
-	u8 event_limit;
-	u8 vwc;
-};
-
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
-struct nvme_ns {
-	struct list_head list;
-
-	struct nvme_dev *dev;
-	struct request_queue *queue;
-	struct gendisk *disk;
-	struct kref kref;
-
-	unsigned ns_id;
-	int lba_shift;
-	u16 ms;
-	bool ext;
-	u8 pi_type;
-	u64 mode_select_num_blocks;
-	u32 mode_select_block_len;
-};
-
-/*
- * The nvme_iod describes the data in an I/O, including the list of PRP
- * entries.  You can't see it in this data structure because C doesn't let
- * me express that.  Use nvme_alloc_iod to ensure there's enough space
- * allocated to store the PRP list.
- */
-struct nvme_iod {
-	unsigned long private;	/* For the use of the submitter of the I/O */
-	int npages;		/* In the PRP list. 0 means small pool in use */
-	int offset;		/* Of PRP list */
-	int nents;		/* Used in scatterlist */
-	int length;		/* Of data, in bytes */
-	dma_addr_t first_dma;
-	struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
-	struct scatterlist sg[0];
-};
-
-static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
-{
-	return (sector >> (ns->lba_shift - 9));
-}
-
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buf, unsigned bufflen);
-int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, void __user *ubuffer, unsigned bufflen,
-		u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
-		struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
-			dma_addr_t dma_addr, u32 *result);
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
-			dma_addr_t dma_addr, u32 *result);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
-
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 9d99a8dda154f38307d43d9c9aa504bd3703d596 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Oct 2015 15:25:49 +0200
Subject: nvme: move hardware structures out of the uapi version of nvme.h

Currently all NVMe command and completion structures are exposed to userspace
through the uapi version of nvme.h.  They are not an ABI between the kernel
and userspace, and will change in C-incompatible way for future versions of
the spec.  Move them to the kernel version of the file and rename the uapi
header to nvme_ioctl.h so that userspace can easily detect the presence of
the new clean header.  Nvme-cli already carries a local copy of the header,
so it won't be affected by this move.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c       |   1 +
 include/linux/nvme.h            | 526 ++++++++++++++++++++++++++++++++++-
 include/uapi/linux/nvme.h       | 589 ----------------------------------------
 include/uapi/linux/nvme_ioctl.h |  65 +++++
 4 files changed, 590 insertions(+), 591 deletions(-)
 delete mode 100644 include/uapi/linux/nvme.h
 create mode 100644 include/uapi/linux/nvme_ioctl.h

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a20f66a44b96..a526696d684d 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -42,6 +42,7 @@
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
+#include <uapi/linux/nvme_ioctl.h>
 #include "nvme.h"
 
 #define NVME_MINORS		(1U << MINORBITS)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 364cb9adbbbc..91a805437876 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -15,8 +15,6 @@
 #ifndef _LINUX_NVME_H
 #define _LINUX_NVME_H
 
-#include <uapi/linux/nvme.h>
-
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
 	__u32			vs;	/* Version */
@@ -73,4 +71,528 @@ enum {
 	NVME_CSTS_SHST_MASK	= 3 << 2,
 };
 
+struct nvme_id_power_state {
+	__le16			max_power;	/* centiwatts */
+	__u8			rsvd2;
+	__u8			flags;
+	__le32			entry_lat;	/* microseconds */
+	__le32			exit_lat;	/* microseconds */
+	__u8			read_tput;
+	__u8			read_lat;
+	__u8			write_tput;
+	__u8			write_lat;
+	__le16			idle_power;
+	__u8			idle_scale;
+	__u8			rsvd19;
+	__le16			active_power;
+	__u8			active_work_scale;
+	__u8			rsvd23[9];
+};
+
+enum {
+	NVME_PS_FLAGS_MAX_POWER_SCALE	= 1 << 0,
+	NVME_PS_FLAGS_NON_OP_STATE	= 1 << 1,
+};
+
+struct nvme_id_ctrl {
+	__le16			vid;
+	__le16			ssvid;
+	char			sn[20];
+	char			mn[40];
+	char			fr[8];
+	__u8			rab;
+	__u8			ieee[3];
+	__u8			mic;
+	__u8			mdts;
+	__u16			cntlid;
+	__u32			ver;
+	__u8			rsvd84[172];
+	__le16			oacs;
+	__u8			acl;
+	__u8			aerl;
+	__u8			frmw;
+	__u8			lpa;
+	__u8			elpe;
+	__u8			npss;
+	__u8			avscc;
+	__u8			apsta;
+	__le16			wctemp;
+	__le16			cctemp;
+	__u8			rsvd270[242];
+	__u8			sqes;
+	__u8			cqes;
+	__u8			rsvd514[2];
+	__le32			nn;
+	__le16			oncs;
+	__le16			fuses;
+	__u8			fna;
+	__u8			vwc;
+	__le16			awun;
+	__le16			awupf;
+	__u8			nvscc;
+	__u8			rsvd531;
+	__le16			acwu;
+	__u8			rsvd534[2];
+	__le32			sgls;
+	__u8			rsvd540[1508];
+	struct nvme_id_power_state	psd[32];
+	__u8			vs[1024];
+};
+
+enum {
+	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
+	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
+	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_VWC_PRESENT			= 1 << 0,
+};
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			nmic;
+	__u8			rescap;
+	__u8			fpi;
+	__u8			rsvd33;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__le16			nabsn;
+	__le16			nabo;
+	__le16			nabspf;
+	__u16			rsvd46;
+	__le64			nvmcap[2];
+	__u8			rsvd64[40];
+	__u8			nguid[16];
+	__u8			eui64[8];
+	struct nvme_lbaf	lbaf[16];
+	__u8			rsvd192[192];
+	__u8			vs[3712];
+};
+
+enum {
+	NVME_NS_FEAT_THIN	= 1 << 0,
+	NVME_NS_FLBAS_LBA_MASK	= 0xf,
+	NVME_NS_FLBAS_META_EXT	= 0x10,
+	NVME_LBAF_RP_BEST	= 0,
+	NVME_LBAF_RP_BETTER	= 1,
+	NVME_LBAF_RP_GOOD	= 2,
+	NVME_LBAF_RP_DEGRADED	= 3,
+	NVME_NS_DPC_PI_LAST	= 1 << 4,
+	NVME_NS_DPC_PI_FIRST	= 1 << 3,
+	NVME_NS_DPC_PI_TYPE3	= 1 << 2,
+	NVME_NS_DPC_PI_TYPE2	= 1 << 1,
+	NVME_NS_DPC_PI_TYPE1	= 1 << 0,
+	NVME_NS_DPS_PI_FIRST	= 1 << 3,
+	NVME_NS_DPS_PI_MASK	= 0x7,
+	NVME_NS_DPS_PI_TYPE1	= 1,
+	NVME_NS_DPS_PI_TYPE2	= 2,
+	NVME_NS_DPS_PI_TYPE3	= 3,
+};
+
+struct nvme_smart_log {
+	__u8			critical_warning;
+	__u8			temperature[2];
+	__u8			avail_spare;
+	__u8			spare_thresh;
+	__u8			percent_used;
+	__u8			rsvd6[26];
+	__u8			data_units_read[16];
+	__u8			data_units_written[16];
+	__u8			host_reads[16];
+	__u8			host_writes[16];
+	__u8			ctrl_busy_time[16];
+	__u8			power_cycles[16];
+	__u8			power_on_hours[16];
+	__u8			unsafe_shutdowns[16];
+	__u8			media_errors[16];
+	__u8			num_err_log_entries[16];
+	__le32			warning_temp_time;
+	__le32			critical_comp_time;
+	__le16			temp_sensor[8];
+	__u8			rsvd216[296];
+};
+
+enum {
+	NVME_SMART_CRIT_SPARE		= 1 << 0,
+	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
+	NVME_SMART_CRIT_RELIABILITY	= 1 << 2,
+	NVME_SMART_CRIT_MEDIA		= 1 << 3,
+	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
+};
+
+enum {
+	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
+};
+
+struct nvme_lba_range_type {
+	__u8			type;
+	__u8			attributes;
+	__u8			rsvd2[14];
+	__u64			slba;
+	__u64			nlb;
+	__u8			guid[16];
+	__u8			rsvd48[16];
+};
+
+enum {
+	NVME_LBART_TYPE_FS	= 0x01,
+	NVME_LBART_TYPE_RAID	= 0x02,
+	NVME_LBART_TYPE_CACHE	= 0x03,
+	NVME_LBART_TYPE_SWAP	= 0x04,
+
+	NVME_LBART_ATTRIB_TEMP	= 1 << 0,
+	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
+};
+
+struct nvme_reservation_status {
+	__le32	gen;
+	__u8	rtype;
+	__u8	regctl[2];
+	__u8	resv5[2];
+	__u8	ptpls;
+	__u8	resv10[13];
+	struct {
+		__le16	cntlid;
+		__u8	rcsts;
+		__u8	resv3[5];
+		__le64	hostid;
+		__le64	rkey;
+	} regctl_ds[];
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+	nvme_cmd_flush		= 0x00,
+	nvme_cmd_write		= 0x01,
+	nvme_cmd_read		= 0x02,
+	nvme_cmd_write_uncor	= 0x04,
+	nvme_cmd_compare	= 0x05,
+	nvme_cmd_write_zeroes	= 0x08,
+	nvme_cmd_dsm		= 0x09,
+	nvme_cmd_resv_register	= 0x0d,
+	nvme_cmd_resv_report	= 0x0e,
+	nvme_cmd_resv_acquire	= 0x11,
+	nvme_cmd_resv_release	= 0x15,
+};
+
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le32			cdw2[2];
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__le32			cdw10[6];
+};
+
+struct nvme_rw_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
+enum {
+	NVME_RW_LR			= 1 << 15,
+	NVME_RW_FUA			= 1 << 14,
+	NVME_RW_DSM_FREQ_UNSPEC		= 0,
+	NVME_RW_DSM_FREQ_TYPICAL	= 1,
+	NVME_RW_DSM_FREQ_RARE		= 2,
+	NVME_RW_DSM_FREQ_READS		= 3,
+	NVME_RW_DSM_FREQ_WRITES		= 4,
+	NVME_RW_DSM_FREQ_RW		= 5,
+	NVME_RW_DSM_FREQ_ONCE		= 6,
+	NVME_RW_DSM_FREQ_PREFETCH	= 7,
+	NVME_RW_DSM_FREQ_TEMP		= 8,
+	NVME_RW_DSM_LATENCY_NONE	= 0 << 4,
+	NVME_RW_DSM_LATENCY_IDLE	= 1 << 4,
+	NVME_RW_DSM_LATENCY_NORM	= 2 << 4,
+	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
+	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
+	NVME_RW_DSM_COMPRESSED		= 1 << 7,
+	NVME_RW_PRINFO_PRCHK_REF	= 1 << 10,
+	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
+	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
+	NVME_RW_PRINFO_PRACT		= 1 << 13,
+};
+
+struct nvme_dsm_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			nr;
+	__le32			attributes;
+	__u32			rsvd12[4];
+};
+
+enum {
+	NVME_DSMGMT_IDR		= 1 << 0,
+	NVME_DSMGMT_IDW		= 1 << 1,
+	NVME_DSMGMT_AD		= 1 << 2,
+};
+
+struct nvme_dsm_range {
+	__le32			cattr;
+	__le32			nlb;
+	__le64			slba;
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+	nvme_admin_delete_sq		= 0x00,
+	nvme_admin_create_sq		= 0x01,
+	nvme_admin_get_log_page		= 0x02,
+	nvme_admin_delete_cq		= 0x04,
+	nvme_admin_create_cq		= 0x05,
+	nvme_admin_identify		= 0x06,
+	nvme_admin_abort_cmd		= 0x08,
+	nvme_admin_set_features		= 0x09,
+	nvme_admin_get_features		= 0x0a,
+	nvme_admin_async_event		= 0x0c,
+	nvme_admin_activate_fw		= 0x10,
+	nvme_admin_download_fw		= 0x11,
+	nvme_admin_format_nvm		= 0x80,
+	nvme_admin_security_send	= 0x81,
+	nvme_admin_security_recv	= 0x82,
+};
+
+enum {
+	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
+	NVME_CQ_IRQ_ENABLED	= (1 << 1),
+	NVME_SQ_PRIO_URGENT	= (0 << 1),
+	NVME_SQ_PRIO_HIGH	= (1 << 1),
+	NVME_SQ_PRIO_MEDIUM	= (2 << 1),
+	NVME_SQ_PRIO_LOW	= (3 << 1),
+	NVME_FEAT_ARBITRATION	= 0x01,
+	NVME_FEAT_POWER_MGMT	= 0x02,
+	NVME_FEAT_LBA_RANGE	= 0x03,
+	NVME_FEAT_TEMP_THRESH	= 0x04,
+	NVME_FEAT_ERR_RECOVERY	= 0x05,
+	NVME_FEAT_VOLATILE_WC	= 0x06,
+	NVME_FEAT_NUM_QUEUES	= 0x07,
+	NVME_FEAT_IRQ_COALESCE	= 0x08,
+	NVME_FEAT_IRQ_CONFIG	= 0x09,
+	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
+	NVME_FEAT_ASYNC_EVENT	= 0x0b,
+	NVME_FEAT_AUTO_PST	= 0x0c,
+	NVME_FEAT_SW_PROGRESS	= 0x80,
+	NVME_FEAT_HOST_ID	= 0x81,
+	NVME_FEAT_RESV_MASK	= 0x82,
+	NVME_FEAT_RESV_PERSIST	= 0x83,
+	NVME_LOG_ERROR		= 0x01,
+	NVME_LOG_SMART		= 0x02,
+	NVME_LOG_FW_SLOT	= 0x03,
+	NVME_LOG_RESERVATION	= 0x80,
+	NVME_FWACT_REPL		= (0 << 3),
+	NVME_FWACT_REPL_ACTV	= (1 << 3),
+	NVME_FWACT_ACTV		= (2 << 3),
+};
+
+struct nvme_identify {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			cns;
+	__u32			rsvd11[5];
+};
+
+struct nvme_features {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			fid;
+	__le32			dword11;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_cq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			cqid;
+	__le16			qsize;
+	__le16			cq_flags;
+	__le16			irq_vector;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_sq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			sqid;
+	__le16			qsize;
+	__le16			sq_flags;
+	__le16			cqid;
+	__u32			rsvd12[4];
+};
+
+struct nvme_delete_queue {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[9];
+	__le16			qid;
+	__u16			rsvd10;
+	__u32			rsvd11[5];
+};
+
+struct nvme_abort_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[9];
+	__le16			sqid;
+	__u16			cid;
+	__u32			rsvd11[5];
+};
+
+struct nvme_download_firmware {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[5];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			numd;
+	__le32			offset;
+	__u32			rsvd12[4];
+};
+
+struct nvme_format_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[4];
+	__le32			cdw10;
+	__u32			rsvd11[5];
+};
+
+struct nvme_command {
+	union {
+		struct nvme_common_command common;
+		struct nvme_rw_command rw;
+		struct nvme_identify identify;
+		struct nvme_features features;
+		struct nvme_create_cq create_cq;
+		struct nvme_create_sq create_sq;
+		struct nvme_delete_queue delete_queue;
+		struct nvme_download_firmware dlfw;
+		struct nvme_format_cmd format;
+		struct nvme_dsm_cmd dsm;
+		struct nvme_abort_cmd abort;
+	};
+};
+
+enum {
+	NVME_SC_SUCCESS			= 0x0,
+	NVME_SC_INVALID_OPCODE		= 0x1,
+	NVME_SC_INVALID_FIELD		= 0x2,
+	NVME_SC_CMDID_CONFLICT		= 0x3,
+	NVME_SC_DATA_XFER_ERROR		= 0x4,
+	NVME_SC_POWER_LOSS		= 0x5,
+	NVME_SC_INTERNAL		= 0x6,
+	NVME_SC_ABORT_REQ		= 0x7,
+	NVME_SC_ABORT_QUEUE		= 0x8,
+	NVME_SC_FUSED_FAIL		= 0x9,
+	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_INVALID_NS		= 0xb,
+	NVME_SC_CMD_SEQ_ERROR		= 0xc,
+	NVME_SC_SGL_INVALID_LAST	= 0xd,
+	NVME_SC_SGL_INVALID_COUNT	= 0xe,
+	NVME_SC_SGL_INVALID_DATA	= 0xf,
+	NVME_SC_SGL_INVALID_METADATA	= 0x10,
+	NVME_SC_SGL_INVALID_TYPE	= 0x11,
+	NVME_SC_LBA_RANGE		= 0x80,
+	NVME_SC_CAP_EXCEEDED		= 0x81,
+	NVME_SC_NS_NOT_READY		= 0x82,
+	NVME_SC_RESERVATION_CONFLICT	= 0x83,
+	NVME_SC_CQ_INVALID		= 0x100,
+	NVME_SC_QID_INVALID		= 0x101,
+	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_ABORT_LIMIT		= 0x103,
+	NVME_SC_ABORT_MISSING		= 0x104,
+	NVME_SC_ASYNC_LIMIT		= 0x105,
+	NVME_SC_FIRMWARE_SLOT		= 0x106,
+	NVME_SC_FIRMWARE_IMAGE		= 0x107,
+	NVME_SC_INVALID_VECTOR		= 0x108,
+	NVME_SC_INVALID_LOG_PAGE	= 0x109,
+	NVME_SC_INVALID_FORMAT		= 0x10a,
+	NVME_SC_FIRMWARE_NEEDS_RESET	= 0x10b,
+	NVME_SC_INVALID_QUEUE		= 0x10c,
+	NVME_SC_FEATURE_NOT_SAVEABLE	= 0x10d,
+	NVME_SC_FEATURE_NOT_CHANGEABLE	= 0x10e,
+	NVME_SC_FEATURE_NOT_PER_NS	= 0x10f,
+	NVME_SC_FW_NEEDS_RESET_SUBSYS	= 0x110,
+	NVME_SC_BAD_ATTRIBUTES		= 0x180,
+	NVME_SC_INVALID_PI		= 0x181,
+	NVME_SC_READ_ONLY		= 0x182,
+	NVME_SC_WRITE_FAULT		= 0x280,
+	NVME_SC_READ_ERROR		= 0x281,
+	NVME_SC_GUARD_CHECK		= 0x282,
+	NVME_SC_APPTAG_CHECK		= 0x283,
+	NVME_SC_REFTAG_CHECK		= 0x284,
+	NVME_SC_COMPARE_FAILED		= 0x285,
+	NVME_SC_ACCESS_DENIED		= 0x286,
+	NVME_SC_DNR			= 0x4000,
+};
+
+struct nvme_completion {
+	__le32	result;		/* Used by admin commands to return data */
+	__u32	rsvd;
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
+#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8))
+
 #endif /* _LINUX_NVME_H */
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
deleted file mode 100644
index 8864194a4151..000000000000
--- a/include/uapi/linux/nvme.h
+++ /dev/null
@@ -1,589 +0,0 @@
-/*
- * Definitions for the NVM Express interface
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef _UAPI_LINUX_NVME_H
-#define _UAPI_LINUX_NVME_H
-
-#include <linux/types.h>
-
-struct nvme_id_power_state {
-	__le16			max_power;	/* centiwatts */
-	__u8			rsvd2;
-	__u8			flags;
-	__le32			entry_lat;	/* microseconds */
-	__le32			exit_lat;	/* microseconds */
-	__u8			read_tput;
-	__u8			read_lat;
-	__u8			write_tput;
-	__u8			write_lat;
-	__le16			idle_power;
-	__u8			idle_scale;
-	__u8			rsvd19;
-	__le16			active_power;
-	__u8			active_work_scale;
-	__u8			rsvd23[9];
-};
-
-enum {
-	NVME_PS_FLAGS_MAX_POWER_SCALE	= 1 << 0,
-	NVME_PS_FLAGS_NON_OP_STATE	= 1 << 1,
-};
-
-struct nvme_id_ctrl {
-	__le16			vid;
-	__le16			ssvid;
-	char			sn[20];
-	char			mn[40];
-	char			fr[8];
-	__u8			rab;
-	__u8			ieee[3];
-	__u8			mic;
-	__u8			mdts;
-	__u16			cntlid;
-	__u32			ver;
-	__u8			rsvd84[172];
-	__le16			oacs;
-	__u8			acl;
-	__u8			aerl;
-	__u8			frmw;
-	__u8			lpa;
-	__u8			elpe;
-	__u8			npss;
-	__u8			avscc;
-	__u8			apsta;
-	__le16			wctemp;
-	__le16			cctemp;
-	__u8			rsvd270[242];
-	__u8			sqes;
-	__u8			cqes;
-	__u8			rsvd514[2];
-	__le32			nn;
-	__le16			oncs;
-	__le16			fuses;
-	__u8			fna;
-	__u8			vwc;
-	__le16			awun;
-	__le16			awupf;
-	__u8			nvscc;
-	__u8			rsvd531;
-	__le16			acwu;
-	__u8			rsvd534[2];
-	__le32			sgls;
-	__u8			rsvd540[1508];
-	struct nvme_id_power_state	psd[32];
-	__u8			vs[1024];
-};
-
-enum {
-	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
-	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
-	NVME_CTRL_ONCS_DSM			= 1 << 2,
-	NVME_CTRL_VWC_PRESENT			= 1 << 0,
-};
-
-struct nvme_lbaf {
-	__le16			ms;
-	__u8			ds;
-	__u8			rp;
-};
-
-struct nvme_id_ns {
-	__le64			nsze;
-	__le64			ncap;
-	__le64			nuse;
-	__u8			nsfeat;
-	__u8			nlbaf;
-	__u8			flbas;
-	__u8			mc;
-	__u8			dpc;
-	__u8			dps;
-	__u8			nmic;
-	__u8			rescap;
-	__u8			fpi;
-	__u8			rsvd33;
-	__le16			nawun;
-	__le16			nawupf;
-	__le16			nacwu;
-	__le16			nabsn;
-	__le16			nabo;
-	__le16			nabspf;
-	__u16			rsvd46;
-	__le64			nvmcap[2];
-	__u8			rsvd64[40];
-	__u8			nguid[16];
-	__u8			eui64[8];
-	struct nvme_lbaf	lbaf[16];
-	__u8			rsvd192[192];
-	__u8			vs[3712];
-};
-
-enum {
-	NVME_NS_FEAT_THIN	= 1 << 0,
-	NVME_NS_FLBAS_LBA_MASK	= 0xf,
-	NVME_NS_FLBAS_META_EXT	= 0x10,
-	NVME_LBAF_RP_BEST	= 0,
-	NVME_LBAF_RP_BETTER	= 1,
-	NVME_LBAF_RP_GOOD	= 2,
-	NVME_LBAF_RP_DEGRADED	= 3,
-	NVME_NS_DPC_PI_LAST	= 1 << 4,
-	NVME_NS_DPC_PI_FIRST	= 1 << 3,
-	NVME_NS_DPC_PI_TYPE3	= 1 << 2,
-	NVME_NS_DPC_PI_TYPE2	= 1 << 1,
-	NVME_NS_DPC_PI_TYPE1	= 1 << 0,
-	NVME_NS_DPS_PI_FIRST	= 1 << 3,
-	NVME_NS_DPS_PI_MASK	= 0x7,
-	NVME_NS_DPS_PI_TYPE1	= 1,
-	NVME_NS_DPS_PI_TYPE2	= 2,
-	NVME_NS_DPS_PI_TYPE3	= 3,
-};
-
-struct nvme_smart_log {
-	__u8			critical_warning;
-	__u8			temperature[2];
-	__u8			avail_spare;
-	__u8			spare_thresh;
-	__u8			percent_used;
-	__u8			rsvd6[26];
-	__u8			data_units_read[16];
-	__u8			data_units_written[16];
-	__u8			host_reads[16];
-	__u8			host_writes[16];
-	__u8			ctrl_busy_time[16];
-	__u8			power_cycles[16];
-	__u8			power_on_hours[16];
-	__u8			unsafe_shutdowns[16];
-	__u8			media_errors[16];
-	__u8			num_err_log_entries[16];
-	__le32			warning_temp_time;
-	__le32			critical_comp_time;
-	__le16			temp_sensor[8];
-	__u8			rsvd216[296];
-};
-
-enum {
-	NVME_SMART_CRIT_SPARE		= 1 << 0,
-	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
-	NVME_SMART_CRIT_RELIABILITY	= 1 << 2,
-	NVME_SMART_CRIT_MEDIA		= 1 << 3,
-	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
-};
-
-enum {
-	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
-};
-
-struct nvme_lba_range_type {
-	__u8			type;
-	__u8			attributes;
-	__u8			rsvd2[14];
-	__u64			slba;
-	__u64			nlb;
-	__u8			guid[16];
-	__u8			rsvd48[16];
-};
-
-enum {
-	NVME_LBART_TYPE_FS	= 0x01,
-	NVME_LBART_TYPE_RAID	= 0x02,
-	NVME_LBART_TYPE_CACHE	= 0x03,
-	NVME_LBART_TYPE_SWAP	= 0x04,
-
-	NVME_LBART_ATTRIB_TEMP	= 1 << 0,
-	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
-};
-
-struct nvme_reservation_status {
-	__le32	gen;
-	__u8	rtype;
-	__u8	regctl[2];
-	__u8	resv5[2];
-	__u8	ptpls;
-	__u8	resv10[13];
-	struct {
-		__le16	cntlid;
-		__u8	rcsts;
-		__u8	resv3[5];
-		__le64	hostid;
-		__le64	rkey;
-	} regctl_ds[];
-};
-
-/* I/O commands */
-
-enum nvme_opcode {
-	nvme_cmd_flush		= 0x00,
-	nvme_cmd_write		= 0x01,
-	nvme_cmd_read		= 0x02,
-	nvme_cmd_write_uncor	= 0x04,
-	nvme_cmd_compare	= 0x05,
-	nvme_cmd_write_zeroes	= 0x08,
-	nvme_cmd_dsm		= 0x09,
-	nvme_cmd_resv_register	= 0x0d,
-	nvme_cmd_resv_report	= 0x0e,
-	nvme_cmd_resv_acquire	= 0x11,
-	nvme_cmd_resv_release	= 0x15,
-};
-
-struct nvme_common_command {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__le32			cdw2[2];
-	__le64			metadata;
-	__le64			prp1;
-	__le64			prp2;
-	__le32			cdw10[6];
-};
-
-struct nvme_rw_command {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u64			rsvd2;
-	__le64			metadata;
-	__le64			prp1;
-	__le64			prp2;
-	__le64			slba;
-	__le16			length;
-	__le16			control;
-	__le32			dsmgmt;
-	__le32			reftag;
-	__le16			apptag;
-	__le16			appmask;
-};
-
-enum {
-	NVME_RW_LR			= 1 << 15,
-	NVME_RW_FUA			= 1 << 14,
-	NVME_RW_DSM_FREQ_UNSPEC		= 0,
-	NVME_RW_DSM_FREQ_TYPICAL	= 1,
-	NVME_RW_DSM_FREQ_RARE		= 2,
-	NVME_RW_DSM_FREQ_READS		= 3,
-	NVME_RW_DSM_FREQ_WRITES		= 4,
-	NVME_RW_DSM_FREQ_RW		= 5,
-	NVME_RW_DSM_FREQ_ONCE		= 6,
-	NVME_RW_DSM_FREQ_PREFETCH	= 7,
-	NVME_RW_DSM_FREQ_TEMP		= 8,
-	NVME_RW_DSM_LATENCY_NONE	= 0 << 4,
-	NVME_RW_DSM_LATENCY_IDLE	= 1 << 4,
-	NVME_RW_DSM_LATENCY_NORM	= 2 << 4,
-	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
-	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
-	NVME_RW_DSM_COMPRESSED		= 1 << 7,
-	NVME_RW_PRINFO_PRCHK_REF	= 1 << 10,
-	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
-	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
-	NVME_RW_PRINFO_PRACT		= 1 << 13,
-};
-
-struct nvme_dsm_cmd {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u64			rsvd2[2];
-	__le64			prp1;
-	__le64			prp2;
-	__le32			nr;
-	__le32			attributes;
-	__u32			rsvd12[4];
-};
-
-enum {
-	NVME_DSMGMT_IDR		= 1 << 0,
-	NVME_DSMGMT_IDW		= 1 << 1,
-	NVME_DSMGMT_AD		= 1 << 2,
-};
-
-struct nvme_dsm_range {
-	__le32			cattr;
-	__le32			nlb;
-	__le64			slba;
-};
-
-/* Admin commands */
-
-enum nvme_admin_opcode {
-	nvme_admin_delete_sq		= 0x00,
-	nvme_admin_create_sq		= 0x01,
-	nvme_admin_get_log_page		= 0x02,
-	nvme_admin_delete_cq		= 0x04,
-	nvme_admin_create_cq		= 0x05,
-	nvme_admin_identify		= 0x06,
-	nvme_admin_abort_cmd		= 0x08,
-	nvme_admin_set_features		= 0x09,
-	nvme_admin_get_features		= 0x0a,
-	nvme_admin_async_event		= 0x0c,
-	nvme_admin_activate_fw		= 0x10,
-	nvme_admin_download_fw		= 0x11,
-	nvme_admin_format_nvm		= 0x80,
-	nvme_admin_security_send	= 0x81,
-	nvme_admin_security_recv	= 0x82,
-};
-
-enum {
-	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
-	NVME_CQ_IRQ_ENABLED	= (1 << 1),
-	NVME_SQ_PRIO_URGENT	= (0 << 1),
-	NVME_SQ_PRIO_HIGH	= (1 << 1),
-	NVME_SQ_PRIO_MEDIUM	= (2 << 1),
-	NVME_SQ_PRIO_LOW	= (3 << 1),
-	NVME_FEAT_ARBITRATION	= 0x01,
-	NVME_FEAT_POWER_MGMT	= 0x02,
-	NVME_FEAT_LBA_RANGE	= 0x03,
-	NVME_FEAT_TEMP_THRESH	= 0x04,
-	NVME_FEAT_ERR_RECOVERY	= 0x05,
-	NVME_FEAT_VOLATILE_WC	= 0x06,
-	NVME_FEAT_NUM_QUEUES	= 0x07,
-	NVME_FEAT_IRQ_COALESCE	= 0x08,
-	NVME_FEAT_IRQ_CONFIG	= 0x09,
-	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
-	NVME_FEAT_ASYNC_EVENT	= 0x0b,
-	NVME_FEAT_AUTO_PST	= 0x0c,
-	NVME_FEAT_SW_PROGRESS	= 0x80,
-	NVME_FEAT_HOST_ID	= 0x81,
-	NVME_FEAT_RESV_MASK	= 0x82,
-	NVME_FEAT_RESV_PERSIST	= 0x83,
-	NVME_LOG_ERROR		= 0x01,
-	NVME_LOG_SMART		= 0x02,
-	NVME_LOG_FW_SLOT	= 0x03,
-	NVME_LOG_RESERVATION	= 0x80,
-	NVME_FWACT_REPL		= (0 << 3),
-	NVME_FWACT_REPL_ACTV	= (1 << 3),
-	NVME_FWACT_ACTV		= (2 << 3),
-};
-
-struct nvme_identify {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u64			rsvd2[2];
-	__le64			prp1;
-	__le64			prp2;
-	__le32			cns;
-	__u32			rsvd11[5];
-};
-
-struct nvme_features {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u64			rsvd2[2];
-	__le64			prp1;
-	__le64			prp2;
-	__le32			fid;
-	__le32			dword11;
-	__u32			rsvd12[4];
-};
-
-struct nvme_create_cq {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__u32			rsvd1[5];
-	__le64			prp1;
-	__u64			rsvd8;
-	__le16			cqid;
-	__le16			qsize;
-	__le16			cq_flags;
-	__le16			irq_vector;
-	__u32			rsvd12[4];
-};
-
-struct nvme_create_sq {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__u32			rsvd1[5];
-	__le64			prp1;
-	__u64			rsvd8;
-	__le16			sqid;
-	__le16			qsize;
-	__le16			sq_flags;
-	__le16			cqid;
-	__u32			rsvd12[4];
-};
-
-struct nvme_delete_queue {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__u32			rsvd1[9];
-	__le16			qid;
-	__u16			rsvd10;
-	__u32			rsvd11[5];
-};
-
-struct nvme_abort_cmd {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__u32			rsvd1[9];
-	__le16			sqid;
-	__u16			cid;
-	__u32			rsvd11[5];
-};
-
-struct nvme_download_firmware {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__u32			rsvd1[5];
-	__le64			prp1;
-	__le64			prp2;
-	__le32			numd;
-	__le32			offset;
-	__u32			rsvd12[4];
-};
-
-struct nvme_format_cmd {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u64			rsvd2[4];
-	__le32			cdw10;
-	__u32			rsvd11[5];
-};
-
-struct nvme_command {
-	union {
-		struct nvme_common_command common;
-		struct nvme_rw_command rw;
-		struct nvme_identify identify;
-		struct nvme_features features;
-		struct nvme_create_cq create_cq;
-		struct nvme_create_sq create_sq;
-		struct nvme_delete_queue delete_queue;
-		struct nvme_download_firmware dlfw;
-		struct nvme_format_cmd format;
-		struct nvme_dsm_cmd dsm;
-		struct nvme_abort_cmd abort;
-	};
-};
-
-enum {
-	NVME_SC_SUCCESS			= 0x0,
-	NVME_SC_INVALID_OPCODE		= 0x1,
-	NVME_SC_INVALID_FIELD		= 0x2,
-	NVME_SC_CMDID_CONFLICT		= 0x3,
-	NVME_SC_DATA_XFER_ERROR		= 0x4,
-	NVME_SC_POWER_LOSS		= 0x5,
-	NVME_SC_INTERNAL		= 0x6,
-	NVME_SC_ABORT_REQ		= 0x7,
-	NVME_SC_ABORT_QUEUE		= 0x8,
-	NVME_SC_FUSED_FAIL		= 0x9,
-	NVME_SC_FUSED_MISSING		= 0xa,
-	NVME_SC_INVALID_NS		= 0xb,
-	NVME_SC_CMD_SEQ_ERROR		= 0xc,
-	NVME_SC_SGL_INVALID_LAST	= 0xd,
-	NVME_SC_SGL_INVALID_COUNT	= 0xe,
-	NVME_SC_SGL_INVALID_DATA	= 0xf,
-	NVME_SC_SGL_INVALID_METADATA	= 0x10,
-	NVME_SC_SGL_INVALID_TYPE	= 0x11,
-	NVME_SC_LBA_RANGE		= 0x80,
-	NVME_SC_CAP_EXCEEDED		= 0x81,
-	NVME_SC_NS_NOT_READY		= 0x82,
-	NVME_SC_RESERVATION_CONFLICT	= 0x83,
-	NVME_SC_CQ_INVALID		= 0x100,
-	NVME_SC_QID_INVALID		= 0x101,
-	NVME_SC_QUEUE_SIZE		= 0x102,
-	NVME_SC_ABORT_LIMIT		= 0x103,
-	NVME_SC_ABORT_MISSING		= 0x104,
-	NVME_SC_ASYNC_LIMIT		= 0x105,
-	NVME_SC_FIRMWARE_SLOT		= 0x106,
-	NVME_SC_FIRMWARE_IMAGE		= 0x107,
-	NVME_SC_INVALID_VECTOR		= 0x108,
-	NVME_SC_INVALID_LOG_PAGE	= 0x109,
-	NVME_SC_INVALID_FORMAT		= 0x10a,
-	NVME_SC_FIRMWARE_NEEDS_RESET	= 0x10b,
-	NVME_SC_INVALID_QUEUE		= 0x10c,
-	NVME_SC_FEATURE_NOT_SAVEABLE	= 0x10d,
-	NVME_SC_FEATURE_NOT_CHANGEABLE	= 0x10e,
-	NVME_SC_FEATURE_NOT_PER_NS	= 0x10f,
-	NVME_SC_FW_NEEDS_RESET_SUBSYS	= 0x110,
-	NVME_SC_BAD_ATTRIBUTES		= 0x180,
-	NVME_SC_INVALID_PI		= 0x181,
-	NVME_SC_READ_ONLY		= 0x182,
-	NVME_SC_WRITE_FAULT		= 0x280,
-	NVME_SC_READ_ERROR		= 0x281,
-	NVME_SC_GUARD_CHECK		= 0x282,
-	NVME_SC_APPTAG_CHECK		= 0x283,
-	NVME_SC_REFTAG_CHECK		= 0x284,
-	NVME_SC_COMPARE_FAILED		= 0x285,
-	NVME_SC_ACCESS_DENIED		= 0x286,
-	NVME_SC_DNR			= 0x4000,
-};
-
-struct nvme_completion {
-	__le32	result;		/* Used by admin commands to return data */
-	__u32	rsvd;
-	__le16	sq_head;	/* how much of this queue may be reclaimed */
-	__le16	sq_id;		/* submission queue that generated this entry */
-	__u16	command_id;	/* of the command which completed */
-	__le16	status;		/* did the command fail, and if so, why? */
-};
-
-struct nvme_user_io {
-	__u8	opcode;
-	__u8	flags;
-	__u16	control;
-	__u16	nblocks;
-	__u16	rsvd;
-	__u64	metadata;
-	__u64	addr;
-	__u64	slba;
-	__u32	dsmgmt;
-	__u32	reftag;
-	__u16	apptag;
-	__u16	appmask;
-};
-
-struct nvme_passthru_cmd {
-	__u8	opcode;
-	__u8	flags;
-	__u16	rsvd1;
-	__u32	nsid;
-	__u32	cdw2;
-	__u32	cdw3;
-	__u64	metadata;
-	__u64	addr;
-	__u32	metadata_len;
-	__u32	data_len;
-	__u32	cdw10;
-	__u32	cdw11;
-	__u32	cdw12;
-	__u32	cdw13;
-	__u32	cdw14;
-	__u32	cdw15;
-	__u32	timeout_ms;
-	__u32	result;
-};
-
-#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8))
-
-#define nvme_admin_cmd nvme_passthru_cmd
-
-#define NVME_IOCTL_ID		_IO('N', 0x40)
-#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
-#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
-#define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
-#define NVME_IOCTL_RESET	_IO('N', 0x44)
-#define NVME_IOCTL_SUBSYS_RESET	_IO('N', 0x45)
-
-#endif /* _UAPI_LINUX_NVME_H */
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
new file mode 100644
index 000000000000..c4b2a3f90829
--- /dev/null
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -0,0 +1,65 @@
+/*
+ * Definitions for the NVM Express ioctl interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI_LINUX_NVME_IOCTL_H
+#define _UAPI_LINUX_NVME_IOCTL_H
+
+#include <linux/types.h>
+
+struct nvme_user_io {
+	__u8	opcode;
+	__u8	flags;
+	__u16	control;
+	__u16	nblocks;
+	__u16	rsvd;
+	__u64	metadata;
+	__u64	addr;
+	__u64	slba;
+	__u32	dsmgmt;
+	__u32	reftag;
+	__u16	apptag;
+	__u16	appmask;
+};
+
+struct nvme_passthru_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
+	__u64	addr;
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32	result;
+};
+
+#define nvme_admin_cmd nvme_passthru_cmd
+
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
+#define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET	_IO('N', 0x44)
+#define NVME_IOCTL_SUBSYS_RESET	_IO('N', 0x45)
+
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
cgit v1.2.3


From 08c69640cfcbdcc7aaed31c05bbfaf03bb60611c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Oct 2015 15:27:16 +0200
Subject: nvme.h: add missing nvme_id_ctrl endianess annotations

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 91a805437876..9668d3571497 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -104,8 +104,8 @@ struct nvme_id_ctrl {
 	__u8			ieee[3];
 	__u8			mic;
 	__u8			mdts;
-	__u16			cntlid;
-	__u32			ver;
+	__le16			cntlid;
+	__le32			ver;
 	__u8			rsvd84[172];
 	__le16			oacs;
 	__u8			acl;
-- 
cgit v1.2.3


From 2812dfe370516ef958b5c9e2eca1b2f002236d2d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 9 Oct 2015 18:19:20 +0200
Subject: nvme: include <linux/types.ĥ> in <linux/nvme.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The buildbot complains about this even if it doesn't generate
a a build warning.  But it's an easy fix, so here we go:

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9668d3571497..3af5f454c04a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -15,6 +15,8 @@
 #ifndef _LINUX_NVME_H
 #define _LINUX_NVME_H
 
+#include <linux/types.h>
+
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
 	__u32			vs;	/* Version */
-- 
cgit v1.2.3


From 9a764234eee689ea800424ab99b08ff07a8bdbcd Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 14 Sep 2015 12:09:34 -0700
Subject: soc: brcmstb: Add Bus Interface Unit control setup

Broadcom STB SoCs (brcmstb) require an early setup of their Bus
Interface Unit control register, this needs to happen before SMP is
brought up because it affects how the CPU complex will be interfaced to
the memory controller.

Add support code which properly initializes the BIU registers based on
whether "brcm,write-pairing" is present in Device Tree, and take care of
saving and restoring credit register settings during system-wide
suspend/resume operations.

Acked-by: Gregory Fong <gregory.0xf0@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/soc/brcmstb/Makefile        |   2 +-
 drivers/soc/brcmstb/biuctrl.c       | 116 ++++++++++++++++++++++++++++++++++++
 include/linux/soc/brcmstb/brcmstb.h |  10 ++++
 3 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/brcmstb/biuctrl.c
 create mode 100644 include/linux/soc/brcmstb/brcmstb.h

(limited to 'include/linux')

diff --git a/drivers/soc/brcmstb/Makefile b/drivers/soc/brcmstb/Makefile
index 183280e39f80..9120b2715d3e 100644
--- a/drivers/soc/brcmstb/Makefile
+++ b/drivers/soc/brcmstb/Makefile
@@ -1 +1 @@
-obj-y				+= common.o
+obj-y				+= common.o biuctrl.o
diff --git a/drivers/soc/brcmstb/biuctrl.c b/drivers/soc/brcmstb/biuctrl.c
new file mode 100644
index 000000000000..9049c076f9a1
--- /dev/null
+++ b/drivers/soc/brcmstb/biuctrl.c
@@ -0,0 +1,116 @@
+/*
+ * Broadcom STB SoCs Bus Unit Interface controls
+ *
+ * Copyright (C) 2015, Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	"brcmstb: " KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/syscore_ops.h>
+
+#define CPU_CREDIT_REG_OFFSET			0x184
+#define  CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK	0x70000000
+
+static void __iomem *cpubiuctrl_base;
+static bool mcp_wr_pairing_en;
+
+static int __init mcp_write_pairing_set(void)
+{
+	u32 creds = 0;
+
+	if (!cpubiuctrl_base)
+		return -1;
+
+	creds = readl_relaxed(cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	if (mcp_wr_pairing_en) {
+		pr_info("MCP: Enabling write pairing\n");
+		writel_relaxed(creds | CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK,
+			     cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	} else if (creds & CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK) {
+		pr_info("MCP: Disabling write pairing\n");
+		writel_relaxed(creds & ~CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK,
+				cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	} else {
+		pr_info("MCP: Write pairing already disabled\n");
+	}
+
+	return 0;
+}
+
+static int __init setup_hifcpubiuctrl_regs(void)
+{
+	struct device_node *np;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
+	if (!np) {
+		pr_err("missing BIU control node\n");
+		return -ENODEV;
+	}
+
+	cpubiuctrl_base = of_iomap(np, 0);
+	if (!cpubiuctrl_base) {
+		pr_err("failed to remap BIU control base\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mcp_wr_pairing_en = of_property_read_bool(np, "brcm,write-pairing");
+out:
+	of_node_put(np);
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static u32 cpu_credit_reg_dump;  /* for save/restore */
+
+static int brcmstb_cpu_credit_reg_suspend(void)
+{
+	if (cpubiuctrl_base)
+		cpu_credit_reg_dump =
+			readl_relaxed(cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	return 0;
+}
+
+static void brcmstb_cpu_credit_reg_resume(void)
+{
+	if (cpubiuctrl_base)
+		writel_relaxed(cpu_credit_reg_dump,
+				cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+}
+
+static struct syscore_ops brcmstb_cpu_credit_syscore_ops = {
+	.suspend = brcmstb_cpu_credit_reg_suspend,
+	.resume = brcmstb_cpu_credit_reg_resume,
+};
+#endif
+
+
+void __init brcmstb_biuctrl_init(void)
+{
+	int ret;
+
+	setup_hifcpubiuctrl_regs();
+
+	ret = mcp_write_pairing_set();
+	if (ret) {
+		pr_err("MCP: Unable to disable write pairing!\n");
+		return;
+	}
+
+#ifdef CONFIG_PM_SLEEP
+	register_syscore_ops(&brcmstb_cpu_credit_syscore_ops);
+#endif
+}
diff --git a/include/linux/soc/brcmstb/brcmstb.h b/include/linux/soc/brcmstb/brcmstb.h
new file mode 100644
index 000000000000..337ce414e898
--- /dev/null
+++ b/include/linux/soc/brcmstb/brcmstb.h
@@ -0,0 +1,10 @@
+#ifndef __BRCMSTB_SOC_H
+#define __BRCMSTB_SOC_H
+
+/*
+ * Bus Interface Unit control register setup, must happen early during boot,
+ * before SMP is brought up, called by machine entry point.
+ */
+void brcmstb_biuctrl_init(void);
+
+#endif /* __BRCMSTB_SOC_H */
-- 
cgit v1.2.3


From a639315d6c536c806724c9328941a2517507e3e3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 15 Sep 2015 02:14:03 -0400
Subject: pmem: kill memremap_pmem()

Now that the pmem-api is defined as "a set of apis that enables access
to WB mapped pmem",  the mapping type is implied.  Remove the wrapper
and push the functionality down into the pmem driver in preparation for
adding support for direct-mapped pmem.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c |  9 +++++----
 include/linux/pmem.h  | 26 +-------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0ba6a978f227..0680affae04a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -157,8 +157,9 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 			return addr;
 		pmem->virt_addr = (void __pmem *) addr;
 	} else {
-		pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
-				pmem->size);
+		pmem->virt_addr = (void __pmem *) devm_memremap(dev,
+				pmem->phys_addr, pmem->size,
+				ARCH_MEMREMAP_PMEM);
 		if (!pmem->virt_addr)
 			return ERR_PTR(-ENXIO);
 	}
@@ -363,8 +364,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
 
 	/* establish pfn range for lookup, and switch to direct map */
 	pmem = dev_get_drvdata(dev);
-	memunmap_pmem(dev, pmem->virt_addr);
-	pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
+	devm_memunmap(dev, (void __force *) pmem->virt_addr);
+	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
 	if (IS_ERR(pmem->virt_addr)) {
 		rc = PTR_ERR(pmem->virt_addr);
 		goto err;
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 85f810b33917..acfea8ce4a07 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si
 	memcpy(dst, (void __force const *) src, size);
 }
 
-static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
-{
-	devm_memunmap(dev, (void __force *) addr);
-}
-
 static inline bool arch_has_pmem_api(void)
 {
 	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
@@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void)
  * These defaults seek to offer decent performance and minimize the
  * window between i/o completion and writes being durable on media.
  * However, it is undefined / architecture specific whether
- * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
+ * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
  * making data durable relative to i/o completion.
  */
 static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
@@ -116,25 +111,6 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
 		memset((void __force *)addr, 0, size);
 }
 
-/**
- * memremap_pmem - map physical persistent memory for pmem api
- * @offset: physical address of persistent memory
- * @size: size of the mapping
- *
- * Establish a mapping of the architecture specific memory type expected
- * by memcpy_to_pmem() and wmb_pmem().  For example, it may be
- * the case that an uncacheable or writethrough mapping is sufficient,
- * or a writeback mapping provided memcpy_to_pmem() and
- * wmb_pmem() arrange for the data to be written through the
- * cache to persistent media.
- */
-static inline void __pmem *memremap_pmem(struct device *dev,
-		resource_size_t offset, unsigned long size)
-{
-	return (void __pmem *) devm_memremap(dev, offset, size,
-			ARCH_MEMREMAP_PMEM);
-}
-
 /**
  * memcpy_to_pmem - copy data to persistent memory
  * @dst: destination buffer for the copy
-- 
cgit v1.2.3


From 7c683941f30a977c10ec6be174ec5f16939c7ce5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Oct 2015 20:35:55 -0400
Subject: devm: make allocations numa aware by default

Given we already have a device just use dev_to_node() to provide hint
allocations for devres.  However, current devres_alloc() users will need
to explicitly opt-in with devres_alloc_node().

Reviewed-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/base/devres.c  | 19 ++++++++++---------
 include/linux/device.h | 16 ++++++++++++----
 2 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 875464690117..8fc654f0807b 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -82,12 +82,12 @@ static struct devres_group * node_to_group(struct devres_node *node)
 }
 
 static __always_inline struct devres * alloc_dr(dr_release_t release,
-						size_t size, gfp_t gfp)
+						size_t size, gfp_t gfp, int nid)
 {
 	size_t tot_size = sizeof(struct devres) + size;
 	struct devres *dr;
 
-	dr = kmalloc_track_caller(tot_size, gfp);
+	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
 	if (unlikely(!dr))
 		return NULL;
 
@@ -106,24 +106,25 @@ static void add_dr(struct device *dev, struct devres_node *node)
 }
 
 #ifdef CONFIG_DEBUG_DEVRES
-void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
+void * __devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid,
 		      const char *name)
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
 	if (unlikely(!dr))
 		return NULL;
 	set_node_dbginfo(&dr->node, name, size);
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(__devres_alloc);
+EXPORT_SYMBOL_GPL(__devres_alloc_node);
 #else
 /**
  * devres_alloc - Allocate device resource data
  * @release: Release function devres will be associated with
  * @size: Allocation size
  * @gfp: Allocation flags
+ * @nid: NUMA node
  *
  * Allocate devres of @size bytes.  The allocated area is zeroed, then
  * associated with @release.  The returned pointer can be passed to
@@ -132,16 +133,16 @@ EXPORT_SYMBOL_GPL(__devres_alloc);
  * RETURNS:
  * Pointer to allocated devres on success, NULL on failure.
  */
-void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
+void * devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid)
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
 	if (unlikely(!dr))
 		return NULL;
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(devres_alloc);
+EXPORT_SYMBOL_GPL(devres_alloc_node);
 #endif
 
 /**
@@ -776,7 +777,7 @@ void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
 	struct devres *dr;
 
 	/* use raw alloc_dr for kmalloc caller tracing */
-	dr = alloc_dr(devm_kmalloc_release, size, gfp);
+	dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
 	if (unlikely(!dr))
 		return NULL;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 5d7bc6349930..b8f411b57dcb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -604,13 +604,21 @@ typedef void (*dr_release_t)(struct device *dev, void *res);
 typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data);
 
 #ifdef CONFIG_DEBUG_DEVRES
-extern void *__devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
-			     const char *name);
+extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
+				 int nid, const char *name);
 #define devres_alloc(release, size, gfp) \
-	__devres_alloc(release, size, gfp, #release)
+	__devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release)
+#define devres_alloc_node(release, size, gfp, nid) \
+	__devres_alloc_node(release, size, gfp, nid, #release)
 #else
-extern void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp);
+extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
+			       int nid);
+static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
+{
+	return devres_alloc_node(release, size, gfp, NUMA_NO_NODE);
+}
 #endif
+
 extern void devres_for_each_res(struct device *dev, dr_release_t release,
 				dr_match_t match, void *match_data,
 				void (*fn)(struct device *, void *, void *),
-- 
cgit v1.2.3


From 7cabd0086acd8f204d9b11a9b0aca90d6a9fcc5b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 30 Sep 2015 11:48:01 +0100
Subject: irqchip/gic-v3: Make gic_enable_sre an inline function

In order for gic_enable_sre to be used by the arm64 core code,
move it to arm-gic-v3.h. As a bonus, we now also check if
system registers have been already enabled, and return early
if they have.

In all cases, the function now returns a boolean indicating if
the enabling has been successful.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3.c       | 32 +++++++++-----------------------
 include/linux/irqchip/arm-gic-v3.h | 16 ++++++++++++++++
 2 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 149e3c6b3618..936da87c1070 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -171,27 +171,6 @@ static void __maybe_unused gic_write_sgi1r(u64 val)
 	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
 }
 
-static void gic_enable_sre(void)
-{
-	u64 val;
-
-	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
-	val |= ICC_SRE_EL1_SRE;
-	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
-	isb();
-
-	/*
-	 * Need to check that the SRE bit has actually been set. If
-	 * not, it means that SRE is disabled at EL2. We're going to
-	 * die painfully, and there is nothing we can do about it.
-	 *
-	 * Kindly inform the luser.
-	 */
-	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
-	if (!(val & ICC_SRE_EL1_SRE))
-		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
-}
-
 static void gic_enable_redist(bool enable)
 {
 	void __iomem *rbase;
@@ -525,8 +504,15 @@ static int gic_populate_rdist(void)
 
 static void gic_cpu_sys_reg_init(void)
 {
-	/* Enable system registers */
-	gic_enable_sre();
+	/*
+	 * Need to check that the SRE bit has actually been set. If
+	 * not, it means that SRE is disabled at EL2. We're going to
+	 * die painfully, and there is nothing we can do about it.
+	 *
+	 * Kindly inform the luser.
+	 */
+	if (!gic_enable_sre())
+		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
 
 	/* Set priority mask register */
 	gic_write_pmr(DEFAULT_PMR_VALUE);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c0c8a2ef9d90..9001b0bbe878 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -398,6 +398,22 @@ static inline void gic_write_dir(u64 irq)
 	isb();
 }
 
+static inline bool gic_enable_sre(void)
+{
+	u64 val;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	if (val & ICC_SRE_EL1_SRE)
+		return true;
+
+	val |= ICC_SRE_EL1_SRE;
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	isb();
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+
+	return !!(val & ICC_SRE_EL1_SRE);
+}
+
 struct irq_domain;
 int its_cpu_init(void);
 int its_init(struct device_node *node, struct rdists *rdists,
-- 
cgit v1.2.3


From 7936e914f7b0827c2dcfe63fbefdc21de2d61dcb Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Thu, 1 Oct 2015 13:47:14 +0100
Subject: irqchip/gic-v3: Refactor the arm64 specific parts

This patch moves the GICv3 system register access helpers to
arch/arm64/. Their 32bit counterparts will need to use mrc/mcr accesses
instead of mrs_s/msr_s.

[maz: fixed conflict with Cavium erratum handling]
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/arch_gicv3.h | 162 ++++++++++++++++++++++++++++++++++++
 drivers/irqchip/irq-gic-v3.c        |  57 +------------
 include/linux/irqchip/arm-gic-v3.h  |  88 ++------------------
 3 files changed, 175 insertions(+), 132 deletions(-)
 create mode 100644 arch/arm64/include/asm/arch_gicv3.h

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
new file mode 100644
index 000000000000..e695a931728c
--- /dev/null
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -0,0 +1,162 @@
+/*
+ * arch/arm64/include/asm/arch_gicv3.h
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_ARCH_GICV3_H
+#define __ASM_ARCH_GICV3_H
+
+#include <asm/sysreg.h>
+
+#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
+#define ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
+#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
+#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
+#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
+
+#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
+
+/*
+ * System register definitions
+ */
+#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
+#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
+#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
+#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
+#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
+
+#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
+#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
+
+#define ICH_LR0_EL2			__LR0_EL2(0)
+#define ICH_LR1_EL2			__LR0_EL2(1)
+#define ICH_LR2_EL2			__LR0_EL2(2)
+#define ICH_LR3_EL2			__LR0_EL2(3)
+#define ICH_LR4_EL2			__LR0_EL2(4)
+#define ICH_LR5_EL2			__LR0_EL2(5)
+#define ICH_LR6_EL2			__LR0_EL2(6)
+#define ICH_LR7_EL2			__LR0_EL2(7)
+#define ICH_LR8_EL2			__LR8_EL2(0)
+#define ICH_LR9_EL2			__LR8_EL2(1)
+#define ICH_LR10_EL2			__LR8_EL2(2)
+#define ICH_LR11_EL2			__LR8_EL2(3)
+#define ICH_LR12_EL2			__LR8_EL2(4)
+#define ICH_LR13_EL2			__LR8_EL2(5)
+#define ICH_LR14_EL2			__LR8_EL2(6)
+#define ICH_LR15_EL2			__LR8_EL2(7)
+
+#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
+#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
+#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
+#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
+#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
+
+#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
+#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
+#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
+#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
+#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+/* Low level accessors */
+
+static inline void gic_write_eoir(u64 irq)
+{
+	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	isb();
+}
+
+static inline void gic_write_dir(u64 irq)
+{
+	asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+	isb();
+}
+
+static inline u64 gic_read_iar_common(void)
+{
+	u64 irqstat;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	return irqstat;
+}
+
+/*
+ * Cavium ThunderX erratum 23154
+ *
+ * The gicv3 of ThunderX requires a modified version for reading the
+ * IAR status to ensure data synchronization (access to icc_iar1_el1
+ * is not sync'ed before and after).
+ */
+static inline u64 gic_read_iar_cavium_thunderx(void)
+{
+	u64 irqstat;
+
+	asm volatile(
+		"nop;nop;nop;nop\n\t"
+		"nop;nop;nop;nop\n\t"
+		"mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t"
+		"nop;nop;nop;nop"
+		: "=r" (irqstat));
+	mb();
+
+	return irqstat;
+}
+
+static inline void gic_write_pmr(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+}
+
+static inline void gic_write_ctlr(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static inline void gic_write_grpen1(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static inline void gic_write_sgi1r(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+}
+
+static inline u64 gic_read_sre(void)
+{
+	u64 val;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	return val;
+}
+
+static inline void gic_write_sre(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_ARCH_GICV3_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 936da87c1070..bf3df7961a5b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -108,37 +108,7 @@ static void gic_redist_wait_for_rwp(void)
 	gic_do_wait_for_rwp(gic_data_rdist_rd_base());
 }
 
-/* Low level accessors */
-static u64 gic_read_iar_common(void)
-{
-	u64 irqstat;
-
-	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
-	return irqstat;
-}
-
-/*
- * Cavium ThunderX erratum 23154
- *
- * The gicv3 of ThunderX requires a modified version for reading the
- * IAR status to ensure data synchronization (access to icc_iar1_el1
- * is not sync'ed before and after).
- */
-static u64 gic_read_iar_cavium_thunderx(void)
-{
-	u64 irqstat;
-
-	asm volatile(
-		"nop;nop;nop;nop\n\t"
-		"nop;nop;nop;nop\n\t"
-		"mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t"
-		"nop;nop;nop;nop"
-		: "=r" (irqstat));
-	mb();
-
-	return irqstat;
-}
-
+#ifdef CONFIG_ARM64
 static DEFINE_STATIC_KEY_FALSE(is_cavium_thunderx);
 
 static u64 __maybe_unused gic_read_iar(void)
@@ -148,28 +118,7 @@ static u64 __maybe_unused gic_read_iar(void)
 	else
 		return gic_read_iar_common();
 }
-
-static void __maybe_unused gic_write_pmr(u64 val)
-{
-	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
-}
-
-static void __maybe_unused gic_write_ctlr(u64 val)
-{
-	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
-	isb();
-}
-
-static void __maybe_unused gic_write_grpen1(u64 val)
-{
-	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
-	isb();
-}
-
-static void __maybe_unused gic_write_sgi1r(u64 val)
-{
-	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
-}
+#endif
 
 static void gic_enable_redist(bool enable)
 {
@@ -856,8 +805,10 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 
 static void gicv3_enable_quirks(void)
 {
+#ifdef CONFIG_ARM64
 	if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_23154))
 		static_branch_enable(&is_cavium_thunderx);
+#endif
 }
 
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 9001b0bbe878..b4ee60076ff8 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -18,8 +18,6 @@
 #ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
 #define __LINUX_IRQCHIP_ARM_GIC_V3_H
 
-#include <asm/sysreg.h>
-
 /*
  * Distributor registers. We assume we're running non-secure, with ARE
  * being set. Secure-only and non-ARE registers are not described.
@@ -293,19 +291,8 @@
 #define ICH_VMCR_PMR_SHIFT		24
 #define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
 
-#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
-#define ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
-#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
-#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
-#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
-#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
-#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
-#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
-
 #define ICC_IAR1_EL1_SPURIOUS		0x3ff
 
-#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
-
 #define ICC_SRE_EL2_SRE			(1 << 0)
 #define ICC_SRE_EL2_ENABLE		(1 << 3)
 
@@ -321,54 +308,10 @@
 #define ICC_SGI1R_AFFINITY_3_SHIFT	48
 #define ICC_SGI1R_AFFINITY_3_MASK	(0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
 
-/*
- * System register definitions
- */
-#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
-#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
-#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
-#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
-#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
-#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
-#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
-
-#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
-#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
-
-#define ICH_LR0_EL2			__LR0_EL2(0)
-#define ICH_LR1_EL2			__LR0_EL2(1)
-#define ICH_LR2_EL2			__LR0_EL2(2)
-#define ICH_LR3_EL2			__LR0_EL2(3)
-#define ICH_LR4_EL2			__LR0_EL2(4)
-#define ICH_LR5_EL2			__LR0_EL2(5)
-#define ICH_LR6_EL2			__LR0_EL2(6)
-#define ICH_LR7_EL2			__LR0_EL2(7)
-#define ICH_LR8_EL2			__LR8_EL2(0)
-#define ICH_LR9_EL2			__LR8_EL2(1)
-#define ICH_LR10_EL2			__LR8_EL2(2)
-#define ICH_LR11_EL2			__LR8_EL2(3)
-#define ICH_LR12_EL2			__LR8_EL2(4)
-#define ICH_LR13_EL2			__LR8_EL2(5)
-#define ICH_LR14_EL2			__LR8_EL2(6)
-#define ICH_LR15_EL2			__LR8_EL2(7)
-
-#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
-#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
-#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
-#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
-#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
-
-#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
-#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
-#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
-#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
-#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
+#include <asm/arch_gicv3.h>
 
 #ifndef __ASSEMBLY__
 
-#include <linux/stringify.h>
-#include <asm/msi.h>
-
 /*
  * We need a value to serve as a irq-type for LPIs. Choose one that will
  * hopefully pique the interest of the reviewer.
@@ -386,39 +329,26 @@ struct rdists {
 	u64			flags;
 };
 
-static inline void gic_write_eoir(u64 irq)
-{
-	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
-	isb();
-}
-
-static inline void gic_write_dir(u64 irq)
-{
-	asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
-	isb();
-}
+struct irq_domain;
+int its_cpu_init(void);
+int its_init(struct device_node *node, struct rdists *rdists,
+	     struct irq_domain *domain);
 
 static inline bool gic_enable_sre(void)
 {
-	u64 val;
+	u32 val;
 
-	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	val = gic_read_sre();
 	if (val & ICC_SRE_EL1_SRE)
 		return true;
 
 	val |= ICC_SRE_EL1_SRE;
-	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
-	isb();
-	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	gic_write_sre(val);
+	val = gic_read_sre();
 
 	return !!(val & ICC_SRE_EL1_SRE);
 }
 
-struct irq_domain;
-int its_cpu_init(void);
-int its_init(struct device_node *node, struct rdists *rdists,
-	     struct irq_domain *domain);
-
 #endif
 
 #endif
-- 
cgit v1.2.3


From f6c86a41e1dc2214363b00cc0eadb8a5401c892d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Thu, 1 Oct 2015 13:47:15 +0100
Subject: irqchip/gic-v3: Change unsigned types for AArch32 compatibility

This patch does a few simple compatibility-related changes:
- change the system register access prototypes to their actual size,
- homogenise mpidr accesses with unsigned long,
- force the 64bit register values to unsigned long long.

Note: the list registers are 64bit on GICv3, but the AArch32 vGIC driver
will need to split their values into two 32bit registers: LRn and LRCn.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/arch_gicv3.h | 33 +++++++++++++++++++--------------
 drivers/irqchip/irq-gic-v3.c        | 25 ++++++++++++-------------
 include/linux/irqchip/arm-gic-v3.h  | 18 +++++++++---------
 3 files changed, 40 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index e695a931728c..1aaa63551365 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -78,17 +78,22 @@
 
 #include <linux/stringify.h>
 
-/* Low level accessors */
+/*
+ * Low-level accessors
+ *
+ * These system registers are 32 bits, but we make sure that the compiler
+ * sets the GP register's most significant bits to 0 with an explicit cast.
+ */
 
-static inline void gic_write_eoir(u64 irq)
+static inline void gic_write_eoir(u32 irq)
 {
-	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" ((u64)irq));
 	isb();
 }
 
-static inline void gic_write_dir(u64 irq)
+static inline void gic_write_dir(u32 irq)
 {
-	asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+	asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" ((u64)irq));
 	isb();
 }
 
@@ -122,20 +127,20 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
 	return irqstat;
 }
 
-static inline void gic_write_pmr(u64 val)
+static inline void gic_write_pmr(u32 val)
 {
-	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" ((u64)val));
 }
 
-static inline void gic_write_ctlr(u64 val)
+static inline void gic_write_ctlr(u32 val)
 {
-	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" ((u64)val));
 	isb();
 }
 
-static inline void gic_write_grpen1(u64 val)
+static inline void gic_write_grpen1(u32 val)
 {
-	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" ((u64)val));
 	isb();
 }
 
@@ -144,7 +149,7 @@ static inline void gic_write_sgi1r(u64 val)
 	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
 }
 
-static inline u64 gic_read_sre(void)
+static inline u32 gic_read_sre(void)
 {
 	u64 val;
 
@@ -152,9 +157,9 @@ static inline u64 gic_read_sre(void)
 	return val;
 }
 
-static inline void gic_write_sre(u64 val)
+static inline void gic_write_sre(u32 val)
 {
-	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" ((u64)val));
 	isb();
 }
 
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index bf3df7961a5b..6125bbd777e7 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -319,11 +319,11 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
 	return 0;
 }
 
-static u64 gic_mpidr_to_affinity(u64 mpidr)
+static u64 gic_mpidr_to_affinity(unsigned long mpidr)
 {
 	u64 aff;
 
-	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
+	aff = ((u64)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
 	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
 	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8  |
 	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
@@ -333,7 +333,7 @@ static u64 gic_mpidr_to_affinity(u64 mpidr)
 
 static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
-	u64 irqnr;
+	u32 irqnr;
 
 	do {
 		irqnr = gic_read_iar();
@@ -397,7 +397,7 @@ static void __init gic_dist_init(void)
 
 static int gic_populate_rdist(void)
 {
-	u64 mpidr = cpu_logical_map(smp_processor_id());
+	unsigned long mpidr = cpu_logical_map(smp_processor_id());
 	u64 typer;
 	u32 aff;
 	int i;
@@ -428,10 +428,9 @@ static int gic_populate_rdist(void)
 				u64 offset = ptr - gic_data.redist_regions[i].redist_base;
 				gic_data_rdist_rd_base() = ptr;
 				gic_data_rdist()->phys_base = gic_data.redist_regions[i].phys_base + offset;
-				pr_info("CPU%d: found redistributor %llx region %d:%pa\n",
-					smp_processor_id(),
-					(unsigned long long)mpidr,
-					i, &gic_data_rdist()->phys_base);
+				pr_info("CPU%d: found redistributor %lx region %d:%pa\n",
+					smp_processor_id(), mpidr, i,
+					&gic_data_rdist()->phys_base);
 				return 0;
 			}
 
@@ -446,8 +445,8 @@ static int gic_populate_rdist(void)
 	}
 
 	/* We couldn't even deal with ourselves... */
-	WARN(true, "CPU%d: mpidr %llx has no re-distributor!\n",
-	     smp_processor_id(), (unsigned long long)mpidr);
+	WARN(true, "CPU%d: mpidr %lx has no re-distributor!\n",
+	     smp_processor_id(), mpidr);
 	return -ENODEV;
 }
 
@@ -524,10 +523,10 @@ static struct notifier_block gic_cpu_notifier = {
 };
 
 static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
-				   u64 cluster_id)
+				   unsigned long cluster_id)
 {
 	int cpu = *base_cpu;
-	u64 mpidr = cpu_logical_map(cpu);
+	unsigned long mpidr = cpu_logical_map(cpu);
 	u16 tlist = 0;
 
 	while (cpu < nr_cpu_ids) {
@@ -588,7 +587,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 	smp_wmb();
 
 	for_each_cpu(cpu, mask) {
-		u64 cluster_id = cpu_logical_map(cpu) & ~0xffUL;
+		unsigned long cluster_id = cpu_logical_map(cpu) & ~0xffUL;
 		u16 tlist;
 
 		tlist = gic_compute_target_list(&cpu, mask, cluster_id);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index b4ee60076ff8..c9ae0c6ec050 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -265,16 +265,16 @@
 /*
  * Hypervisor interface registers (SRE only)
  */
-#define ICH_LR_VIRTUAL_ID_MASK		((1UL << 32) - 1)
-
-#define ICH_LR_EOI			(1UL << 41)
-#define ICH_LR_GROUP			(1UL << 60)
-#define ICH_LR_HW			(1UL << 61)
-#define ICH_LR_STATE			(3UL << 62)
-#define ICH_LR_PENDING_BIT		(1UL << 62)
-#define ICH_LR_ACTIVE_BIT		(1UL << 63)
+#define ICH_LR_VIRTUAL_ID_MASK		((1ULL << 32) - 1)
+
+#define ICH_LR_EOI			(1ULL << 41)
+#define ICH_LR_GROUP			(1ULL << 60)
+#define ICH_LR_HW			(1ULL << 61)
+#define ICH_LR_STATE			(3ULL << 62)
+#define ICH_LR_PENDING_BIT		(1ULL << 62)
+#define ICH_LR_ACTIVE_BIT		(1ULL << 63)
 #define ICH_LR_PHYS_ID_SHIFT		32
-#define ICH_LR_PHYS_ID_MASK		(0x3ffUL << ICH_LR_PHYS_ID_SHIFT)
+#define ICH_LR_PHYS_ID_MASK		(0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
 
 #define ICH_MISR_EOI			(1 << 0)
 #define ICH_MISR_U			(1 << 1)
-- 
cgit v1.2.3


From e866a2e3950fe2f708d5cc67d641b1725ef7a708 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 1 Oct 2015 23:45:31 +0200
Subject: linux/thermal.h: rename KELVIN_TO_CELSIUS to DECI_KELVIN_TO_CELSIUS

The macros KELVIN_TO_CELSIUS and CELSIUS_TO_KELVIN actually convert
between deciKelvins and Celsius, so rename them to reflect that. While
at it, use a statement expression in DECI_KELVIN_TO_CELSIUS to prevent
expanding the argument multiple times and get rid of a few casts.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/thermal.c              | 12 ++++++------
 drivers/platform/x86/asus-wmi.c     |  2 +-
 drivers/platform/x86/intel_menlow.c |  8 ++++----
 include/linux/thermal.h             |  8 +++++---
 4 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 30d8518b25fb..82707f9824ca 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -315,7 +315,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 			if (crt == -1) {
 				tz->trips.critical.flags.valid = 0;
 			} else if (crt > 0) {
-				unsigned long crt_k = CELSIUS_TO_KELVIN(crt);
+				unsigned long crt_k = CELSIUS_TO_DECI_KELVIN(crt);
 				/*
 				 * Allow override critical threshold
 				 */
@@ -351,7 +351,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 		if (psv == -1) {
 			status = AE_SUPPORT;
 		} else if (psv > 0) {
-			tmp = CELSIUS_TO_KELVIN(psv);
+			tmp = CELSIUS_TO_DECI_KELVIN(psv);
 			status = AE_OK;
 		} else {
 			status = acpi_evaluate_integer(tz->device->handle,
@@ -431,7 +431,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 					break;
 				if (i == 1)
 					tz->trips.active[0].temperature =
-						CELSIUS_TO_KELVIN(act);
+						CELSIUS_TO_DECI_KELVIN(act);
 				else
 					/*
 					 * Don't allow override higher than
@@ -439,9 +439,9 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 					 */
 					tz->trips.active[i - 1].temperature =
 						(tz->trips.active[i - 2].temperature <
-						CELSIUS_TO_KELVIN(act) ?
+						CELSIUS_TO_DECI_KELVIN(act) ?
 						tz->trips.active[i - 2].temperature :
-						CELSIUS_TO_KELVIN(act));
+						CELSIUS_TO_DECI_KELVIN(act));
 				break;
 			} else {
 				tz->trips.active[i].temperature = tmp;
@@ -1105,7 +1105,7 @@ static int acpi_thermal_add(struct acpi_device *device)
 	INIT_WORK(&tz->thermal_check_work, acpi_thermal_check_fn);
 
 	pr_info(PREFIX "%s [%s] (%ld C)\n", acpi_device_name(device),
-		acpi_device_bid(device), KELVIN_TO_CELSIUS(tz->temperature));
+		acpi_device_bid(device), DECI_KELVIN_TO_CELSIUS(tz->temperature));
 	goto end;
 
 free_memory:
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index efbc3f0c592b..bb80f7a29496 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -1318,7 +1318,7 @@ static ssize_t asus_hwmon_temp1(struct device *dev,
 	if (err < 0)
 		return err;
 
-	value = KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000;
+	value = DECI_KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000;
 
 	return sprintf(buf, "%d\n", value);
 }
diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index e8b46d2c468c..0a919d81662c 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c
@@ -315,7 +315,7 @@ static ssize_t aux0_show(struct device *dev,
 
 	result = sensor_get_auxtrip(attr->handle, 0, &value);
 
-	return result ? result : sprintf(buf, "%lu", KELVIN_TO_CELSIUS(value));
+	return result ? result : sprintf(buf, "%lu", DECI_KELVIN_TO_CELSIUS(value));
 }
 
 static ssize_t aux1_show(struct device *dev,
@@ -327,7 +327,7 @@ static ssize_t aux1_show(struct device *dev,
 
 	result = sensor_get_auxtrip(attr->handle, 1, &value);
 
-	return result ? result : sprintf(buf, "%lu", KELVIN_TO_CELSIUS(value));
+	return result ? result : sprintf(buf, "%lu", DECI_KELVIN_TO_CELSIUS(value));
 }
 
 static ssize_t aux0_store(struct device *dev,
@@ -345,7 +345,7 @@ static ssize_t aux0_store(struct device *dev,
 	if (value < 0)
 		return -EINVAL;
 
-	result = sensor_set_auxtrip(attr->handle, 0, CELSIUS_TO_KELVIN(value));
+	result = sensor_set_auxtrip(attr->handle, 0, CELSIUS_TO_DECI_KELVIN(value));
 	return result ? result : count;
 }
 
@@ -364,7 +364,7 @@ static ssize_t aux1_store(struct device *dev,
 	if (value < 0)
 		return -EINVAL;
 
-	result = sensor_set_auxtrip(attr->handle, 1, CELSIUS_TO_KELVIN(value));
+	result = sensor_set_auxtrip(attr->handle, 1, CELSIUS_TO_DECI_KELVIN(value));
 	return result ? result : count;
 }
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 157d366e761b..4014a59828fc 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,9 +44,11 @@
 #define THERMAL_WEIGHT_DEFAULT 0
 
 /* Unit conversion macros */
-#define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
-				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
-#define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
+#define DECI_KELVIN_TO_CELSIUS(t)	({			\
+	long _t = (t);						\
+	((_t-2732 >= 0) ? (_t-2732+5)/10 : (_t-2732-5)/10);	\
+})
+#define CELSIUS_TO_DECI_KELVIN(t)	((t)*10+2732)
 #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
 #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
 #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
-- 
cgit v1.2.3


From e9849777d0e27cdd2902805be51da73e7c79578c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 9 Oct 2015 23:28:58 +0200
Subject: genirq: Add flag to force mask in disable_irq[_nosync]()

If an irq chip does not implement the irq_disable callback, then we
use a lazy approach for disabling the interrupt. That means that the
interrupt is marked disabled, but the interrupt line is not
immediately masked in the interrupt chip. It only becomes masked if
the interrupt is raised while it's marked disabled. We use this to avoid
possibly expensive mask/unmask operations for common case operations.

Unfortunately there are devices which do not allow the interrupt to be
disabled easily at the device level. They are forced to use
disable_irq_nosync(). This can result in taking each interrupt twice.

Instead of enforcing the non lazy mode on all interrupts of a irq
chip, provide a settings flag, which can be set by the driver for that
particular interrupt line.

Reported-and-tested-by: Duc Dang <dhdang@apm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1510092348370.6097@nanos
---
 include/linux/irq.h   |  4 +++-
 kernel/irq/chip.c     |  9 +++++++++
 kernel/irq/manage.c   |  1 +
 kernel/irq/settings.h | 12 ++++++++++++
 4 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ba72b60b57b1..3c1c96786248 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -72,6 +72,7 @@ enum irqchip_irq_state;
  * IRQ_IS_POLLED		- Always polled by another interrupt. Exclude
  *				  it from the spurious interrupt detection
  *				  mechanism and from core side polling.
+ * IRQ_DISABLE_UNLAZY		- Disable lazy irq disable
  */
 enum {
 	IRQ_TYPE_NONE		= 0x00000000,
@@ -97,13 +98,14 @@ enum {
 	IRQ_NOTHREAD		= (1 << 16),
 	IRQ_PER_CPU_DEVID	= (1 << 17),
 	IRQ_IS_POLLED		= (1 << 18),
+	IRQ_DISABLE_UNLAZY	= (1 << 19),
 };
 
 #define IRQF_MODIFY_MASK	\
 	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
 	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
 	 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
-	 IRQ_IS_POLLED)
+	 IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
 
 #define IRQ_NO_BALANCING_MASK	(IRQ_PER_CPU | IRQ_NO_BALANCING)
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4aa00d325b8c..15206453b12a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -241,6 +241,13 @@ void irq_enable(struct irq_desc *desc)
  * disabled. If an interrupt happens, then the interrupt flow
  * handler masks the line at the hardware level and marks it
  * pending.
+ *
+ * If the interrupt chip does not implement the irq_disable callback,
+ * a driver can disable the lazy approach for a particular irq line by
+ * calling 'irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY)'. This can
+ * be used for devices which cannot disable the interrupt at the
+ * device level under certain circumstances and have to use
+ * disable_irq[_nosync] instead.
  */
 void irq_disable(struct irq_desc *desc)
 {
@@ -248,6 +255,8 @@ void irq_disable(struct irq_desc *desc)
 	if (desc->irq_data.chip->irq_disable) {
 		desc->irq_data.chip->irq_disable(&desc->irq_data);
 		irq_state_set_masked(desc);
+	} else if (irq_settings_disable_unlazy(desc)) {
+		mask_irq(desc);
 	}
 }
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 312f9cb12805..a71175ff98d5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1463,6 +1463,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 
 	/* If this was the last handler, shut down the IRQ line: */
 	if (!desc->action) {
+		irq_settings_clr_disable_unlazy(desc);
 		irq_shutdown(desc);
 		irq_release_resources(desc);
 	}
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 3320b84cc60f..320579d89091 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -15,6 +15,7 @@ enum {
 	_IRQ_NESTED_THREAD	= IRQ_NESTED_THREAD,
 	_IRQ_PER_CPU_DEVID	= IRQ_PER_CPU_DEVID,
 	_IRQ_IS_POLLED		= IRQ_IS_POLLED,
+	_IRQ_DISABLE_UNLAZY	= IRQ_DISABLE_UNLAZY,
 	_IRQF_MODIFY_MASK	= IRQF_MODIFY_MASK,
 };
 
@@ -28,6 +29,7 @@ enum {
 #define IRQ_NESTED_THREAD	GOT_YOU_MORON
 #define IRQ_PER_CPU_DEVID	GOT_YOU_MORON
 #define IRQ_IS_POLLED		GOT_YOU_MORON
+#define IRQ_DISABLE_UNLAZY	GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
 #define IRQF_MODIFY_MASK	GOT_YOU_MORON
 
@@ -154,3 +156,13 @@ static inline bool irq_settings_is_polled(struct irq_desc *desc)
 {
 	return desc->status_use_accessors & _IRQ_IS_POLLED;
 }
+
+static inline bool irq_settings_disable_unlazy(struct irq_desc *desc)
+{
+	return desc->status_use_accessors & _IRQ_DISABLE_UNLAZY;
+}
+
+static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc)
+{
+	desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY;
+}
-- 
cgit v1.2.3


From ff936a04e5f28b7e0455be0e7fa91334f89e4b44 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 7 Oct 2015 10:55:41 -0700
Subject: bpf: fix cb access in socket filter programs

eBPF socket filter programs may see junk in 'u32 cb[5]' area,
since it could have been used by protocol layers earlier.

For socket filter programs used in af_packet we need to clean
20 bytes of skb->cb area if it could be used by the program.
For programs attached to TCP/UDP sockets we need to save/restore
these 20 bytes, since it's used by protocol layers.

Remove SK_RUN_FILTER macro, since it's no longer used.

Long term we may move this bpf cb area to per-cpu scratch, but that
requires addition of new 'per-cpu load/store' instructions,
so not suitable as a short term fix.

Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h    |  6 +++---
 include/linux/filter.h | 39 +++++++++++++++++++++++++++++++++++----
 kernel/bpf/verifier.c  |  2 +-
 net/core/filter.c      | 12 +++++++-----
 net/packet/af_packet.c | 10 +++++-----
 5 files changed, 51 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3697ad563899..b4fdee6cb686 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -100,6 +100,8 @@ enum bpf_access_type {
 	BPF_WRITE = 2
 };
 
+struct bpf_prog;
+
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -111,7 +113,7 @@ struct bpf_verifier_ops {
 
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
-				  struct bpf_insn *insn);
+				  struct bpf_insn *insn, struct bpf_prog *prog);
 };
 
 struct bpf_prog_type_list {
@@ -120,8 +122,6 @@ struct bpf_prog_type_list {
 	enum bpf_prog_type type;
 };
 
-struct bpf_prog;
-
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1bbce14bcf17..4165e9ac9e36 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -13,6 +13,7 @@
 #include <linux/printk.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <net/sch_generic.h>
 
 #include <asm/cacheflush.h>
 
@@ -302,10 +303,6 @@ struct bpf_prog_aux;
 	bpf_size;						\
 })
 
-/* Macro to invoke filter function. */
-#define SK_RUN_FILTER(filter, ctx) \
-	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
-
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
@@ -329,6 +326,7 @@ struct bpf_prog {
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
+				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1;	/* Do we need dst entry? */
 	kmemcheck_bitfield_end(meta);
 	u32			len;		/* Number of filter blocks */
@@ -352,6 +350,39 @@ struct sk_filter {
 
 #define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
+static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
+				       struct sk_buff *skb)
+{
+	u8 *cb_data = qdisc_skb_cb(skb)->data;
+	u8 saved_cb[QDISC_CB_PRIV_LEN];
+	u32 res;
+
+	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
+		     QDISC_CB_PRIV_LEN);
+
+	if (unlikely(prog->cb_access)) {
+		memcpy(saved_cb, cb_data, sizeof(saved_cb));
+		memset(cb_data, 0, sizeof(saved_cb));
+	}
+
+	res = BPF_PROG_RUN(prog, skb);
+
+	if (unlikely(prog->cb_access))
+		memcpy(cb_data, saved_cb, sizeof(saved_cb));
+
+	return res;
+}
+
+static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
+					struct sk_buff *skb)
+{
+	u8 *cb_data = qdisc_skb_cb(skb)->data;
+
+	if (unlikely(prog->cb_access))
+		memset(cb_data, 0, QDISC_CB_PRIV_LEN);
+	return BPF_PROG_RUN(prog, skb);
+}
+
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
 	return max(sizeof(struct bpf_prog),
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b074b23000d6..f8da034c2258 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2024,7 +2024,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
 
 		cnt = env->prog->aux->ops->
 			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
-					   insn->off, insn_buf);
+					   insn->off, insn_buf, env->prog);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index 342e6c8fc415..5f4cf1cffed3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -56,10 +56,10 @@
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
  *
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -83,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -1736,7 +1736,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 
 static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 				      int src_reg, int ctx_off,
-				      struct bpf_insn *insn_buf)
+				      struct bpf_insn *insn_buf,
+				      struct bpf_prog *prog)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -1827,6 +1828,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 		offsetof(struct __sk_buff, cb[4]):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 
+		prog->cb_access = 1;
 		ctx_off -= offsetof(struct __sk_buff, cb[0]);
 		ctx_off += offsetof(struct sk_buff, cb);
 		ctx_off += offsetof(struct qdisc_skb_cb, data);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 81c900fbc4a4..104910f7d1fb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1423,7 +1423,7 @@ static unsigned int fanout_demux_bpf(struct packet_fanout *f,
 	rcu_read_lock();
 	prog = rcu_dereference(f->bpf_prog);
 	if (prog)
-		ret = BPF_PROG_RUN(prog, skb) % num;
+		ret = bpf_prog_run_clear_cb(prog, skb) % num;
 	rcu_read_unlock();
 
 	return ret;
@@ -1939,16 +1939,16 @@ out_free:
 	return err;
 }
 
-static unsigned int run_filter(const struct sk_buff *skb,
-				      const struct sock *sk,
-				      unsigned int res)
+static unsigned int run_filter(struct sk_buff *skb,
+			       const struct sock *sk,
+			       unsigned int res)
 {
 	struct sk_filter *filter;
 
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
-		res = SK_RUN_FILTER(filter, skb);
+		res = bpf_prog_run_clear_cb(filter->prog, skb);
 	rcu_read_unlock();
 
 	return res;
-- 
cgit v1.2.3


From 7968c0e338085eba0ee2f0e0b0d833057a966679 Mon Sep 17 00:00:00 2001
From: Leif Lindholm <leif.lindholm@linaro.org>
Date: Wed, 26 Aug 2015 14:24:58 +0100
Subject: efi/arm64: Clean up efi_get_fdt_params() interface

As we now have a common debug infrastructure between core and arm64 efi,
drop the bit of the interface passing verbose output flags around.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/arm64/kernel/efi.c    | 2 +-
 drivers/firmware/efi/efi.c | 6 ++----
 include/linux/efi.h        | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 612ad5ec1d2e..ab5eeb63e2ca 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -201,7 +201,7 @@ void __init efi_init(void)
 	struct efi_fdt_params params;
 
 	/* Grab UEFI information placed in FDT by stub */
-	if (!efi_get_fdt_params(&params, efi_enabled(EFI_DBG)))
+	if (!efi_get_fdt_params(&params))
 		return;
 
 	efi_system_table = params.system_table;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index f0372a022c86..a0a0469e2869 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -492,7 +492,6 @@ static __initdata struct {
 };
 
 struct param_info {
-	int verbose;
 	int found;
 	void *params;
 };
@@ -523,21 +522,20 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
 		else
 			*(u64 *)dest = val;
 
-		if (info->verbose)
+		if (efi_enabled(EFI_DBG))
 			pr_info("  %s: 0x%0*llx\n", dt_params[i].name,
 				dt_params[i].size * 2, val);
 	}
 	return 1;
 }
 
-int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
+int __init efi_get_fdt_params(struct efi_fdt_params *params)
 {
 	struct param_info info;
 	int ret;
 
 	pr_info("Getting EFI parameters from FDT:\n");
 
-	info.verbose = verbose;
 	info.found = 0;
 	info.params = params;
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 26ca9e2fd30e..4677d8a1bfd0 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -902,7 +902,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
 extern void efi_get_time(struct timespec *now);
 extern void efi_reserve_boot_services(void);
-extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
+extern int efi_get_fdt_params(struct efi_fdt_params *params);
 extern struct efi_memory_map memmap;
 extern struct kobject *efi_kobj;
 
-- 
cgit v1.2.3


From bf924863c9445174c6e118f723dc477e2b6ccc7e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 9 Sep 2015 10:08:15 +0200
Subject: efi: Add support for UEFIv2.5 Properties table

Version 2.5 of the UEFI spec introduces a new configuration table
called the 'EFI Properties table'. Currently, it is only used to
convey whether the Memory Protection feature is enabled, which splits
PE/COFF images into separate code and data memory regions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/efi.c | 30 ++++++++++++++++--------------
 include/linux/efi.h        | 13 +++++++++++++
 2 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 278d4d88d250..c297d78f50fd 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -26,20 +26,21 @@
 #include <linux/platform_device.h>
 
 struct efi __read_mostly efi = {
-	.mps        = EFI_INVALID_TABLE_ADDR,
-	.acpi       = EFI_INVALID_TABLE_ADDR,
-	.acpi20     = EFI_INVALID_TABLE_ADDR,
-	.smbios     = EFI_INVALID_TABLE_ADDR,
-	.smbios3    = EFI_INVALID_TABLE_ADDR,
-	.sal_systab = EFI_INVALID_TABLE_ADDR,
-	.boot_info  = EFI_INVALID_TABLE_ADDR,
-	.hcdp       = EFI_INVALID_TABLE_ADDR,
-	.uga        = EFI_INVALID_TABLE_ADDR,
-	.uv_systab  = EFI_INVALID_TABLE_ADDR,
-	.fw_vendor  = EFI_INVALID_TABLE_ADDR,
-	.runtime    = EFI_INVALID_TABLE_ADDR,
-	.config_table  = EFI_INVALID_TABLE_ADDR,
-	.esrt       = EFI_INVALID_TABLE_ADDR,
+	.mps			= EFI_INVALID_TABLE_ADDR,
+	.acpi			= EFI_INVALID_TABLE_ADDR,
+	.acpi20			= EFI_INVALID_TABLE_ADDR,
+	.smbios			= EFI_INVALID_TABLE_ADDR,
+	.smbios3		= EFI_INVALID_TABLE_ADDR,
+	.sal_systab		= EFI_INVALID_TABLE_ADDR,
+	.boot_info		= EFI_INVALID_TABLE_ADDR,
+	.hcdp			= EFI_INVALID_TABLE_ADDR,
+	.uga			= EFI_INVALID_TABLE_ADDR,
+	.uv_systab		= EFI_INVALID_TABLE_ADDR,
+	.fw_vendor		= EFI_INVALID_TABLE_ADDR,
+	.runtime		= EFI_INVALID_TABLE_ADDR,
+	.config_table		= EFI_INVALID_TABLE_ADDR,
+	.esrt			= EFI_INVALID_TABLE_ADDR,
+	.properties_table	= EFI_INVALID_TABLE_ADDR,
 };
 EXPORT_SYMBOL(efi);
 
@@ -365,6 +366,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3},
 	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
 	{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
+	{EFI_PROPERTIES_TABLE_GUID, "PROP", &efi.properties_table},
 	{NULL_GUID, NULL, NULL},
 };
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 4677d8a1bfd0..d6a9bee755f2 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -596,6 +596,9 @@ void efi_native_runtime_setup(void);
 #define DEVICE_TREE_GUID \
     EFI_GUID(  0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 )
 
+#define EFI_PROPERTIES_TABLE_GUID \
+    EFI_GUID(  0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 )
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
@@ -809,6 +812,15 @@ typedef struct _efi_file_io_interface {
 #define EFI_FILE_MODE_WRITE	0x0000000000000002
 #define EFI_FILE_MODE_CREATE	0x8000000000000000
 
+typedef struct {
+	u32 version;
+	u32 length;
+	u64 memory_protection_attribute;
+} efi_properties_table_t;
+
+#define EFI_PROPERTIES_TABLE_VERSION	0x00010000
+#define EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA	0x1
+
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
 /*
@@ -831,6 +843,7 @@ extern struct efi {
 	unsigned long runtime;		/* runtime table */
 	unsigned long config_table;	/* config tables */
 	unsigned long esrt;		/* ESRT table */
+	unsigned long properties_table;	/* properties table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
-- 
cgit v1.2.3


From a1041713349d0b823b492d7b4ea4325d0b5666db Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 23 Sep 2015 07:29:34 -0700
Subject: efi: Introduce EFI_NX_PE_DATA bit and set it from properties table

UEFI v2.5 introduces a runtime memory protection feature that splits
PE/COFF runtime images into separate code and data regions. Since this
may require special handling by the OS, allocate a EFI_xxx bit to
keep track of whether this feature is currently active or not.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/efi.c | 18 ++++++++++++++++++
 include/linux/efi.h        |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index c297d78f50fd..31fc864eb037 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -426,6 +426,24 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
 	}
 	pr_cont("\n");
 	set_bit(EFI_CONFIG_TABLES, &efi.flags);
+
+	/* Parse the EFI Properties table if it exists */
+	if (efi.properties_table != EFI_INVALID_TABLE_ADDR) {
+		efi_properties_table_t *tbl;
+
+		tbl = early_memremap(efi.properties_table, sizeof(*tbl));
+		if (tbl == NULL) {
+			pr_err("Could not map Properties table!\n");
+			return -ENOMEM;
+		}
+
+		if (tbl->memory_protection_attribute &
+		    EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA)
+			set_bit(EFI_NX_PE_DATA, &efi.flags);
+
+		early_memunmap(tbl, sizeof(*tbl));
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index d6a9bee755f2..fa5106c2f9f5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -973,6 +973,7 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_PARAVIRT		6	/* Access is via a paravirt interface */
 #define EFI_ARCH_1		7	/* First arch-specific bit */
 #define EFI_DBG			8	/* Print additional debug info at runtime */
+#define EFI_NX_PE_DATA		9	/* Can runtime data regions be mapped non-executable? */
 
 #ifdef CONFIG_EFI
 /*
-- 
cgit v1.2.3


From 0f96a99dab366333439e110d6ad253bc7c557c09 Mon Sep 17 00:00:00 2001
From: Taku Izumi <izumi.taku@jp.fujitsu.com>
Date: Wed, 30 Sep 2015 23:01:56 +0900
Subject: efi: Add "efi_fake_mem" boot option

This patch introduces new boot option named "efi_fake_mem".
By specifying this parameter, you can add arbitrary attribute
to specific memory range.
This is useful for debugging of Address Range Mirroring feature.

For example, if "efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000"
is specified, the original (firmware provided) EFI memmap will be
updated so that the specified memory regions have
EFI_MEMORY_MORE_RELIABLE attribute (0x10000):

 <original>
   efi: mem36: [Conventional Memory|  |  |  |  |  |   |WB|WT|WC|UC] range=[0x0000000100000000-0x00000020a0000000) (129536MB)

 <updated>
   efi: mem36: [Conventional Memory|  |MR|  |  |  |   |WB|WT|WC|UC] range=[0x0000000100000000-0x0000000180000000) (2048MB)
   efi: mem37: [Conventional Memory|  |  |  |  |  |   |WB|WT|WC|UC] range=[0x0000000180000000-0x00000010a0000000) (61952MB)
   efi: mem38: [Conventional Memory|  |MR|  |  |  |   |WB|WT|WC|UC] range=[0x00000010a0000000-0x0000001120000000) (2048MB)
   efi: mem39: [Conventional Memory|  |  |  |  |  |   |WB|WT|WC|UC] range=[0x0000001120000000-0x00000020a0000000) (63488MB)

And you will find that the following message is output:

   efi: Memory: 4096M/131455M mirrored memory

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/kernel-parameters.txt |  15 +++
 arch/x86/kernel/setup.c             |   4 +-
 drivers/firmware/efi/Kconfig        |  22 ++++
 drivers/firmware/efi/Makefile       |   1 +
 drivers/firmware/efi/fake_mem.c     | 238 ++++++++++++++++++++++++++++++++++++
 include/linux/efi.h                 |   6 +
 6 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 drivers/firmware/efi/fake_mem.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d6f0459cd7b..cd5312f24981 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1092,6 +1092,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			you are really sure that your UEFI does sane gc and
 			fulfills the spec otherwise your board may brick.
 
+	efi_fake_mem=	nn[KMG]@ss[KMG]:aa[,nn[KMG]@ss[KMG]:aa,..] [EFI; X86]
+			Add arbitrary attribute to specific memory range by
+			updating original EFI memory map.
+			Region of memory which aa attribute is added to is
+			from ss to ss+nn.
+			If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
+			is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
+			attribute is added to range 0x100000000-0x180000000 and
+			0x10a0000000-0x1120000000.
+
+			Using this parameter you can do debugging of EFI memmap
+			related feature. For example, you can do debugging of
+			Address Range Mirroring feature even if your box
+			doesn't support it.
+
 	eisa_irq_edge=	[PARISC,HW]
 			See header of drivers/parisc/eisa.c.
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 80f874bf999e..e3ed628f7db4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1104,8 +1104,10 @@ void __init setup_arch(char **cmdline_p)
 	memblock_set_current_limit(ISA_END_ADDRESS);
 	memblock_x86_fill();
 
-	if (efi_enabled(EFI_BOOT))
+	if (efi_enabled(EFI_BOOT)) {
+		efi_fake_memmap();
 		efi_find_mirror();
+	}
 
 	/*
 	 * The EFI specification says that boot service code won't be called
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 54071c148340..1de6f0ed5077 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -52,6 +52,28 @@ config EFI_RUNTIME_MAP
 
 	  See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map.
 
+config EFI_FAKE_MEMMAP
+	bool "Enable EFI fake memory map"
+	depends on EFI && X86
+	default n
+	help
+	  Saying Y here will enable "efi_fake_mem" boot option.
+	  By specifying this parameter, you can add arbitrary attribute
+	  to specific memory range by updating original (firmware provided)
+	  EFI memmap.
+	  This is useful for debugging of EFI memmap related feature.
+	  e.g. Address Range Mirroring feature.
+
+config EFI_MAX_FAKE_MEM
+	int "maximum allowable number of ranges in efi_fake_mem boot option"
+	depends on EFI_FAKE_MEMMAP
+	range 1 128
+	default 8
+	help
+	  Maximum allowable number of ranges in efi_fake_mem boot option.
+	  Ranges can be set up to this value using comma-separated list.
+	  The default value is 8.
+
 config EFI_PARAMS_FROM_FDT
 	bool
 	help
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 6fd3da938717..c24f00569acb 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_UEFI_CPER)			+= cper.o
 obj-$(CONFIG_EFI_RUNTIME_MAP)		+= runtime-map.o
 obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
 obj-$(CONFIG_EFI_STUB)			+= libstub/
+obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
new file mode 100644
index 000000000000..32bcb14df2c8
--- /dev/null
+++ b/drivers/firmware/efi/fake_mem.c
@@ -0,0 +1,238 @@
+/*
+ * fake_mem.c
+ *
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Taku Izumi <izumi.taku@jp.fujitsu.com>
+ *
+ * This code introduces new boot option named "efi_fake_mem"
+ * By specifying this parameter, you can add arbitrary attribute to
+ * specific memory range by updating original (firmware provided) EFI
+ * memmap.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with
+ *  this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ */
+
+#include <linux/kernel.h>
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/types.h>
+#include <linux/sort.h>
+#include <asm/efi.h>
+
+#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
+
+struct fake_mem {
+	struct range range;
+	u64 attribute;
+};
+static struct fake_mem fake_mems[EFI_MAX_FAKEMEM];
+static int nr_fake_mem;
+
+static int __init cmp_fake_mem(const void *x1, const void *x2)
+{
+	const struct fake_mem *m1 = x1;
+	const struct fake_mem *m2 = x2;
+
+	if (m1->range.start < m2->range.start)
+		return -1;
+	if (m1->range.start > m2->range.start)
+		return 1;
+	return 0;
+}
+
+void __init efi_fake_memmap(void)
+{
+	u64 start, end, m_start, m_end, m_attr;
+	int new_nr_map = memmap.nr_map;
+	efi_memory_desc_t *md;
+	u64 new_memmap_phy;
+	void *new_memmap;
+	void *old, *new;
+	int i;
+
+	if (!nr_fake_mem || !efi_enabled(EFI_MEMMAP))
+		return;
+
+	/* count up the number of EFI memory descriptor */
+	for (old = memmap.map; old < memmap.map_end; old += memmap.desc_size) {
+		md = old;
+		start = md->phys_addr;
+		end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+		for (i = 0; i < nr_fake_mem; i++) {
+			/* modifying range */
+			m_start = fake_mems[i].range.start;
+			m_end = fake_mems[i].range.end;
+
+			if (m_start <= start) {
+				/* split into 2 parts */
+				if (start < m_end && m_end < end)
+					new_nr_map++;
+			}
+			if (start < m_start && m_start < end) {
+				/* split into 3 parts */
+				if (m_end < end)
+					new_nr_map += 2;
+				/* split into 2 parts */
+				if (end <= m_end)
+					new_nr_map++;
+			}
+		}
+	}
+
+	/* allocate memory for new EFI memmap */
+	new_memmap_phy = memblock_alloc(memmap.desc_size * new_nr_map,
+					PAGE_SIZE);
+	if (!new_memmap_phy)
+		return;
+
+	/* create new EFI memmap */
+	new_memmap = early_memremap(new_memmap_phy,
+				    memmap.desc_size * new_nr_map);
+	if (!new_memmap) {
+		memblock_free(new_memmap_phy, memmap.desc_size * new_nr_map);
+		return;
+	}
+
+	for (old = memmap.map, new = new_memmap;
+	     old < memmap.map_end;
+	     old += memmap.desc_size, new += memmap.desc_size) {
+
+		/* copy original EFI memory descriptor */
+		memcpy(new, old, memmap.desc_size);
+		md = new;
+		start = md->phys_addr;
+		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+		for (i = 0; i < nr_fake_mem; i++) {
+			/* modifying range */
+			m_start = fake_mems[i].range.start;
+			m_end = fake_mems[i].range.end;
+			m_attr = fake_mems[i].attribute;
+
+			if (m_start <= start && end <= m_end)
+				md->attribute |= m_attr;
+
+			if (m_start <= start &&
+			    (start < m_end && m_end < end)) {
+				/* first part */
+				md->attribute |= m_attr;
+				md->num_pages = (m_end - md->phys_addr + 1) >>
+					EFI_PAGE_SHIFT;
+				/* latter part */
+				new += memmap.desc_size;
+				memcpy(new, old, memmap.desc_size);
+				md = new;
+				md->phys_addr = m_end + 1;
+				md->num_pages = (end - md->phys_addr + 1) >>
+					EFI_PAGE_SHIFT;
+			}
+
+			if ((start < m_start && m_start < end) && m_end < end) {
+				/* first part */
+				md->num_pages = (m_start - md->phys_addr) >>
+					EFI_PAGE_SHIFT;
+				/* middle part */
+				new += memmap.desc_size;
+				memcpy(new, old, memmap.desc_size);
+				md = new;
+				md->attribute |= m_attr;
+				md->phys_addr = m_start;
+				md->num_pages = (m_end - m_start + 1) >>
+					EFI_PAGE_SHIFT;
+				/* last part */
+				new += memmap.desc_size;
+				memcpy(new, old, memmap.desc_size);
+				md = new;
+				md->phys_addr = m_end + 1;
+				md->num_pages = (end - m_end) >>
+					EFI_PAGE_SHIFT;
+			}
+
+			if ((start < m_start && m_start < end) &&
+			    (end <= m_end)) {
+				/* first part */
+				md->num_pages = (m_start - md->phys_addr) >>
+					EFI_PAGE_SHIFT;
+				/* latter part */
+				new += memmap.desc_size;
+				memcpy(new, old, memmap.desc_size);
+				md = new;
+				md->phys_addr = m_start;
+				md->num_pages = (end - md->phys_addr + 1) >>
+					EFI_PAGE_SHIFT;
+				md->attribute |= m_attr;
+			}
+		}
+	}
+
+	/* swap into new EFI memmap */
+	efi_unmap_memmap();
+	memmap.map = new_memmap;
+	memmap.phys_map = (void *)new_memmap_phy;
+	memmap.nr_map = new_nr_map;
+	memmap.map_end = memmap.map + memmap.nr_map * memmap.desc_size;
+	set_bit(EFI_MEMMAP, &efi.flags);
+
+	/* print new EFI memmap */
+	efi_print_memmap();
+}
+
+static int __init setup_fake_mem(char *p)
+{
+	u64 start = 0, mem_size = 0, attribute = 0;
+	int i;
+
+	if (!p)
+		return -EINVAL;
+
+	while (*p != '\0') {
+		mem_size = memparse(p, &p);
+		if (*p == '@')
+			start = memparse(p+1, &p);
+		else
+			break;
+
+		if (*p == ':')
+			attribute = simple_strtoull(p+1, &p, 0);
+		else
+			break;
+
+		if (nr_fake_mem >= EFI_MAX_FAKEMEM)
+			break;
+
+		fake_mems[nr_fake_mem].range.start = start;
+		fake_mems[nr_fake_mem].range.end = start + mem_size - 1;
+		fake_mems[nr_fake_mem].attribute = attribute;
+		nr_fake_mem++;
+
+		if (*p == ',')
+			p++;
+	}
+
+	sort(fake_mems, nr_fake_mem, sizeof(struct fake_mem),
+	     cmp_fake_mem, NULL);
+
+	for (i = 0; i < nr_fake_mem; i++)
+		pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]",
+			fake_mems[i].attribute, fake_mems[i].range.start,
+			fake_mems[i].range.end);
+
+	return *p == '\0' ? 0 : -EINVAL;
+}
+
+early_param("efi_fake_mem", setup_fake_mem);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index fa5106c2f9f5..4d01c1033fce 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -922,6 +922,12 @@ extern struct kobject *efi_kobj;
 extern int efi_reboot_quirk_mode;
 extern bool efi_poweroff_required(void);
 
+#ifdef CONFIG_EFI_FAKE_MEMMAP
+extern void __init efi_fake_memmap(void);
+#else
+static inline void efi_fake_memmap(void) { }
+#endif
+
 /* Iterate through an efi_memory_map */
 #define for_each_efi_memory_desc(m, md)					   \
 	for ((md) = (m)->map;						   \
-- 
cgit v1.2.3


From b817525a4a80c04e4ca44192d97a1ffa9f2be572 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Oct 2015 14:47:05 -0400
Subject: writeback: bdi_writeback iteration must not skip dying ones

bdi_for_each_wb() is used in several places to wake up or issue
writeback work items to all wb's (bdi_writeback's) on a given bdi.
The iteration is performed by walking bdi->cgwb_tree; however, the
tree only indexes wb's which are currently active.

For example, when a memcg gets associated with a different blkcg, the
old wb is removed from the tree so that the new one can be indexed.
The old wb starts dying from then on but will linger till all its
inodes are drained.  As these dying wb's may still host dirty inodes,
writeback operations which affect all wb's must include them.
bdi_for_each_wb() skipping dying wb's led to sync(2) missing and
failing to sync the inodes belonging to those wb's.

This patch adds a RCU protected @bdi->wb_list which lists all wb's
beloinging to that bdi.  wb's are added on creation and removed on
release rather than on the start of destruction.  bdi_for_each_wb()
usages are replaced with list_for_each[_continue]_rcu() iterations
over @bdi->wb_list and bdi_for_each_wb() and its helpers are removed.

v2: Updated as per Jan.  last_wb ref leak in bdi_split_work_to_wbs()
    fixed and unnecessary list head severing in cgwb_bdi_destroy()
    removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Artem Bityutskiy <dedekind1@gmail.com>
Fixes: ebe41ab0c79d ("writeback: implement bdi_for_each_wb()")
Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/fs-writeback.c                | 31 ++++++++++++++------
 include/linux/backing-dev-defs.h |  3 ++
 include/linux/backing-dev.h      | 63 ----------------------------------------
 mm/backing-dev.c                 | 14 ++++++++-
 mm/page-writeback.c              |  3 +-
 5 files changed, 39 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d0da30668e98..29e4599f6fc1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -778,19 +778,24 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
 				  struct wb_writeback_work *base_work,
 				  bool skip_if_busy)
 {
-	int next_memcg_id = 0;
-	struct bdi_writeback *wb;
-	struct wb_iter iter;
+	struct bdi_writeback *last_wb = NULL;
+	struct bdi_writeback *wb = list_entry_rcu(&bdi->wb_list,
+						struct bdi_writeback, bdi_node);
 
 	might_sleep();
 restart:
 	rcu_read_lock();
-	bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) {
+	list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
 		DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
 		struct wb_writeback_work fallback_work;
 		struct wb_writeback_work *work;
 		long nr_pages;
 
+		if (last_wb) {
+			wb_put(last_wb);
+			last_wb = NULL;
+		}
+
 		/* SYNC_ALL writes out I_DIRTY_TIME too */
 		if (!wb_has_dirty_io(wb) &&
 		    (base_work->sync_mode == WB_SYNC_NONE ||
@@ -819,12 +824,22 @@ restart:
 
 		wb_queue_work(wb, work);
 
-		next_memcg_id = wb->memcg_css->id + 1;
+		/*
+		 * Pin @wb so that it stays on @bdi->wb_list.  This allows
+		 * continuing iteration from @wb after dropping and
+		 * regrabbing rcu read lock.
+		 */
+		wb_get(wb);
+		last_wb = wb;
+
 		rcu_read_unlock();
 		wb_wait_for_completion(bdi, &fallback_work_done);
 		goto restart;
 	}
 	rcu_read_unlock();
+
+	if (last_wb)
+		wb_put(last_wb);
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
@@ -1857,12 +1872,11 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 	rcu_read_lock();
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		struct bdi_writeback *wb;
-		struct wb_iter iter;
 
 		if (!bdi_has_dirty_io(bdi))
 			continue;
 
-		bdi_for_each_wb(wb, bdi, &iter, 0)
+		list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
 			wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
 					   false, reason);
 	}
@@ -1894,9 +1908,8 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
 	rcu_read_lock();
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		struct bdi_writeback *wb;
-		struct wb_iter iter;
 
-		bdi_for_each_wb(wb, bdi, &iter, 0)
+		list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
 			if (!list_empty(&wb->b_dirty_time))
 				wb_wakeup(wb);
 	}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index a23209b43842..1b4d69f68c33 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
 	struct list_head work_list;
 	struct delayed_work dwork;	/* work item used for writeback */
 
+	struct list_head bdi_node;	/* anchored at bdi->wb_list */
+
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct percpu_ref refcnt;	/* used only for !root wb's */
 	struct fprop_local_percpu memcg_completions;
@@ -150,6 +152,7 @@ struct backing_dev_info {
 	atomic_long_t tot_write_bandwidth;
 
 	struct bdi_writeback wb;  /* the root writeback info for this bdi */
+	struct list_head wb_list; /* list of all wbs */
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
 	struct rb_root cgwb_congested_tree; /* their congested states */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index d5eb4ad1c534..78677e5a65bf 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -408,61 +408,6 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 	rcu_read_unlock();
 }
 
-struct wb_iter {
-	int			start_memcg_id;
-	struct radix_tree_iter	tree_iter;
-	void			**slot;
-};
-
-static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
-						   struct backing_dev_info *bdi)
-{
-	struct radix_tree_iter *titer = &iter->tree_iter;
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	if (iter->start_memcg_id >= 0) {
-		iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id);
-		iter->start_memcg_id = -1;
-	} else {
-		iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
-	}
-
-	if (!iter->slot)
-		iter->slot = radix_tree_next_chunk(&bdi->cgwb_tree, titer, 0);
-	if (iter->slot)
-		return *iter->slot;
-	return NULL;
-}
-
-static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
-						   struct backing_dev_info *bdi,
-						   int start_memcg_id)
-{
-	iter->start_memcg_id = start_memcg_id;
-
-	if (start_memcg_id)
-		return __wb_iter_next(iter, bdi);
-	else
-		return &bdi->wb;
-}
-
-/**
- * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order
- * @wb_cur: cursor struct bdi_writeback pointer
- * @bdi: bdi to walk wb's of
- * @iter: pointer to struct wb_iter to be used as iteration buffer
- * @start_memcg_id: memcg ID to start iteration from
- *
- * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
- * memcg ID order starting from @start_memcg_id.  @iter is struct wb_iter
- * to be used as temp storage during iteration.  rcu_read_lock() must be
- * held throughout iteration.
- */
-#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id)		\
-	for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id);	\
-	     (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
-
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline bool inode_cgwb_enabled(struct inode *inode)
@@ -522,14 +467,6 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
 {
 }
 
-struct wb_iter {
-	int		next_id;
-};
-
-#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id)		\
-	for ((iter)->next_id = (start_blkcg_id);			\
-	     ({	(wb_cur) = !(iter)->next_id++ ? &(bdi)->wb : NULL; }); )
-
 static inline int inode_congested(struct inode *inode, int cong_bits)
 {
 	return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 2df8ddcb0ca0..e92d77937fd3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -480,6 +480,10 @@ static void cgwb_release_workfn(struct work_struct *work)
 						release_work);
 	struct backing_dev_info *bdi = wb->bdi;
 
+	spin_lock_irq(&cgwb_lock);
+	list_del_rcu(&wb->bdi_node);
+	spin_unlock_irq(&cgwb_lock);
+
 	wb_shutdown(wb);
 
 	css_put(wb->memcg_css);
@@ -575,6 +579,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 		ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
 		if (!ret) {
 			atomic_inc(&bdi->usage_cnt);
+			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
 			list_add(&wb->memcg_node, memcg_cgwb_list);
 			list_add(&wb->blkcg_node, blkcg_cgwb_list);
 			css_get(memcg_css);
@@ -764,15 +769,22 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
 
 int bdi_init(struct backing_dev_info *bdi)
 {
+	int ret;
+
 	bdi->dev = NULL;
 
 	bdi->min_ratio = 0;
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = FPROP_FRAC_BASE;
 	INIT_LIST_HEAD(&bdi->bdi_list);
+	INIT_LIST_HEAD(&bdi->wb_list);
 	init_waitqueue_head(&bdi->wb_waitq);
 
-	return cgwb_bdi_init(bdi);
+	ret = cgwb_bdi_init(bdi);
+
+	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
+
+	return ret;
 }
 EXPORT_SYMBOL(bdi_init);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 902e5f215e57..0f1a94e9f351 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1956,7 +1956,6 @@ void laptop_mode_timer_fn(unsigned long data)
 	int nr_pages = global_page_state(NR_FILE_DIRTY) +
 		global_page_state(NR_UNSTABLE_NFS);
 	struct bdi_writeback *wb;
-	struct wb_iter iter;
 
 	/*
 	 * We want to write everything out, not just down to the dirty
@@ -1966,7 +1965,7 @@ void laptop_mode_timer_fn(unsigned long data)
 		return;
 
 	rcu_read_lock();
-	bdi_for_each_wb(wb, &q->backing_dev_info, &iter, 0)
+	list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
 		if (wb_has_dirty_io(wb))
 			wb_start_writeback(wb, nr_pages, true,
 					   WB_REASON_LAPTOP_TIMER);
-- 
cgit v1.2.3


From c5edf9cdc4c483b9a94c03fc0b9f769bd090bf3e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Sep 2015 13:04:26 -0400
Subject: writeback: fix incorrect calculation of available memory for memcg
 domains

For memcg domains, the amount of available memory was calculated as

 min(the amount currently in use + headroom according to memcg,
     total clean memory)

This isn't quite correct as what should be capped by the amount of
clean memory is the headroom, not the sum of memory in use and
headroom.  For example, if a memcg domain has a significant amount of
dirty memory, the above can lead to a value which is lower than the
current amount in use which doesn't make much sense.  In most
circumstances, the above leads to a number which is somewhat but not
drastically lower.

As the amount of memory which can be readily allocated to the memcg
domain is capped by the amount of system-wide clean memory which is
not already assigned to the memcg itself, the number we want is

 the amount currently in use +
 min(headroom according to memcg, clean memory elsewhere in the system)

This patch updates mem_cgroup_wb_stats() to return the number of
filepages and headroom instead of the calculated available pages.
mdtc_cap_avail() is renamed to mdtc_calc_avail() and performs the
above calculation from file, headroom, dirty and globally clean pages.

v2: Dummy mem_cgroup_wb_stats() implementation wasn't updated leading
    to build failure when !CGROUP_WRITEBACK.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: c2aa723a6093 ("writeback: implement memcg writeback domain based throttling")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/memcontrol.h |  8 +++++---
 mm/memcontrol.c            | 35 +++++++++++++++++------------------
 mm/page-writeback.c        | 29 ++++++++++++++++++-----------
 3 files changed, 40 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6452ff4c463f..3e3318ddfc0e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -676,8 +676,9 @@ enum {
 
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
 struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
-void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
-			 unsigned long *pdirty, unsigned long *pwriteback);
+void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
+			 unsigned long *pheadroom, unsigned long *pdirty,
+			 unsigned long *pwriteback);
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
@@ -687,7 +688,8 @@ static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
 }
 
 static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
-				       unsigned long *pavail,
+				       unsigned long *pfilepages,
+				       unsigned long *pheadroom,
 				       unsigned long *pdirty,
 				       unsigned long *pwriteback)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1fedbde68f59..882c10cfd0ba 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3740,44 +3740,43 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
 /**
  * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
  * @wb: bdi_writeback in question
- * @pavail: out parameter for number of available pages
+ * @pfilepages: out parameter for number of file pages
+ * @pheadroom: out parameter for number of allocatable pages according to memcg
  * @pdirty: out parameter for number of dirty pages
  * @pwriteback: out parameter for number of pages under writeback
  *
- * Determine the numbers of available, dirty, and writeback pages in @wb's
- * memcg.  Dirty and writeback are self-explanatory.  Available is a bit
- * more involved.
+ * Determine the numbers of file, headroom, dirty, and writeback pages in
+ * @wb's memcg.  File, dirty and writeback are self-explanatory.  Headroom
+ * is a bit more involved.
  *
- * A memcg's headroom is "min(max, high) - used".  The available memory is
- * calculated as the lowest headroom of itself and the ancestors plus the
- * number of pages already being used for file pages.  Note that this
- * doesn't consider the actual amount of available memory in the system.
- * The caller should further cap *@pavail accordingly.
+ * A memcg's headroom is "min(max, high) - used".  In the hierarchy, the
+ * headroom is calculated as the lowest headroom of itself and the
+ * ancestors.  Note that this doesn't consider the actual amount of
+ * available memory in the system.  The caller should further cap
+ * *@pheadroom accordingly.
  */
-void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
-			 unsigned long *pdirty, unsigned long *pwriteback)
+void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
+			 unsigned long *pheadroom, unsigned long *pdirty,
+			 unsigned long *pwriteback)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
 	struct mem_cgroup *parent;
-	unsigned long head_room = PAGE_COUNTER_MAX;
-	unsigned long file_pages;
 
 	*pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY);
 
 	/* this should eventually include NR_UNSTABLE_NFS */
 	*pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
+	*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
+						     (1 << LRU_ACTIVE_FILE));
+	*pheadroom = PAGE_COUNTER_MAX;
 
-	file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
-						    (1 << LRU_ACTIVE_FILE));
 	while ((parent = parent_mem_cgroup(memcg))) {
 		unsigned long ceiling = min(memcg->memory.limit, memcg->high);
 		unsigned long used = page_counter_read(&memcg->memory);
 
-		head_room = min(head_room, ceiling - min(ceiling, used));
+		*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
 		memcg = parent;
 	}
-
-	*pavail = file_pages + head_room;
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 56c0bffa9f49..2c90357c34ea 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -684,13 +684,19 @@ static unsigned long hard_dirty_limit(struct wb_domain *dom,
 	return max(thresh, dom->dirty_limit);
 }
 
-/* memory available to a memcg domain is capped by system-wide clean memory */
-static void mdtc_cap_avail(struct dirty_throttle_control *mdtc)
+/*
+ * Memory which can be further allocated to a memcg domain is capped by
+ * system-wide clean memory excluding the amount being used in the domain.
+ */
+static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
+			    unsigned long filepages, unsigned long headroom)
 {
 	struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
-	unsigned long clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
+	unsigned long clean = filepages - min(filepages, mdtc->dirty);
+	unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
+	unsigned long other_clean = global_clean - min(global_clean, clean);
 
-	mdtc->avail = min(mdtc->avail, clean);
+	mdtc->avail = filepages + min(headroom, other_clean);
 }
 
 /**
@@ -1564,16 +1570,16 @@ static void balance_dirty_pages(struct address_space *mapping,
 		}
 
 		if (mdtc) {
-			unsigned long writeback;
+			unsigned long filepages, headroom, writeback;
 
 			/*
 			 * If @wb belongs to !root memcg, repeat the same
 			 * basic calculations for the memcg domain.
 			 */
-			mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty,
-					    &writeback);
-			mdtc_cap_avail(mdtc);
+			mem_cgroup_wb_stats(wb, &filepages, &headroom,
+					    &mdtc->dirty, &writeback);
 			mdtc->dirty += writeback;
+			mdtc_calc_avail(mdtc, filepages, headroom);
 
 			domain_dirty_limits(mdtc);
 
@@ -1895,10 +1901,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 		return true;
 
 	if (mdtc) {
-		unsigned long writeback;
+		unsigned long filepages, headroom, writeback;
 
-		mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, &writeback);
-		mdtc_cap_avail(mdtc);
+		mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
+				    &writeback);
+		mdtc_calc_avail(mdtc, filepages, headroom);
 		domain_dirty_limits(mdtc);	/* ditto, ignore writeback */
 
 		if (mdtc->dirty > mdtc->bg_thresh)
-- 
cgit v1.2.3


From f96b3c4f34b294a2293a5aa1d55e12e66aee055d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 29 Sep 2015 15:12:37 +0200
Subject: PM / Domains: Remove in_progress counter from struct
 generic_pm_domain

Commit ba2bbfbf6307 ("PM / Domains: Remove intermediate states..") changed
the power off sequence (pm_genpd_poweroff()), which from locking point of
view means the genpd mutex is held throughout the sequence.

The above change means the in_progress counter can't be updated while
pm_genpd_poweroff() is executing, which allows us to remove the counter.

Instead we inform pm_genpd_poweroff() via a bool parameter, to indicate
whether we call it from the scheduled work or from the ->runtime_suspend()
callback, since that all that matters.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 12 +++++-------
 include/linux/pm_domain.h   |  1 -
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cf4950d92937..c785f2e398b9 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -315,11 +315,12 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 /**
  * pm_genpd_poweroff - Remove power from a given PM domain.
  * @genpd: PM domain to power down.
+ * @is_async: PM domain is powered down from a scheduled work
  *
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
  */
-static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
+static int pm_genpd_poweroff(struct generic_pm_domain *genpd, bool is_async)
 {
 	struct pm_domain_data *pdd;
 	struct gpd_link *link;
@@ -351,7 +352,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 			not_suspended++;
 	}
 
-	if (not_suspended > genpd->in_progress)
+	if (not_suspended > 1 || (not_suspended == 1 && is_async))
 		return -EBUSY;
 
 	if (genpd->gov && genpd->gov->power_down_ok) {
@@ -399,7 +400,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
 	mutex_lock(&genpd->lock);
-	pm_genpd_poweroff(genpd);
+	pm_genpd_poweroff(genpd, true);
 	mutex_unlock(&genpd->lock);
 }
 
@@ -445,9 +446,7 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	mutex_lock(&genpd->lock);
-	genpd->in_progress++;
-	pm_genpd_poweroff(genpd);
-	genpd->in_progress--;
+	pm_genpd_poweroff(genpd, false);
 	mutex_unlock(&genpd->lock);
 
 	return 0;
@@ -1462,7 +1461,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	mutex_init(&genpd->lock);
 	genpd->gov = gov;
 	INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn);
-	genpd->in_progress = 0;
 	atomic_set(&genpd->sd_count, 0);
 	genpd->status = is_off ? GPD_STATE_POWER_OFF : GPD_STATE_ACTIVE;
 	genpd->device_count = 0;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 384b1d193707..0eaa730c890d 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -47,7 +47,6 @@ struct generic_pm_domain {
 	struct dev_power_governor *gov;
 	struct work_struct power_off_work;
 	const char *name;
-	unsigned int in_progress;	/* Number of devices being suspended now */
 	atomic_t sd_count;	/* Number of subdomains with power "on" */
 	enum gpd_status status;	/* Current state of the domain */
 	unsigned int device_count;	/* Number of devices */
-- 
cgit v1.2.3


From bb4b72fc63d4c1c2ccd5e5af95e48b77d6cad80c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 6 Oct 2015 14:27:42 +0200
Subject: PM / Domains: Remove pm_genpd_poweroff_unused() API

As the last user of the pm_genpd_poweroff_unused() API has moved into
relying on genpd to deal with this internally from a late_initcall, let's
remove the API.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 10 +++-------
 include/linux/pm_domain.h   |  3 ---
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c785f2e398b9..dcd451c85c99 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -501,15 +501,15 @@ static int __init pd_ignore_unused_setup(char *__unused)
 __setup("pd_ignore_unused", pd_ignore_unused_setup);
 
 /**
- * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
+ * genpd_poweroff_unused - Power off all PM domains with no devices in use.
  */
-void pm_genpd_poweroff_unused(void)
+static int __init genpd_poweroff_unused(void)
 {
 	struct generic_pm_domain *genpd;
 
 	if (pd_ignore_unused) {
 		pr_warn("genpd: Not disabling unused power domains\n");
-		return;
+		return 0;
 	}
 
 	mutex_lock(&gpd_list_lock);
@@ -518,11 +518,7 @@ void pm_genpd_poweroff_unused(void)
 		genpd_queue_power_off_work(genpd);
 
 	mutex_unlock(&gpd_list_lock);
-}
 
-static int __init genpd_poweroff_unused(void)
-{
-	pm_genpd_poweroff_unused();
 	return 0;
 }
 late_initcall(genpd_poweroff_unused);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 0eaa730c890d..a736f0f0ca66 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -125,9 +125,7 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
 extern void pm_genpd_init(struct generic_pm_domain *genpd,
 			  struct dev_power_governor *gov, bool is_off);
-
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
-extern void pm_genpd_poweroff_unused(void);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -170,7 +168,6 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
 {
 	return -ENOSYS;
 }
-static inline void pm_genpd_poweroff_unused(void) {}
 #endif
 
 static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
-- 
cgit v1.2.3


From ea823c7cbffa7bae311d78761866ac4db344c89b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 6 Oct 2015 14:27:51 +0200
Subject: PM / Domains: Remove pm_genpd_poweron() API

Once genpd could be configured to be built with CONFIG_PM_RUNTIME unset
(nowadays CONFIG_PM), the pm_genpd_poweron() API served a purpose, since
it allowed users to power on a PM domain.

As such configuration no longer is supported, users shall solely rely on
using some of the runtime PM APIs to power on a PM domain.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 +++-
 include/linux/pm_domain.h   | 5 -----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index dcd451c85c99..8724b4732e64 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -192,6 +192,8 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 	queue_work(pm_wq, &genpd->power_off_work);
 }
 
+static int pm_genpd_poweron(struct generic_pm_domain *genpd);
+
 /**
  * __pm_genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
@@ -245,7 +247,7 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd)
  * pm_genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
  */
-int pm_genpd_poweron(struct generic_pm_domain *genpd)
+static int pm_genpd_poweron(struct generic_pm_domain *genpd)
 {
 	int ret;
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index a736f0f0ca66..f4dd8105b024 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -125,7 +125,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
 extern void pm_genpd_init(struct generic_pm_domain *genpd,
 			  struct dev_power_governor *gov, bool is_off);
-extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -164,10 +163,6 @@ static inline void pm_genpd_init(struct generic_pm_domain *genpd,
 				 struct dev_power_governor *gov, bool is_off)
 {
 }
-static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
-{
-	return -ENOSYS;
-}
 #endif
 
 static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
-- 
cgit v1.2.3


From 0ad95472bf169a3501991f8f33f5147f792a8116 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Wed, 23 Sep 2015 15:49:29 +0300
Subject: lockd: create NSM handles per net namespace

Commit cb7323fffa85 ("lockd: create and use per-net NSM
 RPC clients on MON/UNMON requests") introduced per-net
NSM RPC clients. Unfortunately this doesn't make any sense
without per-net nsm_handle.

E.g. the following scenario could happen
Two hosts (X and Y) in different namespaces (A and B) share
the same nsm struct.

1. nsm_monitor(host_X) called => NSM rpc client created,
	nsm->sm_monitored bit set.
2. nsm_mointor(host-Y) called => nsm->sm_monitored already set,
	we just exit. Thus in namespace B ln->nsm_clnt == NULL.
3. host X destroyed => nsm->sm_count decremented to 1
4. host Y destroyed => nsm_unmonitor() => nsm_mon_unmon() => NULL-ptr
	dereference of *ln->nsm_clnt

So this could be fixed by making per-net nsm_handles list,
instead of global. Thus different net namespaces will not be able
share the same nsm_handle.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/host.c             |  7 ++++---
 fs/lockd/mon.c              | 36 ++++++++++++++++++++++--------------
 fs/lockd/netns.h            |  1 +
 fs/lockd/svc.c              |  1 +
 fs/lockd/svc4proc.c         |  2 +-
 fs/lockd/svcproc.c          |  2 +-
 include/linux/lockd/lockd.h |  9 ++++++---
 7 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 969d589c848d..b5f3c3ab0d5f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 		atomic_inc(&nsm->sm_count);
 	else {
 		host = NULL;
-		nsm = nsm_get_handle(ni->sap, ni->salen,
+		nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
 					ni->hostname, ni->hostname_len);
 		if (unlikely(nsm == NULL)) {
 			dprintk("lockd: %s failed; no nsm handle\n",
@@ -534,17 +534,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
 
 /**
  * nlm_host_rebooted - Release all resources held by rebooted host
+ * @net:  network namespace
  * @info: pointer to decoded results of NLM_SM_NOTIFY call
  *
  * We were notified that the specified host has rebooted.  Release
  * all resources held by that peer.
  */
-void nlm_host_rebooted(const struct nlm_reboot *info)
+void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info)
 {
 	struct nsm_handle *nsm;
 	struct nlm_host	*host;
 
-	nsm = nsm_reboot_lookup(info);
+	nsm = nsm_reboot_lookup(net, info);
 	if (unlikely(nsm == NULL))
 		return;
 
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 47a32b6d9b90..6c05cd17e520 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -51,7 +51,6 @@ struct nsm_res {
 };
 
 static const struct rpc_program	nsm_program;
-static				LIST_HEAD(nsm_handles);
 static				DEFINE_SPINLOCK(nsm_lock);
 
 /*
@@ -264,33 +263,35 @@ void nsm_unmonitor(const struct nlm_host *host)
 	}
 }
 
-static struct nsm_handle *nsm_lookup_hostname(const char *hostname,
-					      const size_t len)
+static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles,
+					const char *hostname, const size_t len)
 {
 	struct nsm_handle *nsm;
 
-	list_for_each_entry(nsm, &nsm_handles, sm_link)
+	list_for_each_entry(nsm, nsm_handles, sm_link)
 		if (strlen(nsm->sm_name) == len &&
 		    memcmp(nsm->sm_name, hostname, len) == 0)
 			return nsm;
 	return NULL;
 }
 
-static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
+static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles,
+					const struct sockaddr *sap)
 {
 	struct nsm_handle *nsm;
 
-	list_for_each_entry(nsm, &nsm_handles, sm_link)
+	list_for_each_entry(nsm, nsm_handles, sm_link)
 		if (rpc_cmp_addr(nsm_addr(nsm), sap))
 			return nsm;
 	return NULL;
 }
 
-static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
+static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles,
+					const struct nsm_private *priv)
 {
 	struct nsm_handle *nsm;
 
-	list_for_each_entry(nsm, &nsm_handles, sm_link)
+	list_for_each_entry(nsm, nsm_handles, sm_link)
 		if (memcmp(nsm->sm_priv.data, priv->data,
 					sizeof(priv->data)) == 0)
 			return nsm;
@@ -353,6 +354,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
 
 /**
  * nsm_get_handle - Find or create a cached nsm_handle
+ * @net: network namespace
  * @sap: pointer to socket address of handle to find
  * @salen: length of socket address
  * @hostname: pointer to C string containing hostname to find
@@ -365,11 +367,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
  * @hostname cannot be found in the handle cache.  Returns NULL if
  * an error occurs.
  */
-struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+struct nsm_handle *nsm_get_handle(const struct net *net,
+				  const struct sockaddr *sap,
 				  const size_t salen, const char *hostname,
 				  const size_t hostname_len)
 {
 	struct nsm_handle *cached, *new = NULL;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
 		if (printk_ratelimit()) {
@@ -384,9 +388,10 @@ retry:
 	spin_lock(&nsm_lock);
 
 	if (nsm_use_hostnames && hostname != NULL)
-		cached = nsm_lookup_hostname(hostname, hostname_len);
+		cached = nsm_lookup_hostname(&ln->nsm_handles,
+					hostname, hostname_len);
 	else
-		cached = nsm_lookup_addr(sap);
+		cached = nsm_lookup_addr(&ln->nsm_handles, sap);
 
 	if (cached != NULL) {
 		atomic_inc(&cached->sm_count);
@@ -400,7 +405,7 @@ retry:
 	}
 
 	if (new != NULL) {
-		list_add(&new->sm_link, &nsm_handles);
+		list_add(&new->sm_link, &ln->nsm_handles);
 		spin_unlock(&nsm_lock);
 		dprintk("lockd: created nsm_handle for %s (%s)\n",
 				new->sm_name, new->sm_addrbuf);
@@ -417,19 +422,22 @@ retry:
 
 /**
  * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle
+ * @net:  network namespace
  * @info: pointer to NLMPROC_SM_NOTIFY arguments
  *
  * Returns a matching nsm_handle if found in the nsm cache. The returned
  * nsm_handle's reference count is bumped. Otherwise returns NULL if some
  * error occurred.
  */
-struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info)
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+				const struct nlm_reboot *info)
 {
 	struct nsm_handle *cached;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	spin_lock(&nsm_lock);
 
-	cached = nsm_lookup_priv(&info->priv);
+	cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv);
 	if (unlikely(cached == NULL)) {
 		spin_unlock(&nsm_lock);
 		dprintk("lockd: never saw rebooted peer '%.*s' before\n",
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 097bfa3adb1c..89fe011b1335 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -15,6 +15,7 @@ struct lockd_net {
 	spinlock_t nsm_clnt_lock;
 	unsigned int nsm_users;
 	struct rpc_clnt *nsm_clnt;
+	struct list_head nsm_handles;
 };
 
 extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index d678bcc3cbcb..0dff13f41808 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -593,6 +593,7 @@ static int lockd_init_net(struct net *net)
 	INIT_LIST_HEAD(&ln->lockd_manager.list);
 	ln->lockd_manager.block_opens = false;
 	spin_lock_init(&ln->nsm_clnt_lock);
+	INIT_LIST_HEAD(&ln->nsm_handles);
 	return 0;
 }
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b147d1ae71fd..09c576f26c7b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 		return rpc_system_err;
 	}
 
-	nlm_host_rebooted(argp);
+	nlm_host_rebooted(SVC_NET(rqstp), argp);
 	return rpc_success;
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 21171f0c6477..fb26b9f522e7 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 		return rpc_system_err;
 	}
 
-	nlm_host_rebooted(argp);
+	nlm_host_rebooted(SVC_NET(rqstp), argp);
 	return rpc_success;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ff82a32871b5..fd3b65bf51b5 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -235,7 +235,8 @@ void		  nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 void		  nlm_shutdown_hosts_net(struct net *net);
-void		  nlm_host_rebooted(const struct nlm_reboot *);
+void		  nlm_host_rebooted(const struct net *net,
+					const struct nlm_reboot *);
 
 /*
  * Host monitoring
@@ -243,11 +244,13 @@ void		  nlm_host_rebooted(const struct nlm_reboot *);
 int		  nsm_monitor(const struct nlm_host *host);
 void		  nsm_unmonitor(const struct nlm_host *host);
 
-struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+struct nsm_handle *nsm_get_handle(const struct net *net,
+					const struct sockaddr *sap,
 					const size_t salen,
 					const char *hostname,
 					const size_t hostname_len);
-struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info);
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+					const struct nlm_reboot *info);
 void		  nsm_release(struct nsm_handle *nsm);
 
 /*
-- 
cgit v1.2.3


From 1be7f75d1668d6296b80bf35dcf6762393530afc Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 7 Oct 2015 22:23:21 -0700
Subject: bpf: enable non-root eBPF programs

In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
  (except R10+Imm which is used to compute stack addresses)
- comparison of pointers
  (except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps

Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.

Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.

Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.

For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
  u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
  u64 *value = bpf_map_lookup_elem(&my_map, &index);

  if (value)
	*value += skb->len;
  return 0;
}

but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
  u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
  u64 *value = bpf_map_lookup_elem(&my_map, &index);

  if (value)
	*value += (u64) skb;
  return 0;
}
since it would leak the kernel address into the map.

Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns

The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1).  Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |   2 +
 kernel/bpf/syscall.c  |  11 +++---
 kernel/bpf/verifier.c | 106 +++++++++++++++++++++++++++++++++++++++++++++-----
 kernel/sysctl.c       |  13 +++++++
 net/core/filter.c     |   3 +-
 5 files changed, 120 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b4fdee6cb686..02fa3db3c1ec 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -167,6 +167,8 @@ void bpf_prog_put_rcu(struct bpf_prog *prog);
 struct bpf_map *bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
 
+extern int sysctl_unprivileged_bpf_disabled;
+
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c868cafbc00c..83697bc8e574 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
 #include <linux/filter.h>
 #include <linux/version.h>
 
+int sysctl_unprivileged_bpf_disabled __read_mostly;
+
 static LIST_HEAD(bpf_map_types);
 
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -544,6 +546,9 @@ static int bpf_prog_load(union bpf_attr *attr)
 	    attr->kern_version != LINUX_VERSION_CODE)
 		return -EINVAL;
 
+	if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 	if (!prog)
@@ -599,11 +604,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	union bpf_attr attr = {};
 	int err;
 
-	/* the syscall is limited to root temporarily. This restriction will be
-	 * lifted when security audit is clean. Note that eBPF+tracing must have
-	 * this restriction, since it may pass kernel data to user space
-	 */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 		return -EPERM;
 
 	if (!access_ok(VERIFY_READ, uattr, 1))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f8da034c2258..1d6b97be79e1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -199,6 +199,7 @@ struct verifier_env {
 	struct verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
+	bool allow_ptr_leaks;
 };
 
 /* verbose verifier prints what it's seeing
@@ -538,6 +539,21 @@ static int bpf_size_to_bytes(int bpf_size)
 		return -EINVAL;
 }
 
+static bool is_spillable_regtype(enum bpf_reg_type type)
+{
+	switch (type) {
+	case PTR_TO_MAP_VALUE:
+	case PTR_TO_MAP_VALUE_OR_NULL:
+	case PTR_TO_STACK:
+	case PTR_TO_CTX:
+	case FRAME_PTR:
+	case CONST_PTR_TO_MAP:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /* check_stack_read/write functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
@@ -550,9 +566,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
 	 */
 
 	if (value_regno >= 0 &&
-	    (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
-	     state->regs[value_regno].type == PTR_TO_STACK ||
-	     state->regs[value_regno].type == PTR_TO_CTX)) {
+	    is_spillable_regtype(state->regs[value_regno].type)) {
 
 		/* register containing pointer is being spilled into stack */
 		if (size != BPF_REG_SIZE) {
@@ -643,6 +657,20 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
 	return -EACCES;
 }
 
+static bool is_pointer_value(struct verifier_env *env, int regno)
+{
+	if (env->allow_ptr_leaks)
+		return false;
+
+	switch (env->cur_state.regs[regno].type) {
+	case UNKNOWN_VALUE:
+	case CONST_IMM:
+		return false;
+	default:
+		return true;
+	}
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -669,11 +697,21 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 	}
 
 	if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose("R%d leaks addr into map\n", value_regno);
+			return -EACCES;
+		}
 		err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown_value(state->regs, value_regno);
 
 	} else if (state->regs[regno].type == PTR_TO_CTX) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose("R%d leaks addr into ctx\n", value_regno);
+			return -EACCES;
+		}
 		err = check_ctx_access(env, off, size, t);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown_value(state->regs, value_regno);
@@ -684,10 +722,17 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			verbose("invalid stack off=%d size=%d\n", off, size);
 			return -EACCES;
 		}
-		if (t == BPF_WRITE)
+		if (t == BPF_WRITE) {
+			if (!env->allow_ptr_leaks &&
+			    state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
+			    size != BPF_REG_SIZE) {
+				verbose("attempt to corrupt spilled pointer on stack\n");
+				return -EACCES;
+			}
 			err = check_stack_write(state, off, size, value_regno);
-		else
+		} else {
 			err = check_stack_read(state, off, size, value_regno);
+		}
 	} else {
 		verbose("R%d invalid mem access '%s'\n",
 			regno, reg_type_str[state->regs[regno].type]);
@@ -775,8 +820,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		return -EACCES;
 	}
 
-	if (arg_type == ARG_ANYTHING)
+	if (arg_type == ARG_ANYTHING) {
+		if (is_pointer_value(env, regno)) {
+			verbose("R%d leaks addr into helper function\n", regno);
+			return -EACCES;
+		}
 		return 0;
+	}
 
 	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
@@ -950,8 +1000,9 @@ static int check_call(struct verifier_env *env, int func_id)
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
+static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
 {
+	struct reg_state *regs = env->cur_state.regs;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -976,6 +1027,12 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
 		if (err)
 			return err;
 
+		if (is_pointer_value(env, insn->dst_reg)) {
+			verbose("R%d pointer arithmetic prohibited\n",
+				insn->dst_reg);
+			return -EACCES;
+		}
+
 		/* check dest operand */
 		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
 		if (err)
@@ -1012,6 +1069,11 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
 				 */
 				regs[insn->dst_reg] = regs[insn->src_reg];
 			} else {
+				if (is_pointer_value(env, insn->src_reg)) {
+					verbose("R%d partial copy of pointer\n",
+						insn->src_reg);
+					return -EACCES;
+				}
 				regs[insn->dst_reg].type = UNKNOWN_VALUE;
 				regs[insn->dst_reg].map_ptr = NULL;
 			}
@@ -1061,8 +1123,18 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
 		/* pattern match 'bpf_add Rx, imm' instruction */
 		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
 		    regs[insn->dst_reg].type == FRAME_PTR &&
-		    BPF_SRC(insn->code) == BPF_K)
+		    BPF_SRC(insn->code) == BPF_K) {
 			stack_relative = true;
+		} else if (is_pointer_value(env, insn->dst_reg)) {
+			verbose("R%d pointer arithmetic prohibited\n",
+				insn->dst_reg);
+			return -EACCES;
+		} else if (BPF_SRC(insn->code) == BPF_X &&
+			   is_pointer_value(env, insn->src_reg)) {
+			verbose("R%d pointer arithmetic prohibited\n",
+				insn->src_reg);
+			return -EACCES;
+		}
 
 		/* check dest operand */
 		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
@@ -1101,6 +1173,12 @@ static int check_cond_jmp_op(struct verifier_env *env,
 		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 		if (err)
 			return err;
+
+		if (is_pointer_value(env, insn->src_reg)) {
+			verbose("R%d pointer comparison prohibited\n",
+				insn->src_reg);
+			return -EACCES;
+		}
 	} else {
 		if (insn->src_reg != BPF_REG_0) {
 			verbose("BPF_JMP uses reserved fields\n");
@@ -1155,6 +1233,9 @@ static int check_cond_jmp_op(struct verifier_env *env,
 			regs[insn->dst_reg].type = CONST_IMM;
 			regs[insn->dst_reg].imm = 0;
 		}
+	} else if (is_pointer_value(env, insn->dst_reg)) {
+		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
+		return -EACCES;
 	} else if (BPF_SRC(insn->code) == BPF_K &&
 		   (opcode == BPF_JEQ || opcode == BPF_JNE)) {
 
@@ -1658,7 +1739,7 @@ static int do_check(struct verifier_env *env)
 		}
 
 		if (class == BPF_ALU || class == BPF_ALU64) {
-			err = check_alu_op(regs, insn);
+			err = check_alu_op(env, insn);
 			if (err)
 				return err;
 
@@ -1816,6 +1897,11 @@ static int do_check(struct verifier_env *env)
 				if (err)
 					return err;
 
+				if (is_pointer_value(env, BPF_REG_0)) {
+					verbose("R0 leaks addr as return value\n");
+					return -EACCES;
+				}
+
 process_bpf_exit:
 				insn_idx = pop_stack(env, &prev_insn_idx);
 				if (insn_idx < 0) {
@@ -2144,6 +2230,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
+	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
 	ret = do_check(env);
 
 skip_full_check:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e69201d8094e..96c856b04081 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
+#include <linux/bpf.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1138,6 +1139,18 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= timer_migration_handler,
 	},
+#endif
+#ifdef CONFIG_BPF_SYSCALL
+	{
+		.procname	= "unprivileged_bpf_disabled",
+		.data		= &sysctl_unprivileged_bpf_disabled,
+		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
diff --git a/net/core/filter.c b/net/core/filter.c
index 5f4cf1cffed3..0b00094932ab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1640,7 +1640,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_trace_printk:
-		return bpf_get_trace_printk_proto();
+		if (capable(CAP_SYS_ADMIN))
+			return bpf_get_trace_printk_proto();
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From aaac3ba95e4c8b496d22f68bd1bc01cfbf525eca Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 7 Oct 2015 22:23:22 -0700
Subject: bpf: charge user for creation of BPF maps and programs

since eBPF programs and maps use kernel memory consider it 'locked' memory
from user accounting point of view and charge it against RLIMIT_MEMLOCK limit.
This limit is typically set to 64Kbytes by distros, so almost all
bpf+tracing programs would need to increase it, since they use maps,
but kernel charges maximum map size upfront.
For example the hash map of 1024 elements will be charged as 64Kbyte.
It's inconvenient for current users and changes current behavior for root,
but probably worth doing to be consistent root vs non-root.

Similar accounting logic is done by mmap of perf_event.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |  3 +++
 include/linux/sched.h |  2 +-
 kernel/bpf/arraymap.c |  2 +-
 kernel/bpf/hashtab.c  |  4 ++++
 kernel/bpf/syscall.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 72 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 02fa3db3c1ec..e3a51b74e275 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,8 @@ struct bpf_map {
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
+	u32 pages;
+	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
 };
@@ -128,6 +130,7 @@ struct bpf_prog_aux {
 	const struct bpf_verifier_ops *ops;
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
+	struct user_struct *user;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501b41af..4817df5fffae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -840,7 +840,7 @@ struct user_struct {
 	struct hlist_node uidhash_node;
 	kuid_t uid;
 
-#ifdef CONFIG_PERF_EVENTS
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
 	atomic_long_t locked_vm;
 #endif
 };
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2fecc4aed119..f2d9e698c753 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -49,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
-
+	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
 	array->elem_size = elem_size;
 
 	return &array->map;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 83c209d9b17a..28592d79502b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -88,6 +88,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	htab->elem_size = sizeof(struct htab_elem) +
 			  round_up(htab->map.key_size, 8) +
 			  htab->map.value_size;
+
+	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
+				   htab->elem_size * htab->map.max_entries,
+				   PAGE_SIZE) >> PAGE_SHIFT;
 	return &htab->map;
 
 free_htab:
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 83697bc8e574..f640e5f7afbd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -46,11 +46,38 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
 	list_add(&tl->list_node, &bpf_map_types);
 }
 
+static int bpf_map_charge_memlock(struct bpf_map *map)
+{
+	struct user_struct *user = get_current_user();
+	unsigned long memlock_limit;
+
+	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	atomic_long_add(map->pages, &user->locked_vm);
+
+	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
+		atomic_long_sub(map->pages, &user->locked_vm);
+		free_uid(user);
+		return -EPERM;
+	}
+	map->user = user;
+	return 0;
+}
+
+static void bpf_map_uncharge_memlock(struct bpf_map *map)
+{
+	struct user_struct *user = map->user;
+
+	atomic_long_sub(map->pages, &user->locked_vm);
+	free_uid(user);
+}
+
 /* called from workqueue */
 static void bpf_map_free_deferred(struct work_struct *work)
 {
 	struct bpf_map *map = container_of(work, struct bpf_map, work);
 
+	bpf_map_uncharge_memlock(map);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
 }
@@ -110,6 +137,10 @@ static int map_create(union bpf_attr *attr)
 
 	atomic_set(&map->refcnt, 1);
 
+	err = bpf_map_charge_memlock(map);
+	if (err)
+		goto free_map;
+
 	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
 
 	if (err < 0)
@@ -442,11 +473,37 @@ static void free_used_maps(struct bpf_prog_aux *aux)
 	kfree(aux->used_maps);
 }
 
+static int bpf_prog_charge_memlock(struct bpf_prog *prog)
+{
+	struct user_struct *user = get_current_user();
+	unsigned long memlock_limit;
+
+	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	atomic_long_add(prog->pages, &user->locked_vm);
+	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
+		atomic_long_sub(prog->pages, &user->locked_vm);
+		free_uid(user);
+		return -EPERM;
+	}
+	prog->aux->user = user;
+	return 0;
+}
+
+static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
+{
+	struct user_struct *user = prog->aux->user;
+
+	atomic_long_sub(prog->pages, &user->locked_vm);
+	free_uid(user);
+}
+
 static void __prog_put_rcu(struct rcu_head *rcu)
 {
 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 
 	free_used_maps(aux);
+	bpf_prog_uncharge_memlock(aux->prog);
 	bpf_prog_free(aux->prog);
 }
 
@@ -554,6 +611,10 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (!prog)
 		return -ENOMEM;
 
+	err = bpf_prog_charge_memlock(prog);
+	if (err)
+		goto free_prog_nouncharge;
+
 	prog->len = attr->insn_cnt;
 
 	err = -EFAULT;
@@ -595,6 +656,8 @@ static int bpf_prog_load(union bpf_attr *attr)
 free_used_maps:
 	free_used_maps(prog->aux);
 free_prog:
+	bpf_prog_uncharge_memlock(prog);
+free_prog_nouncharge:
 	bpf_prog_free(prog);
 	return err;
 }
-- 
cgit v1.2.3


From d475f090bf1c0dc2999e98bbf2e7cb2243358849 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Oct 2015 19:33:24 -0700
Subject: tcp: shrink tcp_timewait_sock by 8 bytes

Reducing tcp_timewait_sock from 280 bytes to 272 bytes
allows SLAB to pack 15 objects per page instead of 14 (on x86)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 4 ++--
 include/net/sock.h  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e442e6e9a365..86a7edaa6797 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -356,8 +356,8 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
 
 struct tcp_timewait_sock {
 	struct inet_timewait_sock tw_sk;
-	u32			  tw_rcv_nxt;
-	u32			  tw_snd_nxt;
+#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt
+#define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt
 	u32			  tw_rcv_wnd;
 	u32			  tw_ts_offset;
 	u32			  tw_ts_recent;
diff --git a/include/net/sock.h b/include/net/sock.h
index 19cfe1fc911c..64a75458d22c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -229,6 +229,7 @@ struct sock_common {
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
+		u32		skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */
 	};
 
 	atomic_t		skc_refcnt;
@@ -237,6 +238,7 @@ struct sock_common {
 	union {
 		u32		skc_rxhash;
 		u32		skc_window_clamp;
+		u32		skc_tw_snd_nxt; /* struct tcp_timewait_sock */
 	};
 	/* public: */
 };
-- 
cgit v1.2.3


From a4288289f585d42a19145f266e214acb165fe9b3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 7 Oct 2015 15:48:25 +0200
Subject: wireless: update robust action frame list

Unprotected DMG and VHT action frames are not protected, reflect
that in the list.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index dcfb2f43d316..0109f3847e9a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2397,6 +2397,8 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 		return *category != WLAN_CATEGORY_PUBLIC &&
 			*category != WLAN_CATEGORY_HT &&
 			*category != WLAN_CATEGORY_SELF_PROTECTED &&
+			*category != WLAN_CATEGORY_UNPROT_DMG &&
+			*category != WLAN_CATEGORY_VHT &&
 			*category != WLAN_CATEGORY_VENDOR_SPECIFIC;
 	}
 
-- 
cgit v1.2.3


From af61426187cd854bffe013ca8547bd8fa3c4dfbf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 7 Oct 2015 15:48:26 +0200
Subject: wireless: add WNM action frame categories

Add the WNM and unprotected WNM categories and mark the latter
as not robust.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 0109f3847e9a..452c0b0d2f32 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1932,6 +1932,8 @@ enum ieee80211_category {
 	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
+	WLAN_CATEGORY_WNM = 10,
+	WLAN_CATEGORY_WNM_UNPROTECTED = 11,
 	WLAN_CATEGORY_TDLS = 12,
 	WLAN_CATEGORY_MESH_ACTION = 13,
 	WLAN_CATEGORY_MULTIHOP_ACTION = 14,
@@ -2396,6 +2398,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 		category = ((u8 *) hdr) + 24;
 		return *category != WLAN_CATEGORY_PUBLIC &&
 			*category != WLAN_CATEGORY_HT &&
+			*category != WLAN_CATEGORY_WNM_UNPROTECTED &&
 			*category != WLAN_CATEGORY_SELF_PROTECTED &&
 			*category != WLAN_CATEGORY_UNPROT_DMG &&
 			*category != WLAN_CATEGORY_VHT &&
-- 
cgit v1.2.3


From 1ac710e08a86e4723286873db73edb2a6e99f591 Mon Sep 17 00:00:00 2001
From: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Date: Wed, 7 Oct 2015 14:54:08 +0100
Subject: mfd: da9150: Add support for Fuel-Gauge

Signed-off-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9150-core.c       | 156 ++++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/da9150/core.h |  19 ++++-
 2 files changed, 167 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9150-core.c b/drivers/mfd/da9150-core.c
index 94b9bbd1a69b..85ca4b5d0678 100644
--- a/drivers/mfd/da9150-core.c
+++ b/drivers/mfd/da9150-core.c
@@ -23,6 +23,77 @@
 #include <linux/mfd/da9150/core.h>
 #include <linux/mfd/da9150/registers.h>
 
+/* Raw device access, used for QIF */
+static int da9150_i2c_read_device(struct i2c_client *client, u8 addr, int count,
+				  u8 *buf)
+{
+	struct i2c_msg xfer;
+	int ret;
+
+	/*
+	 * Read is split into two transfers as device expects STOP/START rather
+	 * than repeated start to carry out this kind of access.
+	 */
+
+	/* Write address */
+	xfer.addr = client->addr;
+	xfer.flags = 0;
+	xfer.len = 1;
+	xfer.buf = &addr;
+
+	ret = i2c_transfer(client->adapter, &xfer, 1);
+	if (ret != 1) {
+		if (ret < 0)
+			return ret;
+		else
+			return -EIO;
+	}
+
+	/* Read data */
+	xfer.addr = client->addr;
+	xfer.flags = I2C_M_RD;
+	xfer.len = count;
+	xfer.buf = buf;
+
+	ret = i2c_transfer(client->adapter, &xfer, 1);
+	if (ret == 1)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static int da9150_i2c_write_device(struct i2c_client *client, u8 addr,
+				   int count, const u8 *buf)
+{
+	struct i2c_msg xfer;
+	u8 *reg_data;
+	int ret;
+
+	reg_data = kzalloc(1 + count, GFP_KERNEL);
+	if (!reg_data)
+		return -ENOMEM;
+
+	reg_data[0] = addr;
+	memcpy(&reg_data[1], buf, count);
+
+	/* Write address & data */
+	xfer.addr = client->addr;
+	xfer.flags = 0;
+	xfer.len = 1 + count;
+	xfer.buf = reg_data;
+
+	ret = i2c_transfer(client->adapter, &xfer, 1);
+	kfree(reg_data);
+	if (ret == 1)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
 static bool da9150_volatile_reg(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -107,6 +178,28 @@ static const struct regmap_config da9150_regmap_config = {
 	.volatile_reg = da9150_volatile_reg,
 };
 
+void da9150_read_qif(struct da9150 *da9150, u8 addr, int count, u8 *buf)
+{
+	int ret;
+
+	ret = da9150_i2c_read_device(da9150->core_qif, addr, count, buf);
+	if (ret < 0)
+		dev_err(da9150->dev, "Failed to read from QIF 0x%x: %d\n",
+			addr, ret);
+}
+EXPORT_SYMBOL_GPL(da9150_read_qif);
+
+void da9150_write_qif(struct da9150 *da9150, u8 addr, int count, const u8 *buf)
+{
+	int ret;
+
+	ret = da9150_i2c_write_device(da9150->core_qif, addr, count, buf);
+	if (ret < 0)
+		dev_err(da9150->dev, "Failed to write to QIF 0x%x: %d\n",
+			addr, ret);
+}
+EXPORT_SYMBOL_GPL(da9150_write_qif);
+
 u8 da9150_reg_read(struct da9150 *da9150, u16 reg)
 {
 	int val, ret;
@@ -297,19 +390,35 @@ static struct resource da9150_charger_resources[] = {
 	},
 };
 
+static struct resource da9150_fg_resources[] = {
+	DEFINE_RES_IRQ_NAMED(DA9150_IRQ_FG, "FG"),
+};
+
+enum da9150_dev_idx {
+	DA9150_GPADC_IDX = 0,
+	DA9150_CHARGER_IDX,
+	DA9150_FG_IDX,
+};
+
 static struct mfd_cell da9150_devs[] = {
-	{
+	[DA9150_GPADC_IDX] = {
 		.name = "da9150-gpadc",
 		.of_compatible = "dlg,da9150-gpadc",
 		.resources = da9150_gpadc_resources,
 		.num_resources = ARRAY_SIZE(da9150_gpadc_resources),
 	},
-	{
+	[DA9150_CHARGER_IDX] = {
 		.name = "da9150-charger",
 		.of_compatible = "dlg,da9150-charger",
 		.resources = da9150_charger_resources,
 		.num_resources = ARRAY_SIZE(da9150_charger_resources),
 	},
+	[DA9150_FG_IDX] = {
+		.name = "da9150-fuel-gauge",
+		.of_compatible = "dlg,da9150-fuel-gauge",
+		.resources = da9150_fg_resources,
+		.num_resources = ARRAY_SIZE(da9150_fg_resources),
+	},
 };
 
 static int da9150_probe(struct i2c_client *client,
@@ -317,6 +426,7 @@ static int da9150_probe(struct i2c_client *client,
 {
 	struct da9150 *da9150;
 	struct da9150_pdata *pdata = dev_get_platdata(&client->dev);
+	int qif_addr;
 	int ret;
 
 	da9150 = devm_kzalloc(&client->dev, sizeof(*da9150), GFP_KERNEL);
@@ -335,16 +445,41 @@ static int da9150_probe(struct i2c_client *client,
 		return ret;
 	}
 
-	da9150->irq_base = pdata ? pdata->irq_base : -1;
+	/* Setup secondary I2C interface for QIF access */
+	qif_addr = da9150_reg_read(da9150, DA9150_CORE2WIRE_CTRL_A);
+	qif_addr = (qif_addr & DA9150_CORE_BASE_ADDR_MASK) >> 1;
+	qif_addr |= DA9150_QIF_I2C_ADDR_LSB;
+	da9150->core_qif = i2c_new_dummy(client->adapter, qif_addr);
+	if (!da9150->core_qif) {
+		dev_err(da9150->dev, "Failed to attach QIF client\n");
+		return -ENODEV;
+	}
+
+	i2c_set_clientdata(da9150->core_qif, da9150);
+
+	if (pdata) {
+		da9150->irq_base = pdata->irq_base;
+
+		da9150_devs[DA9150_FG_IDX].platform_data = pdata->fg_pdata;
+		da9150_devs[DA9150_FG_IDX].pdata_size =
+			sizeof(struct da9150_fg_pdata);
+	} else {
+		da9150->irq_base = -1;
+	}
 
 	ret = regmap_add_irq_chip(da9150->regmap, da9150->irq,
 				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
 				  da9150->irq_base, &da9150_regmap_irq_chip,
 				  &da9150->regmap_irq_data);
-	if (ret)
-		return ret;
+	if (ret) {
+		dev_err(da9150->dev, "Failed to add regmap irq chip: %d\n",
+			ret);
+		goto regmap_irq_fail;
+	}
+
 
 	da9150->irq_base = regmap_irq_chip_get_base(da9150->regmap_irq_data);
+
 	enable_irq_wake(da9150->irq);
 
 	ret = mfd_add_devices(da9150->dev, -1, da9150_devs,
@@ -352,11 +487,17 @@ static int da9150_probe(struct i2c_client *client,
 			      da9150->irq_base, NULL);
 	if (ret) {
 		dev_err(da9150->dev, "Failed to add child devices: %d\n", ret);
-		regmap_del_irq_chip(da9150->irq, da9150->regmap_irq_data);
-		return ret;
+		goto mfd_fail;
 	}
 
 	return 0;
+
+mfd_fail:
+	regmap_del_irq_chip(da9150->irq, da9150->regmap_irq_data);
+regmap_irq_fail:
+	i2c_unregister_device(da9150->core_qif);
+
+	return ret;
 }
 
 static int da9150_remove(struct i2c_client *client)
@@ -365,6 +506,7 @@ static int da9150_remove(struct i2c_client *client)
 
 	regmap_del_irq_chip(da9150->irq, da9150->regmap_irq_data);
 	mfd_remove_devices(da9150->dev);
+	i2c_unregister_device(da9150->core_qif);
 
 	return 0;
 }
diff --git a/include/linux/mfd/da9150/core.h b/include/linux/mfd/da9150/core.h
index 76e668933a77..1bf50caeb9fa 100644
--- a/include/linux/mfd/da9150/core.h
+++ b/include/linux/mfd/da9150/core.h
@@ -15,6 +15,7 @@
 #define __DA9150_CORE_H
 
 #include <linux/device.h>
+#include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/regmap.h>
 
@@ -46,23 +47,39 @@
 #define DA9150_IRQ_GPADC	19
 #define DA9150_IRQ_WKUP		20
 
+/* I2C sub-device address */
+#define DA9150_QIF_I2C_ADDR_LSB		0x5
+
+struct da9150_fg_pdata {
+	u32 update_interval;	/* msecs */
+	u8 warn_soc_lvl;	/* % value */
+	u8 crit_soc_lvl;	/* % value */
+};
+
 struct da9150_pdata {
 	int irq_base;
+	struct da9150_fg_pdata *fg_pdata;
 };
 
 struct da9150 {
 	struct device *dev;
 	struct regmap *regmap;
+	struct i2c_client *core_qif;
+
 	struct regmap_irq_chip_data *regmap_irq_data;
 	int irq;
 	int irq_base;
 };
 
-/* Device I/O */
+/* Device I/O - Query Interface for FG and standard register access */
+void da9150_read_qif(struct da9150 *da9150, u8 addr, int count, u8 *buf);
+void da9150_write_qif(struct da9150 *da9150, u8 addr, int count, const u8 *buf);
+
 u8 da9150_reg_read(struct da9150 *da9150, u16 reg);
 void da9150_reg_write(struct da9150 *da9150, u16 reg, u8 val);
 void da9150_set_bits(struct da9150 *da9150, u16 reg, u8 mask, u8 val);
 
 void da9150_bulk_read(struct da9150 *da9150, u16 reg, int count, u8 *buf);
 void da9150_bulk_write(struct da9150 *da9150, u16 reg, int count, const u8 *buf);
+
 #endif /* __DA9150_CORE_H */
-- 
cgit v1.2.3


From 42160a041db89807691b2a3fbf42e36a98b6019e Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 8 Oct 2015 16:56:07 +0200
Subject: can: at91: remove at91_can_data

struct at91_can_data was used to pass a callback to the driver, allowing it
to switch the transceiver on and off. As all at91 boards are now using DT,
this is not used anymore, remove that structure.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/at91_can.c          | 21 ---------------------
 include/linux/platform_data/atmel.h |  5 -----
 2 files changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 945c0955a967..8b3275d7792a 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -8,15 +8,6 @@
  * Public License ("GPL") version 2 as distributed in the 'COPYING'
  * file from the main directory of the linux kernel source.
  *
- *
- * Your platform definition file should specify something like:
- *
- * static struct at91_can_data ek_can_data = {
- *	transceiver_switch = sam9263ek_transceiver_switch,
- * };
- *
- * at91_add_device_can(&ek_can_data);
- *
  */
 
 #include <linux/clk.h>
@@ -33,7 +24,6 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/platform_data/atmel.h>
 
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
@@ -324,15 +314,6 @@ static inline u32 at91_can_id_to_reg_mid(canid_t can_id)
 	return reg_mid;
 }
 
-/*
- * Swtich transceiver on or off
- */
-static void at91_transceiver_switch(const struct at91_priv *priv, int on)
-{
-	if (priv->pdata && priv->pdata->transceiver_switch)
-		priv->pdata->transceiver_switch(on);
-}
-
 static void at91_setup_mailboxes(struct net_device *dev)
 {
 	struct at91_priv *priv = netdev_priv(dev);
@@ -416,7 +397,6 @@ static void at91_chip_start(struct net_device *dev)
 
 	at91_set_bittiming(dev);
 	at91_setup_mailboxes(dev);
-	at91_transceiver_switch(priv, 1);
 
 	/* enable chip */
 	if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
@@ -444,7 +424,6 @@ static void at91_chip_stop(struct net_device *dev, enum can_state state)
 	reg_mr = at91_read(priv, AT91_MR);
 	at91_write(priv, AT91_MR, reg_mr & ~AT91_MR_CANEN);
 
-	at91_transceiver_switch(priv, 0);
 	priv->can.state = state;
 }
 
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 527a85c61924..c121ddf74f7f 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -74,11 +74,6 @@ struct atmel_uart_data {
 	struct serial_rs485	rs485;		/* rs485 settings */
 };
 
-/* CAN */
-struct at91_can_data {
-	void (*transceiver_switch)(int on);
-};
-
 /* FIXME: this needs a better location, but gets stuff building again */
 extern int at91_suspend_entering_slow_clock(void);
 
-- 
cgit v1.2.3


From f110711a6053f08731858aa91420104094188973 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:30 +0100
Subject: irqdomain: Convert irqdomain-%3Eof_node to fwnode

Now that we have everyone accessing the of_node field via the
irq_domain_get_of_node accessor, it is pretty easy to swap it
for a pointer to a fwnode_handle.

This translates into a few limited changes in __irq_domain_add,
and an updated irq_domain_get_of_node.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 5 +++--
 kernel/irq/irqdomain.c    | 6 +++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f644fdb06dd6..2f508f4ac2ea 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -34,6 +34,7 @@
 
 #include <linux/types.h>
 #include <linux/irqhandler.h>
+#include <linux/of.h>
 #include <linux/radix-tree.h>
 
 struct device_node;
@@ -130,7 +131,7 @@ struct irq_domain {
 	unsigned int flags;
 
 	/* Optional data */
-	struct device_node *of_node;
+	struct fwnode_handle *fwnode;
 	enum irq_domain_bus_token bus_token;
 	struct irq_domain_chip_generic *gc;
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -163,7 +164,7 @@ enum {
 
 static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 {
-	return d->of_node;
+	return to_of_node(d->fwnode);
 }
 
 #ifdef CONFIG_IRQ_DOMAIN
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8f8b538b067d..1aee5c118bae 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -46,17 +46,21 @@ struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
 				    void *host_data)
 {
 	struct irq_domain *domain;
+	struct fwnode_handle *fwnode;
 
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(of_node));
 	if (WARN_ON(!domain))
 		return NULL;
 
+	of_node_get(of_node);
+	fwnode = of_node ? &of_node->fwnode : NULL;
+
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
 	domain->ops = ops;
 	domain->host_data = host_data;
-	domain->of_node = of_node_get(of_node);
+	domain->fwnode = fwnode;
 	domain->hwirq_max = hwirq_max;
 	domain->revmap_size = size;
 	domain->revmap_direct_max_irq = direct_max;
-- 
cgit v1.2.3


From 130b8c6c8d86075304952241bf2365cea6489df1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:31 +0100
Subject: irqdomain: Allow irq domain lookup by fwnode

So far, our irq domains are still looked up by device node.
Let's change this and allow a domain to be looked up using
a fwnode_handle pointer.

The existing interfaces are preserved with a couple of helpers.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-4-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 11 +++++++++--
 kernel/irq/irqdomain.c    | 16 +++++++---------
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 2f508f4ac2ea..607c1856cc01 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -183,10 +183,17 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 					 irq_hw_number_t first_hwirq,
 					 const struct irq_domain_ops *ops,
 					 void *host_data);
-extern struct irq_domain *irq_find_matching_host(struct device_node *node,
-						 enum irq_domain_bus_token bus_token);
+extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+						   enum irq_domain_bus_token bus_token);
 extern void irq_set_default_host(struct irq_domain *host);
 
+static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
+							enum irq_domain_bus_token bus_token)
+{
+	return irq_find_matching_fwnode(node ? &node->fwnode : NULL,
+					bus_token);
+}
+
 static inline struct irq_domain *irq_find_host(struct device_node *node)
 {
 	return irq_find_matching_host(node, DOMAIN_BUS_ANY);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 1aee5c118bae..023ab6d488f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -191,12 +191,12 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 EXPORT_SYMBOL_GPL(irq_domain_add_legacy);
 
 /**
- * irq_find_matching_host() - Locates a domain for a given device node
- * @node: device-tree node of the interrupt controller
+ * irq_find_matching_fwnode() - Locates a domain for a given fwnode
+ * @fwnode: FW descriptor of the interrupt controller
  * @bus_token: domain-specific data
  */
-struct irq_domain *irq_find_matching_host(struct device_node *node,
-					  enum irq_domain_bus_token bus_token)
+struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+					    enum irq_domain_bus_token bus_token)
 {
 	struct irq_domain *h, *found = NULL;
 	int rc;
@@ -212,12 +212,10 @@ struct irq_domain *irq_find_matching_host(struct device_node *node,
 	 */
 	mutex_lock(&irq_domain_mutex);
 	list_for_each_entry(h, &irq_domain_list, link) {
-		struct device_node *of_node;
-		of_node = irq_domain_get_of_node(h);
 		if (h->ops->match)
-			rc = h->ops->match(h, node, bus_token);
+			rc = h->ops->match(h, to_of_node(fwnode), bus_token);
 		else
-			rc = ((of_node != NULL) && (of_node == node) &&
+			rc = ((fwnode != NULL) && (h->fwnode == fwnode) &&
 			      ((bus_token == DOMAIN_BUS_ANY) ||
 			       (h->bus_token == bus_token)));
 
@@ -229,7 +227,7 @@ struct irq_domain *irq_find_matching_host(struct device_node *node,
 	mutex_unlock(&irq_domain_mutex);
 	return found;
 }
-EXPORT_SYMBOL_GPL(irq_find_matching_host);
+EXPORT_SYMBOL_GPL(irq_find_matching_fwnode);
 
 /**
  * irq_set_default_host() - Set a "default" irq domain
-- 
cgit v1.2.3


From 11e4438ee330fab0f216ee7cc1b651cb2ddceb5d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:32 +0100
Subject: irqdomain: Introduce a firmware-specific IRQ specifier structure

So far the closest thing to a generic IRQ specifier structure is
of_phandle_args, which happens to be pretty OF specific (the of_node
pointer in there is quite annoying).

Let's introduce 'struct irq_fwspec' that can be used in place of
of_phandle_args for OF, but also for other firmware implementations
(that'd be ACPI). This is used together with a new 'translate' method
that is the pendent of 'xlate'.

We convert irq_create_of_mapping to use this new structure (with a
small hack that will be removed later).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-5-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 20 ++++++++++++++++
 kernel/irq/irqdomain.c    | 59 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 68 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 607c1856cc01..533c974b9d94 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -46,6 +46,24 @@ struct irq_data;
 /* Number of irqs reserved for a legacy isa controller */
 #define NUM_ISA_INTERRUPTS	16
 
+#define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16
+
+/**
+ * struct irq_fwspec - generic IRQ specifier structure
+ *
+ * @fwnode:		Pointer to a firmware-specific descriptor
+ * @param_count:	Number of device-specific parameters
+ * @param:		Device-specific parameters
+ *
+ * This structure, directly modeled after of_phandle_args, is used to
+ * pass a device-specific description of an interrupt.
+ */
+struct irq_fwspec {
+	struct fwnode_handle *fwnode;
+	int param_count;
+	u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
+};
+
 /*
  * Should several domains have the same device node, but serve
  * different purposes (for example one domain is for PCI/MSI, and the
@@ -92,6 +110,8 @@ struct irq_domain_ops {
 		     unsigned int nr_irqs);
 	void (*activate)(struct irq_domain *d, struct irq_data *irq_data);
 	void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
+	int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
+			 unsigned long *out_hwirq, unsigned int *out_type);
 #endif
 };
 
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 023ab6d488f7..86dfd402ed5e 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -484,30 +484,67 @@ int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base,
 }
 EXPORT_SYMBOL_GPL(irq_create_strict_mappings);
 
+static int irq_domain_translate(struct irq_domain *d,
+				struct irq_fwspec *fwspec,
+				irq_hw_number_t *hwirq, unsigned int *type)
+{
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	if (d->ops->translate)
+		return d->ops->translate(d, fwspec, hwirq, type);
+#endif
+	if (d->ops->xlate)
+		return d->ops->xlate(d, to_of_node(fwspec->fwnode),
+				     fwspec->param, fwspec->param_count,
+				     hwirq, type);
+
+	/* If domain has no translation, then we assume interrupt line */
+	*hwirq = fwspec->param[0];
+	return 0;
+}
+
+static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data,
+				      struct irq_fwspec *fwspec)
+{
+	int i;
+
+	fwspec->fwnode = irq_data->np ? &irq_data->np->fwnode : NULL;
+	fwspec->param_count = irq_data->args_count;
+
+	for (i = 0; i < irq_data->args_count; i++)
+		fwspec->param[i] = irq_data->args[i];
+}
+
 unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 {
+	struct irq_fwspec fwspec;
 	struct irq_domain *domain;
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
 
-	domain = irq_data->np ? irq_find_host(irq_data->np) : irq_default_domain;
+	of_phandle_args_to_fwspec(irq_data, &fwspec);
+
+	if (fwspec.fwnode)
+		domain = irq_find_matching_fwnode(fwspec.fwnode, DOMAIN_BUS_ANY);
+	else
+		domain = irq_default_domain;
+
 	if (!domain) {
 		pr_warn("no irq domain found for %s !\n",
-			of_node_full_name(irq_data->np));
+			of_node_full_name(to_of_node(fwspec.fwnode)));
 		return 0;
 	}
 
-	/* If domain has no translation, then we assume interrupt line */
-	if (domain->ops->xlate == NULL)
-		hwirq = irq_data->args[0];
-	else {
-		if (domain->ops->xlate(domain, irq_data->np, irq_data->args,
-					irq_data->args_count, &hwirq, &type))
-			return 0;
-	}
+	if (irq_domain_translate(domain, &fwspec, &hwirq, &type))
+		return 0;
 
 	if (irq_domain_is_hierarchy(domain)) {
+		/* Temporary hack */
+		void *desc = &fwspec;
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+		if (!domain->ops->translate)
+			desc = irq_data;
+#endif
 		/*
 		 * If we've already configured this interrupt,
 		 * don't do it again, or hell will break loose.
@@ -516,7 +553,7 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 		if (virq)
 			return virq;
 
-		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, irq_data);
+		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, desc);
 		if (virq <= 0)
 			return 0;
 	} else {
-- 
cgit v1.2.3


From c0131f09de8c2d301814cac86d78f643b8ee0574 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:34 +0100
Subject: irqdomain: Introduce irq_create_fwspec_mapping

Just like we have irq_create_of_mapping, irq_create_fwspec_mapping
creates a IRQ domain mapping for an interrupt described in a
struct irq_fwspec.

irq_create_of_mapping gets rewritten in terms of the new function,
and the hack we introduced before gets removed (now that no stacked
irqchip uses of_phandle_args anymore).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-7-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h |  1 +
 kernel/irq/irqdomain.c    | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 533c974b9d94..7e7842e90d40 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -267,6 +267,7 @@ extern void irq_domain_disassociate(struct irq_domain *domain,
 
 extern unsigned int irq_create_mapping(struct irq_domain *host,
 				       irq_hw_number_t hwirq);
+extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
 extern void irq_dispose_mapping(unsigned int virq);
 
 /**
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 86dfd402ed5e..b1fda6dad467 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -514,37 +514,28 @@ static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data,
 		fwspec->param[i] = irq_data->args[i];
 }
 
-unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
+unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 {
-	struct irq_fwspec fwspec;
 	struct irq_domain *domain;
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
 
-	of_phandle_args_to_fwspec(irq_data, &fwspec);
-
-	if (fwspec.fwnode)
-		domain = irq_find_matching_fwnode(fwspec.fwnode, DOMAIN_BUS_ANY);
+	if (fwspec->fwnode)
+		domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
 	else
 		domain = irq_default_domain;
 
 	if (!domain) {
 		pr_warn("no irq domain found for %s !\n",
-			of_node_full_name(to_of_node(fwspec.fwnode)));
+			of_node_full_name(to_of_node(fwspec->fwnode)));
 		return 0;
 	}
 
-	if (irq_domain_translate(domain, &fwspec, &hwirq, &type))
+	if (irq_domain_translate(domain, fwspec, &hwirq, &type))
 		return 0;
 
 	if (irq_domain_is_hierarchy(domain)) {
-		/* Temporary hack */
-		void *desc = &fwspec;
-#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
-		if (!domain->ops->translate)
-			desc = irq_data;
-#endif
 		/*
 		 * If we've already configured this interrupt,
 		 * don't do it again, or hell will break loose.
@@ -553,7 +544,7 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 		if (virq)
 			return virq;
 
-		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, desc);
+		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
 		if (virq <= 0)
 			return 0;
 	} else {
@@ -569,6 +560,15 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 		irq_set_irq_type(virq, type);
 	return virq;
 }
+EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);
+
+unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
+{
+	struct irq_fwspec fwspec;
+
+	of_phandle_args_to_fwspec(irq_data, &fwspec);
+	return irq_create_fwspec_mapping(&fwspec);
+}
 EXPORT_SYMBOL_GPL(irq_create_of_mapping);
 
 /**
-- 
cgit v1.2.3


From 1bf4ddc46c5d6123897a54cea4ffe3e90f30600b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:35 +0100
Subject: irqdomain: Introduce irq_domain_create_{linear, tree}

Just like we have irq_domain_add_{linear,tree} to create a irq domain
identified by an of_node, introduce irq_domain_create_{linear,tree}
that do the same thing, except that they take a struct fwnode_handle.

Existing functions get rewritten in terms of the new ones so that
everything keeps working as before (and __irq_domain_add is now
fwnode_handle based as well).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-8-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 31 +++++++++++++++++++++++++------
 kernel/irq/irqdomain.c    | 11 ++++++-----
 2 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 7e7842e90d40..995d4c5100d3 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -188,7 +188,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 }
 
 #ifdef CONFIG_IRQ_DOMAIN
-struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
 				    const struct irq_domain_ops *ops,
 				    void *host_data);
@@ -207,11 +207,15 @@ extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
 						   enum irq_domain_bus_token bus_token);
 extern void irq_set_default_host(struct irq_domain *host);
 
+static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
+{
+	return node ? &node->fwnode : NULL;
+}
+
 static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
 							enum irq_domain_bus_token bus_token)
 {
-	return irq_find_matching_fwnode(node ? &node->fwnode : NULL,
-					bus_token);
+	return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token);
 }
 
 static inline struct irq_domain *irq_find_host(struct device_node *node)
@@ -231,14 +235,14 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(of_node, size, size, 0, ops, host_data);
+	return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
 }
 static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
 					 unsigned int max_irq,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(of_node, 0, max_irq, max_irq, ops, host_data);
+	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
 }
 static inline struct irq_domain *irq_domain_add_legacy_isa(
 				struct device_node *of_node,
@@ -252,7 +256,22 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(of_node, 0, ~0, 0, ops, host_data);
+	return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data);
+}
+
+static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
+					 unsigned int size,
+					 const struct irq_domain_ops *ops,
+					 void *host_data)
+{
+	return __irq_domain_add(fwnode, size, size, 0, ops, host_data);
+}
+
+static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode,
+					 const struct irq_domain_ops *ops,
+					 void *host_data)
+{
+	return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data);
 }
 
 extern void irq_domain_remove(struct irq_domain *host);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index b1fda6dad467..de6d7493190b 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -40,13 +40,15 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain);
  * Allocates and initialize and irq_domain structure.
  * Returns pointer to IRQ domain, or NULL on failure.
  */
-struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
 				    const struct irq_domain_ops *ops,
 				    void *host_data)
 {
 	struct irq_domain *domain;
-	struct fwnode_handle *fwnode;
+	struct device_node *of_node;
+
+	of_node = to_of_node(fwnode);
 
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(of_node));
@@ -54,7 +56,6 @@ struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
 		return NULL;
 
 	of_node_get(of_node);
-	fwnode = of_node ? &of_node->fwnode : NULL;
 
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
@@ -137,7 +138,7 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
 {
 	struct irq_domain *domain;
 
-	domain = __irq_domain_add(of_node, size, size, 0, ops, host_data);
+	domain = __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
 	if (!domain)
 		return NULL;
 
@@ -181,7 +182,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 {
 	struct irq_domain *domain;
 
-	domain = __irq_domain_add(of_node, first_hwirq + size,
+	domain = __irq_domain_add(of_node_to_fwnode(of_node), first_hwirq + size,
 				  first_hwirq + size, 0, ops, host_data);
 	if (domain)
 		irq_domain_associate_many(domain, first_irq, first_hwirq, size);
-- 
cgit v1.2.3


From b145dcc45a6af0abfcf9b4de8006d40559c50fc6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:36 +0100
Subject: irqdomain: Add a fwnode_handle allocator

In order to be able to reference an irqdomain from ACPI, we need
to be able to create an identifier, which is usually a struct
device_node.

This device node does't really fit the ACPI infrastructure, so
we cunningly allocate a new structure containing a fwnode_handle,
and return that.

This structure doesn't really point to a device (interrupt
controllers are not "real" devices in Linux), but as we cannot
really deny that they exist, we create them with a new fwnode_type
(FWNODE_IRQCHIP).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-9-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/fwnode.h    |  1 +
 include/linux/irqdomain.h |  2 ++
 kernel/irq/irqdomain.c    | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 0408545bce42..37ec668546ab 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -17,6 +17,7 @@ enum fwnode_type {
 	FWNODE_OF,
 	FWNODE_ACPI,
 	FWNODE_PDATA,
+	FWNODE_IRQCHIP,
 };
 
 struct fwnode_handle {
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 995d4c5100d3..949caa728758 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -188,6 +188,8 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 }
 
 #ifdef CONFIG_IRQ_DOMAIN
+struct fwnode_handle *irq_domain_alloc_fwnode(void *data);
+void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
 struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
 				    const struct irq_domain_ops *ops,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index de6d7493190b..41151d394da7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -27,6 +27,57 @@ static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
 				  irq_hw_number_t hwirq, int node);
 static void irq_domain_check_hierarchy(struct irq_domain *domain);
 
+struct irqchip_fwid {
+	struct fwnode_handle fwnode;
+	char *name;
+	void *data;
+};
+
+/**
+ * irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for
+ *                           identifying an irq domain
+ * @data: optional user-provided data
+ *
+ * Allocate a struct device_node, and return a poiner to the embedded
+ * fwnode_handle (or NULL on failure).
+ */
+struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+{
+	struct irqchip_fwid *fwid;
+	char *name;
+
+	fwid = kzalloc(sizeof(*fwid), GFP_KERNEL);
+	name = kasprintf(GFP_KERNEL, "irqchip@%p", data);
+
+	if (!fwid || !name) {
+		kfree(fwid);
+		kfree(name);
+		return NULL;
+	}
+
+	fwid->name = name;
+	fwid->data = data;
+	fwid->fwnode.type = FWNODE_IRQCHIP;
+	return &fwid->fwnode;
+}
+
+/**
+ * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
+ *
+ * Free a fwnode_handle allocated with irq_domain_alloc_fwnode.
+ */
+void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
+{
+	struct irqchip_fwid *fwid;
+
+	if (WARN_ON(fwnode->type != FWNODE_IRQCHIP))
+		return;
+
+	fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+	kfree(fwid->name);
+	kfree(fwid);
+}
+
 /**
  * __irq_domain_add() - Allocate a new irq_domain data structure
  * @of_node: optional device-tree node of the interrupt controller
-- 
cgit v1.2.3


From 2bc6eba4a322e70eac8cde76442c4ac90699fb39 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:38 +0100
Subject: acpi/gsi: Add acpi_set_irq_model to initialize the GSI layer

In order to start embrassing irqdomains at the GSI level, introduce
a new initializer:

void acpi_set_irq_model(enum acpi_irq_model_id model,
			struct fwnode_handle *fwnode);

where:
- model is the value assigned to acpi_irq_model
- fwnode is the identifier for the irqdomain mapping
  GSI interrupts

As nobody calls this code yet, the current code is (mostly)
left in place.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-11-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/gsi.c   | 32 +++++++++++++++++++++++++++-----
 include/linux/acpi.h |  3 +++
 2 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/gsi.c b/drivers/acpi/gsi.c
index 2c39fe1216e2..202a8fee77dc 100644
--- a/drivers/acpi/gsi.c
+++ b/drivers/acpi/gsi.c
@@ -75,12 +75,21 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger,
 {
 	unsigned int irq;
 	unsigned int irq_type = acpi_gsi_get_irq_type(trigger, polarity);
-	struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
-							DOMAIN_BUS_ANY);
 
-	irq = irq_create_mapping(d, gsi);
-	if (!irq)
-		return -EINVAL;
+	if (acpi_gsi_domain_id) {
+		struct irq_fwspec fwspec;
+
+		fwspec.fwnode = acpi_gsi_domain_id;
+		fwspec.param[0] = gsi;
+		fwspec.param[1] = irq_type;
+		fwspec.param_count = 2;
+
+		return irq_create_fwspec_mapping(&fwspec);
+	} else {
+		irq = irq_create_mapping(NULL, gsi);
+		if (!irq)
+			return -EINVAL;
+	}
 
 	/* Set irq type if specified and different than the current one */
 	if (irq_type != IRQ_TYPE_NONE &&
@@ -103,3 +112,16 @@ void acpi_unregister_gsi(u32 gsi)
 	irq_dispose_mapping(irq);
 }
 EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
+
+/**
+ * acpi_set_irq_model - Setup the GSI irqdomain information
+ * @model: the value assigned to acpi_irq_model
+ * @fwnode: the irq_domain identifier for mapping and looking up
+ *          GSI interrupts
+ */
+void __init acpi_set_irq_model(enum acpi_irq_model_id model,
+			       struct fwnode_handle *fwnode)
+{
+	acpi_irq_model = model;
+	acpi_gsi_domain_id = fwnode;
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 43856d19cf4d..d863e12bbead 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -201,6 +201,9 @@ int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
 
+void acpi_set_irq_model(enum acpi_irq_model_id model,
+			struct fwnode_handle *fwnode);
+
 #ifdef CONFIG_X86_IO_APIC
 extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
 #else
-- 
cgit v1.2.3


From e81a7cd96bd55bb57d92486c514b7b8f8c8cd8ce Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:39 +0100
Subject: irqchip/gic: Get rid of gic_init_bases()

Since nobody is using gic_init_bases anymore outside of the GIC
driver itself, let's do a bit of housekeeping and remove the now
useless entry point.

Only gic_init() is now exposed to the rest of the kernel for the
benefit of legacy systems.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-and-tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-12-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic.c       | 8 +++-----
 include/linux/irqchip/arm-gic.h | 9 ++-------
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 9262bb9b442b..12b2973530ed 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1127,17 +1127,15 @@ static void __init __gic_init_bases(unsigned int gic_nr, int irq_start,
 	gic_pm_init(gic);
 }
 
-void __init gic_init_bases(unsigned int gic_nr, int irq_start,
-			   void __iomem *dist_base, void __iomem *cpu_base,
-			   u32 percpu_offset, struct device_node *node)
+void __init gic_init(unsigned int gic_nr, int irq_start,
+		     void __iomem *dist_base, void __iomem *cpu_base)
 {
 	/*
 	 * Non-DT/ACPI systems won't run a hypervisor, so let's not
 	 * bother with these...
 	 */
 	static_key_slow_dec(&supports_deactivate);
-	__gic_init_bases(gic_nr, irq_start, dist_base, cpu_base,
-			 percpu_offset, node);
+	__gic_init_bases(gic_nr, irq_start, dist_base, cpu_base, 0, NULL);
 }
 
 #ifdef CONFIG_OF
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index b8901dfd9e95..bae69e5d693c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -100,16 +100,11 @@
 
 struct device_node;
 
-void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
-		    u32 offset, struct device_node *);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
 int gic_cpu_if_down(unsigned int gic_nr);
 
-static inline void gic_init(unsigned int nr, int start,
-			    void __iomem *dist , void __iomem *cpu)
-{
-	gic_init_bases(nr, start, dist, cpu, 0, NULL);
-}
+void gic_init(unsigned int nr, int start,
+	      void __iomem *dist , void __iomem *cpu);
 
 int gicv2m_of_init(struct device_node *node, struct irq_domain *parent);
 
-- 
cgit v1.2.3


From 2a5e9a072da6469a37d1f0b1577416f51223c280 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:43 +0100
Subject: irqdomain: Introduce irq_domain_create_hierarchy

As we're about to start converting the various MSI layers to
use fwnode_handle instead of device_node, add irq_domain_create_hierarchy
as a directly equivalent of irq_domain_add_hierarchy (which still
exists as a compatibility interface).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-16-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 17 +++++++++++++++--
 kernel/irq/irqdomain.c    | 12 ++++++------
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 949caa728758..2b3340ae915d 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -340,10 +340,23 @@ extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
 				void *chip_data, irq_flow_handler_t handler,
 				void *handler_data, const char *handler_name);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
-extern struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
+extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
 			unsigned int flags, unsigned int size,
-			struct device_node *node,
+			struct fwnode_handle *fwnode,
 			const struct irq_domain_ops *ops, void *host_data);
+
+static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
+					    unsigned int flags,
+					    unsigned int size,
+					    struct device_node *node,
+					    const struct irq_domain_ops *ops,
+					    void *host_data)
+{
+	return irq_domain_create_hierarchy(parent, flags, size,
+					   of_node_to_fwnode(node),
+					   ops, host_data);
+}
+
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 				   unsigned int nr_irqs, int node, void *arg,
 				   bool realloc);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 41151d394da7..22aa9612ef7c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -854,11 +854,11 @@ static int irq_domain_alloc_descs(int virq, unsigned int cnt,
 
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 /**
- * irq_domain_add_hierarchy - Add a irqdomain into the hierarchy
+ * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
  * @parent:	Parent irq domain to associate with the new domain
  * @flags:	Irq domain flags associated to the domain
  * @size:	Size of the domain. See below
- * @node:	Optional device-tree node of the interrupt controller
+ * @fwnode:	Optional fwnode of the interrupt controller
  * @ops:	Pointer to the interrupt domain callbacks
  * @host_data:	Controller private data pointer
  *
@@ -868,19 +868,19 @@ static int irq_domain_alloc_descs(int virq, unsigned int cnt,
  * domain flags are set.
  * Returns pointer to IRQ domain, or NULL on failure.
  */
-struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
+struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
 					    unsigned int flags,
 					    unsigned int size,
-					    struct device_node *node,
+					    struct fwnode_handle *fwnode,
 					    const struct irq_domain_ops *ops,
 					    void *host_data)
 {
 	struct irq_domain *domain;
 
 	if (size)
-		domain = irq_domain_add_linear(node, size, ops, host_data);
+		domain = irq_domain_create_linear(fwnode, size, ops, host_data);
 	else
-		domain = irq_domain_add_tree(node, ops, host_data);
+		domain = irq_domain_create_tree(fwnode, ops, host_data);
 	if (domain) {
 		domain->parent = parent;
 		domain->flags |= flags;
-- 
cgit v1.2.3


From be5436c83ac8921f33fe07323fab03c6644ce52e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:44 +0100
Subject: irqdomain/msi: Use fwnode instead of of_node

As we continue to push of_node towards the outskirts of irq domains,
let's start tackling the case of msi_create_irq_domain and its little
friends.

This has limited impact in both PCI/MSI, platform MSI, and a few
drivers.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-17-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/base/platform-msi.c                   |  6 +++---
 drivers/irqchip/irq-gic-v2m.c                 |  5 +++--
 drivers/irqchip/irq-gic-v3-its-pci-msi.c      |  3 ++-
 drivers/irqchip/irq-gic-v3-its-platform-msi.c |  3 ++-
 drivers/pci/host/pci-xgene-msi.c              |  2 +-
 drivers/pci/msi.c                             | 14 +++++++-------
 include/linux/msi.h                           |  9 +++++----
 kernel/irq/msi.c                              |  8 ++++----
 8 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 134483daac25..5df4575b5ba7 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -152,7 +152,7 @@ static int platform_msi_alloc_descs(struct device *dev, int nvec,
 
 /**
  * platform_msi_create_irq_domain - Create a platform MSI interrupt domain
- * @np:		Optional device-tree node of the interrupt controller
+ * @fwnode:		Optional fwnode of the interrupt controller
  * @info:	MSI domain info
  * @parent:	Parent irq domain
  *
@@ -162,7 +162,7 @@ static int platform_msi_alloc_descs(struct device *dev, int nvec,
  * Returns:
  * A domain pointer or NULL in case of failure.
  */
-struct irq_domain *platform_msi_create_irq_domain(struct device_node *np,
+struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
 						  struct msi_domain_info *info,
 						  struct irq_domain *parent)
 {
@@ -173,7 +173,7 @@ struct irq_domain *platform_msi_create_irq_domain(struct device_node *np,
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
 		platform_msi_update_chip_ops(info);
 
-	domain = msi_create_irq_domain(np, info, parent);
+	domain = msi_create_irq_domain(fwnode, info, parent);
 	if (domain)
 		domain->bus_token = DOMAIN_BUS_PLATFORM_MSI;
 
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 3b88e17d237c..bf9b3c0e6978 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -308,9 +308,10 @@ static int __init gicv2m_init_one(struct device_node *node,
 
 	inner_domain->bus_token = DOMAIN_BUS_NEXUS;
 	inner_domain->parent = parent;
-	pci_domain = pci_msi_create_irq_domain(node, &gicv2m_msi_domain_info,
+	pci_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+					       &gicv2m_msi_domain_info,
 					       inner_domain);
-	plat_domain = platform_msi_create_irq_domain(node,
+	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
 						     &gicv2m_pmsi_domain_info,
 						     inner_domain);
 	if (!pci_domain || !plat_domain) {
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index a7c8c9ffbafd..693c2f9ae898 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -125,7 +125,8 @@ static int __init its_pci_msi_init(void)
 			continue;
 		}
 
-		if (!pci_msi_create_irq_domain(np, &its_pci_msi_domain_info,
+		if (!pci_msi_create_irq_domain(of_node_to_fwnode(np),
+					       &its_pci_msi_domain_info,
 					       parent)) {
 			pr_err("%s: unable to create PCI domain\n",
 			       np->full_name);
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index a86550562779..960a8166a6c0 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -78,7 +78,8 @@ static int __init its_pmsi_init(void)
 			continue;
 		}
 
-		if (!platform_msi_create_irq_domain(np, &its_pmsi_domain_info,
+		if (!platform_msi_create_irq_domain(of_node_to_fwnode(np),
+						    &its_pmsi_domain_info,
 						    parent)) {
 			pr_err("%s: unable to create platform domain\n",
 			       np->full_name);
diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
index e491681daf22..a6456b578269 100644
--- a/drivers/pci/host/pci-xgene-msi.c
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -256,7 +256,7 @@ static int xgene_allocate_domains(struct xgene_msi *msi)
 	if (!msi->inner_domain)
 		return -ENOMEM;
 
-	msi->msi_domain = pci_msi_create_irq_domain(msi->node,
+	msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(msi->node),
 						    &xgene_msi_domain_info,
 						    msi->inner_domain);
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d4497141d083..ddd59fe786f8 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1246,8 +1246,8 @@ static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
 }
 
 /**
- * pci_msi_create_irq_domain - Creat a MSI interrupt domain
- * @node:	Optional device-tree node of the interrupt controller
+ * pci_msi_create_irq_domain - Create a MSI interrupt domain
+ * @fwnode:	Optional fwnode of the interrupt controller
  * @info:	MSI domain info
  * @parent:	Parent irq domain
  *
@@ -1256,7 +1256,7 @@ static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
  * Returns:
  * A domain pointer or NULL in case of failure.
  */
-struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
+struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 					     struct msi_domain_info *info,
 					     struct irq_domain *parent)
 {
@@ -1267,7 +1267,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
 		pci_msi_domain_update_chip_ops(info);
 
-	domain = msi_create_irq_domain(node, info, parent);
+	domain = msi_create_irq_domain(fwnode, info, parent);
 	if (!domain)
 		return NULL;
 
@@ -1303,14 +1303,14 @@ void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev)
 
 /**
  * pci_msi_create_default_irq_domain - Create a default MSI interrupt domain
- * @node:	Optional device-tree node of the interrupt controller
+ * @fwnode:	Optional fwnode of the interrupt controller
  * @info:	MSI domain info
  * @parent:	Parent irq domain
  *
  * Returns: A domain pointer or NULL in case of failure. If successful
  * the default PCI/MSI irqdomain pointer is updated.
  */
-struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
+struct irq_domain *pci_msi_create_default_irq_domain(struct fwnode_handle *fwnode,
 		struct msi_domain_info *info, struct irq_domain *parent)
 {
 	struct irq_domain *domain;
@@ -1320,7 +1320,7 @@ struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
 		pr_err("PCI: default irq domain for PCI MSI has already been created.\n");
 		domain = NULL;
 	} else {
-		domain = pci_msi_create_irq_domain(node, info, parent);
+		domain = pci_msi_create_irq_domain(fwnode, info, parent);
 		pci_msi_default_domain = domain;
 	}
 	mutex_unlock(&pci_msi_domain_lock);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index ad939d0ba816..32a24b9a9556 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -174,6 +174,7 @@ struct msi_controller {
 struct irq_domain;
 struct irq_chip;
 struct device_node;
+struct fwnode_handle;
 struct msi_domain_info;
 
 /**
@@ -262,7 +263,7 @@ enum {
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			    bool force);
 
-struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
+struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
 					 struct msi_domain_info *info,
 					 struct irq_domain *parent);
 int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
@@ -270,7 +271,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
 
-struct irq_domain *platform_msi_create_irq_domain(struct device_node *np,
+struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
 						  struct msi_domain_info *info,
 						  struct irq_domain *parent);
 int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
@@ -280,13 +281,13 @@ void platform_msi_domain_free_irqs(struct device *dev);
 
 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
 void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg);
-struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
+struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 					     struct msi_domain_info *info,
 					     struct irq_domain *parent);
 int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
 			      int nvec, int type);
 void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev);
-struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
+struct irq_domain *pci_msi_create_default_irq_domain(struct fwnode_handle *fwnode,
 		 struct msi_domain_info *info, struct irq_domain *parent);
 
 irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 7e6512b9dc1f..95354bb07a14 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -239,11 +239,11 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info)
 
 /**
  * msi_create_irq_domain - Create a MSI interrupt domain
- * @of_node:	Optional device-tree node of the interrupt controller
+ * @fwnode:	Optional fwnode of the interrupt controller
  * @info:	MSI domain info
  * @parent:	Parent irq domain
  */
-struct irq_domain *msi_create_irq_domain(struct device_node *node,
+struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
 					 struct msi_domain_info *info,
 					 struct irq_domain *parent)
 {
@@ -252,8 +252,8 @@ struct irq_domain *msi_create_irq_domain(struct device_node *node,
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
 		msi_domain_update_chip_ops(info);
 
-	return irq_domain_add_hierarchy(parent, 0, 0, node, &msi_domain_ops,
-					info);
+	return irq_domain_create_hierarchy(parent, 0, 0, fwnode,
+					   &msi_domain_ops, info);
 }
 
 /**
-- 
cgit v1.2.3


From e7a46c818564329f977f8fa157b5e9e1d0d83012 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 12:51:45 +0100
Subject: irqdomain: Documentation updates

Update the IRQ domain documentation to reflect the changes made
while divorcing the domain infrastructure from Device Tree.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Graeme Gregory <graeme@xora.org.uk>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Link: http://lkml.kernel.org/r/1444737105-31573-18-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/IRQ-domain.txt |  8 ++++----
 include/linux/irqdomain.h    | 23 ++++++++++-------------
 2 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
index 3a8e15cba816..8d990bde8693 100644
--- a/Documentation/IRQ-domain.txt
+++ b/Documentation/IRQ-domain.txt
@@ -32,9 +32,9 @@ top of the irq_alloc_desc*() API.  An irq_domain to manage mapping is
 preferred over interrupt controller drivers open coding their own
 reverse mapping scheme.
 
-irq_domain also implements translation from Device Tree interrupt
-specifiers to hwirq numbers, and can be easily extended to support
-other IRQ topology data sources.
+irq_domain also implements translation from an abstract irq_fwspec
+structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
+be easily extended to support other IRQ topology data sources.
 
 === irq_domain usage ===
 An interrupt controller driver creates and registers an irq_domain by
@@ -184,7 +184,7 @@ There are four major interfaces to use hierarchy irq_domain:
    related resources associated with these interrupts.
 3) irq_domain_activate_irq(): activate interrupt controller hardware to
    deliver the interrupt.
-3) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
+4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
    to stop delivering the interrupt.
 
 Following changes are needed to support hierarchy irq_domain.
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 2b3340ae915d..d5e5c5bef28c 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -5,9 +5,10 @@
  * helpful for interrupt controllers to implement mapping between hardware
  * irq numbers and the Linux irq number space.
  *
- * irq_domains also have a hook for translating device tree interrupt
- * representation into a hardware irq number that can be mapped back to a
- * Linux irq number without any extra platform support code.
+ * irq_domains also have hooks for translating device tree or other
+ * firmware interrupt representations into a hardware irq number that
+ * can be mapped back to a Linux irq number without any extra platform
+ * support code.
  *
  * Interrupt controller "domain" data structure. This could be defined as a
  * irq domain controller. That is, it handles the mapping between hardware
@@ -17,16 +18,12 @@
  * model). It's the domain callbacks that are responsible for setting the
  * irq_chip on a given irq_desc after it's been mapped.
  *
- * The host code and data structures are agnostic to whether or not
- * we use an open firmware device-tree. We do have references to struct
- * device_node in two places: in irq_find_host() to find the host matching
- * a given interrupt controller node, and of course as an argument to its
- * counterpart domain->ops->match() callback. However, those are treated as
- * generic pointers by the core and the fact that it's actually a device-node
- * pointer is purely a convention between callers and implementation. This
- * code could thus be used on other architectures by replacing those two
- * by some sort of arch-specific void * "token" used to identify interrupt
- * controllers.
+ * The host code and data structures use a fwnode_handle pointer to
+ * identify the domain. In some cases, and in order to preserve source
+ * code compatibility, this fwnode pointer is "upgraded" to a DT
+ * device_node. For those firmware infrastructures that do not provide
+ * a unique identifier for an interrupt controller, the irq_domain
+ * code offers a fwnode allocator.
  */
 
 #ifndef _LINUX_IRQDOMAIN_H
-- 
cgit v1.2.3


From 17a7b0b4d9749f80d365d7baff5dec2f54b0e992 Mon Sep 17 00:00:00 2001
From: Lars Svensson <lars1.svensson@sonymobile.com>
Date: Wed, 7 Oct 2015 09:20:12 +0200
Subject: fb.h: Provide alternate screen_base pointer

Some drivers use member screen_base of struct fb_info to store non-
__iomem pointers, creating the need for ugly __force typecasts to
avoid sparse warnings. This adds an alternate pointer without the
__iomem qualifyer for this use.

Signed-off-by: Lars Svensson <lars1.svensson@sonymobile.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fb.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index bc9afa74ee11..41a3b11f7796 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -483,7 +483,10 @@ struct fb_info {
 #ifdef CONFIG_FB_TILEBLITTING
 	struct fb_tile_ops *tileops;    /* Tile Blitting */
 #endif
-	char __iomem *screen_base;	/* Virtual address */
+	union {
+		char __iomem *screen_base;	/* Virtual address */
+		char *screen_buffer;
+	};
 	unsigned long screen_size;	/* Amount of ioremapped VRAM or 0 */ 
 	void *pseudo_palette;		/* Fake palette of 16 colors */ 
 #define FBINFO_STATE_RUNNING	0
-- 
cgit v1.2.3


From f9f9f11dcf0f3b757b282ce7cefea8696212a422 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Mon, 12 Oct 2015 14:43:22 +0200
Subject: of/irq: move of_msi_configure to the right guard and add a dummy

of_msi_configure is part of of_irq.c, which is compiled in when OF_IRQ
is enabled, not just OF.
Also It is unconditionally called from of_platform_device_create_pdata,
which does not depend on OF_IRQ, just OF_ADDRESS, so we need a dummy
implementation in case of OF_ADDRESS=y but OF_IRQ=n.

Fixes: c706c239 ("of/platform: Assign MSI domain to platform device")
Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_irq.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 4bcbd586a672..0088038d5ccd 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern void of_msi_configure(struct device *dev, struct device_node *np);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -64,6 +65,9 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
 {
 	return 0;
 }
+static inline void of_msi_configure(struct device *dev, struct device_node *np)
+{
+}
 #endif
 
 #if defined(CONFIG_OF)
@@ -74,7 +78,6 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
  */
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
 extern struct device_node *of_irq_find_parent(struct device_node *child);
-extern void of_msi_configure(struct device *dev, struct device_node *np);
 
 #else /* !CONFIG_OF */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
-- 
cgit v1.2.3


From 52493d446141b07c8ba28dd6a529513f8b2342bd Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Mon, 12 Oct 2015 14:43:23 +0200
Subject: of/irq: make of_irq_find_parent static

of_irq_find_parent has no users outside of of_irq.c, so it does not make
sense to expose it in of_irq.h. Therefore remove the prototype and dummy
implmeentation and make the function static instead.

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/irq.c       | 2 +-
 include/linux/of_irq.h | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 55317fa9c9dc..0c7f4cbe3434 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
  * Returns a pointer to the interrupt parent node, or NULL if the interrupt
  * parent could not be determined.
  */
-struct device_node *of_irq_find_parent(struct device_node *child)
+static struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
 	const __be32 *parp;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 0088038d5ccd..a58e2e51eeb2 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -77,7 +77,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
  * so declare it here regardless of the CONFIG_OF_IRQ setting.
  */
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 #else /* !CONFIG_OF */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
@@ -85,11 +84,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
 {
 	return 0;
 }
-
-static inline void *of_irq_find_parent(struct device_node *child)
-{
-	return NULL;
-}
 #endif /* !CONFIG_OF */
 
 #endif /* __OF_IRQ_H */
-- 
cgit v1.2.3


From 62ebf931935964230d6fe39026bc5fbcfac330d3 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Mon, 12 Oct 2015 14:43:24 +0200
Subject: of/irq: fix guards for irq_of_parse_and_map prototype

Since OF is now a userselectable config symbol, having OF=y but OF_IRQ=n
is a valid combination for non-OF platforms, and OF=y does not guarantee
anymore that OF_IRQ is enabled (or we are building for SPARC).

Fixes the following build error with OF=y, IRQ_DOMAIN=n and SPI=y:

drivers/built-in.o: In function `spi_register_master':
(.text+0xc3ae): undefined reference to `irq_of_parse_and_map'
Makefile:935: recipe for target 'vmlinux' failed
make: *** [vmlinux] Error 1

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_irq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index a58e2e51eeb2..580818d90475 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -70,7 +70,7 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
 }
 #endif
 
-#if defined(CONFIG_OF)
+#if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC)
 /*
  * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
  * implements it differently.  However, the prototype is the same for all,
@@ -78,7 +78,7 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
  */
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
 
-#else /* !CONFIG_OF */
+#else /* !CONFIG_OF && !CONFIG_SPARC */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
 						int index)
 {
-- 
cgit v1.2.3


From 50d3fb562561fcf5b745e71945834735d7386a1f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 13 Oct 2015 20:48:21 +0100
Subject: iommu/vt-d: Use plain writeq() for dmar_writeq() where available

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/intel-iommu.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6240063bdcac..08802b99f057 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -59,14 +59,11 @@
 #define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
 
 #define OFFSET_STRIDE		(9)
-/*
-#define dmar_readl(dmar, reg) readl(dmar + reg)
-#define dmar_readq(dmar, reg) ({ \
-		u32 lo, hi; \
-		lo = readl(dmar + reg); \
-		hi = readl(dmar + reg + 4); \
-		(((u64) hi) << 32) + lo; })
-*/
+
+#ifdef CONFIG_64BIT
+#define dmar_readq(a) readq(a)
+#define dmar_writeq(a,v) writeq(v,a)
+#else
 static inline u64 dmar_readq(void __iomem *addr)
 {
 	u32 lo, hi;
@@ -80,6 +77,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 	writel((u32)val, addr);
 	writel((u32)(val >> 32), addr + 4);
 }
+#endif
 
 #define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
 #define DMAR_VER_MINOR(v)		((v) & 0x0f)
-- 
cgit v1.2.3


From 45aaeff947190e4b57b2d0db4d74ab5eea450825 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 13 Oct 2015 11:22:18 +0200
Subject: mtd: nand: pass page number to ecc->write_xxx() methods

The ->read_xxx() methods are all passed the page number the NAND controller
is supposed to read, but ->write_xxx() do not have such a parameter.

This is a problem if we want to properly implement data
scrambling/randomization in order to mitigate MLC sensibility to repeated
pattern: to prevent bitflips in adjacent pages in the same block we need
to avoid repeating the same pattern at the same offset in those pages,
hence the randomizer/scrambler engine need to be passed the page value
in order to adapt its seed accordingly.

Moreover, adding the page parameter to the ->write_xxx() methods add some
consistency to the current API.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
CC: Josh Wu <josh.wu@atmel.com>
CC: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
CC: Maxime Ripard <maxime.ripard@free-electrons.com>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Huang Shijie <shijie.huang@arm.com>
CC: Stefan Agner <stefan@agner.ch>
CC: devel@driverdev.osuosl.org
CC: linux-arm-kernel@lists.infradead.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/atmel_nand.c                 |  6 ++++--
 drivers/mtd/nand/bf5xx_nand.c                 |  3 ++-
 drivers/mtd/nand/brcmnand/brcmnand.c          |  4 ++--
 drivers/mtd/nand/cafe_nand.c                  |  3 ++-
 drivers/mtd/nand/denali.c                     |  5 +++--
 drivers/mtd/nand/docg4.c                      |  6 +++---
 drivers/mtd/nand/fsl_elbc_nand.c              |  4 ++--
 drivers/mtd/nand/fsl_ifc_nand.c               |  2 +-
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c        |  8 +++----
 drivers/mtd/nand/hisi504_nand.c               |  3 ++-
 drivers/mtd/nand/lpc32xx_mlc.c                |  3 ++-
 drivers/mtd/nand/lpc32xx_slc.c                |  5 +++--
 drivers/mtd/nand/nand_base.c                  | 31 ++++++++++++++++++---------
 drivers/mtd/nand/omap2.c                      |  3 ++-
 drivers/mtd/nand/pxa3xx_nand.c                |  3 ++-
 drivers/mtd/nand/sh_flctl.c                   |  3 ++-
 drivers/mtd/nand/sunxi_nand.c                 |  5 +++--
 drivers/mtd/nand/vf610_nfc.c                  |  2 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  3 ++-
 include/linux/mtd/nand.h                      |  6 +++---
 20 files changed, 66 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 898947378713..583cdd9bb971 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -954,7 +954,8 @@ static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
 }
 
 static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required)
+		struct nand_chip *chip, const uint8_t *buf, int oob_required,
+		int page)
 {
 	struct atmel_nand_host *host = chip->priv;
 	uint32_t *eccpos = chip->ecc.layout->eccpos;
@@ -2005,7 +2006,8 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	if (likely(!raw))
 		/* Need to write ecc into oob */
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required);
+		status = chip->ecc.write_page(mtd, chip, buf, oob_required,
+					      page);
 
 	if (status < 0)
 		return status;
diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c
index acec34403ad4..61bd2160717c 100644
--- a/drivers/mtd/nand/bf5xx_nand.c
+++ b/drivers/mtd/nand/bf5xx_nand.c
@@ -566,7 +566,8 @@ static int bf5xx_nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip
 }
 
 static int bf5xx_nand_write_page_raw(struct mtd_info *mtd,
-		struct nand_chip *chip,	const uint8_t *buf, int oob_required)
+		struct nand_chip *chip,	const uint8_t *buf, int oob_required,
+		int page)
 {
 	bf5xx_nand_write_buf(mtd, buf, mtd->writesize);
 	bf5xx_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index ea319a4058b7..7c1c306650a4 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1606,7 +1606,7 @@ out:
 }
 
 static int brcmnand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required)
+			       const uint8_t *buf, int oob_required, int page)
 {
 	struct brcmnand_host *host = chip->priv;
 	void *oob = oob_required ? chip->oob_poi : NULL;
@@ -1617,7 +1617,7 @@ static int brcmnand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 static int brcmnand_write_page_raw(struct mtd_info *mtd,
 				   struct nand_chip *chip, const uint8_t *buf,
-				   int oob_required)
+				   int oob_required, int page)
 {
 	struct brcmnand_host *host = chip->priv;
 	void *oob = oob_required ? chip->oob_poi : NULL;
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index aa1dc559f361..9de78d2a2eb1 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -516,7 +516,8 @@ static struct nand_bbt_descr cafe_bbt_mirror_descr_512 = {
 
 static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
 					  struct nand_chip *chip,
-					  const uint8_t *buf, int oob_required)
+					  const uint8_t *buf, int oob_required,
+					  int page)
 {
 	struct cafe_priv *cafe = mtd->priv;
 
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 0db16ef5604f..67eb2be0db87 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1114,7 +1114,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
  * by write_page above.
  */
 static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required)
+				const uint8_t *buf, int oob_required, int page)
 {
 	/*
 	 * for regular page writes, we let HW handle all the ECC
@@ -1129,7 +1129,8 @@ static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
  * write_page() function above.
  */
 static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-					const uint8_t *buf, int oob_required)
+				 const uint8_t *buf, int oob_required,
+				 int page)
 {
 	/*
 	 * for raw page writes, we want to disable ECC and simply write
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 510c12d41761..408cf69b854b 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -977,13 +977,13 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
 }
 
 static int docg4_write_page_raw(struct mtd_info *mtd, struct nand_chip *nand,
-				 const uint8_t *buf, int oob_required)
+				const uint8_t *buf, int oob_required, int page)
 {
 	return write_page(mtd, nand, buf, false);
 }
 
 static int docg4_write_page(struct mtd_info *mtd, struct nand_chip *nand,
-			     const uint8_t *buf, int oob_required)
+			     const uint8_t *buf, int oob_required, int page)
 {
 	return write_page(mtd, nand, buf, true);
 }
@@ -1113,7 +1113,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 
 	/* write first page of block */
 	write_page_prologue(mtd, g4_addr);
-	docg4_write_page(mtd, nand, buf, 1);
+	docg4_write_page(mtd, nand, buf, 1, page);
 	ret = pageprog(mtd);
 
 	kfree(buf);
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 43cf0f961c3e..dcb1f7f4873f 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -715,7 +715,7 @@ static int fsl_elbc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
  * waitfunc.
  */
 static int fsl_elbc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required)
+				const uint8_t *buf, int oob_required, int page)
 {
 	fsl_elbc_write_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -728,7 +728,7 @@ static int fsl_elbc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
  */
 static int fsl_elbc_write_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 				uint32_t offset, uint32_t data_len,
-				const uint8_t *buf, int oob_required)
+				const uint8_t *buf, int oob_required, int page)
 {
 	fsl_elbc_write_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index b9dd41cba3af..7f4ac8c19001 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -772,7 +772,7 @@ static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
  * waitfunc.
  */
 static int fsl_ifc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required)
+			       const uint8_t *buf, int oob_required, int page)
 {
 	fsl_ifc_write_buf(mtd, buf, mtd->writesize);
 	fsl_ifc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 1dbcc8182f1b..2064adac1d17 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1160,7 +1160,7 @@ static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required)
+				const uint8_t *buf, int oob_required, int page)
 {
 	struct gpmi_nand_data *this = chip->priv;
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
@@ -1446,7 +1446,7 @@ static int gpmi_ecc_read_page_raw(struct mtd_info *mtd,
 static int gpmi_ecc_write_page_raw(struct mtd_info *mtd,
 				   struct nand_chip *chip,
 				   const uint8_t *buf,
-				   int oob_required)
+				   int oob_required, int page)
 {
 	struct gpmi_nand_data *this = chip->priv;
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
@@ -1533,7 +1533,7 @@ static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
 {
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page);
 
-	return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1);
+	return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1, page);
 }
 
 static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
@@ -1717,7 +1717,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this)
 		/* Write the first page of the current stride. */
 		dev_dbg(dev, "Writing an NCB fingerprint in page 0x%x\n", page);
 		chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
-		chip->ecc.write_page_raw(mtd, chip, buffer, 0);
+		chip->ecc.write_page_raw(mtd, chip, buffer, 0, page);
 		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 
 		/* Wait for the write to finish. */
diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index 6099aaa5ee40..0cb2e886937d 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c
@@ -590,7 +590,8 @@ static int hisi_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int hisi_nand_write_page_hwecc(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required)
+		struct nand_chip *chip, const uint8_t *buf, int oob_required,
+		int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index 65b7e03a0f5b..9686d025d4c9 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -495,7 +495,8 @@ static int lpc32xx_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 static int lpc32xx_write_page_lowlevel(struct mtd_info *mtd,
 				       struct nand_chip *chip,
-				       const uint8_t *buf, int oob_required)
+				       const uint8_t *buf, int oob_required,
+				       int page)
 {
 	struct lpc32xx_nand_host *host = chip->priv;
 	const uint8_t *oobbuf = chip->oob_poi;
diff --git a/drivers/mtd/nand/lpc32xx_slc.c b/drivers/mtd/nand/lpc32xx_slc.c
index cbf4501090b8..40c06b2e6229 100644
--- a/drivers/mtd/nand/lpc32xx_slc.c
+++ b/drivers/mtd/nand/lpc32xx_slc.c
@@ -663,7 +663,8 @@ static int lpc32xx_nand_read_page_raw_syndrome(struct mtd_info *mtd,
  */
 static int lpc32xx_nand_write_page_syndrome(struct mtd_info *mtd,
 					    struct nand_chip *chip,
-					    const uint8_t *buf, int oob_required)
+					    const uint8_t *buf,
+					    int oob_required, int page)
 {
 	struct lpc32xx_nand_host *host = chip->priv;
 	uint8_t *pb = chip->oob_poi + chip->ecc.layout->eccpos[0];
@@ -692,7 +693,7 @@ static int lpc32xx_nand_write_page_syndrome(struct mtd_info *mtd,
 static int lpc32xx_nand_write_page_raw_syndrome(struct mtd_info *mtd,
 						struct nand_chip *chip,
 						const uint8_t *buf,
-						int oob_required)
+						int oob_required, int page)
 {
 	/* Raw writes can just use the FIFO interface */
 	chip->write_buf(mtd, buf, chip->ecc.size * chip->ecc.steps);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a8230b164f49..d87c7d0b4675 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2155,11 +2155,12 @@ out:
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
 static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required)
+			       const uint8_t *buf, int oob_required, int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
@@ -2174,12 +2175,14 @@ static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
  *
  * We need a special oob layout and handling even when ECC isn't checked.
  */
 static int nand_write_page_raw_syndrome(struct mtd_info *mtd,
 					struct nand_chip *chip,
-					const uint8_t *buf, int oob_required)
+					const uint8_t *buf, int oob_required,
+					int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -2216,9 +2219,11 @@ static int nand_write_page_raw_syndrome(struct mtd_info *mtd,
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
  */
 static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf, int oob_required)
+				 const uint8_t *buf, int oob_required,
+				 int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -2234,7 +2239,7 @@ static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0; i < chip->ecc.total; i++)
 		chip->oob_poi[eccpos[i]] = ecc_calc[i];
 
-	return chip->ecc.write_page_raw(mtd, chip, buf, 1);
+	return chip->ecc.write_page_raw(mtd, chip, buf, 1, page);
 }
 
 /**
@@ -2243,9 +2248,11 @@ static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
  */
 static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf, int oob_required)
+				  const uint8_t *buf, int oob_required,
+				  int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -2277,11 +2284,12 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * @data_len:	data length
  * @buf:	data buffer
  * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
  */
 static int nand_write_subpage_hwecc(struct mtd_info *mtd,
 				struct nand_chip *chip, uint32_t offset,
 				uint32_t data_len, const uint8_t *buf,
-				int oob_required)
+				int oob_required, int page)
 {
 	uint8_t *oob_buf  = chip->oob_poi;
 	uint8_t *ecc_calc = chip->buffers->ecccalc;
@@ -2336,13 +2344,15 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd,
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
  *
  * The hw generator calculates the error syndrome automatically. Therefore we
  * need a special oob layout and handling.
  */
 static int nand_write_page_syndrome(struct mtd_info *mtd,
 				    struct nand_chip *chip,
-				    const uint8_t *buf, int oob_required)
+				    const uint8_t *buf, int oob_required,
+				    int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -2406,12 +2416,13 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	if (unlikely(raw))
 		status = chip->ecc.write_page_raw(mtd, chip, buf,
-							oob_required);
+						  oob_required, page);
 	else if (subpage)
 		status = chip->ecc.write_subpage(mtd, chip, offset, data_len,
-							 buf, oob_required);
+						 buf, oob_required, page);
 	else
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required);
+		status = chip->ecc.write_page(mtd, chip, buf, oob_required,
+					      page);
 
 	if (status < 0)
 		return status;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 7fbf00962709..93f664cd1c90 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1500,11 +1500,12 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
  * @chip:		nand chip info structure
  * @buf:		data buffer
  * @oob_required:	must write chip->oob_poi to OOB
+ * @page:		page
  *
  * Custom write page method evolved to support multi sector writing in one shot
  */
 static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf, int oob_required)
+			       const uint8_t *buf, int oob_required, int page)
 {
 	int i;
 	uint8_t *ecc_calc = chip->buffers->ecccalc;
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 0024a9c35631..17d7a23f72b2 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1160,7 +1160,8 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
 }
 
 static int pxa3xx_nand_write_page_hwecc(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required)
+		struct nand_chip *chip, const uint8_t *buf, int oob_required,
+		int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 2e4f762775d5..bcba1a924c75 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -569,7 +569,8 @@ static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				   const uint8_t *buf, int oob_required)
+				  const uint8_t *buf, int oob_required,
+				  int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index d8b809b13c0e..ef46ac66248b 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -752,7 +752,8 @@ static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
 
 static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd,
 				       struct nand_chip *chip,
-				       const uint8_t *buf, int oob_required)
+				       const uint8_t *buf, int oob_required,
+				       int page)
 {
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int ret, i, cur_off = 0;
@@ -815,7 +816,7 @@ static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd,
 static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd,
 						struct nand_chip *chip,
 						const uint8_t *buf,
-						int oob_required)
+						int oob_required, int page)
 {
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int ret, i, cur_off = 0;
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index ae1f84e3d635..d275691dbb7c 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -612,7 +612,7 @@ static int vf610_nfc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required)
+				const uint8_t *buf, int oob_required, int page)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 9a8ddbb0191f..405b643189fd 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -612,7 +612,8 @@ static int spinand_erase_block(struct spi_device *spi_nand, u16 block_id)
 
 #ifdef CONFIG_MTD_SPINAND_ONDIEECC
 static int spinand_write_page_hwecc(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required)
+		struct nand_chip *chip, const uint8_t *buf, int oob_required,
+		int page)
 {
 	const uint8_t *p = buf;
 	int eccsize = chip->ecc.size;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 50e1f94fa377..5a9d1d4c2487 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -504,16 +504,16 @@ struct nand_ecc_ctrl {
 	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint8_t *buf, int oob_required, int page);
 	int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int oob_required);
+			const uint8_t *buf, int oob_required, int page);
 	int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint8_t *buf, int oob_required, int page);
 	int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint32_t offs, uint32_t len, uint8_t *buf, int page);
 	int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint32_t offset, uint32_t data_len,
-			const uint8_t *data_buf, int oob_required);
+			const uint8_t *data_buf, int oob_required, int page);
 	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int oob_required);
+			const uint8_t *buf, int oob_required, int page);
 	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 	int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From ef25ba0476015908ef5960f9faac149ddf34ede0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 7 Oct 2015 00:49:34 +0200
Subject: PM / sleep: Add flags to indicate platform firmware involvement

There are quite a few cases in which device drivers, bus types or
even the PM core itself may benefit from knowing whether or not
the platform firmware will be involved in the upcoming system power
transition (during system suspend) or whether or not it was involved
in it (during system resume).

For this reason, introduce global system suspend flags that can be
used by the platform code to expose that information for the benefit
of the other parts of the kernel and make the ACPI core set them
as appropriate.

Users of the new flags will be added later.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c    |  3 +++
 include/linux/suspend.h | 36 ++++++++++++++++++++++++++++++++++++
 kernel/power/suspend.c  |  4 ++++
 3 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2f0d4db40a9e..a0b4d781e606 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -487,6 +487,8 @@ static int acpi_suspend_begin(suspend_state_t pm_state)
 		pr_err("ACPI does not support sleep state S%u\n", acpi_state);
 		return -ENOSYS;
 	}
+	if (acpi_state > ACPI_STATE_S1)
+		pm_set_suspend_via_firmware();
 
 	acpi_pm_start(acpi_state);
 	return 0;
@@ -522,6 +524,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
 		if (error)
 			return error;
 		pr_info(PREFIX "Low-level resume complete\n");
+		pm_set_resume_via_firmware();
 		break;
 	}
 	trace_suspend_resume(TPS("acpi_suspend"), acpi_state, false);
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 33aaf9a9596c..8b6ec7ef0854 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -202,6 +202,36 @@ struct platform_freeze_ops {
 extern void suspend_set_ops(const struct platform_suspend_ops *ops);
 extern int suspend_valid_only_mem(suspend_state_t state);
 
+extern unsigned int pm_suspend_global_flags;
+
+#define PM_SUSPEND_FLAG_FW_SUSPEND	(1 << 0)
+#define PM_SUSPEND_FLAG_FW_RESUME	(1 << 1)
+
+static inline void pm_suspend_clear_flags(void)
+{
+	pm_suspend_global_flags = 0;
+}
+
+static inline void pm_set_suspend_via_firmware(void)
+{
+	pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_SUSPEND;
+}
+
+static inline void pm_set_resume_via_firmware(void)
+{
+	pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME;
+}
+
+static inline bool pm_suspend_via_firmware(void)
+{
+	return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_SUSPEND);
+}
+
+static inline bool pm_resume_via_firmware(void)
+{
+	return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME);
+}
+
 /* Suspend-to-idle state machnine. */
 enum freeze_state {
 	FREEZE_STATE_NONE,      /* Not suspended/suspending. */
@@ -241,6 +271,12 @@ extern int pm_suspend(suspend_state_t state);
 #else /* !CONFIG_SUSPEND */
 #define suspend_valid_only_mem	NULL
 
+static inline void pm_suspend_clear_flags(void) {}
+static inline void pm_set_suspend_via_firmware(void) {}
+static inline void pm_set_resume_via_firmware(void) {}
+static inline bool pm_suspend_via_firmware(void) { return false; }
+static inline bool pm_resume_via_firmware(void) { return false; }
+
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
 static inline bool idle_should_freeze(void) { return false; }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 7e4cda4a8dd9..f9fe133c13e2 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -35,6 +35,9 @@
 const char *pm_labels[] = { "mem", "standby", "freeze", NULL };
 const char *pm_states[PM_SUSPEND_MAX];
 
+unsigned int pm_suspend_global_flags;
+EXPORT_SYMBOL_GPL(pm_suspend_global_flags);
+
 static const struct platform_suspend_ops *suspend_ops;
 static const struct platform_freeze_ops *freeze_ops;
 static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
@@ -493,6 +496,7 @@ static int enter_state(suspend_state_t state)
 #endif
 
 	pr_debug("PM: Preparing system for sleep (%s)\n", pm_states[state]);
+	pm_suspend_clear_flags();
 	error = suspend_prepare(state);
 	if (error)
 		goto Unlock;
-- 
cgit v1.2.3


From 58a1fbbb2ee873dd1fe327e80bc7b08e80866269 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 7 Oct 2015 00:50:24 +0200
Subject: PM / PCI / ACPI: Kick devices that might have been reset by firmware

There is a concern that if the platform firmware was involved in
the system resume that's being completed,  some devices might have
been reset by it and if those devices had the power.direct_complete
flag set during the preceding suspend transition, they may stay
in a reset-power-on state indefinitely (until they are runtime-resumed
and then suspended again).  That may not be a big deal from the
individual device's perspective, but if the system is an SoC, it may
be prevented from entering deep SoC-wide low-power states on idle
because of that.

The devices that are most likely to be affected by this issue are
PCI devices and ACPI-enumerated devices using the general ACPI PM
domain, so to prevent it from happening for those devices, force a
runtime resume for them if they have their power.direct_complete
flags set and the platform firmware was involved in the resume
transition currently in progress.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c         |  2 +-
 drivers/acpi/device_pm.c         | 19 +------------------
 drivers/base/power/generic_ops.c | 23 +++++++++++++++++++++++
 drivers/pci/pci-driver.c         |  2 +-
 include/linux/pm.h               |  1 +
 5 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index f51bd0d0bc17..f9e0d09f7c66 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -664,7 +664,7 @@ static struct dev_pm_domain acpi_lpss_pm_domain = {
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM_SLEEP
 		.prepare = acpi_subsys_prepare,
-		.complete = acpi_subsys_complete,
+		.complete = pm_complete_with_resume_check,
 		.suspend = acpi_subsys_suspend,
 		.suspend_late = acpi_lpss_suspend_late,
 		.resume_early = acpi_lpss_resume_early,
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 4806b7f856c4..08a02cdc737c 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -962,23 +962,6 @@ int acpi_subsys_prepare(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(acpi_subsys_prepare);
 
-/**
- * acpi_subsys_complete - Finalize device's resume during system resume.
- * @dev: Device to handle.
- */
-void acpi_subsys_complete(struct device *dev)
-{
-	pm_generic_complete(dev);
-	/*
-	 * If the device had been runtime-suspended before the system went into
-	 * the sleep state it is going out of and it has never been resumed till
-	 * now, resume it in case the firmware powered it up.
-	 */
-	if (dev->power.direct_complete)
-		pm_request_resume(dev);
-}
-EXPORT_SYMBOL_GPL(acpi_subsys_complete);
-
 /**
  * acpi_subsys_suspend - Run the device driver's suspend callback.
  * @dev: Device to handle.
@@ -1047,7 +1030,7 @@ static struct dev_pm_domain acpi_general_pm_domain = {
 		.runtime_resume = acpi_subsys_runtime_resume,
 #ifdef CONFIG_PM_SLEEP
 		.prepare = acpi_subsys_prepare,
-		.complete = acpi_subsys_complete,
+		.complete = pm_complete_with_resume_check,
 		.suspend = acpi_subsys_suspend,
 		.suspend_late = acpi_subsys_suspend_late,
 		.resume_early = acpi_subsys_resume_early,
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index b2ed606265a8..07c3c4a9522d 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -9,6 +9,7 @@
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
+#include <linux/suspend.h>
 
 #ifdef CONFIG_PM
 /**
@@ -297,4 +298,26 @@ void pm_generic_complete(struct device *dev)
 	if (drv && drv->pm && drv->pm->complete)
 		drv->pm->complete(dev);
 }
+
+/**
+ * pm_complete_with_resume_check - Complete a device power transition.
+ * @dev: Device to handle.
+ *
+ * Complete a device power transition during a system-wide power transition and
+ * optionally schedule a runtime resume of the device if the system resume in
+ * progress has been initated by the platform firmware and the device had its
+ * power.direct_complete flag set.
+ */
+void pm_complete_with_resume_check(struct device *dev)
+{
+	pm_generic_complete(dev);
+	/*
+	 * If the device had been runtime-suspended before the system went into
+	 * the sleep state it is going out of and it has never been resumed till
+	 * now, resume it in case the firmware powered it up.
+	 */
+	if (dev->power.direct_complete && pm_resume_via_firmware())
+		pm_request_resume(dev);
+}
+EXPORT_SYMBOL_GPL(pm_complete_with_resume_check);
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index c9ce3073d7fe..306124bba61e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -687,7 +687,7 @@ static int pci_pm_prepare(struct device *dev)
 static void pci_pm_complete(struct device *dev)
 {
 	pci_dev_complete_resume(to_pci_dev(dev));
-	pm_generic_complete(dev);
+	pm_complete_with_resume_check(dev);
 }
 
 #else /* !CONFIG_PM_SLEEP */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 35d599e7250d..528be6787796 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -732,6 +732,7 @@ extern int pm_generic_poweroff_noirq(struct device *dev);
 extern int pm_generic_poweroff_late(struct device *dev);
 extern int pm_generic_poweroff(struct device *dev);
 extern void pm_generic_complete(struct device *dev);
+extern void pm_complete_with_resume_check(struct device *dev);
 
 #else /* !CONFIG_PM_SLEEP */
 
-- 
cgit v1.2.3


From 801cf21bb5806e39c593bad76b257bba20eaf66f Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 Sep 2015 12:57:06 -0700
Subject: mtd: spi-nor: make implicit <linux/bitops.h> dependency explicit

We use BIT() in the header. No real problem for now, but it's better to
be accurate.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 672595a381c5..bc4c321707e3 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -10,6 +10,8 @@
 #ifndef __LINUX_MTD_SPI_NOR_H
 #define __LINUX_MTD_SPI_NOR_H
 
+#include <linux/bitops.h>
+
 /*
  * Note on opcode nomenclature: some opcodes have a format like
  * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number
-- 
cgit v1.2.3


From a8a16454edb364858a0a54e17acaeab329e7b47f Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 Sep 2015 12:57:07 -0700
Subject: mtd: spi-nor: make bitfield constants more consistent

These status bits use different ways of representing similar integer
constants -- some are decimal, some are hex. Make them more consistent.

At the same time, impose my own preference, since IMO it's clearer what
these are when using the BIT() macro.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index bc4c321707e3..768b900a72cf 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -63,24 +63,24 @@
 #define SPINOR_OP_WD_EVCR      0x61    /* Write EVCR register */
 
 /* Status Register bits. */
-#define SR_WIP			1	/* Write in progress */
-#define SR_WEL			2	/* Write enable latch */
+#define SR_WIP			BIT(0)	/* Write in progress */
+#define SR_WEL			BIT(1)	/* Write enable latch */
 /* meaning of other SR_* bits may differ between vendors */
-#define SR_BP0			4	/* Block protect 0 */
-#define SR_BP1			8	/* Block protect 1 */
-#define SR_BP2			0x10	/* Block protect 2 */
-#define SR_SRWD			0x80	/* SR write protect */
+#define SR_BP0			BIT(2)	/* Block protect 0 */
+#define SR_BP1			BIT(3)	/* Block protect 1 */
+#define SR_BP2			BIT(4)	/* Block protect 2 */
+#define SR_SRWD			BIT(7)	/* SR write protect */
 
-#define SR_QUAD_EN_MX		0x40	/* Macronix Quad I/O */
+#define SR_QUAD_EN_MX		BIT(6)	/* Macronix Quad I/O */
 
 /* Enhanced Volatile Configuration Register bits */
-#define EVCR_QUAD_EN_MICRON    0x80    /* Micron Quad I/O */
+#define EVCR_QUAD_EN_MICRON	BIT(7)	/* Micron Quad I/O */
 
 /* Flag Status Register bits */
-#define FSR_READY		0x80
+#define FSR_READY		BIT(7)
 
 /* Configuration Register bits. */
-#define CR_QUAD_EN_SPAN		0x2	/* Spansion Quad I/O */
+#define CR_QUAD_EN_SPAN		BIT(1)	/* Spansion Quad I/O */
 
 enum read_mode {
 	SPI_NOR_NORMAL = 0,
-- 
cgit v1.2.3


From db4745edb282766f6532d4b0f643c2348e450e25 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 Sep 2015 12:57:08 -0700
Subject: mtd: spi-nor: add SPI NOR manufacturer IDs

These are often similar for CFI (parallel NOR) and for SPI NOR, but they
aren't always the same, for various reasons (different namespaces,
company acquisitions and renames, etc.). And some don't have CFI_MFR_*
entries at all.

So let's make a proper place to list the SPI NOR IDs, with all the SPI
NOR specific assumptions and comments.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 768b900a72cf..88297ee22de4 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -11,6 +11,21 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 #include <linux/bitops.h>
+#include <linux/mtd/cfi.h>
+
+/*
+ * Manufacturer IDs
+ *
+ * The first byte returned from the flash after sending opcode SPINOR_OP_RDID.
+ * Sometimes these are the same as CFI IDs, but sometimes they aren't.
+ */
+#define SNOR_MFR_ATMEL		CFI_MFR_ATMEL
+#define SNOR_MFR_INTEL		CFI_MFR_INTEL
+#define SNOR_MFR_MICRON		CFI_MFR_ST /* ST Micro <--> Micron */
+#define SNOR_MFR_MACRONIX	CFI_MFR_MACRONIX
+#define SNOR_MFR_SPANSION	CFI_MFR_AMD
+#define SNOR_MFR_SST		CFI_MFR_SST
+#define SNOR_MFR_WINBOND	0xef
 
 /*
  * Note on opcode nomenclature: some opcodes have a format like
-- 
cgit v1.2.3


From f8900258906c3533b91e779e80f75ec80de816c0 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 Sep 2015 12:57:10 -0700
Subject: mtd: spi-nor: fixup kernel-doc for flash lock/unlock function
 pointers

I got the names of these fields wrong.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 88297ee22de4..75eb1a079f75 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -145,8 +145,8 @@ struct mtd_info;
  * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
  * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
  *			at the offset @offs
- * @lock:		[FLASH-SPECIFIC] lock a region of the SPI NOR
- * @unlock:		[FLASH-SPECIFIC] unlock a region of the SPI NOR
+ * @flash_lock:		[FLASH-SPECIFIC] lock a region of the SPI NOR
+ * @flash_unlock:	[FLASH-SPECIFIC] unlock a region of the SPI NOR
  * @priv:		the private data
  */
 struct spi_nor {
-- 
cgit v1.2.3


From 5bf0e69b67a5600ea7ef178acd57606d1fc2c134 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 Sep 2015 12:57:12 -0700
Subject: mtd: spi-nor: add mtd_is_locked() support

This enables ioctl(MEMISLOCKED). Status can now be reported in the
mtdinfo or flash_lock utilities found in mtd-utils.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 37 ++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/spi-nor.h   |  3 +++
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 3db0f2b0ad45..192aa8c1c8bf 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -549,6 +549,24 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	return write_sr(nor, status_new);
 }
 
+/*
+ * Check if a region of the flash is (completely) locked. See stm_lock() for
+ * more info.
+ *
+ * Returns 1 if entire region is locked, 0 if any portion is unlocked, and
+ * negative on errors.
+ */
+static int stm_is_locked(struct spi_nor *nor, loff_t ofs, uint64_t len)
+{
+	int status;
+
+	status = read_sr(nor);
+	if (status < 0)
+		return status;
+
+	return stm_is_locked_sr(nor, ofs, len, status);
+}
+
 static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct spi_nor *nor = mtd_to_spi_nor(mtd);
@@ -579,6 +597,21 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return ret;
 }
 
+static int spi_nor_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	int ret;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
+	if (ret)
+		return ret;
+
+	ret = nor->flash_is_locked(nor, ofs, len);
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
+	return ret;
+}
+
 /* Used when the "_ext_id" is two bytes at most */
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
 		.id = {							\
@@ -1186,11 +1219,13 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
 		nor->flash_lock = stm_lock;
 		nor->flash_unlock = stm_unlock;
+		nor->flash_is_locked = stm_is_locked;
 	}
 
-	if (nor->flash_lock && nor->flash_unlock) {
+	if (nor->flash_lock && nor->flash_unlock && nor->flash_is_locked) {
 		mtd->_lock = spi_nor_lock;
 		mtd->_unlock = spi_nor_unlock;
+		mtd->_is_locked = spi_nor_is_locked;
 	}
 
 	/* sst nor chips use AAI word program */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 75eb1a079f75..c8723b62c4cd 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -147,6 +147,8 @@ struct mtd_info;
  *			at the offset @offs
  * @flash_lock:		[FLASH-SPECIFIC] lock a region of the SPI NOR
  * @flash_unlock:	[FLASH-SPECIFIC] unlock a region of the SPI NOR
+ * @flash_is_locked:	[FLASH-SPECIFIC] check if a region of the SPI NOR is
+ *			completely locked
  * @priv:		the private data
  */
 struct spi_nor {
@@ -178,6 +180,7 @@ struct spi_nor {
 
 	int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+	int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 
 	void *priv;
 };
-- 
cgit v1.2.3


From 870823e629ea194e6cf8e82a9694ac62cad49512 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:32:37 +0200
Subject: configfs: add show and store methods to struct configfs_attribute

Add methods to struct configfs_attribute to directly show and store
attributes without adding boilerplate code to every user.  In addition
to the methods this also adds 3 helper macros to define read/write,
read-only and write-only attributes with a single line of code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 fs/configfs/file.c       | 17 ++++++++++++-----
 include/linux/configfs.h | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 403269ffcdf3..106ca589e90a 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -74,7 +74,11 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 	if (!buffer->page)
 		return -ENOMEM;
 
-	count = ops->show_attribute(item,attr,buffer->page);
+	if (ops->show_attribute)
+		count = ops->show_attribute(item, attr, buffer->page);
+	else
+		count = attr->show(item, buffer->page);
+
 	buffer->needs_read_fill = 0;
 	BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
 	if (count >= 0)
@@ -173,7 +177,9 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size
 	struct config_item * item = to_item(dentry->d_parent);
 	struct configfs_item_operations * ops = buffer->ops;
 
-	return ops->store_attribute(item,attr,buffer->page,count);
+	if (ops->store_attribute)
+		return ops->store_attribute(item, attr, buffer->page, count);
+	return attr->store(item, buffer->page, count);
 }
 
 
@@ -237,8 +243,8 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-
-		if (!(inode->i_mode & S_IWUGO) || !ops->store_attribute)
+		if (!(inode->i_mode & S_IWUGO) ||
+		    (!ops->store_attribute && !attr->store))
 			goto Eaccess;
 
 	}
@@ -248,7 +254,8 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * must be a show method for it.
 	 */
 	if (file->f_mode & FMODE_READ) {
-		if (!(inode->i_mode & S_IRUGO) || !ops->show_attribute)
+		if (!(inode->i_mode & S_IRUGO) ||
+		    (!ops->show_attribute && !attr->show))
 			goto Eaccess;
 	}
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 63a36e89d0eb..85e9956a86de 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -125,8 +125,35 @@ struct configfs_attribute {
 	const char		*ca_name;
 	struct module 		*ca_owner;
 	umode_t			ca_mode;
+	ssize_t (*show)(struct config_item *, char *);
+	ssize_t (*store)(struct config_item *, const char *, size_t);
 };
 
+#define CONFIGFS_ATTR(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO | S_IWUSR,		\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+	.store		= _pfx##_name##_store,		\
+}
+
+#define CONFIGFS_ATTR_RO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO,			\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+}
+
+#define CONFIGFS_ATTR_WO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IWUSR,			\
+	.ca_owner	= THIS_MODULE,			\
+	.store		= _pfx##_name##_store,		\
+}
+
 /*
  * Users often need to create attribute structures for their configurable
  * attributes, containing a configfs_attribute member and function pointers
-- 
cgit v1.2.3


From 45b6a73f62ebcf3ff067895fb8030e67f4c7b67f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:32:38 +0200
Subject: usb-gadget: use per-attribute show and store methods

To simplify the configfs interface and remove boilerplate code that also
causes binary bloat.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/usb/gadget/configfs.c       | 295 ++++++++++++++----------------------
 include/linux/usb/gadget_configfs.h |  19 +--
 2 files changed, 118 insertions(+), 196 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 294eb74fb078..163d305e1200 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -64,6 +64,11 @@ struct gadget_info {
 	char qw_sign[OS_STRING_QW_SIGN_LEN];
 };
 
+static inline struct gadget_info *to_gadget_info(struct config_item *item)
+{
+	 return container_of(to_config_group(item), struct gadget_info, group);
+}
+
 struct config_usb_cfg {
 	struct config_group group;
 	struct config_group strings_group;
@@ -74,6 +79,12 @@ struct config_usb_cfg {
 	struct usb_gadget_strings *gstrings[MAX_USB_STRING_LANGS + 1];
 };
 
+static inline struct config_usb_cfg *to_config_usb_cfg(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct config_usb_cfg,
+			group);
+}
+
 struct gadget_strings {
 	struct usb_gadget_strings stringtab_dev;
 	struct usb_string strings[USB_GADGET_FIRST_AVAIL_IDX];
@@ -117,32 +128,25 @@ static int usb_string_copy(const char *s, char **s_copy)
 	return 0;
 }
 
-CONFIGFS_ATTR_STRUCT(gadget_info);
-CONFIGFS_ATTR_STRUCT(config_usb_cfg);
-
-#define GI_DEVICE_DESC_ITEM_ATTR(name)	\
-	static struct gadget_info_attribute gadget_cdev_desc_##name = \
-		__CONFIGFS_ATTR(name,  S_IRUGO | S_IWUSR,		\
-				gadget_dev_desc_##name##_show,		\
-				gadget_dev_desc_##name##_store)
-
 #define GI_DEVICE_DESC_SIMPLE_R_u8(__name)	\
-	static ssize_t gadget_dev_desc_##__name##_show(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##__name##_show(struct config_item *item, \
 			char *page)	\
 {	\
-	return sprintf(page, "0x%02x\n", gi->cdev.desc.__name);	\
+	return sprintf(page, "0x%02x\n", \
+		to_gadget_info(item)->cdev.desc.__name); \
 }
 
 #define GI_DEVICE_DESC_SIMPLE_R_u16(__name)	\
-	static ssize_t gadget_dev_desc_##__name##_show(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##__name##_show(struct config_item *item, \
 			char *page)	\
 {	\
-	return sprintf(page, "0x%04x\n", le16_to_cpup(&gi->cdev.desc.__name)); \
+	return sprintf(page, "0x%04x\n", \
+		le16_to_cpup(&to_gadget_info(item)->cdev.desc.__name)); \
 }
 
 
 #define GI_DEVICE_DESC_SIMPLE_W_u8(_name)		\
-	static ssize_t gadget_dev_desc_##_name##_store(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##_name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
 {							\
 	u8 val;						\
@@ -150,12 +154,12 @@ CONFIGFS_ATTR_STRUCT(config_usb_cfg);
 	ret = kstrtou8(page, 0, &val);			\
 	if (ret)					\
 		return ret;				\
-	gi->cdev.desc._name = val;			\
+	to_gadget_info(item)->cdev.desc._name = val;	\
 	return len;					\
 }
 
 #define GI_DEVICE_DESC_SIMPLE_W_u16(_name)	\
-	static ssize_t gadget_dev_desc_##_name##_store(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##_name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
 {							\
 	u16 val;					\
@@ -163,7 +167,7 @@ CONFIGFS_ATTR_STRUCT(config_usb_cfg);
 	ret = kstrtou16(page, 0, &val);			\
 	if (ret)					\
 		return ret;				\
-	gi->cdev.desc._name = cpu_to_le16p(&val);	\
+	to_gadget_info(item)->cdev.desc._name = cpu_to_le16p(&val);	\
 	return len;					\
 }
 
@@ -193,7 +197,7 @@ static ssize_t is_valid_bcd(u16 bcd_val)
 	return 0;
 }
 
-static ssize_t gadget_dev_desc_bcdDevice_store(struct gadget_info *gi,
+static ssize_t gadget_dev_desc_bcdDevice_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u16 bcdDevice;
@@ -206,11 +210,11 @@ static ssize_t gadget_dev_desc_bcdDevice_store(struct gadget_info *gi,
 	if (ret)
 		return ret;
 
-	gi->cdev.desc.bcdDevice = cpu_to_le16(bcdDevice);
+	to_gadget_info(item)->cdev.desc.bcdDevice = cpu_to_le16(bcdDevice);
 	return len;
 }
 
-static ssize_t gadget_dev_desc_bcdUSB_store(struct gadget_info *gi,
+static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u16 bcdUSB;
@@ -223,13 +227,13 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct gadget_info *gi,
 	if (ret)
 		return ret;
 
-	gi->cdev.desc.bcdUSB = cpu_to_le16(bcdUSB);
+	to_gadget_info(item)->cdev.desc.bcdUSB = cpu_to_le16(bcdUSB);
 	return len;
 }
 
-static ssize_t gadget_dev_desc_UDC_show(struct gadget_info *gi, char *page)
+static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page)
 {
-	return sprintf(page, "%s\n", gi->udc_name ?: "");
+	return sprintf(page, "%s\n", to_gadget_info(item)->udc_name ?: "");
 }
 
 static int unregister_gadget(struct gadget_info *gi)
@@ -247,9 +251,10 @@ static int unregister_gadget(struct gadget_info *gi)
 	return 0;
 }
 
-static ssize_t gadget_dev_desc_UDC_store(struct gadget_info *gi,
+static ssize_t gadget_dev_desc_UDC_store(struct config_item *item,
 		const char *page, size_t len)
 {
+	struct gadget_info *gi = to_gadget_info(item);
 	char *name;
 	int ret;
 
@@ -283,34 +288,29 @@ err:
 	return ret;
 }
 
-GI_DEVICE_DESC_ITEM_ATTR(bDeviceClass);
-GI_DEVICE_DESC_ITEM_ATTR(bDeviceSubClass);
-GI_DEVICE_DESC_ITEM_ATTR(bDeviceProtocol);
-GI_DEVICE_DESC_ITEM_ATTR(bMaxPacketSize0);
-GI_DEVICE_DESC_ITEM_ATTR(idVendor);
-GI_DEVICE_DESC_ITEM_ATTR(idProduct);
-GI_DEVICE_DESC_ITEM_ATTR(bcdDevice);
-GI_DEVICE_DESC_ITEM_ATTR(bcdUSB);
-GI_DEVICE_DESC_ITEM_ATTR(UDC);
+CONFIGFS_ATTR(gadget_dev_desc_, bDeviceClass);
+CONFIGFS_ATTR(gadget_dev_desc_, bDeviceSubClass);
+CONFIGFS_ATTR(gadget_dev_desc_, bDeviceProtocol);
+CONFIGFS_ATTR(gadget_dev_desc_, bMaxPacketSize0);
+CONFIGFS_ATTR(gadget_dev_desc_, idVendor);
+CONFIGFS_ATTR(gadget_dev_desc_, idProduct);
+CONFIGFS_ATTR(gadget_dev_desc_, bcdDevice);
+CONFIGFS_ATTR(gadget_dev_desc_, bcdUSB);
+CONFIGFS_ATTR(gadget_dev_desc_, UDC);
 
 static struct configfs_attribute *gadget_root_attrs[] = {
-	&gadget_cdev_desc_bDeviceClass.attr,
-	&gadget_cdev_desc_bDeviceSubClass.attr,
-	&gadget_cdev_desc_bDeviceProtocol.attr,
-	&gadget_cdev_desc_bMaxPacketSize0.attr,
-	&gadget_cdev_desc_idVendor.attr,
-	&gadget_cdev_desc_idProduct.attr,
-	&gadget_cdev_desc_bcdDevice.attr,
-	&gadget_cdev_desc_bcdUSB.attr,
-	&gadget_cdev_desc_UDC.attr,
+	&gadget_dev_desc_attr_bDeviceClass,
+	&gadget_dev_desc_attr_bDeviceSubClass,
+	&gadget_dev_desc_attr_bDeviceProtocol,
+	&gadget_dev_desc_attr_bMaxPacketSize0,
+	&gadget_dev_desc_attr_idVendor,
+	&gadget_dev_desc_attr_idProduct,
+	&gadget_dev_desc_attr_bcdDevice,
+	&gadget_dev_desc_attr_bcdUSB,
+	&gadget_dev_desc_attr_UDC,
 	NULL,
 };
 
-static inline struct gadget_info *to_gadget_info(struct config_item *item)
-{
-	 return container_of(to_config_group(item), struct gadget_info, group);
-}
-
 static inline struct gadget_strings *to_gadget_strings(struct config_item *item)
 {
 	 return container_of(to_config_group(item), struct gadget_strings,
@@ -324,12 +324,6 @@ static inline struct gadget_config_name *to_gadget_config_name(
 			 group);
 }
 
-static inline struct config_usb_cfg *to_config_usb_cfg(struct config_item *item)
-{
-	return container_of(to_config_group(item), struct config_usb_cfg,
-			group);
-}
-
 static inline struct usb_function_instance *to_usb_function_instance(
 		struct config_item *item)
 {
@@ -348,12 +342,8 @@ static void gadget_info_attr_release(struct config_item *item)
 	kfree(gi);
 }
 
-CONFIGFS_ATTR_OPS(gadget_info);
-
 static struct configfs_item_operations gadget_root_item_ops = {
 	.release                = gadget_info_attr_release,
-	.show_attribute         = gadget_info_attr_show,
-	.store_attribute        = gadget_info_attr_store,
 };
 
 static void gadget_config_attr_release(struct config_item *item)
@@ -454,24 +444,20 @@ static int config_usb_cfg_unlink(
 	return 0;
 }
 
-CONFIGFS_ATTR_OPS(config_usb_cfg);
-
 static struct configfs_item_operations gadget_config_item_ops = {
 	.release                = gadget_config_attr_release,
-	.show_attribute         = config_usb_cfg_attr_show,
-	.store_attribute        = config_usb_cfg_attr_store,
 	.allow_link             = config_usb_cfg_link,
 	.drop_link              = config_usb_cfg_unlink,
 };
 
 
-static ssize_t gadget_config_desc_MaxPower_show(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_MaxPower_show(struct config_item *item,
 		char *page)
 {
-	return sprintf(page, "%u\n", cfg->c.MaxPower);
+	return sprintf(page, "%u\n", to_config_usb_cfg(item)->c.MaxPower);
 }
 
-static ssize_t gadget_config_desc_MaxPower_store(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_MaxPower_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u16 val;
@@ -481,17 +467,18 @@ static ssize_t gadget_config_desc_MaxPower_store(struct config_usb_cfg *cfg,
 		return ret;
 	if (DIV_ROUND_UP(val, 8) > 0xff)
 		return -ERANGE;
-	cfg->c.MaxPower = val;
+	to_config_usb_cfg(item)->c.MaxPower = val;
 	return len;
 }
 
-static ssize_t gadget_config_desc_bmAttributes_show(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_bmAttributes_show(struct config_item *item,
 		char *page)
 {
-	return sprintf(page, "0x%02x\n", cfg->c.bmAttributes);
+	return sprintf(page, "0x%02x\n",
+		to_config_usb_cfg(item)->c.bmAttributes);
 }
 
-static ssize_t gadget_config_desc_bmAttributes_store(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_bmAttributes_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u8 val;
@@ -504,22 +491,16 @@ static ssize_t gadget_config_desc_bmAttributes_store(struct config_usb_cfg *cfg,
 	if (val & ~(USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER |
 				USB_CONFIG_ATT_WAKEUP))
 		return -EINVAL;
-	cfg->c.bmAttributes = val;
+	to_config_usb_cfg(item)->c.bmAttributes = val;
 	return len;
 }
 
-#define CFG_CONFIG_DESC_ITEM_ATTR(name)	\
-	static struct config_usb_cfg_attribute gadget_usb_cfg_##name = \
-		__CONFIGFS_ATTR(name,  S_IRUGO | S_IWUSR,		\
-				gadget_config_desc_##name##_show,	\
-				gadget_config_desc_##name##_store)
-
-CFG_CONFIG_DESC_ITEM_ATTR(MaxPower);
-CFG_CONFIG_DESC_ITEM_ATTR(bmAttributes);
+CONFIGFS_ATTR(gadget_config_desc_, MaxPower);
+CONFIGFS_ATTR(gadget_config_desc_, bmAttributes);
 
 static struct configfs_attribute *gadget_config_attrs[] = {
-	&gadget_usb_cfg_MaxPower.attr,
-	&gadget_usb_cfg_bmAttributes.attr,
+	&gadget_config_desc_attr_MaxPower,
+	&gadget_config_desc_attr_bmAttributes,
 	NULL,
 };
 
@@ -616,11 +597,10 @@ static struct config_item_type functions_type = {
 	.ct_owner       = THIS_MODULE,
 };
 
-CONFIGFS_ATTR_STRUCT(gadget_config_name);
 GS_STRINGS_RW(gadget_config_name, configuration);
 
 static struct configfs_attribute *gadget_config_name_langid_attrs[] = {
-	&gadget_config_name_configuration.attr,
+	&gadget_config_name_attr_configuration,
 	NULL,
 };
 
@@ -719,15 +699,14 @@ static struct config_item_type config_desc_type = {
 	.ct_owner       = THIS_MODULE,
 };
 
-CONFIGFS_ATTR_STRUCT(gadget_strings);
 GS_STRINGS_RW(gadget_strings, manufacturer);
 GS_STRINGS_RW(gadget_strings, product);
 GS_STRINGS_RW(gadget_strings, serialnumber);
 
 static struct configfs_attribute *gadget_strings_langid_attrs[] = {
-	&gadget_strings_manufacturer.attr,
-	&gadget_strings_product.attr,
-	&gadget_strings_serialnumber.attr,
+	&gadget_strings_attr_manufacturer,
+	&gadget_strings_attr_product,
+	&gadget_strings_attr_serialnumber,
 	NULL,
 };
 
@@ -751,27 +730,25 @@ static inline struct os_desc *to_os_desc(struct config_item *item)
 	return container_of(to_config_group(item), struct os_desc, group);
 }
 
-CONFIGFS_ATTR_STRUCT(os_desc);
-CONFIGFS_ATTR_OPS(os_desc);
-
-static ssize_t os_desc_use_show(struct os_desc *os_desc, char *page)
+static inline struct gadget_info *os_desc_item_to_gadget_info(
+		struct config_item *item)
 {
-	struct gadget_info *gi;
-
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
+	return to_gadget_info(to_os_desc(item)->group.cg_item.ci_parent);
+}
 
-	return sprintf(page, "%d", gi->use_os_desc);
+static ssize_t os_desc_use_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d",
+			os_desc_item_to_gadget_info(item)->use_os_desc);
 }
 
-static ssize_t os_desc_use_store(struct os_desc *os_desc, const char *page,
+static ssize_t os_desc_use_store(struct config_item *item, const char *page,
 				 size_t len)
 {
-	struct gadget_info *gi;
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 	int ret;
 	bool use;
 
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
-
 	mutex_lock(&gi->lock);
 	ret = strtobool(page, &use);
 	if (!ret) {
@@ -783,29 +760,19 @@ static ssize_t os_desc_use_store(struct os_desc *os_desc, const char *page,
 	return ret;
 }
 
-static struct os_desc_attribute os_desc_use =
-	__CONFIGFS_ATTR(use, S_IRUGO | S_IWUSR,
-			os_desc_use_show,
-			os_desc_use_store);
-
-static ssize_t os_desc_b_vendor_code_show(struct os_desc *os_desc, char *page)
+static ssize_t os_desc_b_vendor_code_show(struct config_item *item, char *page)
 {
-	struct gadget_info *gi;
-
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
-
-	return sprintf(page, "%d", gi->b_vendor_code);
+	return sprintf(page, "%d",
+			os_desc_item_to_gadget_info(item)->b_vendor_code);
 }
 
-static ssize_t os_desc_b_vendor_code_store(struct os_desc *os_desc,
+static ssize_t os_desc_b_vendor_code_store(struct config_item *item,
 					   const char *page, size_t len)
 {
-	struct gadget_info *gi;
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 	int ret;
 	u8 b_vendor_code;
 
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
-
 	mutex_lock(&gi->lock);
 	ret = kstrtou8(page, 0, &b_vendor_code);
 	if (!ret) {
@@ -817,29 +784,20 @@ static ssize_t os_desc_b_vendor_code_store(struct os_desc *os_desc,
 	return ret;
 }
 
-static struct os_desc_attribute os_desc_b_vendor_code =
-	__CONFIGFS_ATTR(b_vendor_code, S_IRUGO | S_IWUSR,
-			os_desc_b_vendor_code_show,
-			os_desc_b_vendor_code_store);
-
-static ssize_t os_desc_qw_sign_show(struct os_desc *os_desc, char *page)
+static ssize_t os_desc_qw_sign_show(struct config_item *item, char *page)
 {
-	struct gadget_info *gi;
-
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 
 	memcpy(page, gi->qw_sign, OS_STRING_QW_SIGN_LEN);
-
 	return OS_STRING_QW_SIGN_LEN;
 }
 
-static ssize_t os_desc_qw_sign_store(struct os_desc *os_desc, const char *page,
+static ssize_t os_desc_qw_sign_store(struct config_item *item, const char *page,
 				     size_t len)
 {
-	struct gadget_info *gi;
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 	int res, l;
 
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
 	l = min((int)len, OS_STRING_QW_SIGN_LEN >> 1);
 	if (page[l - 1] == '\n')
 		--l;
@@ -855,15 +813,14 @@ static ssize_t os_desc_qw_sign_store(struct os_desc *os_desc, const char *page,
 	return res;
 }
 
-static struct os_desc_attribute os_desc_qw_sign =
-	__CONFIGFS_ATTR(qw_sign, S_IRUGO | S_IWUSR,
-			os_desc_qw_sign_show,
-			os_desc_qw_sign_store);
+CONFIGFS_ATTR(os_desc_, use);
+CONFIGFS_ATTR(os_desc_, b_vendor_code);
+CONFIGFS_ATTR(os_desc_, qw_sign);
 
 static struct configfs_attribute *os_desc_attrs[] = {
-	&os_desc_use.attr,
-	&os_desc_b_vendor_code.attr,
-	&os_desc_qw_sign.attr,
+	&os_desc_attr_use,
+	&os_desc_attr_b_vendor_code,
+	&os_desc_attr_qw_sign,
 	NULL,
 };
 
@@ -926,8 +883,6 @@ static int os_desc_unlink(struct config_item *os_desc_ci,
 
 static struct configfs_item_operations os_desc_ops = {
 	.release                = os_desc_attr_release,
-	.show_attribute         = os_desc_attr_show,
-	.store_attribute        = os_desc_attr_store,
 	.allow_link		= os_desc_link,
 	.drop_link		= os_desc_unlink,
 };
@@ -938,28 +893,21 @@ static struct config_item_type os_desc_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-CONFIGFS_ATTR_STRUCT(usb_os_desc);
-CONFIGFS_ATTR_OPS(usb_os_desc);
-
-
 static inline struct usb_os_desc_ext_prop
 *to_usb_os_desc_ext_prop(struct config_item *item)
 {
 	return container_of(item, struct usb_os_desc_ext_prop, item);
 }
 
-CONFIGFS_ATTR_STRUCT(usb_os_desc_ext_prop);
-CONFIGFS_ATTR_OPS(usb_os_desc_ext_prop);
-
-static ssize_t ext_prop_type_show(struct usb_os_desc_ext_prop *ext_prop,
-				  char *page)
+static ssize_t ext_prop_type_show(struct config_item *item, char *page)
 {
-	return sprintf(page, "%d", ext_prop->type);
+	return sprintf(page, "%d", to_usb_os_desc_ext_prop(item)->type);
 }
 
-static ssize_t ext_prop_type_store(struct usb_os_desc_ext_prop *ext_prop,
+static ssize_t ext_prop_type_store(struct config_item *item,
 				   const char *page, size_t len)
 {
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
 	struct usb_os_desc *desc = to_usb_os_desc(ext_prop->item.ci_parent);
 	u8 type;
 	int ret;
@@ -997,9 +945,9 @@ end:
 	return ret;
 }
 
-static ssize_t ext_prop_data_show(struct usb_os_desc_ext_prop *ext_prop,
-				  char *page)
+static ssize_t ext_prop_data_show(struct config_item *item, char *page)
 {
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
 	int len = ext_prop->data_len;
 
 	if (ext_prop->type == USB_EXT_PROP_UNICODE ||
@@ -1011,9 +959,10 @@ static ssize_t ext_prop_data_show(struct usb_os_desc_ext_prop *ext_prop,
 	return len;
 }
 
-static ssize_t ext_prop_data_store(struct usb_os_desc_ext_prop *ext_prop,
+static ssize_t ext_prop_data_store(struct config_item *item,
 				   const char *page, size_t len)
 {
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
 	struct usb_os_desc *desc = to_usb_os_desc(ext_prop->item.ci_parent);
 	char *new_data;
 	size_t ret_len = len;
@@ -1044,17 +993,12 @@ static ssize_t ext_prop_data_store(struct usb_os_desc_ext_prop *ext_prop,
 	return ret_len;
 }
 
-static struct usb_os_desc_ext_prop_attribute ext_prop_type =
-	__CONFIGFS_ATTR(type, S_IRUGO | S_IWUSR,
-			ext_prop_type_show, ext_prop_type_store);
-
-static struct usb_os_desc_ext_prop_attribute ext_prop_data =
-	__CONFIGFS_ATTR(data, S_IRUGO | S_IWUSR,
-			ext_prop_data_show, ext_prop_data_store);
+CONFIGFS_ATTR(ext_prop_, type);
+CONFIGFS_ATTR(ext_prop_, data);
 
 static struct configfs_attribute *ext_prop_attrs[] = {
-	&ext_prop_type.attr,
-	&ext_prop_data.attr,
+	&ext_prop_attr_type,
+	&ext_prop_attr_data,
 	NULL,
 };
 
@@ -1067,8 +1011,6 @@ static void usb_os_desc_ext_prop_release(struct config_item *item)
 
 static struct configfs_item_operations ext_prop_ops = {
 	.release		= usb_os_desc_ext_prop_release,
-	.show_attribute		= usb_os_desc_ext_prop_attr_show,
-	.store_attribute	= usb_os_desc_ext_prop_attr_store,
 };
 
 static struct config_item *ext_prop_make(
@@ -1137,21 +1079,17 @@ static struct configfs_group_operations interf_grp_ops = {
 	.drop_item	= &ext_prop_drop,
 };
 
-static struct configfs_item_operations interf_item_ops = {
-	.show_attribute		= usb_os_desc_attr_show,
-	.store_attribute	= usb_os_desc_attr_store,
-};
-
-static ssize_t interf_grp_compatible_id_show(struct usb_os_desc *desc,
+static ssize_t interf_grp_compatible_id_show(struct config_item *item,
 					     char *page)
 {
-	memcpy(page, desc->ext_compat_id, 8);
+	memcpy(page, to_usb_os_desc(item)->ext_compat_id, 8);
 	return 8;
 }
 
-static ssize_t interf_grp_compatible_id_store(struct usb_os_desc *desc,
+static ssize_t interf_grp_compatible_id_store(struct config_item *item,
 					      const char *page, size_t len)
 {
+	struct usb_os_desc *desc = to_usb_os_desc(item);
 	int l;
 
 	l = min_t(int, 8, len);
@@ -1167,21 +1105,17 @@ static ssize_t interf_grp_compatible_id_store(struct usb_os_desc *desc,
 	return len;
 }
 
-static struct usb_os_desc_attribute interf_grp_attr_compatible_id =
-	__CONFIGFS_ATTR(compatible_id, S_IRUGO | S_IWUSR,
-			interf_grp_compatible_id_show,
-			interf_grp_compatible_id_store);
-
-static ssize_t interf_grp_sub_compatible_id_show(struct usb_os_desc *desc,
+static ssize_t interf_grp_sub_compatible_id_show(struct config_item *item,
 						 char *page)
 {
-	memcpy(page, desc->ext_compat_id + 8, 8);
+	memcpy(page, to_usb_os_desc(item)->ext_compat_id + 8, 8);
 	return 8;
 }
 
-static ssize_t interf_grp_sub_compatible_id_store(struct usb_os_desc *desc,
+static ssize_t interf_grp_sub_compatible_id_store(struct config_item *item,
 						  const char *page, size_t len)
 {
+	struct usb_os_desc *desc = to_usb_os_desc(item);
 	int l;
 
 	l = min_t(int, 8, len);
@@ -1197,14 +1131,12 @@ static ssize_t interf_grp_sub_compatible_id_store(struct usb_os_desc *desc,
 	return len;
 }
 
-static struct usb_os_desc_attribute interf_grp_attr_sub_compatible_id =
-	__CONFIGFS_ATTR(sub_compatible_id, S_IRUGO | S_IWUSR,
-			interf_grp_sub_compatible_id_show,
-			interf_grp_sub_compatible_id_store);
+CONFIGFS_ATTR(interf_grp_, compatible_id);
+CONFIGFS_ATTR(interf_grp_, sub_compatible_id);
 
 static struct configfs_attribute *interf_grp_attrs[] = {
-	&interf_grp_attr_compatible_id.attr,
-	&interf_grp_attr_sub_compatible_id.attr,
+	&interf_grp_attr_compatible_id,
+	&interf_grp_attr_sub_compatible_id,
 	NULL
 };
 
@@ -1242,7 +1174,6 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 	f_default_groups[0] = os_desc_group;
 
 	os_desc_group->default_groups = interface_groups;
-	interface_type->ct_item_ops = &interf_item_ops;
 	interface_type->ct_group_ops = &interf_grp_ops;
 	interface_type->ct_attrs = interf_grp_attrs;
 	interface_type->ct_owner = owner;
diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h
index d74c0ae989d5..c36e95730de1 100644
--- a/include/linux/usb/gadget_configfs.h
+++ b/include/linux/usb/gadget_configfs.h
@@ -7,9 +7,10 @@ int check_user_usb_string(const char *name,
 		struct usb_gadget_strings *stringtab_dev);
 
 #define GS_STRINGS_W(__struct, __name)	\
-	static ssize_t __struct##_##__name##_store(struct __struct *gs, \
+static ssize_t __struct##_##__name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
 {							\
+	struct __struct *gs = to_##__struct(item);	\
 	int ret;					\
 							\
 	ret = usb_string_copy(page, &gs->__name);	\
@@ -19,30 +20,20 @@ int check_user_usb_string(const char *name,
 }
 
 #define GS_STRINGS_R(__struct, __name)	\
-	static ssize_t __struct##_##__name##_show(struct __struct *gs, \
-			char *page)	\
+static ssize_t __struct##_##__name##_show(struct config_item *item, char *page) \
 {	\
+	struct __struct *gs = to_##__struct(item);	\
 	return sprintf(page, "%s\n", gs->__name ?: "");	\
 }
 
-#define GS_STRING_ITEM_ATTR(struct_name, name)	\
-	static struct struct_name##_attribute struct_name##_##name = \
-		__CONFIGFS_ATTR(name,  S_IRUGO | S_IWUSR,		\
-				struct_name##_##name##_show,		\
-				struct_name##_##name##_store)
-
 #define GS_STRINGS_RW(struct_name, _name)	\
 	GS_STRINGS_R(struct_name, _name)	\
 	GS_STRINGS_W(struct_name, _name)	\
-	GS_STRING_ITEM_ATTR(struct_name, _name)
+	CONFIGFS_ATTR(struct_name##_, _name)
 
 #define USB_CONFIG_STRING_RW_OPS(struct_in)				\
-	CONFIGFS_ATTR_OPS(struct_in);					\
-									\
 static struct configfs_item_operations struct_in##_langid_item_ops = {	\
 	.release                = struct_in##_attr_release,		\
-	.show_attribute         = struct_in##_attr_show,		\
-	.store_attribute        = struct_in##_attr_store,		\
 };									\
 									\
 static struct config_item_type struct_in##_langid_type = {		\
-- 
cgit v1.2.3


From 517982229f78b2aebf00a8a337e84e8eeea70b8e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:32:59 +0200
Subject: configfs: remove old API

Remove the old show_attribute and store_attribute methods and update
the documentation.  Also replace the two C samples with a single new
one in the proper samples directory where people expect to find it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 Documentation/filesystems/Makefile                 |   2 -
 Documentation/filesystems/configfs/Makefile        |   3 -
 Documentation/filesystems/configfs/configfs.txt    |  38 +-
 .../configfs/configfs_example_explicit.c           | 483 ---------------------
 .../filesystems/configfs/configfs_example_macros.c | 446 -------------------
 fs/configfs/file.c                                 |  15 +-
 include/linux/configfs.h                           |  82 ----
 samples/Kconfig                                    |   6 +
 samples/Makefile                                   |   3 +-
 samples/configfs/Makefile                          |   2 +
 samples/configfs/configfs_sample.c                 | 404 +++++++++++++++++
 11 files changed, 428 insertions(+), 1056 deletions(-)
 delete mode 100644 Documentation/filesystems/configfs/Makefile
 delete mode 100644 Documentation/filesystems/configfs/configfs_example_explicit.c
 delete mode 100644 Documentation/filesystems/configfs/configfs_example_macros.c
 create mode 100644 samples/configfs/Makefile
 create mode 100644 samples/configfs/configfs_sample.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile
index 13483d192ebb..883010ce5e35 100644
--- a/Documentation/filesystems/Makefile
+++ b/Documentation/filesystems/Makefile
@@ -1,5 +1,3 @@
-subdir-y := configfs
-
 # List of programs to build
 hostprogs-y := dnotify_test
 
diff --git a/Documentation/filesystems/configfs/Makefile b/Documentation/filesystems/configfs/Makefile
deleted file mode 100644
index be7ec5e67dbc..000000000000
--- a/Documentation/filesystems/configfs/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-ifneq ($(CONFIG_CONFIGFS_FS),)
-obj-m += configfs_example_explicit.o configfs_example_macros.o
-endif
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index b40fec9d3f53..af68efdbbfad 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -160,12 +160,6 @@ among other things.  For that, it needs a type.
 
 	struct configfs_item_operations {
 		void (*release)(struct config_item *);
-		ssize_t (*show_attribute)(struct config_item *,
-					  struct configfs_attribute *,
-					  char *);
-		ssize_t (*store_attribute)(struct config_item *,
-					   struct configfs_attribute *,
-					   const char *, size_t);
 		int (*allow_link)(struct config_item *src,
 				  struct config_item *target);
 		int (*drop_link)(struct config_item *src,
@@ -183,9 +177,7 @@ The most basic function of a config_item_type is to define what
 operations can be performed on a config_item.  All items that have been
 allocated dynamically will need to provide the ct_item_ops->release()
 method.  This method is called when the config_item's reference count
-reaches zero.  Items that wish to display an attribute need to provide
-the ct_item_ops->show_attribute() method.  Similarly, storing a new
-attribute value uses the store_attribute() method.
+reaches zero.
 
 [struct configfs_attribute]
 
@@ -193,6 +185,8 @@ attribute value uses the store_attribute() method.
 		char                    *ca_name;
 		struct module           *ca_owner;
 		umode_t                  ca_mode;
+		ssize_t (*show)(struct config_item *, char *);
+		ssize_t (*store)(struct config_item *, const char *, size_t);
 	};
 
 When a config_item wants an attribute to appear as a file in the item's
@@ -202,10 +196,10 @@ config_item_type->ct_attrs.  When the item appears in configfs, the
 attribute file will appear with the configfs_attribute->ca_name
 filename.  configfs_attribute->ca_mode specifies the file permissions.
 
-If an attribute is readable and the config_item provides a
-ct_item_ops->show_attribute() method, that method will be called
-whenever userspace asks for a read(2) on the attribute.  The converse
-will happen for write(2).
+If an attribute is readable and provides a ->show method, that method will
+be called whenever userspace asks for a read(2) on the attribute.  If an
+attribute is writable and provides a ->store  method, that method will be
+be called whenever userspace asks for a write(2) on the attribute.
 
 [struct config_group]
 
@@ -311,20 +305,10 @@ the subsystem must be ready for it.
 [An Example]
 
 The best example of these basic concepts is the simple_children
-subsystem/group and the simple_child item in configfs_example_explicit.c
-and configfs_example_macros.c.  It shows a trivial object displaying and
-storing an attribute, and a simple group creating and destroying these
-children.
-
-The only difference between configfs_example_explicit.c and
-configfs_example_macros.c is how the attributes of the childless item
-are defined.  The childless item has extended attributes, each with
-their own show()/store() operation.  This follows a convention commonly
-used in sysfs.  configfs_example_explicit.c creates these attributes
-by explicitly defining the structures involved.  Conversely
-configfs_example_macros.c uses some convenience macros from configfs.h
-to define the attributes.  These macros are similar to their sysfs
-counterparts.
+subsystem/group and the simple_child item in
+samples/configfs/configfs_sample.c. It shows a trivial object displaying
+and storing an attribute, and a simple group creating and destroying
+these children.
 
 [Hierarchy Navigation and the Subsystem Mutex]
 
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
deleted file mode 100644
index 1420233dfa55..000000000000
--- a/Documentation/filesystems/configfs/configfs_example_explicit.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
- * configfs_example_explicit.c - This file is a demonstration module
- *      containing a number of configfs subsystems.  It explicitly defines
- *      each structure without using the helper macros defined in
- *      configfs.h.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on sysfs:
- * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
- *
- * configfs Copyright (C) 2005 Oracle.  All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/configfs.h>
-
-
-
-/*
- * 01-childless
- *
- * This first example is a childless subsystem.  It cannot create
- * any config_items.  It just has attributes.
- *
- * Note that we are enclosing the configfs_subsystem inside a container.
- * This is not necessary if a subsystem has no attributes directly
- * on the subsystem.  See the next example, 02-simple-children, for
- * such a subsystem.
- */
-
-struct childless {
-	struct configfs_subsystem subsys;
-	int showme;
-	int storeme;
-};
-
-struct childless_attribute {
-	struct configfs_attribute attr;
-	ssize_t (*show)(struct childless *, char *);
-	ssize_t (*store)(struct childless *, const char *, size_t);
-};
-
-static inline struct childless *to_childless(struct config_item *item)
-{
-	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
-}
-
-static ssize_t childless_showme_read(struct childless *childless,
-				     char *page)
-{
-	ssize_t pos;
-
-	pos = sprintf(page, "%d\n", childless->showme);
-	childless->showme++;
-
-	return pos;
-}
-
-static ssize_t childless_storeme_read(struct childless *childless,
-				      char *page)
-{
-	return sprintf(page, "%d\n", childless->storeme);
-}
-
-static ssize_t childless_storeme_write(struct childless *childless,
-				       const char *page,
-				       size_t count)
-{
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if ((*p != '\0') && (*p != '\n'))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	childless->storeme = tmp;
-
-	return count;
-}
-
-static ssize_t childless_description_read(struct childless *childless,
-					  char *page)
-{
-	return sprintf(page,
-"[01-childless]\n"
-"\n"
-"The childless subsystem is the simplest possible subsystem in\n"
-"configfs.  It does not support the creation of child config_items.\n"
-"It only has a few attributes.  In fact, it isn't much different\n"
-"than a directory in /proc.\n");
-}
-
-static struct childless_attribute childless_attr_showme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
-	.show	= childless_showme_read,
-};
-static struct childless_attribute childless_attr_storeme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
-	.show	= childless_storeme_read,
-	.store	= childless_storeme_write,
-};
-static struct childless_attribute childless_attr_description = {
-	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
-	.show = childless_description_read,
-};
-
-static struct configfs_attribute *childless_attrs[] = {
-	&childless_attr_showme.attr,
-	&childless_attr_storeme.attr,
-	&childless_attr_description.attr,
-	NULL,
-};
-
-static ssize_t childless_attr_show(struct config_item *item,
-				   struct configfs_attribute *attr,
-				   char *page)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = 0;
-
-	if (childless_attr->show)
-		ret = childless_attr->show(childless, page);
-	return ret;
-}
-
-static ssize_t childless_attr_store(struct config_item *item,
-				    struct configfs_attribute *attr,
-				    const char *page, size_t count)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = -EINVAL;
-
-	if (childless_attr->store)
-		ret = childless_attr->store(childless, page, count);
-	return ret;
-}
-
-static struct configfs_item_operations childless_item_ops = {
-	.show_attribute		= childless_attr_show,
-	.store_attribute	= childless_attr_store,
-};
-
-static struct config_item_type childless_type = {
-	.ct_item_ops	= &childless_item_ops,
-	.ct_attrs	= childless_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct childless childless_subsys = {
-	.subsys = {
-		.su_group = {
-			.cg_item = {
-				.ci_namebuf = "01-childless",
-				.ci_type = &childless_type,
-			},
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 02-simple-children
- *
- * This example merely has a simple one-attribute child.  Note that
- * there is no extra attribute structure, as the child's attribute is
- * known from the get-go.  Also, there is no container for the
- * subsystem, as it has no attributes of its own.
- */
-
-struct simple_child {
-	struct config_item item;
-	int storeme;
-};
-
-static inline struct simple_child *to_simple_child(struct config_item *item)
-{
-	return item ? container_of(item, struct simple_child, item) : NULL;
-}
-
-static struct configfs_attribute simple_child_attr_storeme = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "storeme",
-	.ca_mode = S_IRUGO | S_IWUSR,
-};
-
-static struct configfs_attribute *simple_child_attrs[] = {
-	&simple_child_attr_storeme,
-	NULL,
-};
-
-static ssize_t simple_child_attr_show(struct config_item *item,
-				      struct configfs_attribute *attr,
-				      char *page)
-{
-	ssize_t count;
-	struct simple_child *simple_child = to_simple_child(item);
-
-	count = sprintf(page, "%d\n", simple_child->storeme);
-
-	return count;
-}
-
-static ssize_t simple_child_attr_store(struct config_item *item,
-				       struct configfs_attribute *attr,
-				       const char *page, size_t count)
-{
-	struct simple_child *simple_child = to_simple_child(item);
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	simple_child->storeme = tmp;
-
-	return count;
-}
-
-static void simple_child_release(struct config_item *item)
-{
-	kfree(to_simple_child(item));
-}
-
-static struct configfs_item_operations simple_child_item_ops = {
-	.release		= simple_child_release,
-	.show_attribute		= simple_child_attr_show,
-	.store_attribute	= simple_child_attr_store,
-};
-
-static struct config_item_type simple_child_type = {
-	.ct_item_ops	= &simple_child_item_ops,
-	.ct_attrs	= simple_child_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-
-struct simple_children {
-	struct config_group group;
-};
-
-static inline struct simple_children *to_simple_children(struct config_item *item)
-{
-	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
-}
-
-static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
-{
-	struct simple_child *simple_child;
-
-	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
-	if (!simple_child)
-		return ERR_PTR(-ENOMEM);
-
-	config_item_init_type_name(&simple_child->item, name,
-				   &simple_child_type);
-
-	simple_child->storeme = 0;
-
-	return &simple_child->item;
-}
-
-static struct configfs_attribute simple_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *simple_children_attrs[] = {
-	&simple_children_attr_description,
-	NULL,
-};
-
-static ssize_t simple_children_attr_show(struct config_item *item,
-					 struct configfs_attribute *attr,
-					 char *page)
-{
-	return sprintf(page,
-"[02-simple-children]\n"
-"\n"
-"This subsystem allows the creation of child config_items.  These\n"
-"items have only one attribute that is readable and writeable.\n");
-}
-
-static void simple_children_release(struct config_item *item)
-{
-	kfree(to_simple_children(item));
-}
-
-static struct configfs_item_operations simple_children_item_ops = {
-	.release	= simple_children_release,
-	.show_attribute	= simple_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations simple_children_group_ops = {
-	.make_item	= simple_children_make_item,
-};
-
-static struct config_item_type simple_children_type = {
-	.ct_item_ops	= &simple_children_item_ops,
-	.ct_group_ops	= &simple_children_group_ops,
-	.ct_attrs	= simple_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem simple_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "02-simple-children",
-			.ci_type = &simple_children_type,
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 03-group-children
- *
- * This example reuses the simple_children group from above.  However,
- * the simple_children group is not the subsystem itself, it is a
- * child of the subsystem.  Creation of a group in the subsystem creates
- * a new simple_children group.  That group can then have simple_child
- * children of its own.
- */
-
-static struct config_group *group_children_make_group(struct config_group *group, const char *name)
-{
-	struct simple_children *simple_children;
-
-	simple_children = kzalloc(sizeof(struct simple_children),
-				  GFP_KERNEL);
-	if (!simple_children)
-		return ERR_PTR(-ENOMEM);
-
-	config_group_init_type_name(&simple_children->group, name,
-				    &simple_children_type);
-
-	return &simple_children->group;
-}
-
-static struct configfs_attribute group_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *group_children_attrs[] = {
-	&group_children_attr_description,
-	NULL,
-};
-
-static ssize_t group_children_attr_show(struct config_item *item,
-					struct configfs_attribute *attr,
-					char *page)
-{
-	return sprintf(page,
-"[03-group-children]\n"
-"\n"
-"This subsystem allows the creation of child config_groups.  These\n"
-"groups are like the subsystem simple-children.\n");
-}
-
-static struct configfs_item_operations group_children_item_ops = {
-	.show_attribute	= group_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations group_children_group_ops = {
-	.make_group	= group_children_make_group,
-};
-
-static struct config_item_type group_children_type = {
-	.ct_item_ops	= &group_children_item_ops,
-	.ct_group_ops	= &group_children_group_ops,
-	.ct_attrs	= group_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem group_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "03-group-children",
-			.ci_type = &group_children_type,
-		},
-	},
-};
-
-/* ----------------------------------------------------------------- */
-
-/*
- * We're now done with our subsystem definitions.
- * For convenience in this module, here's a list of them all.  It
- * allows the init function to easily register them.  Most modules
- * will only have one subsystem, and will only call register_subsystem
- * on it directly.
- */
-static struct configfs_subsystem *example_subsys[] = {
-	&childless_subsys.subsys,
-	&simple_children_subsys,
-	&group_children_subsys,
-	NULL,
-};
-
-static int __init configfs_example_init(void)
-{
-	int ret;
-	int i;
-	struct configfs_subsystem *subsys;
-
-	for (i = 0; example_subsys[i]; i++) {
-		subsys = example_subsys[i];
-
-		config_group_init(&subsys->su_group);
-		mutex_init(&subsys->su_mutex);
-		ret = configfs_register_subsystem(subsys);
-		if (ret) {
-			printk(KERN_ERR "Error %d while registering subsystem %s\n",
-			       ret,
-			       subsys->su_group.cg_item.ci_namebuf);
-			goto out_unregister;
-		}
-	}
-
-	return 0;
-
-out_unregister:
-	for (i--; i >= 0; i--)
-		configfs_unregister_subsystem(example_subsys[i]);
-
-	return ret;
-}
-
-static void __exit configfs_example_exit(void)
-{
-	int i;
-
-	for (i = 0; example_subsys[i]; i++)
-		configfs_unregister_subsystem(example_subsys[i]);
-}
-
-module_init(configfs_example_init);
-module_exit(configfs_example_exit);
-MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c
deleted file mode 100644
index 327dfbc640a9..000000000000
--- a/Documentation/filesystems/configfs/configfs_example_macros.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
- * configfs_example_macros.c - This file is a demonstration module
- *      containing a number of configfs subsystems.  It uses the helper
- *      macros defined by configfs.h
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on sysfs:
- * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
- *
- * configfs Copyright (C) 2005 Oracle.  All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/configfs.h>
-
-
-
-/*
- * 01-childless
- *
- * This first example is a childless subsystem.  It cannot create
- * any config_items.  It just has attributes.
- *
- * Note that we are enclosing the configfs_subsystem inside a container.
- * This is not necessary if a subsystem has no attributes directly
- * on the subsystem.  See the next example, 02-simple-children, for
- * such a subsystem.
- */
-
-struct childless {
-	struct configfs_subsystem subsys;
-	int showme;
-	int storeme;
-};
-
-static inline struct childless *to_childless(struct config_item *item)
-{
-	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
-}
-
-CONFIGFS_ATTR_STRUCT(childless);
-#define CHILDLESS_ATTR(_name, _mode, _show, _store)	\
-struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store)
-#define CHILDLESS_ATTR_RO(_name, _show)	\
-struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show);
-
-static ssize_t childless_showme_read(struct childless *childless,
-				     char *page)
-{
-	ssize_t pos;
-
-	pos = sprintf(page, "%d\n", childless->showme);
-	childless->showme++;
-
-	return pos;
-}
-
-static ssize_t childless_storeme_read(struct childless *childless,
-				      char *page)
-{
-	return sprintf(page, "%d\n", childless->storeme);
-}
-
-static ssize_t childless_storeme_write(struct childless *childless,
-				       const char *page,
-				       size_t count)
-{
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	childless->storeme = tmp;
-
-	return count;
-}
-
-static ssize_t childless_description_read(struct childless *childless,
-					  char *page)
-{
-	return sprintf(page,
-"[01-childless]\n"
-"\n"
-"The childless subsystem is the simplest possible subsystem in\n"
-"configfs.  It does not support the creation of child config_items.\n"
-"It only has a few attributes.  In fact, it isn't much different\n"
-"than a directory in /proc.\n");
-}
-
-CHILDLESS_ATTR_RO(showme, childless_showme_read);
-CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read,
-	       childless_storeme_write);
-CHILDLESS_ATTR_RO(description, childless_description_read);
-
-static struct configfs_attribute *childless_attrs[] = {
-	&childless_attr_showme.attr,
-	&childless_attr_storeme.attr,
-	&childless_attr_description.attr,
-	NULL,
-};
-
-CONFIGFS_ATTR_OPS(childless);
-static struct configfs_item_operations childless_item_ops = {
-	.show_attribute		= childless_attr_show,
-	.store_attribute	= childless_attr_store,
-};
-
-static struct config_item_type childless_type = {
-	.ct_item_ops	= &childless_item_ops,
-	.ct_attrs	= childless_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct childless childless_subsys = {
-	.subsys = {
-		.su_group = {
-			.cg_item = {
-				.ci_namebuf = "01-childless",
-				.ci_type = &childless_type,
-			},
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 02-simple-children
- *
- * This example merely has a simple one-attribute child.  Note that
- * there is no extra attribute structure, as the child's attribute is
- * known from the get-go.  Also, there is no container for the
- * subsystem, as it has no attributes of its own.
- */
-
-struct simple_child {
-	struct config_item item;
-	int storeme;
-};
-
-static inline struct simple_child *to_simple_child(struct config_item *item)
-{
-	return item ? container_of(item, struct simple_child, item) : NULL;
-}
-
-static struct configfs_attribute simple_child_attr_storeme = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "storeme",
-	.ca_mode = S_IRUGO | S_IWUSR,
-};
-
-static struct configfs_attribute *simple_child_attrs[] = {
-	&simple_child_attr_storeme,
-	NULL,
-};
-
-static ssize_t simple_child_attr_show(struct config_item *item,
-				      struct configfs_attribute *attr,
-				      char *page)
-{
-	ssize_t count;
-	struct simple_child *simple_child = to_simple_child(item);
-
-	count = sprintf(page, "%d\n", simple_child->storeme);
-
-	return count;
-}
-
-static ssize_t simple_child_attr_store(struct config_item *item,
-				       struct configfs_attribute *attr,
-				       const char *page, size_t count)
-{
-	struct simple_child *simple_child = to_simple_child(item);
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	simple_child->storeme = tmp;
-
-	return count;
-}
-
-static void simple_child_release(struct config_item *item)
-{
-	kfree(to_simple_child(item));
-}
-
-static struct configfs_item_operations simple_child_item_ops = {
-	.release		= simple_child_release,
-	.show_attribute		= simple_child_attr_show,
-	.store_attribute	= simple_child_attr_store,
-};
-
-static struct config_item_type simple_child_type = {
-	.ct_item_ops	= &simple_child_item_ops,
-	.ct_attrs	= simple_child_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-
-struct simple_children {
-	struct config_group group;
-};
-
-static inline struct simple_children *to_simple_children(struct config_item *item)
-{
-	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
-}
-
-static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
-{
-	struct simple_child *simple_child;
-
-	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
-	if (!simple_child)
-		return ERR_PTR(-ENOMEM);
-
-	config_item_init_type_name(&simple_child->item, name,
-				   &simple_child_type);
-
-	simple_child->storeme = 0;
-
-	return &simple_child->item;
-}
-
-static struct configfs_attribute simple_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *simple_children_attrs[] = {
-	&simple_children_attr_description,
-	NULL,
-};
-
-static ssize_t simple_children_attr_show(struct config_item *item,
-					 struct configfs_attribute *attr,
-					 char *page)
-{
-	return sprintf(page,
-"[02-simple-children]\n"
-"\n"
-"This subsystem allows the creation of child config_items.  These\n"
-"items have only one attribute that is readable and writeable.\n");
-}
-
-static void simple_children_release(struct config_item *item)
-{
-	kfree(to_simple_children(item));
-}
-
-static struct configfs_item_operations simple_children_item_ops = {
-	.release	= simple_children_release,
-	.show_attribute	= simple_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations simple_children_group_ops = {
-	.make_item	= simple_children_make_item,
-};
-
-static struct config_item_type simple_children_type = {
-	.ct_item_ops	= &simple_children_item_ops,
-	.ct_group_ops	= &simple_children_group_ops,
-	.ct_attrs	= simple_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem simple_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "02-simple-children",
-			.ci_type = &simple_children_type,
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 03-group-children
- *
- * This example reuses the simple_children group from above.  However,
- * the simple_children group is not the subsystem itself, it is a
- * child of the subsystem.  Creation of a group in the subsystem creates
- * a new simple_children group.  That group can then have simple_child
- * children of its own.
- */
-
-static struct config_group *group_children_make_group(struct config_group *group, const char *name)
-{
-	struct simple_children *simple_children;
-
-	simple_children = kzalloc(sizeof(struct simple_children),
-				  GFP_KERNEL);
-	if (!simple_children)
-		return ERR_PTR(-ENOMEM);
-
-	config_group_init_type_name(&simple_children->group, name,
-				    &simple_children_type);
-
-	return &simple_children->group;
-}
-
-static struct configfs_attribute group_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *group_children_attrs[] = {
-	&group_children_attr_description,
-	NULL,
-};
-
-static ssize_t group_children_attr_show(struct config_item *item,
-					struct configfs_attribute *attr,
-					char *page)
-{
-	return sprintf(page,
-"[03-group-children]\n"
-"\n"
-"This subsystem allows the creation of child config_groups.  These\n"
-"groups are like the subsystem simple-children.\n");
-}
-
-static struct configfs_item_operations group_children_item_ops = {
-	.show_attribute	= group_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations group_children_group_ops = {
-	.make_group	= group_children_make_group,
-};
-
-static struct config_item_type group_children_type = {
-	.ct_item_ops	= &group_children_item_ops,
-	.ct_group_ops	= &group_children_group_ops,
-	.ct_attrs	= group_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem group_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "03-group-children",
-			.ci_type = &group_children_type,
-		},
-	},
-};
-
-/* ----------------------------------------------------------------- */
-
-/*
- * We're now done with our subsystem definitions.
- * For convenience in this module, here's a list of them all.  It
- * allows the init function to easily register them.  Most modules
- * will only have one subsystem, and will only call register_subsystem
- * on it directly.
- */
-static struct configfs_subsystem *example_subsys[] = {
-	&childless_subsys.subsys,
-	&simple_children_subsys,
-	&group_children_subsys,
-	NULL,
-};
-
-static int __init configfs_example_init(void)
-{
-	int ret;
-	int i;
-	struct configfs_subsystem *subsys;
-
-	for (i = 0; example_subsys[i]; i++) {
-		subsys = example_subsys[i];
-
-		config_group_init(&subsys->su_group);
-		mutex_init(&subsys->su_mutex);
-		ret = configfs_register_subsystem(subsys);
-		if (ret) {
-			printk(KERN_ERR "Error %d while registering subsystem %s\n",
-			       ret,
-			       subsys->su_group.cg_item.ci_namebuf);
-			goto out_unregister;
-		}
-	}
-
-	return 0;
-
-out_unregister:
-	for (i--; i >= 0; i--)
-		configfs_unregister_subsystem(example_subsys[i]);
-
-	return ret;
-}
-
-static void __exit configfs_example_exit(void)
-{
-	int i;
-
-	for (i = 0; example_subsys[i]; i++)
-		configfs_unregister_subsystem(example_subsys[i]);
-}
-
-module_init(configfs_example_init);
-module_exit(configfs_example_exit);
-MODULE_LICENSE("GPL");
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 106ca589e90a..d39099ea7df7 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -65,7 +65,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 {
 	struct configfs_attribute * attr = to_attr(dentry);
 	struct config_item * item = to_item(dentry->d_parent);
-	struct configfs_item_operations * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
 
@@ -74,10 +73,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 	if (!buffer->page)
 		return -ENOMEM;
 
-	if (ops->show_attribute)
-		count = ops->show_attribute(item, attr, buffer->page);
-	else
-		count = attr->show(item, buffer->page);
+	count = attr->show(item, buffer->page);
 
 	buffer->needs_read_fill = 0;
 	BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
@@ -175,10 +171,7 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size
 {
 	struct configfs_attribute * attr = to_attr(dentry);
 	struct config_item * item = to_item(dentry->d_parent);
-	struct configfs_item_operations * ops = buffer->ops;
 
-	if (ops->store_attribute)
-		return ops->store_attribute(item, attr, buffer->page, count);
 	return attr->store(item, buffer->page, count);
 }
 
@@ -243,8 +236,7 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-		if (!(inode->i_mode & S_IWUGO) ||
-		    (!ops->store_attribute && !attr->store))
+		if (!(inode->i_mode & S_IWUGO) || !attr->store)
 			goto Eaccess;
 
 	}
@@ -254,8 +246,7 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * must be a show method for it.
 	 */
 	if (file->f_mode & FMODE_READ) {
-		if (!(inode->i_mode & S_IRUGO) ||
-		    (!ops->show_attribute && !attr->show))
+		if (!(inode->i_mode & S_IRUGO) || !attr->show)
 			goto Eaccess;
 	}
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 85e9956a86de..a8a335b7fce0 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -154,86 +154,6 @@ static struct configfs_attribute _pfx##attr_##_name = {	\
 	.store		= _pfx##_name##_store,		\
 }
 
-/*
- * Users often need to create attribute structures for their configurable
- * attributes, containing a configfs_attribute member and function pointers
- * for the show() and store() operations on that attribute. If they don't
- * need anything else on the extended attribute structure, they can use
- * this macro to define it  The argument _item is the name of the
- * config_item structure.
- */
-#define CONFIGFS_ATTR_STRUCT(_item)					\
-struct _item##_attribute {						\
-	struct configfs_attribute attr;					\
-	ssize_t (*show)(struct _item *, char *);			\
-	ssize_t (*store)(struct _item *, const char *, size_t);		\
-}
-
-/*
- * With the extended attribute structure, users can use this macro
- * (similar to sysfs' __ATTR) to make defining attributes easier.
- * An example:
- * #define MYITEM_ATTR(_name, _mode, _show, _store)	\
- * struct myitem_attribute childless_attr_##_name =	\
- *         __CONFIGFS_ATTR(_name, _mode, _show, _store)
- */
-#define __CONFIGFS_ATTR(_name, _mode, _show, _store)			\
-{									\
-	.attr	= {							\
-			.ca_name = __stringify(_name),			\
-			.ca_mode = _mode,				\
-			.ca_owner = THIS_MODULE,			\
-	},								\
-	.show	= _show,						\
-	.store	= _store,						\
-}
-/* Here is a readonly version, only requiring a show() operation */
-#define __CONFIGFS_ATTR_RO(_name, _show)				\
-{									\
-	.attr	= {							\
-			.ca_name = __stringify(_name),			\
-			.ca_mode = 0444,				\
-			.ca_owner = THIS_MODULE,			\
-	},								\
-	.show	= _show,						\
-}
-
-/*
- * With these extended attributes, the simple show_attribute() and
- * store_attribute() operations need to call the show() and store() of the
- * attributes.  This is a common pattern, so we provide a macro to define
- * them.  The argument _item is the name of the config_item structure.
- * This macro expects the attributes to be named "struct <name>_attribute"
- * and the function to_<name>() to exist;
- */
-#define CONFIGFS_ATTR_OPS(_item)					\
-static ssize_t _item##_attr_show(struct config_item *item,		\
-				 struct configfs_attribute *attr,	\
-				 char *page)				\
-{									\
-	struct _item *_item = to_##_item(item);				\
-	struct _item##_attribute *_item##_attr =			\
-		container_of(attr, struct _item##_attribute, attr);	\
-	ssize_t ret = 0;						\
-									\
-	if (_item##_attr->show)						\
-		ret = _item##_attr->show(_item, page);			\
-	return ret;							\
-}									\
-static ssize_t _item##_attr_store(struct config_item *item,		\
-				  struct configfs_attribute *attr,	\
-				  const char *page, size_t count)	\
-{									\
-	struct _item *_item = to_##_item(item);				\
-	struct _item##_attribute *_item##_attr =			\
-		container_of(attr, struct _item##_attribute, attr);	\
-	ssize_t ret = -EINVAL;						\
-									\
-	if (_item##_attr->store)					\
-		ret = _item##_attr->store(_item, page, count);		\
-	return ret;							\
-}
-
 /*
  * If allow_link() exists, the item can symlink(2) out to other
  * items.  If the item is a group, it may support mkdir(2).
@@ -250,8 +170,6 @@ static ssize_t _item##_attr_store(struct config_item *item,		\
  */
 struct configfs_item_operations {
 	void (*release)(struct config_item *);
-	ssize_t	(*show_attribute)(struct config_item *, struct configfs_attribute *,char *);
-	ssize_t	(*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t);
 	int (*allow_link)(struct config_item *src, struct config_item *target);
 	int (*drop_link)(struct config_item *src, struct config_item *target);
 };
diff --git a/samples/Kconfig b/samples/Kconfig
index 224ebb46bed5..d54f28c6dc5e 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -70,4 +70,10 @@ config SAMPLE_LIVEPATCH
 	  Builds a sample live patch that replaces the procfs handler
 	  for /proc/cmdline to print "this has been live patched".
 
+config SAMPLE_CONFIGFS
+	tristate "Build configfs patching sample -- loadable modules only"
+	depends on CONFIGFS_FS && m
+	help
+	  Builds a sample configfs interface.
+
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index f00257bcc5a7..48001d7e23f0 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,5 @@
 # Makefile for Linux samples code
 
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
-			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
+			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
+			   configfs/
diff --git a/samples/configfs/Makefile b/samples/configfs/Makefile
new file mode 100644
index 000000000000..a9afd99630fc
--- /dev/null
+++ b/samples/configfs/Makefile
@@ -0,0 +1,2 @@
+
+obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs_sample.o
diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c
new file mode 100644
index 000000000000..1ea33119e532
--- /dev/null
+++ b/samples/configfs/configfs_sample.c
@@ -0,0 +1,404 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_macros.c - This file is a demonstration module
+ *      containing a number of configfs subsystems.  It uses the helper
+ *      macros defined by configfs.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)),
+			struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_show(struct config_item *item, char *page)
+{
+	struct childless *childless = to_childless(item);
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", to_childless(item)->storeme);
+}
+
+static ssize_t childless_storeme_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct childless *childless = to_childless(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_show(struct config_item *item, char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+CONFIGFS_ATTR_RO(childless_, showme);
+CONFIGFS_ATTR(childless_, storeme);
+CONFIGFS_ATTR_RO(childless_, description);
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme,
+	&childless_attr_storeme,
+	&childless_attr_description,
+	NULL,
+};
+
+static struct config_item_type childless_type = {
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static ssize_t simple_child_storeme_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", to_simple_child(item)->storeme);
+}
+
+static ssize_t simple_child_storeme_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+CONFIGFS_ATTR(simple_child_, storeme);
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct simple_children {
+	struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item),
+			struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group,
+		const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static ssize_t simple_children_description_show(struct config_item *item,
+		char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+CONFIGFS_ATTR_RO(simple_children_, description);
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static void simple_children_release(struct config_item *item)
+{
+	kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.release	= simple_children_release,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(
+		struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kzalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static ssize_t group_children_description_show(struct config_item *item,
+		char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+CONFIGFS_ATTR_RO(group_children_, description);
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		mutex_init(&subsys->su_mutex);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (i--; i >= 0; i--)
+		configfs_unregister_subsystem(example_subsys[i]);
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++)
+		configfs_unregister_subsystem(example_subsys[i]);
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 027c71bbae3a6eeff00c11d1b708593a5c790314 Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Tue, 13 Oct 2015 23:13:55 -0700
Subject: Input: improve autorepeat initialization

Add new function input_enable_softrepeat() that allows drivers to
initialize their own values for input_dev->rep[REP_DELAY] and
input_dev->rep[REP_PERIOD], but also use the software autorepeat
functionality from input.c.

For example, a HID driver could do:

static void xyz_input_configured(struct hid_device *hid,
                                 struct hid_input *hidinput)
{
        input_enable_softrepeat(hidinput->input, 400, 100);
}

static struct hid_driver xyz_driver = {
        .input_configured = xyz_input_configured,
}

Signed-off-by: Petri Gynther <pgynther@google.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input.c | 25 +++++++++++++++++++------
 include/linux/input.h |  2 ++
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 5391abd28b27..880605959aa6 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -2044,6 +2044,23 @@ static void devm_input_device_unregister(struct device *dev, void *res)
 	__input_unregister_device(input);
 }
 
+/**
+ * input_enable_softrepeat - enable software autorepeat
+ * @dev: input device
+ * @delay: repeat delay
+ * @period: repeat period
+ *
+ * Enable software autorepeat on the input device.
+ */
+void input_enable_softrepeat(struct input_dev *dev, int delay, int period)
+{
+	dev->timer.data = (unsigned long) dev;
+	dev->timer.function = input_repeat_key;
+	dev->rep[REP_DELAY] = delay;
+	dev->rep[REP_PERIOD] = period;
+}
+EXPORT_SYMBOL(input_enable_softrepeat);
+
 /**
  * input_register_device - register device with input core
  * @dev: device to be registered
@@ -2108,12 +2125,8 @@ int input_register_device(struct input_dev *dev)
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * is handled by the driver itself and we don't do it in input.c.
 	 */
-	if (!dev->rep[REP_DELAY] && !dev->rep[REP_PERIOD]) {
-		dev->timer.data = (long) dev;
-		dev->timer.function = input_repeat_key;
-		dev->rep[REP_DELAY] = 250;
-		dev->rep[REP_PERIOD] = 33;
-	}
+	if (!dev->rep[REP_DELAY] && !dev->rep[REP_PERIOD])
+		input_enable_softrepeat(dev, 250, 33);
 
 	if (!dev->getkeycode)
 		dev->getkeycode = input_default_getkeycode;
diff --git a/include/linux/input.h b/include/linux/input.h
index 82ce323b9986..1e967694e9a5 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -469,6 +469,8 @@ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke);
 int input_set_keycode(struct input_dev *dev,
 		      const struct input_keymap_entry *ke);
 
+void input_enable_softrepeat(struct input_dev *dev, int delay, int period);
+
 extern struct class input_class;
 
 /**
-- 
cgit v1.2.3


From 47ec6e5a5f57f96d7d382e2d9f8dc5a5bdb45259 Mon Sep 17 00:00:00 2001
From: Sylvain Rochet <sylvain.rochet@finsecur.com>
Date: Tue, 13 Oct 2015 23:24:36 -0700
Subject: Input: rotary_encoder - add wake up support

This patch adds wake up support to GPIO rotary encoders.

Signed-off-by: Sylvain Rochet <sylvain.rochet@finsecur.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../devicetree/bindings/input/rotary-encoder.txt   |  1 +
 Documentation/input/rotary-encoder.txt             |  1 +
 drivers/input/misc/rotary_encoder.c                | 37 ++++++++++++++++++++++
 include/linux/rotary_encoder.h                     |  1 +
 4 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/rotary-encoder.txt b/Documentation/devicetree/bindings/input/rotary-encoder.txt
index 331549593ed5..891ddba2d792 100644
--- a/Documentation/devicetree/bindings/input/rotary-encoder.txt
+++ b/Documentation/devicetree/bindings/input/rotary-encoder.txt
@@ -15,6 +15,7 @@ Optional properties:
 - rotary-encoder,rollover: Automatic rollove when the rotary value becomes
   greater than the specified steps or smaller than 0. For absolute axis only.
 - rotary-encoder,half-period: Makes the driver work on half-period mode.
+- wakeup-source: Boolean, rotary encoder can wake up the system.
 
 See Documentation/input/rotary-encoder.txt for more information.
 
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
index 5737e3590adb..bddbee188624 100644
--- a/Documentation/input/rotary-encoder.txt
+++ b/Documentation/input/rotary-encoder.txt
@@ -109,6 +109,7 @@ static struct rotary_encoder_platform_data my_rotary_encoder_info = {
 	.inverted_a	= 0,
 	.inverted_b	= 0,
 	.half_period	= false,
+	.wakeup_source	= false,
 };
 
 static struct platform_device rotary_encoder_device = {
diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index f27f81ee84ed..d1665544e109 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c
@@ -26,6 +26,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/pm.h>
 
 #define DRV_NAME "rotary-encoder"
 
@@ -180,6 +181,8 @@ static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct devic
 					"rotary-encoder,rollover", NULL);
 	pdata->half_period = !!of_get_property(np,
 					"rotary-encoder,half-period", NULL);
+	pdata->wakeup_source = !!of_get_property(np,
+					"wakeup-source", NULL);
 
 	return pdata;
 }
@@ -280,6 +283,8 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 		goto exit_free_irq_b;
 	}
 
+	device_init_wakeup(&pdev->dev, pdata->wakeup_source);
+
 	platform_set_drvdata(pdev, encoder);
 
 	return 0;
@@ -306,6 +311,8 @@ static int rotary_encoder_remove(struct platform_device *pdev)
 	struct rotary_encoder *encoder = platform_get_drvdata(pdev);
 	const struct rotary_encoder_platform_data *pdata = encoder->pdata;
 
+	device_init_wakeup(&pdev->dev, false);
+
 	free_irq(encoder->irq_a, encoder);
 	free_irq(encoder->irq_b, encoder);
 	gpio_free(pdata->gpio_a);
@@ -320,11 +327,41 @@ static int rotary_encoder_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int rotary_encoder_suspend(struct device *dev)
+{
+	struct rotary_encoder *encoder = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev)) {
+		enable_irq_wake(encoder->irq_a);
+		enable_irq_wake(encoder->irq_b);
+	}
+
+	return 0;
+}
+
+static int rotary_encoder_resume(struct device *dev)
+{
+	struct rotary_encoder *encoder = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev)) {
+		disable_irq_wake(encoder->irq_a);
+		disable_irq_wake(encoder->irq_b);
+	}
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(rotary_encoder_pm_ops,
+		 rotary_encoder_suspend, rotary_encoder_resume);
+
 static struct platform_driver rotary_encoder_driver = {
 	.probe		= rotary_encoder_probe,
 	.remove		= rotary_encoder_remove,
 	.driver		= {
 		.name	= DRV_NAME,
+		.pm	= &rotary_encoder_pm_ops,
 		.of_match_table = of_match_ptr(rotary_encoder_of_match),
 	}
 };
diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h
index 3f594dce5716..b33f2d2a708f 100644
--- a/include/linux/rotary_encoder.h
+++ b/include/linux/rotary_encoder.h
@@ -11,6 +11,7 @@ struct rotary_encoder_platform_data {
 	bool relative_axis;
 	bool rollover;
 	bool half_period;
+	bool wakeup_source;
 };
 
 #endif /* __ROTARY_ENCODER_H__ */
-- 
cgit v1.2.3


From 0c5a69f432ba1e586ac6ae5e4311c2f1cbd051fa Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 7 Oct 2015 10:54:36 +0200
Subject: s390/compiler.h Fix sparse vs. hotpatch

sparse does not understand the s390 specific hotpatch attribute and
floods the log with messages like
include/uapi/linux/swab.h:92:8: error: attribute 'hotpatch': unknown attribute

Let's just dont use it, if __CHECKER__ is defined.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c836eb2dc44d..449cb674c7fa 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -56,7 +56,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <linux/compiler-gcc.h>
 #endif
 
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
 #define notrace __attribute__((hotpatch(0,0)))
 #else
 #define notrace __attribute__((no_instrument_function))
-- 
cgit v1.2.3


From 2d4d1eea540b27c72488fd1914674c42473d53df Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Thu, 8 Oct 2015 09:26:50 -0700
Subject: lib/mpi: Add mpi sgl helpers

Add mpi_read_raw_from_sgl and mpi_write_to_sgl helpers.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h |   4 ++
 lib/mpi/mpicoder.c  | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 200 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 641b7d6fd096..8b8269fa8907 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -31,6 +31,7 @@
 #define G10_MPI_H
 
 #include <linux/types.h>
+#include <linux/scatterlist.h>
 
 /* DSI defines */
 
@@ -78,6 +79,7 @@ void mpi_swap(MPI a, MPI b);
 MPI do_encode_md(const void *sha_buffer, unsigned nbits);
 MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes);
 MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
+MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len);
 int mpi_fromstr(MPI val, const char *str);
 u32 mpi_get_keyid(MPI a, u32 *keyid);
 void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
@@ -85,6 +87,8 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 		    int *sign);
 void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
 int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign);
+int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned *nbytes,
+		     int *sign);
 
 #define log_mpidump g10_log_mpidump
 
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 95c52a95259e..c20ef27ad876 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -319,3 +319,199 @@ int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mpi_set_buffer);
+
+/**
+ * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
+ *
+ * This function works in the same way as the mpi_read_buffer, but it
+ * takes an sgl instead of u8 * buf.
+ *
+ * @a:		a multi precision integer
+ * @sgl:	scatterlist to write to. Needs to be at least
+ *		mpi_get_size(a) long.
+ * @nbytes:	in/out param - it has the be set to the maximum number of
+ *		bytes that can be written to sgl. This has to be at least
+ *		the size of the integer a. On return it receives the actual
+ *		length of the data written.
+ * @sign:	if not NULL, it will be set to the sign of a.
+ *
+ * Return:	0 on success or error code in case of error
+ */
+int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
+		     int *sign)
+{
+	u8 *p, *p2;
+	mpi_limb_t alimb, alimb2;
+	unsigned int n = mpi_get_size(a);
+	int i, x, y = 0, lzeros = 0, buf_len;
+
+	if (!nbytes || *nbytes < n)
+		return -EINVAL;
+
+	if (sign)
+		*sign = a->sign;
+
+	p = (void *)&a->d[a->nlimbs] - 1;
+
+	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
+		if (!*p)
+			lzeros++;
+		else
+			break;
+	}
+
+	*nbytes = n - lzeros;
+	buf_len = sgl->length;
+	p2 = sg_virt(sgl);
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+		p = (u8 *)&alimb2;
+#if BYTES_PER_MPI_LIMB == 4
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+		*p++ = alimb >> 56;
+		*p++ = alimb >> 48;
+		*p++ = alimb >> 40;
+		*p++ = alimb >> 32;
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#else
+#error please implement for this limb size.
+#endif
+		if (lzeros > 0) {
+			if (lzeros >= sizeof(alimb)) {
+				p -= sizeof(alimb);
+				continue;
+			} else {
+				mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
+				mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+							+ lzeros;
+				*limb1 = *limb2;
+				p -= lzeros;
+				y = lzeros;
+			}
+			lzeros -= sizeof(alimb);
+		}
+
+		p = p - (sizeof(alimb) - y);
+
+		for (x = 0; x < sizeof(alimb) - y; x++) {
+			if (!buf_len) {
+				sgl = sg_next(sgl);
+				if (!sgl)
+					return -EINVAL;
+				buf_len = sgl->length;
+				p2 = sg_virt(sgl);
+			}
+			*p2++ = *p++;
+			buf_len--;
+		}
+		y = 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
+
+/*
+ * mpi_read_raw_from_sgl() - Function allocates an MPI and populates it with
+ *			     data from the sgl
+ *
+ * This function works in the same way as the mpi_read_raw_data, but it
+ * takes an sgl instead of void * buffer. i.e. it allocates
+ * a new MPI and reads the content of the sgl to the MPI.
+ *
+ * @sgl:	scatterlist to read from
+ * @len:	number of bytes to read
+ *
+ * Return:	Pointer to a new MPI or NULL on error
+ */
+MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
+{
+	struct scatterlist *sg;
+	int x, i, j, z, lzeros, ents;
+	unsigned int nbits, nlimbs, nbytes;
+	mpi_limb_t a;
+	MPI val = NULL;
+
+	lzeros = 0;
+	ents = sg_nents(sgl);
+
+	for_each_sg(sgl, sg, ents, i) {
+		const u8 *buff = sg_virt(sg);
+		int len = sg->length;
+
+		while (len-- && !*buff++)
+			lzeros++;
+
+		if (len && *buff)
+			break;
+
+		ents--;
+		lzeros = 0;
+	}
+
+	sgl = sg;
+
+	if (!ents)
+		nbytes = 0;
+	else
+		nbytes = len - lzeros;
+
+	nbits = nbytes * 8;
+	if (nbits > MAX_EXTERN_MPI_BITS) {
+		pr_info("MPI: mpi too large (%u bits)\n", nbits);
+		return NULL;
+	}
+
+	if (nbytes > 0)
+		nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros));
+	else
+		nbits = 0;
+
+	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
+	val = mpi_alloc(nlimbs);
+	if (!val)
+		return NULL;
+
+	val->nbits = nbits;
+	val->sign = 0;
+	val->nlimbs = nlimbs;
+
+	if (nbytes == 0)
+		return val;
+
+	j = nlimbs - 1;
+	a = 0;
+	z = 0;
+	x = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+	x %= BYTES_PER_MPI_LIMB;
+
+	for_each_sg(sgl, sg, ents, i) {
+		const u8 *buffer = sg_virt(sg) + lzeros;
+		int len = sg->length - lzeros;
+		int buf_shift = x;
+
+		if  (sg_is_last(sg) && (len % BYTES_PER_MPI_LIMB))
+			len += BYTES_PER_MPI_LIMB - (len % BYTES_PER_MPI_LIMB);
+
+		for (; x < len + buf_shift; x++) {
+			a <<= 8;
+			a |= *buffer++;
+			if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
+				val->d[j--] = a;
+				a = 0;
+			}
+		}
+		z += x;
+		x = 0;
+		lzeros = 0;
+	}
+	return val;
+}
+EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
-- 
cgit v1.2.3


From d28c2b36d6027702585ca93773b3edd6e5f1a5bd Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:44 +0300
Subject: ARM: common: edma: Remove unused functions

We no longer have users for these functions so they can be removed.
Remove also unused enums from the header file.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 376 -------------------------------------
 include/linux/platform_data/edma.h |  33 ----
 2 files changed, 409 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 56fc339571f9..e9c4cb16a47e 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -510,62 +510,6 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int reserve_contiguous_slots(int ctlr, unsigned int id,
-				     unsigned int num_slots,
-				     unsigned int start_slot)
-{
-	int i, j;
-	unsigned int count = num_slots;
-	int stop_slot = start_slot;
-	DECLARE_BITMAP(tmp_inuse, EDMA_MAX_PARAMENTRY);
-
-	for (i = start_slot; i < edma_cc[ctlr]->num_slots; ++i) {
-		j = EDMA_CHAN_SLOT(i);
-		if (!test_and_set_bit(j, edma_cc[ctlr]->edma_inuse)) {
-			/* Record our current beginning slot */
-			if (count == num_slots)
-				stop_slot = i;
-
-			count--;
-			set_bit(j, tmp_inuse);
-
-			if (count == 0)
-				break;
-		} else {
-			clear_bit(j, tmp_inuse);
-
-			if (id == EDMA_CONT_PARAMS_FIXED_EXACT) {
-				stop_slot = i;
-				break;
-			} else {
-				count = num_slots;
-			}
-		}
-	}
-
-	/*
-	 * We have to clear any bits that we set
-	 * if we run out parameter RAM slots, i.e we do find a set
-	 * of contiguous parameter RAM slots but do not find the exact number
-	 * requested as we may reach the total number of parameter RAM slots
-	 */
-	if (i == edma_cc[ctlr]->num_slots)
-		stop_slot = i;
-
-	j = start_slot;
-	for_each_set_bit_from(j, tmp_inuse, stop_slot)
-		clear_bit(j, edma_cc[ctlr]->edma_inuse);
-
-	if (count)
-		return -EBUSY;
-
-	for (j = i - num_slots + 1; j <= i; ++j)
-		memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(j),
-			&dummy_paramset, PARM_SIZE);
-
-	return EDMA_CTLR_CHAN(ctlr, i - num_slots + 1);
-}
-
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -818,185 +762,10 @@ void edma_free_slot(unsigned slot)
 }
 EXPORT_SYMBOL(edma_free_slot);
 
-
-/**
- * edma_alloc_cont_slots- alloc contiguous parameter RAM slots
- * The API will return the starting point of a set of
- * contiguous parameter RAM slots that have been requested
- *
- * @id: can only be EDMA_CONT_PARAMS_ANY or EDMA_CONT_PARAMS_FIXED_EXACT
- * or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
- * @count: number of contiguous Paramter RAM slots
- * @slot  - the start value of Parameter RAM slot that should be passed if id
- * is EDMA_CONT_PARAMS_FIXED_EXACT or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
- *
- * If id is EDMA_CONT_PARAMS_ANY then the API starts looking for a set of
- * contiguous Parameter RAM slots from parameter RAM 64 in the case of
- * DaVinci SOCs and 32 in the case of DA8xx SOCs.
- *
- * If id is EDMA_CONT_PARAMS_FIXED_EXACT then the API starts looking for a
- * set of contiguous parameter RAM slots from the "slot" that is passed as an
- * argument to the API.
- *
- * If id is EDMA_CONT_PARAMS_FIXED_NOT_EXACT then the API initially tries
- * starts looking for a set of contiguous parameter RAMs from the "slot"
- * that is passed as an argument to the API. On failure the API will try to
- * find a set of contiguous Parameter RAM slots from the remaining Parameter
- * RAM slots
- */
-int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count)
-{
-	/*
-	 * The start slot requested should be greater than
-	 * the number of channels and lesser than the total number
-	 * of slots
-	 */
-	if ((id != EDMA_CONT_PARAMS_ANY) &&
-		(slot < edma_cc[ctlr]->num_channels ||
-		slot >= edma_cc[ctlr]->num_slots))
-		return -EINVAL;
-
-	/*
-	 * The number of parameter RAM slots requested cannot be less than 1
-	 * and cannot be more than the number of slots minus the number of
-	 * channels
-	 */
-	if (count < 1 || count >
-		(edma_cc[ctlr]->num_slots - edma_cc[ctlr]->num_channels))
-		return -EINVAL;
-
-	switch (id) {
-	case EDMA_CONT_PARAMS_ANY:
-		return reserve_contiguous_slots(ctlr, id, count,
-						 edma_cc[ctlr]->num_channels);
-	case EDMA_CONT_PARAMS_FIXED_EXACT:
-	case EDMA_CONT_PARAMS_FIXED_NOT_EXACT:
-		return reserve_contiguous_slots(ctlr, id, count, slot);
-	default:
-		return -EINVAL;
-	}
-
-}
-EXPORT_SYMBOL(edma_alloc_cont_slots);
-
-/**
- * edma_free_cont_slots - deallocate DMA parameter RAM slots
- * @slot: first parameter RAM of a set of parameter RAM slots to be freed
- * @count: the number of contiguous parameter RAM slots to be freed
- *
- * This deallocates the parameter RAM slots allocated by
- * edma_alloc_cont_slots.
- * Callers/applications need to keep track of sets of contiguous
- * parameter RAM slots that have been allocated using the edma_alloc_cont_slots
- * API.
- * Callers are responsible for ensuring the slots are inactive, and will
- * not be activated.
- */
-int edma_free_cont_slots(unsigned slot, int count)
-{
-	unsigned ctlr, slot_to_free;
-	int i;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_channels ||
-		slot >= edma_cc[ctlr]->num_slots ||
-		count < 1)
-		return -EINVAL;
-
-	for (i = slot; i < slot + count; ++i) {
-		ctlr = EDMA_CTLR(i);
-		slot_to_free = EDMA_CHAN_SLOT(i);
-
-		memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(slot_to_free),
-			&dummy_paramset, PARM_SIZE);
-		clear_bit(slot_to_free, edma_cc[ctlr]->edma_inuse);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(edma_free_cont_slots);
-
 /*-----------------------------------------------------------------------*/
 
 /* Parameter RAM operations (i) -- read/write partial slots */
 
-/**
- * edma_set_src - set initial DMA source address in parameter RAM slot
- * @slot: parameter RAM slot being configured
- * @src_port: physical address of source (memory, controller FIFO, etc)
- * @addressMode: INCR, except in very rare cases
- * @fifoWidth: ignored unless @addressMode is FIFO, else specifies the
- *	width to use when addressing the fifo (e.g. W8BIT, W32BIT)
- *
- * Note that the source address is modified during the DMA transfer
- * according to edma_set_src_index().
- */
-void edma_set_src(unsigned slot, dma_addr_t src_port,
-				enum address_mode mode, enum fifo_width width)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		unsigned int i = edma_parm_read(ctlr, PARM_OPT, slot);
-
-		if (mode) {
-			/* set SAM and program FWID */
-			i = (i & ~(EDMA_FWID)) | (SAM | ((width & 0x7) << 8));
-		} else {
-			/* clear SAM */
-			i &= ~SAM;
-		}
-		edma_parm_write(ctlr, PARM_OPT, slot, i);
-
-		/* set the source port address
-		   in source register of param structure */
-		edma_parm_write(ctlr, PARM_SRC, slot, src_port);
-	}
-}
-EXPORT_SYMBOL(edma_set_src);
-
-/**
- * edma_set_dest - set initial DMA destination address in parameter RAM slot
- * @slot: parameter RAM slot being configured
- * @dest_port: physical address of destination (memory, controller FIFO, etc)
- * @addressMode: INCR, except in very rare cases
- * @fifoWidth: ignored unless @addressMode is FIFO, else specifies the
- *	width to use when addressing the fifo (e.g. W8BIT, W32BIT)
- *
- * Note that the destination address is modified during the DMA transfer
- * according to edma_set_dest_index().
- */
-void edma_set_dest(unsigned slot, dma_addr_t dest_port,
-				 enum address_mode mode, enum fifo_width width)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		unsigned int i = edma_parm_read(ctlr, PARM_OPT, slot);
-
-		if (mode) {
-			/* set DAM and program FWID */
-			i = (i & ~(EDMA_FWID)) | (DAM | ((width & 0x7) << 8));
-		} else {
-			/* clear DAM */
-			i &= ~DAM;
-		}
-		edma_parm_write(ctlr, PARM_OPT, slot, i);
-		/* set the destination port address
-		   in dest register of param structure */
-		edma_parm_write(ctlr, PARM_DST, slot, dest_port);
-	}
-}
-EXPORT_SYMBOL(edma_set_dest);
-
 /**
  * edma_get_position - returns the current transfer point
  * @slot: parameter RAM slot being examined
@@ -1016,110 +785,6 @@ dma_addr_t edma_get_position(unsigned slot, bool dst)
 	return edma_read(ctlr, offs);
 }
 
-/**
- * edma_set_src_index - configure DMA source address indexing
- * @slot: parameter RAM slot being configured
- * @src_bidx: byte offset between source arrays in a frame
- * @src_cidx: byte offset between source frames in a block
- *
- * Offsets are specified to support either contiguous or discontiguous
- * memory transfers, or repeated access to a hardware register, as needed.
- * When accessing hardware registers, both offsets are normally zero.
- */
-void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		edma_parm_modify(ctlr, PARM_SRC_DST_BIDX, slot,
-				0xffff0000, src_bidx);
-		edma_parm_modify(ctlr, PARM_SRC_DST_CIDX, slot,
-				0xffff0000, src_cidx);
-	}
-}
-EXPORT_SYMBOL(edma_set_src_index);
-
-/**
- * edma_set_dest_index - configure DMA destination address indexing
- * @slot: parameter RAM slot being configured
- * @dest_bidx: byte offset between destination arrays in a frame
- * @dest_cidx: byte offset between destination frames in a block
- *
- * Offsets are specified to support either contiguous or discontiguous
- * memory transfers, or repeated access to a hardware register, as needed.
- * When accessing hardware registers, both offsets are normally zero.
- */
-void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		edma_parm_modify(ctlr, PARM_SRC_DST_BIDX, slot,
-				0x0000ffff, dest_bidx << 16);
-		edma_parm_modify(ctlr, PARM_SRC_DST_CIDX, slot,
-				0x0000ffff, dest_cidx << 16);
-	}
-}
-EXPORT_SYMBOL(edma_set_dest_index);
-
-/**
- * edma_set_transfer_params - configure DMA transfer parameters
- * @slot: parameter RAM slot being configured
- * @acnt: how many bytes per array (at least one)
- * @bcnt: how many arrays per frame (at least one)
- * @ccnt: how many frames per block (at least one)
- * @bcnt_rld: used only for A-Synchronized transfers; this specifies
- *	the value to reload into bcnt when it decrements to zero
- * @sync_mode: ASYNC or ABSYNC
- *
- * See the EDMA3 documentation to understand how to configure and link
- * transfers using the fields in PaRAM slots.  If you are not doing it
- * all at once with edma_write_slot(), you will use this routine
- * plus two calls each for source and destination, setting the initial
- * address and saying how to index that address.
- *
- * An example of an A-Synchronized transfer is a serial link using a
- * single word shift register.  In that case, @acnt would be equal to
- * that word size; the serial controller issues a DMA synchronization
- * event to transfer each word, and memory access by the DMA transfer
- * controller will be word-at-a-time.
- *
- * An example of an AB-Synchronized transfer is a device using a FIFO.
- * In that case, @acnt equals the FIFO width and @bcnt equals its depth.
- * The controller with the FIFO issues DMA synchronization events when
- * the FIFO threshold is reached, and the DMA transfer controller will
- * transfer one frame to (or from) the FIFO.  It will probably use
- * efficient burst modes to access memory.
- */
-void edma_set_transfer_params(unsigned slot,
-		u16 acnt, u16 bcnt, u16 ccnt,
-		u16 bcnt_rld, enum sync_dimension sync_mode)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		edma_parm_modify(ctlr, PARM_LINK_BCNTRLD, slot,
-				0x0000ffff, bcnt_rld << 16);
-		if (sync_mode == ASYNC)
-			edma_parm_and(ctlr, PARM_OPT, slot, ~SYNCDIM);
-		else
-			edma_parm_or(ctlr, PARM_OPT, slot, SYNCDIM);
-		/* Set the acount, bcount, ccount registers */
-		edma_parm_write(ctlr, PARM_A_B_CNT, slot, (bcnt << 16) | acnt);
-		edma_parm_write(ctlr, PARM_CCNT, slot, ccnt);
-	}
-}
-EXPORT_SYMBOL(edma_set_transfer_params);
-
 /**
  * edma_link - link one parameter RAM slot to another
  * @from: parameter RAM slot originating the link
@@ -1145,26 +810,6 @@ void edma_link(unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(edma_link);
 
-/**
- * edma_unlink - cut link from one parameter RAM slot
- * @from: parameter RAM slot originating the link
- *
- * The originating slot should not be part of any active DMA transfer.
- * Its link is set to 0xffff.
- */
-void edma_unlink(unsigned from)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(from);
-	from = EDMA_CHAN_SLOT(from);
-
-	if (from >= edma_cc[ctlr]->num_slots)
-		return;
-	edma_parm_or(ctlr, PARM_LINK_BCNTRLD, from, 0xffff);
-}
-EXPORT_SYMBOL(edma_unlink);
-
 /*-----------------------------------------------------------------------*/
 
 /* Parameter RAM operations (ii) -- read/write whole parameter sets */
@@ -1401,27 +1046,6 @@ void edma_clean_channel(unsigned channel)
 }
 EXPORT_SYMBOL(edma_clean_channel);
 
-/*
- * edma_clear_event - clear an outstanding event on the DMA channel
- * Arguments:
- *	channel - channel number
- */
-void edma_clear_event(unsigned channel)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel >= edma_cc[ctlr]->num_channels)
-		return;
-	if (channel < 32)
-		edma_write(ctlr, EDMA_ECR, BIT(channel));
-	else
-		edma_write(ctlr, EDMA_ECRH, BIT(channel - 32));
-}
-EXPORT_SYMBOL(edma_clear_event);
-
 /*
  * edma_assign_channel_eventq - move given channel to desired eventq
  * Arguments:
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index bdb2710e2aab..c1862423b356 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -72,20 +72,6 @@ struct edmacc_param {
 #define EDMA_DMA_TC1_ERROR 3
 #define EDMA_DMA_TC2_ERROR 4
 
-enum address_mode {
-	INCR = 0,
-	FIFO = 1
-};
-
-enum fifo_width {
-	W8BIT = 0,
-	W16BIT = 1,
-	W32BIT = 2,
-	W64BIT = 3,
-	W128BIT = 4,
-	W256BIT = 5
-};
-
 enum dma_event_q {
 	EVENTQ_0 = 0,
 	EVENTQ_1 = 1,
@@ -94,11 +80,6 @@ enum dma_event_q {
 	EVENTQ_DEFAULT = -1
 };
 
-enum sync_dimension {
-	ASYNC = 0,
-	ABSYNC = 1
-};
-
 #define EDMA_CTLR_CHAN(ctlr, chan)	(((ctlr) << 16) | (chan))
 #define EDMA_CTLR(i)			((i) >> 16)
 #define EDMA_CHAN_SLOT(i)		((i) & 0xffff)
@@ -121,22 +102,9 @@ void edma_free_channel(unsigned channel);
 int edma_alloc_slot(unsigned ctlr, int slot);
 void edma_free_slot(unsigned slot);
 
-/* alloc/free a set of contiguous parameter RAM slots */
-int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count);
-int edma_free_cont_slots(unsigned slot, int count);
-
 /* calls that operate on part of a parameter RAM slot */
-void edma_set_src(unsigned slot, dma_addr_t src_port,
-				enum address_mode mode, enum fifo_width);
-void edma_set_dest(unsigned slot, dma_addr_t dest_port,
-				 enum address_mode mode, enum fifo_width);
 dma_addr_t edma_get_position(unsigned slot, bool dst);
-void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx);
-void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx);
-void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt,
-		u16 bcnt_rld, enum sync_dimension sync_mode);
 void edma_link(unsigned from, unsigned to);
-void edma_unlink(unsigned from);
 
 /* calls that operate on an entire parameter RAM slot */
 void edma_write_slot(unsigned slot, const struct edmacc_param *params);
@@ -146,7 +114,6 @@ void edma_read_slot(unsigned slot, struct edmacc_param *params);
 int edma_start(unsigned channel);
 void edma_stop(unsigned channel);
 void edma_clean_channel(unsigned channel);
-void edma_clear_event(unsigned channel);
 void edma_pause(unsigned channel);
 void edma_resume(unsigned channel);
 
-- 
cgit v1.2.3


From ca304fa9bb762f091e851d48de43f623c975d47a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:49 +0300
Subject: ARM/dmaengine: edma: Public API to use private struct pointer

Instead of relying on indexes pointing to edma private date in the global
pointer array, pass the private data pointer via the public API.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 305 ++++++++++++++++++-------------------
 drivers/dma/edma.c                 |  79 +++++-----
 include/linux/platform_data/edma.h |  38 +++--
 3 files changed, 214 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 0b4c0ee59ed9..03692520812a 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -130,7 +130,7 @@ struct edma {
 
 	struct edma_soc_info *info;
 	int		id;
-
+	bool		unused_chan_list_done;
 	/* The edma_inuse bit for each PaRAM slot is clear unless the
 	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
 	 */
@@ -264,7 +264,6 @@ static inline void clear_bits(int offset, int len, unsigned long *p)
 }
 
 /*****************************************************************************/
-static struct edma *edma_cc[EDMA_MAX_CC];
 static int arch_num_cc;
 
 /* dummy param set used to (re)initialize parameter RAM slots */
@@ -490,14 +489,18 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	int i, count, ctlr;
+	struct edma *cc = data;
+	int i, count;
 	struct of_phandle_args  dma_spec;
 
 	if (dev->of_node) {
+		struct platform_device *dma_pdev;
+
 		count = of_property_count_strings(dev->of_node, "dma-names");
 		if (count < 0)
 			return 0;
 		for (i = 0; i < count; i++) {
+
 			if (of_parse_phandle_with_args(dev->of_node, "dmas",
 						       "#dma-cells", i,
 						       &dma_spec))
@@ -508,8 +511,12 @@ static int prepare_unused_channel_list(struct device *dev, void *data)
 				continue;
 			}
 
+			dma_pdev = of_find_device_by_node(dma_spec.np);
+			if (&dma_pdev->dev != cc->dev)
+				continue;
+
 			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
-				  edma_cc[0]->edma_unused);
+				  cc->edma_unused);
 			of_node_put(dma_spec.np);
 		}
 		return 0;
@@ -517,11 +524,11 @@ static int prepare_unused_channel_list(struct device *dev, void *data)
 
 	/* For non-OF case */
 	for (i = 0; i < pdev->num_resources; i++) {
-		if ((pdev->resource[i].flags & IORESOURCE_DMA) &&
-				(int)pdev->resource[i].start >= 0) {
-			ctlr = EDMA_CTLR(pdev->resource[i].start);
+		struct resource	*res = &pdev->resource[i];
+
+		if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
 			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-				  edma_cc[ctlr]->edma_unused);
+				  cc->edma_unused);
 		}
 	}
 
@@ -530,8 +537,6 @@ static int prepare_unused_channel_list(struct device *dev, void *data)
 
 /*-----------------------------------------------------------------------*/
 
-static bool unused_chan_list_done;
-
 /* Resource alloc/free:  dma channels, parameter RAM slots */
 
 /**
@@ -564,77 +569,73 @@ static bool unused_chan_list_done;
  *
  * Returns the number of the channel, else negative errno.
  */
-int edma_alloc_channel(int channel,
+int edma_alloc_channel(struct edma *cc, int channel,
 		void (*callback)(unsigned channel, u16 ch_status, void *data),
 		void *data,
 		enum dma_event_q eventq_no)
 {
-	unsigned i, done = 0, ctlr = 0;
+	unsigned done = 0;
 	int ret = 0;
 
-	if (!unused_chan_list_done) {
+	if (!cc->unused_chan_list_done) {
 		/*
 		 * Scan all the platform devices to find out the EDMA channels
 		 * used and clear them in the unused list, making the rest
 		 * available for ARM usage.
 		 */
-		ret = bus_for_each_dev(&platform_bus_type, NULL, NULL,
-				prepare_unused_channel_list);
+		ret = bus_for_each_dev(&platform_bus_type, NULL, cc,
+				       prepare_unused_channel_list);
 		if (ret < 0)
 			return ret;
 
-		unused_chan_list_done = true;
+		cc->unused_chan_list_done = true;
 	}
 
 	if (channel >= 0) {
-		ctlr = EDMA_CTLR(channel);
+		if (cc->id != EDMA_CTLR(channel)) {
+			dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n",
+				__func__, cc->id, EDMA_CTLR(channel));
+			return -EINVAL;
+		}
 		channel = EDMA_CHAN_SLOT(channel);
 	}
 
 	if (channel < 0) {
-		for (i = 0; i < arch_num_cc; i++) {
-			channel = 0;
-			for (;;) {
-				channel = find_next_bit(edma_cc[i]->edma_unused,
-						edma_cc[i]->num_channels,
-						channel);
-				if (channel == edma_cc[i]->num_channels)
-					break;
-				if (!test_and_set_bit(channel,
-						edma_cc[i]->edma_inuse)) {
-					done = 1;
-					ctlr = i;
-					break;
-				}
-				channel++;
-			}
-			if (done)
+		channel = 0;
+		for (;;) {
+			channel = find_next_bit(cc->edma_unused,
+						cc->num_channels, channel);
+			if (channel == cc->num_channels)
+				break;
+			if (!test_and_set_bit(channel, cc->edma_inuse)) {
+				done = 1;
 				break;
+			}
+			channel++;
 		}
 		if (!done)
 			return -ENOMEM;
-	} else if (channel >= edma_cc[ctlr]->num_channels) {
+	} else if (channel >= cc->num_channels) {
 		return -EINVAL;
-	} else if (test_and_set_bit(channel, edma_cc[ctlr]->edma_inuse)) {
+	} else if (test_and_set_bit(channel, cc->edma_inuse)) {
 		return -EBUSY;
 	}
 
 	/* ensure access through shadow region 0 */
-	edma_or_array2(edma_cc[ctlr], EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+	edma_or_array2(cc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
 
 	/* ensure no events are pending */
-	edma_stop(EDMA_CTLR_CHAN(ctlr, channel));
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(channel), &dummy_paramset,
+	edma_stop(cc, EDMA_CTLR_CHAN(cc->id, channel));
+	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
 		    PARM_SIZE);
 
 	if (callback)
-		setup_dma_interrupt(edma_cc[ctlr],
-				    EDMA_CTLR_CHAN(ctlr, channel), callback,
-				    data);
+		setup_dma_interrupt(cc, EDMA_CTLR_CHAN(cc->id, channel),
+				    callback, data);
 
-	map_dmach_queue(edma_cc[ctlr], channel, eventq_no);
+	map_dmach_queue(cc, channel, eventq_no);
 
-	return EDMA_CTLR_CHAN(ctlr, channel);
+	return EDMA_CTLR_CHAN(cc->id, channel);
 }
 EXPORT_SYMBOL(edma_alloc_channel);
 
@@ -650,22 +651,25 @@ EXPORT_SYMBOL(edma_alloc_channel);
  * will not be reactivated by linking, chaining, or software calls to
  * edma_start().
  */
-void edma_free_channel(unsigned channel)
+void edma_free_channel(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
 
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel >= edma_cc[ctlr]->num_channels)
+	if (channel >= cc->num_channels)
 		return;
 
-	setup_dma_interrupt(edma_cc[ctlr], channel, NULL, NULL);
+	setup_dma_interrupt(cc, channel, NULL, NULL);
 	/* REVISIT should probably take out of shadow region 0 */
 
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(channel), &dummy_paramset,
+	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
 		    PARM_SIZE);
-	clear_bit(channel, edma_cc[ctlr]->edma_inuse);
+	clear_bit(channel, cc->edma_inuse);
 }
 EXPORT_SYMBOL(edma_free_channel);
 
@@ -683,35 +687,29 @@ EXPORT_SYMBOL(edma_free_channel);
  *
  * Returns the number of the slot, else negative errno.
  */
-int edma_alloc_slot(unsigned ctlr, int slot)
+int edma_alloc_slot(struct edma *cc, int slot)
 {
-	if (!edma_cc[ctlr])
-		return -EINVAL;
-
-	if (slot >= 0)
+	if (slot > 0)
 		slot = EDMA_CHAN_SLOT(slot);
-
 	if (slot < 0) {
-		slot = edma_cc[ctlr]->num_channels;
+		slot = cc->num_channels;
 		for (;;) {
-			slot = find_next_zero_bit(edma_cc[ctlr]->edma_inuse,
-					edma_cc[ctlr]->num_slots, slot);
-			if (slot == edma_cc[ctlr]->num_slots)
+			slot = find_next_zero_bit(cc->edma_inuse, cc->num_slots,
+						  slot);
+			if (slot == cc->num_slots)
 				return -ENOMEM;
-			if (!test_and_set_bit(slot, edma_cc[ctlr]->edma_inuse))
+			if (!test_and_set_bit(slot, cc->edma_inuse))
 				break;
 		}
-	} else if (slot < edma_cc[ctlr]->num_channels ||
-			slot >= edma_cc[ctlr]->num_slots) {
+	} else if (slot < cc->num_channels || slot >= cc->num_slots) {
 		return -EINVAL;
-	} else if (test_and_set_bit(slot, edma_cc[ctlr]->edma_inuse)) {
+	} else if (test_and_set_bit(slot, cc->edma_inuse)) {
 		return -EBUSY;
 	}
 
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(slot), &dummy_paramset,
-		    PARM_SIZE);
+	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
 
-	return EDMA_CTLR_CHAN(ctlr, slot);
+	return slot;
 }
 EXPORT_SYMBOL(edma_alloc_slot);
 
@@ -723,20 +721,15 @@ EXPORT_SYMBOL(edma_alloc_slot);
  * Callers are responsible for ensuring the slot is inactive, and will
  * not be activated.
  */
-void edma_free_slot(unsigned slot)
+void edma_free_slot(struct edma *cc, unsigned slot)
 {
-	unsigned ctlr;
 
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_channels ||
-		slot >= edma_cc[ctlr]->num_slots)
+	if (slot < cc->num_channels || slot >= cc->num_slots)
 		return;
 
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(slot), &dummy_paramset,
-		    PARM_SIZE);
-	clear_bit(slot, edma_cc[ctlr]->edma_inuse);
+	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
+	clear_bit(slot, cc->edma_inuse);
 }
 EXPORT_SYMBOL(edma_free_slot);
 
@@ -751,16 +744,15 @@ EXPORT_SYMBOL(edma_free_slot);
  *
  * Returns the position of the current active slot
  */
-dma_addr_t edma_get_position(unsigned slot, bool dst)
+dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst)
 {
-	u32 offs, ctlr = EDMA_CTLR(slot);
+	u32 offs;
 
 	slot = EDMA_CHAN_SLOT(slot);
-
 	offs = PARM_OFFSET(slot);
 	offs += dst ? PARM_DST : PARM_SRC;
 
-	return edma_read(edma_cc[ctlr], offs);
+	return edma_read(cc, offs);
 }
 
 /**
@@ -770,21 +762,15 @@ dma_addr_t edma_get_position(unsigned slot, bool dst)
  *
  * The originating slot should not be part of any active DMA transfer.
  */
-void edma_link(unsigned from, unsigned to)
+void edma_link(struct edma *cc, unsigned from, unsigned to)
 {
-	unsigned ctlr_from, ctlr_to;
-
-	ctlr_from = EDMA_CTLR(from);
 	from = EDMA_CHAN_SLOT(from);
-	ctlr_to = EDMA_CTLR(to);
 	to = EDMA_CHAN_SLOT(to);
-
-	if (from >= edma_cc[ctlr_from]->num_slots)
+	if (from >= cc->num_slots || to >= cc->num_slots)
 		return;
-	if (to >= edma_cc[ctlr_to]->num_slots)
-		return;
-	edma_parm_modify(edma_cc[ctlr_from], PARM_LINK_BCNTRLD, from, 0xffff0000,
-				PARM_OFFSET(to));
+
+	edma_parm_modify(cc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+			 PARM_OFFSET(to));
 }
 EXPORT_SYMBOL(edma_link);
 
@@ -802,16 +788,13 @@ EXPORT_SYMBOL(edma_link);
  * calls to set up those parameters in small pieces, and provides
  * complete control over all transfer options.
  */
-void edma_write_slot(unsigned slot, const struct edmacc_param *param)
+void edma_write_slot(struct edma *cc, unsigned slot,
+		     const struct edmacc_param *param)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot >= edma_cc[ctlr]->num_slots)
+	if (slot >= cc->num_slots)
 		return;
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(slot), param, PARM_SIZE);
+	memcpy_toio(cc->base + PARM_OFFSET(slot), param, PARM_SIZE);
 }
 EXPORT_SYMBOL(edma_write_slot);
 
@@ -823,17 +806,12 @@ EXPORT_SYMBOL(edma_write_slot);
  * Use this to read data from a parameter RAM slot, perhaps to
  * save them as a template for later reuse.
  */
-void edma_read_slot(unsigned slot, struct edmacc_param *param)
+void edma_read_slot(struct edma *cc, unsigned slot, struct edmacc_param *param)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot >= edma_cc[ctlr]->num_slots)
+	if (slot >= cc->num_slots)
 		return;
-	memcpy_fromio(param, edma_cc[ctlr]->base + PARM_OFFSET(slot),
-		      PARM_SIZE);
+	memcpy_fromio(param, cc->base + PARM_OFFSET(slot), PARM_SIZE);
 }
 EXPORT_SYMBOL(edma_read_slot);
 
@@ -848,18 +826,19 @@ EXPORT_SYMBOL(edma_read_slot);
  * This temporarily disables EDMA hardware events on the specified channel,
  * preventing them from triggering new transfers on its behalf
  */
-void edma_pause(unsigned channel)
+void edma_pause(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
+	if (channel < cc->num_channels) {
 		unsigned int mask = BIT(channel & 0x1f);
 
-		edma_shadow0_write_array(edma_cc[ctlr], SH_EECR, channel >> 5,
-					 mask);
+		edma_shadow0_write_array(cc, SH_EECR, channel >> 5, mask);
 	}
 }
 EXPORT_SYMBOL(edma_pause);
@@ -870,36 +849,39 @@ EXPORT_SYMBOL(edma_pause);
  *
  * This re-enables EDMA hardware events on the specified channel.
  */
-void edma_resume(unsigned channel)
+void edma_resume(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
+	if (channel < cc->num_channels) {
 		unsigned int mask = BIT(channel & 0x1f);
 
-		edma_shadow0_write_array(edma_cc[ctlr], SH_EESR, channel >> 5,
-					 mask);
+		edma_shadow0_write_array(cc, SH_EESR, channel >> 5, mask);
 	}
 }
 EXPORT_SYMBOL(edma_resume);
 
-int edma_trigger_channel(unsigned channel)
+int edma_trigger_channel(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
 	unsigned int mask;
 
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 	mask = BIT(channel & 0x1f);
 
-	edma_shadow0_write_array(edma_cc[ctlr], SH_ESR, (channel >> 5), mask);
+	edma_shadow0_write_array(cc, SH_ESR, (channel >> 5), mask);
 
 	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
-		 edma_shadow0_read_array(edma_cc[ctlr], SH_ESR,
-					 (channel >> 5)));
+		 edma_shadow0_read_array(cc, SH_ESR, (channel >> 5)));
 	return 0;
 }
 EXPORT_SYMBOL(edma_trigger_channel);
@@ -915,15 +897,16 @@ EXPORT_SYMBOL(edma_trigger_channel);
  *
  * Returns zero on success, else negative errno.
  */
-int edma_start(unsigned channel)
+int edma_start(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
-		struct edma *cc = edma_cc[ctlr];
+	if (channel < cc->num_channels) {
 		int j = channel >> 5;
 		unsigned int mask = BIT(channel & 0x1f);
 
@@ -962,15 +945,16 @@ EXPORT_SYMBOL(edma_start);
  * may not be resumed, and the channel's Parameter RAM should be
  * reinitialized before being reused.
  */
-void edma_stop(unsigned channel)
+void edma_stop(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
-		struct edma *cc = edma_cc[ctlr];
+	if (channel < cc->num_channels) {
 		int j = channel >> 5;
 		unsigned int mask = BIT(channel & 0x1f);
 
@@ -1005,15 +989,16 @@ EXPORT_SYMBOL(edma_stop);
  *
  *****************************************************************************/
 
-void edma_clean_channel(unsigned channel)
+void edma_clean_channel(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
-		struct edma *cc = edma_cc[ctlr];
+	if (channel < cc->num_channels) {
 		int j = (channel >> 5);
 		unsigned int mask = BIT(channel & 0x1f);
 
@@ -1037,26 +1022,35 @@ EXPORT_SYMBOL(edma_clean_channel);
  *
  * Can be used to move a channel to a selected event queue.
  */
-void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no)
+void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
+				enum dma_event_q eventq_no)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel >= edma_cc[ctlr]->num_channels)
+	if (channel >= cc->num_channels)
 		return;
 
 	/* default to low priority queue */
 	if (eventq_no == EVENTQ_DEFAULT)
-		eventq_no = edma_cc[ctlr]->default_queue;
-	if (eventq_no >= edma_cc[ctlr]->num_tc)
+		eventq_no = cc->default_queue;
+	if (eventq_no >= cc->num_tc)
 		return;
 
-	map_dmach_queue(edma_cc[ctlr], channel, eventq_no);
+	map_dmach_queue(cc, channel, eventq_no);
 }
 EXPORT_SYMBOL(edma_assign_channel_eventq);
 
+struct edma *edma_get_data(struct device *edma_dev)
+{
+	return dev_get_drvdata(edma_dev);
+}
+
+
 static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
 			      struct edma *edma_cc, int cc_id)
 {
@@ -1278,11 +1272,10 @@ static int edma_probe(struct platform_device *pdev)
 		}
 	}
 
-	edma_cc[dev_id] = devm_kzalloc(dev, sizeof(struct edma), GFP_KERNEL);
-	if (!edma_cc[dev_id])
+	cc = devm_kzalloc(dev, sizeof(struct edma), GFP_KERNEL);
+	if (!cc)
 		return -ENOMEM;
 
-	cc = edma_cc[dev_id];
 	cc->dev = dev;
 	cc->id = dev_id;
 	dev_set_drvdata(dev, cc);
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index fcb4680efed7..53d48b2a700d 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -119,6 +119,7 @@ struct edma_chan {
 };
 
 struct edma_cc {
+	struct edma			*cc;
 	int				ctlr;
 	struct dma_device		dma_slave;
 	struct edma_chan		slave_chans[EDMA_CHANS];
@@ -150,6 +151,7 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
 /* Dispatch a queued descriptor to the controller (caller holds lock) */
 static void edma_execute(struct edma_chan *echan)
 {
+	struct edma *cc = echan->ecc->cc;
 	struct virt_dma_desc *vdesc;
 	struct edma_desc *edesc;
 	struct device *dev = echan->vchan.chan.device->dev;
@@ -174,7 +176,7 @@ static void edma_execute(struct edma_chan *echan)
 	/* Write descriptor PaRAM set(s) */
 	for (i = 0; i < nslots; i++) {
 		j = i + edesc->processed;
-		edma_write_slot(echan->slot[i], &edesc->pset[j].param);
+		edma_write_slot(cc, echan->slot[i], &edesc->pset[j].param);
 		edesc->sg_len += edesc->pset[j].len;
 		dev_vdbg(echan->vchan.chan.device->dev,
 			"\n pset[%d]:\n"
@@ -199,7 +201,7 @@ static void edma_execute(struct edma_chan *echan)
 			edesc->pset[j].param.link_bcntrld);
 		/* Link to the previous slot if not the last set */
 		if (i != (nslots - 1))
-			edma_link(echan->slot[i], echan->slot[i+1]);
+			edma_link(cc, echan->slot[i], echan->slot[i+1]);
 	}
 
 	edesc->processed += nslots;
@@ -211,9 +213,9 @@ static void edma_execute(struct edma_chan *echan)
 	 */
 	if (edesc->processed == edesc->pset_nr) {
 		if (edesc->cyclic)
-			edma_link(echan->slot[nslots-1], echan->slot[1]);
+			edma_link(cc, echan->slot[nslots-1], echan->slot[1]);
 		else
-			edma_link(echan->slot[nslots-1],
+			edma_link(cc, echan->slot[nslots-1],
 				  echan->ecc->dummy_slot);
 	}
 
@@ -224,19 +226,19 @@ static void edma_execute(struct edma_chan *echan)
 		 * transfers of MAX_NR_SG
 		 */
 		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
-		edma_clean_channel(echan->ch_num);
-		edma_stop(echan->ch_num);
-		edma_start(echan->ch_num);
-		edma_trigger_channel(echan->ch_num);
+		edma_clean_channel(cc, echan->ch_num);
+		edma_stop(cc, echan->ch_num);
+		edma_start(cc, echan->ch_num);
+		edma_trigger_channel(cc, echan->ch_num);
 		echan->missed = 0;
 	} else if (edesc->processed <= MAX_NR_SG) {
 		dev_dbg(dev, "first transfer starting on channel %d\n",
 			echan->ch_num);
-		edma_start(echan->ch_num);
+		edma_start(cc, echan->ch_num);
 	} else {
 		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
 			echan->ch_num, edesc->processed);
-		edma_resume(echan->ch_num);
+		edma_resume(cc, echan->ch_num);
 	}
 }
 
@@ -254,10 +256,11 @@ static int edma_terminate_all(struct dma_chan *chan)
 	 * echan->edesc is NULL and exit.)
 	 */
 	if (echan->edesc) {
-		edma_stop(echan->ch_num);
+		edma_stop(echan->ecc->cc, echan->ch_num);
 		/* Move the cyclic channel back to default queue */
 		if (echan->edesc->cyclic)
-			edma_assign_channel_eventq(echan->ch_num,
+			edma_assign_channel_eventq(echan->ecc->cc,
+						   echan->ch_num,
 						   EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
@@ -295,7 +298,7 @@ static int edma_dma_pause(struct dma_chan *chan)
 	if (!echan->edesc)
 		return -EINVAL;
 
-	edma_pause(echan->ch_num);
+	edma_pause(echan->ecc->cc, echan->ch_num);
 	return 0;
 }
 
@@ -303,7 +306,7 @@ static int edma_dma_resume(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 
-	edma_resume(echan->ch_num);
+	edma_resume(echan->ecc->cc, echan->ch_num);
 	return 0;
 }
 
@@ -485,8 +488,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	for (i = 0; i < nslots; i++) {
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
-						EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -641,8 +643,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 		/* Allocate a PaRAM slot, if needed */
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
-						EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -703,7 +704,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan->ch_num, EVENTQ_0);
+	edma_assign_channel_eventq(echan->ecc->cc, echan->ch_num, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -711,6 +712,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 {
 	struct edma_chan *echan = data;
+	struct edma *cc = echan->ecc->cc;
 	struct device *dev = echan->vchan.chan.device->dev;
 	struct edma_desc *edesc;
 	struct edmacc_param p;
@@ -727,13 +729,13 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 			} else if (edesc->processed == edesc->pset_nr) {
 				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
 				edesc->residue = 0;
-				edma_stop(echan->ch_num);
+				edma_stop(cc, echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
 				echan->edesc = NULL;
 			} else {
 				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
 
-				edma_pause(echan->ch_num);
+				edma_pause(cc, echan->ch_num);
 
 				/* Update statistics for tx_status */
 				edesc->residue -= edesc->sg_len;
@@ -744,7 +746,7 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		}
 		break;
 	case EDMA_DMA_CC_ERROR:
-		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
+		edma_read_slot(cc, echan->slot[0], &p);
 
 		/*
 		 * Issue later based on missed flag which will be sure
@@ -767,10 +769,10 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 			 * missed, so its safe to issue it here.
 			 */
 			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
-			edma_clean_channel(echan->ch_num);
-			edma_stop(echan->ch_num);
-			edma_start(echan->ch_num);
-			edma_trigger_channel(echan->ch_num);
+			edma_clean_channel(cc, echan->ch_num);
+			edma_stop(cc, echan->ch_num);
+			edma_start(cc, echan->ch_num);
+			edma_trigger_channel(cc, echan->ch_num);
 		}
 		break;
 	default:
@@ -789,8 +791,8 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	int a_ch_num;
 	LIST_HEAD(descs);
 
-	a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
-					echan, EVENTQ_DEFAULT);
+	a_ch_num = edma_alloc_channel(echan->ecc->cc, echan->ch_num,
+				      edma_callback, echan, EVENTQ_DEFAULT);
 
 	if (a_ch_num < 0) {
 		ret = -ENODEV;
@@ -814,7 +816,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	return 0;
 
 err_wrong_chan:
-	edma_free_channel(a_ch_num);
+	edma_free_channel(echan->ecc->cc, a_ch_num);
 err_no_chan:
 	return ret;
 }
@@ -827,21 +829,21 @@ static void edma_free_chan_resources(struct dma_chan *chan)
 	int i;
 
 	/* Terminate transfers */
-	edma_stop(echan->ch_num);
+	edma_stop(echan->ecc->cc, echan->ch_num);
 
 	vchan_free_chan_resources(&echan->vchan);
 
 	/* Free EDMA PaRAM slots */
 	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
 		if (echan->slot[i] >= 0) {
-			edma_free_slot(echan->slot[i]);
+			edma_free_slot(echan->ecc->cc, echan->slot[i]);
 			echan->slot[i] = -1;
 		}
 	}
 
 	/* Free EDMA channel */
 	if (echan->alloced) {
-		edma_free_channel(echan->ch_num);
+		edma_free_channel(echan->ecc->cc, echan->ch_num);
 		echan->alloced = false;
 	}
 
@@ -871,7 +873,8 @@ static u32 edma_residue(struct edma_desc *edesc)
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->slot[0], dst);
+	pos = edma_get_position(edesc->echan->ecc->cc, edesc->echan->slot[0],
+				dst);
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
@@ -1008,8 +1011,12 @@ static int edma_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	ecc->cc = edma_get_data(pdev->dev.parent);
+	if (!ecc->cc)
+		return -ENODEV;
+
 	ecc->ctlr = pdev->id;
-	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
+	ecc->dummy_slot = edma_alloc_slot(ecc->cc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
 		return ecc->dummy_slot;
@@ -1042,7 +1049,7 @@ static int edma_probe(struct platform_device *pdev)
 	return 0;
 
 err_reg1:
-	edma_free_slot(ecc->dummy_slot);
+	edma_free_slot(ecc->cc, ecc->dummy_slot);
 	return ret;
 }
 
@@ -1055,7 +1062,7 @@ static int edma_remove(struct platform_device *pdev)
 	if (parent_node)
 		of_dma_controller_free(parent_node);
 	dma_async_device_unregister(&ecc->dma_slave);
-	edma_free_slot(ecc->dummy_slot);
+	edma_free_slot(ecc->cc, ecc->dummy_slot);
 
 	return 0;
 }
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index c1862423b356..466021c03169 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -92,32 +92,40 @@ enum dma_event_q {
 
 #define EDMA_MAX_CC               2
 
+struct edma;
+
+struct edma *edma_get_data(struct device *edma_dev);
+
 /* alloc/free DMA channels and their dedicated parameter RAM slots */
-int edma_alloc_channel(int channel,
+int edma_alloc_channel(struct edma *cc, int channel,
 	void (*callback)(unsigned channel, u16 ch_status, void *data),
 	void *data, enum dma_event_q);
-void edma_free_channel(unsigned channel);
+void edma_free_channel(struct edma *cc, unsigned channel);
 
 /* alloc/free parameter RAM slots */
-int edma_alloc_slot(unsigned ctlr, int slot);
-void edma_free_slot(unsigned slot);
+int edma_alloc_slot(struct edma *cc, int slot);
+void edma_free_slot(struct edma *cc, unsigned slot);
 
 /* calls that operate on part of a parameter RAM slot */
-dma_addr_t edma_get_position(unsigned slot, bool dst);
-void edma_link(unsigned from, unsigned to);
+dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst);
+void edma_link(struct edma *cc, unsigned from, unsigned to);
 
 /* calls that operate on an entire parameter RAM slot */
-void edma_write_slot(unsigned slot, const struct edmacc_param *params);
-void edma_read_slot(unsigned slot, struct edmacc_param *params);
+void edma_write_slot(struct edma *cc, unsigned slot,
+		     const struct edmacc_param *params);
+void edma_read_slot(struct edma *cc, unsigned slot,
+		    struct edmacc_param *params);
 
 /* channel control operations */
-int edma_start(unsigned channel);
-void edma_stop(unsigned channel);
-void edma_clean_channel(unsigned channel);
-void edma_pause(unsigned channel);
-void edma_resume(unsigned channel);
+int edma_start(struct edma *cc, unsigned channel);
+void edma_stop(struct edma *cc, unsigned channel);
+void edma_clean_channel(struct edma *cc, unsigned channel);
+void edma_pause(struct edma *cc, unsigned channel);
+void edma_resume(struct edma *cc, unsigned channel);
+int edma_trigger_channel(struct edma *cc, unsigned channel);
 
-void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no);
+void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
+				enum dma_event_q eventq_no);
 
 struct edma_rsv_info {
 
@@ -141,6 +149,4 @@ struct edma_soc_info {
 	const s16	(*xbar_chans)[2];
 };
 
-int edma_trigger_channel(unsigned);
-
 #endif
-- 
cgit v1.2.3


From 2b6b3b7420190888793c49e97276e1e73bd7eaed Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:53 +0300
Subject: ARM/dmaengine: edma: Merge the two drivers under drivers/dma/

Move the code out from arch/arm/common and merge it inside of the dmaengine
driver.
This change is done with as minimal (if eny) functional change to the code
as possible to avoid introducing regression.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/Kconfig                   |    1 -
 arch/arm/common/Kconfig            |    3 -
 arch/arm/common/Makefile           |    1 -
 arch/arm/common/edma.c             | 1431 ----------------------------------
 arch/arm/mach-omap2/Kconfig        |    1 -
 drivers/dma/Kconfig                |    1 -
 drivers/dma/edma.c                 | 1506 ++++++++++++++++++++++++++++++++++--
 include/linux/platform_data/edma.h |   74 --
 8 files changed, 1431 insertions(+), 1587 deletions(-)
 delete mode 100644 arch/arm/common/edma.c

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 72ad724c67ae..513e38701418 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -736,7 +736,6 @@ config ARCH_DAVINCI
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_CHIP
 	select HAVE_IDE
-	select TI_PRIV_EDMA
 	select USE_OF
 	select ZONE_DMA
 	help
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index c3a4e9ceba34..9353184d730d 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -17,6 +17,3 @@ config SHARP_PARAM
 
 config SHARP_SCOOP
 	bool
-
-config TI_PRIV_EDMA
-	bool
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 6ee5959a813b..27f23b15b1ea 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -15,6 +15,5 @@ obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
 CFLAGS_REMOVE_mcpm_entry.o	= -pg
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
-obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
 obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
 obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
deleted file mode 100644
index 5b747f1bc8b5..000000000000
--- a/arch/arm/common/edma.c
+++ /dev/null
@@ -1,1431 +0,0 @@
-/*
- * EDMA3 support for DaVinci
- *
- * Copyright (C) 2006-2009 Texas Instruments.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/edma.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/pm_runtime.h>
-
-#include <linux/platform_data/edma.h>
-
-/* Offsets matching "struct edmacc_param" */
-#define PARM_OPT		0x00
-#define PARM_SRC		0x04
-#define PARM_A_B_CNT		0x08
-#define PARM_DST		0x0c
-#define PARM_SRC_DST_BIDX	0x10
-#define PARM_LINK_BCNTRLD	0x14
-#define PARM_SRC_DST_CIDX	0x18
-#define PARM_CCNT		0x1c
-
-#define PARM_SIZE		0x20
-
-/* Offsets for EDMA CC global channel registers and their shadows */
-#define SH_ER		0x00	/* 64 bits */
-#define SH_ECR		0x08	/* 64 bits */
-#define SH_ESR		0x10	/* 64 bits */
-#define SH_CER		0x18	/* 64 bits */
-#define SH_EER		0x20	/* 64 bits */
-#define SH_EECR		0x28	/* 64 bits */
-#define SH_EESR		0x30	/* 64 bits */
-#define SH_SER		0x38	/* 64 bits */
-#define SH_SECR		0x40	/* 64 bits */
-#define SH_IER		0x50	/* 64 bits */
-#define SH_IECR		0x58	/* 64 bits */
-#define SH_IESR		0x60	/* 64 bits */
-#define SH_IPR		0x68	/* 64 bits */
-#define SH_ICR		0x70	/* 64 bits */
-#define SH_IEVAL	0x78
-#define SH_QER		0x80
-#define SH_QEER		0x84
-#define SH_QEECR	0x88
-#define SH_QEESR	0x8c
-#define SH_QSER		0x90
-#define SH_QSECR	0x94
-#define SH_SIZE		0x200
-
-/* Offsets for EDMA CC global registers */
-#define EDMA_REV	0x0000
-#define EDMA_CCCFG	0x0004
-#define EDMA_QCHMAP	0x0200	/* 8 registers */
-#define EDMA_DMAQNUM	0x0240	/* 8 registers (4 on OMAP-L1xx) */
-#define EDMA_QDMAQNUM	0x0260
-#define EDMA_QUETCMAP	0x0280
-#define EDMA_QUEPRI	0x0284
-#define EDMA_EMR	0x0300	/* 64 bits */
-#define EDMA_EMCR	0x0308	/* 64 bits */
-#define EDMA_QEMR	0x0310
-#define EDMA_QEMCR	0x0314
-#define EDMA_CCERR	0x0318
-#define EDMA_CCERRCLR	0x031c
-#define EDMA_EEVAL	0x0320
-#define EDMA_DRAE	0x0340	/* 4 x 64 bits*/
-#define EDMA_QRAE	0x0380	/* 4 registers */
-#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
-#define EDMA_QSTAT	0x0600	/* 2 registers */
-#define EDMA_QWMTHRA	0x0620
-#define EDMA_QWMTHRB	0x0624
-#define EDMA_CCSTAT	0x0640
-
-#define EDMA_M		0x1000	/* global channel registers */
-#define EDMA_ECR	0x1008
-#define EDMA_ECRH	0x100C
-#define EDMA_SHADOW0	0x2000	/* 4 regions shadowing global channels */
-#define EDMA_PARM	0x4000	/* 128 param entries */
-
-#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
-
-#define EDMA_DCHMAP	0x0100  /* 64 registers */
-
-/* CCCFG register */
-#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
-#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
-#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
-#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
-#define CHMAP_EXIST		BIT(24)
-
-#define EDMA_MAX_DMACH           64
-#define EDMA_MAX_PARAMENTRY     512
-
-/*****************************************************************************/
-struct edma {
-	struct device	*dev;
-	void __iomem *base;
-
-	/* how many dma resources of each type */
-	unsigned	num_channels;
-	unsigned	num_region;
-	unsigned	num_slots;
-	unsigned	num_tc;
-	enum dma_event_q 	default_queue;
-
-	/* list of channels with no even trigger; terminated by "-1" */
-	const s8	*noevent;
-
-	struct edma_soc_info *info;
-	int		id;
-	bool		unused_chan_list_done;
-	/* The edma_inuse bit for each PaRAM slot is clear unless the
-	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
-	 */
-	DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
-
-	/* The edma_unused bit for each channel is clear unless
-	 * it is not being used on this platform. It uses a bit
-	 * of SOC-specific initialization code.
-	 */
-	DECLARE_BITMAP(edma_unused, EDMA_MAX_DMACH);
-
-	struct dma_interrupt_data {
-		void (*callback)(unsigned channel, unsigned short ch_status,
-				void *data);
-		void *data;
-	} intr_data[EDMA_MAX_DMACH];
-};
-/*****************************************************************************/
-
-static inline unsigned int edma_read(struct edma *cc, int offset)
-{
-	return (unsigned int)__raw_readl(cc->base + offset);
-}
-
-static inline void edma_write(struct edma *cc, int offset, int val)
-{
-	__raw_writel(val, cc->base + offset);
-}
-static inline void edma_modify(struct edma *cc, int offset, unsigned and,
-			       unsigned or)
-{
-	unsigned val = edma_read(cc, offset);
-	val &= and;
-	val |= or;
-	edma_write(cc, offset, val);
-}
-static inline void edma_and(struct edma *cc, int offset, unsigned and)
-{
-	unsigned val = edma_read(cc, offset);
-	val &= and;
-	edma_write(cc, offset, val);
-}
-static inline void edma_or(struct edma *cc, int offset, unsigned or)
-{
-	unsigned val = edma_read(cc, offset);
-	val |= or;
-	edma_write(cc, offset, val);
-}
-static inline unsigned int edma_read_array(struct edma *cc, int offset, int i)
-{
-	return edma_read(cc, offset + (i << 2));
-}
-static inline void edma_write_array(struct edma *cc, int offset, int i,
-		unsigned val)
-{
-	edma_write(cc, offset + (i << 2), val);
-}
-static inline void edma_modify_array(struct edma *cc, int offset, int i,
-		unsigned and, unsigned or)
-{
-	edma_modify(cc, offset + (i << 2), and, or);
-}
-static inline void edma_or_array(struct edma *cc, int offset, int i, unsigned or)
-{
-	edma_or(cc, offset + (i << 2), or);
-}
-static inline void edma_or_array2(struct edma *cc, int offset, int i, int j,
-		unsigned or)
-{
-	edma_or(cc, offset + ((i*2 + j) << 2), or);
-}
-static inline void edma_write_array2(struct edma *cc, int offset, int i, int j,
-		unsigned val)
-{
-	edma_write(cc, offset + ((i*2 + j) << 2), val);
-}
-static inline unsigned int edma_shadow0_read(struct edma *cc, int offset)
-{
-	return edma_read(cc, EDMA_SHADOW0 + offset);
-}
-static inline unsigned int edma_shadow0_read_array(struct edma *cc, int offset,
-		int i)
-{
-	return edma_read(cc, EDMA_SHADOW0 + offset + (i << 2));
-}
-static inline void edma_shadow0_write(struct edma *cc, int offset, unsigned val)
-{
-	edma_write(cc, EDMA_SHADOW0 + offset, val);
-}
-static inline void edma_shadow0_write_array(struct edma *cc, int offset, int i,
-		unsigned val)
-{
-	edma_write(cc, EDMA_SHADOW0 + offset + (i << 2), val);
-}
-static inline unsigned int edma_parm_read(struct edma *cc, int offset,
-		int param_no)
-{
-	return edma_read(cc, EDMA_PARM + offset + (param_no << 5));
-}
-static inline void edma_parm_write(struct edma *cc, int offset, int param_no,
-		unsigned val)
-{
-	edma_write(cc, EDMA_PARM + offset + (param_no << 5), val);
-}
-static inline void edma_parm_modify(struct edma *cc, int offset, int param_no,
-		unsigned and, unsigned or)
-{
-	edma_modify(cc, EDMA_PARM + offset + (param_no << 5), and, or);
-}
-static inline void edma_parm_and(struct edma *cc, int offset, int param_no,
-		unsigned and)
-{
-	edma_and(cc, EDMA_PARM + offset + (param_no << 5), and);
-}
-static inline void edma_parm_or(struct edma *cc, int offset, int param_no,
-		unsigned or)
-{
-	edma_or(cc, EDMA_PARM + offset + (param_no << 5), or);
-}
-
-static inline void set_bits(int offset, int len, unsigned long *p)
-{
-	for (; len > 0; len--)
-		set_bit(offset + (len - 1), p);
-}
-
-static inline void clear_bits(int offset, int len, unsigned long *p)
-{
-	for (; len > 0; len--)
-		clear_bit(offset + (len - 1), p);
-}
-
-/*****************************************************************************/
-static int arch_num_cc;
-
-/* dummy param set used to (re)initialize parameter RAM slots */
-static const struct edmacc_param dummy_paramset = {
-	.link_bcntrld = 0xffff,
-	.ccnt = 1,
-};
-
-static const struct of_device_id edma_of_ids[] = {
-	{ .compatible = "ti,edma3", },
-	{}
-};
-
-/*****************************************************************************/
-
-static void map_dmach_queue(struct edma *cc, unsigned ch_no,
-			    enum dma_event_q queue_no)
-{
-	int bit = (ch_no & 0x7) * 4;
-
-	/* default to low priority queue */
-	if (queue_no == EVENTQ_DEFAULT)
-		queue_no = cc->default_queue;
-
-	queue_no &= 7;
-	edma_modify_array(cc, EDMA_DMAQNUM, (ch_no >> 3),
-			  ~(0x7 << bit), queue_no << bit);
-}
-
-static void assign_priority_to_queue(struct edma *cc, int queue_no,
-				     int priority)
-{
-	int bit = queue_no * 4;
-	edma_modify(cc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
-}
-
-/**
- * map_dmach_param - Maps channel number to param entry number
- *
- * This maps the dma channel number to param entry numberter. In
- * other words using the DMA channel mapping registers a param entry
- * can be mapped to any channel
- *
- * Callers are responsible for ensuring the channel mapping logic is
- * included in that particular EDMA variant (Eg : dm646x)
- *
- */
-static void map_dmach_param(struct edma *cc)
-{
-	int i;
-	for (i = 0; i < EDMA_MAX_DMACH; i++)
-		edma_write_array(cc, EDMA_DCHMAP , i , (i << 5));
-}
-
-static inline void setup_dma_interrupt(struct edma *cc, unsigned lch,
-	void (*callback)(unsigned channel, u16 ch_status, void *data),
-	void *data)
-{
-	lch = EDMA_CHAN_SLOT(lch);
-
-	if (!callback)
-		edma_shadow0_write_array(cc, SH_IECR, lch >> 5,
-					 BIT(lch & 0x1f));
-
-	cc->intr_data[lch].callback = callback;
-	cc->intr_data[lch].data = data;
-
-	if (callback) {
-		edma_shadow0_write_array(cc, SH_ICR, lch >> 5, BIT(lch & 0x1f));
-		edma_shadow0_write_array(cc, SH_IESR, lch >> 5,
-					 BIT(lch & 0x1f));
-	}
-}
-
-/******************************************************************************
- *
- * DMA interrupt handler
- *
- *****************************************************************************/
-static irqreturn_t dma_irq_handler(int irq, void *data)
-{
-	struct edma *cc = data;
-	int ctlr;
-	u32 sh_ier;
-	u32 sh_ipr;
-	u32 bank;
-
-	ctlr = cc->id;
-	if (ctlr < 0)
-		return IRQ_NONE;
-
-	dev_dbg(cc->dev, "dma_irq_handler\n");
-
-	sh_ipr = edma_shadow0_read_array(cc, SH_IPR, 0);
-	if (!sh_ipr) {
-		sh_ipr = edma_shadow0_read_array(cc, SH_IPR, 1);
-		if (!sh_ipr)
-			return IRQ_NONE;
-		sh_ier = edma_shadow0_read_array(cc, SH_IER, 1);
-		bank = 1;
-	} else {
-		sh_ier = edma_shadow0_read_array(cc, SH_IER, 0);
-		bank = 0;
-	}
-
-	do {
-		u32 slot;
-		u32 channel;
-
-		dev_dbg(cc->dev, "IPR%d %08x\n", bank, sh_ipr);
-
-		slot = __ffs(sh_ipr);
-		sh_ipr &= ~(BIT(slot));
-
-		if (sh_ier & BIT(slot)) {
-			channel = (bank << 5) | slot;
-			/* Clear the corresponding IPR bits */
-			edma_shadow0_write_array(cc, SH_ICR, bank, BIT(slot));
-			if (cc->intr_data[channel].callback)
-				cc->intr_data[channel].callback(
-					EDMA_CTLR_CHAN(ctlr, channel),
-					EDMA_DMA_COMPLETE,
-					cc->intr_data[channel].data);
-		}
-	} while (sh_ipr);
-
-	edma_shadow0_write(cc, SH_IEVAL, 1);
-	return IRQ_HANDLED;
-}
-
-/******************************************************************************
- *
- * DMA error interrupt handler
- *
- *****************************************************************************/
-static irqreturn_t dma_ccerr_handler(int irq, void *data)
-{
-	struct edma *cc = data;
-	int i;
-	int ctlr;
-	unsigned int cnt = 0;
-
-	ctlr = cc->id;
-	if (ctlr < 0)
-		return IRQ_NONE;
-
-	dev_dbg(cc->dev, "dma_ccerr_handler\n");
-
-	if ((edma_read_array(cc, EDMA_EMR, 0) == 0) &&
-	    (edma_read_array(cc, EDMA_EMR, 1) == 0) &&
-	    (edma_read(cc, EDMA_QEMR) == 0) &&
-	    (edma_read(cc, EDMA_CCERR) == 0))
-		return IRQ_NONE;
-
-	while (1) {
-		int j = -1;
-		if (edma_read_array(cc, EDMA_EMR, 0))
-			j = 0;
-		else if (edma_read_array(cc, EDMA_EMR, 1))
-			j = 1;
-		if (j >= 0) {
-			dev_dbg(cc->dev, "EMR%d %08x\n", j,
-				edma_read_array(cc, EDMA_EMR, j));
-			for (i = 0; i < 32; i++) {
-				int k = (j << 5) + i;
-				if (edma_read_array(cc, EDMA_EMR, j) &
-							BIT(i)) {
-					/* Clear the corresponding EMR bits */
-					edma_write_array(cc, EDMA_EMCR, j,
-							 BIT(i));
-					/* Clear any SER */
-					edma_shadow0_write_array(cc, SH_SECR,
-								j, BIT(i));
-					if (cc->intr_data[k].callback) {
-						cc->intr_data[k].callback(
-							EDMA_CTLR_CHAN(ctlr, k),
-							EDMA_DMA_CC_ERROR,
-							cc->intr_data[k].data);
-					}
-				}
-			}
-		} else if (edma_read(cc, EDMA_QEMR)) {
-			dev_dbg(cc->dev, "QEMR %02x\n",
-				edma_read(cc, EDMA_QEMR));
-			for (i = 0; i < 8; i++) {
-				if (edma_read(cc, EDMA_QEMR) & BIT(i)) {
-					/* Clear the corresponding IPR bits */
-					edma_write(cc, EDMA_QEMCR, BIT(i));
-					edma_shadow0_write(cc, SH_QSECR,
-							   BIT(i));
-
-					/* NOTE:  not reported!! */
-				}
-			}
-		} else if (edma_read(cc, EDMA_CCERR)) {
-			dev_dbg(cc->dev, "CCERR %08x\n",
-				edma_read(cc, EDMA_CCERR));
-			/* FIXME:  CCERR.BIT(16) ignored!  much better
-			 * to just write CCERRCLR with CCERR value...
-			 */
-			for (i = 0; i < 8; i++) {
-				if (edma_read(cc, EDMA_CCERR) & BIT(i)) {
-					/* Clear the corresponding IPR bits */
-					edma_write(cc, EDMA_CCERRCLR, BIT(i));
-
-					/* NOTE:  not reported!! */
-				}
-			}
-		}
-		if ((edma_read_array(cc, EDMA_EMR, 0) == 0) &&
-		    (edma_read_array(cc, EDMA_EMR, 1) == 0) &&
-		    (edma_read(cc, EDMA_QEMR) == 0) &&
-		    (edma_read(cc, EDMA_CCERR) == 0))
-			break;
-		cnt++;
-		if (cnt > 10)
-			break;
-	}
-	edma_write(cc, EDMA_EEVAL, 1);
-	return IRQ_HANDLED;
-}
-
-static int prepare_unused_channel_list(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct edma *cc = data;
-	int i, count;
-	struct of_phandle_args  dma_spec;
-
-	if (dev->of_node) {
-		struct platform_device *dma_pdev;
-
-		count = of_property_count_strings(dev->of_node, "dma-names");
-		if (count < 0)
-			return 0;
-		for (i = 0; i < count; i++) {
-
-			if (of_parse_phandle_with_args(dev->of_node, "dmas",
-						       "#dma-cells", i,
-						       &dma_spec))
-				continue;
-
-			if (!of_match_node(edma_of_ids, dma_spec.np)) {
-				of_node_put(dma_spec.np);
-				continue;
-			}
-
-			dma_pdev = of_find_device_by_node(dma_spec.np);
-			if (&dma_pdev->dev != cc->dev)
-				continue;
-
-			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
-				  cc->edma_unused);
-			of_node_put(dma_spec.np);
-		}
-		return 0;
-	}
-
-	/* For non-OF case */
-	for (i = 0; i < pdev->num_resources; i++) {
-		struct resource	*res = &pdev->resource[i];
-
-		if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
-			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-				  cc->edma_unused);
-		}
-	}
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------------*/
-
-/* Resource alloc/free:  dma channels, parameter RAM slots */
-
-/**
- * edma_alloc_channel - allocate DMA channel and paired parameter RAM
- * @channel: specific channel to allocate; negative for "any unmapped channel"
- * @callback: optional; to be issued on DMA completion or errors
- * @data: passed to callback
- * @eventq_no: an EVENTQ_* constant, used to choose which Transfer
- *	Controller (TC) executes requests using this channel.  Use
- *	EVENTQ_DEFAULT unless you really need a high priority queue.
- *
- * This allocates a DMA channel and its associated parameter RAM slot.
- * The parameter RAM is initialized to hold a dummy transfer.
- *
- * Normal use is to pass a specific channel number as @channel, to make
- * use of hardware events mapped to that channel.  When the channel will
- * be used only for software triggering or event chaining, channels not
- * mapped to hardware events (or mapped to unused events) are preferable.
- *
- * DMA transfers start from a channel using edma_start(), or by
- * chaining.  When the transfer described in that channel's parameter RAM
- * slot completes, that slot's data may be reloaded through a link.
- *
- * DMA errors are only reported to the @callback associated with the
- * channel driving that transfer, but transfer completion callbacks can
- * be sent to another channel under control of the TCC field in
- * the option word of the transfer's parameter RAM set.  Drivers must not
- * use DMA transfer completion callbacks for channels they did not allocate.
- * (The same applies to TCC codes used in transfer chaining.)
- *
- * Returns the number of the channel, else negative errno.
- */
-int edma_alloc_channel(struct edma *cc, int channel,
-		void (*callback)(unsigned channel, u16 ch_status, void *data),
-		void *data,
-		enum dma_event_q eventq_no)
-{
-	unsigned done = 0;
-	int ret = 0;
-
-	if (!cc->unused_chan_list_done) {
-		/*
-		 * Scan all the platform devices to find out the EDMA channels
-		 * used and clear them in the unused list, making the rest
-		 * available for ARM usage.
-		 */
-		ret = bus_for_each_dev(&platform_bus_type, NULL, cc,
-				       prepare_unused_channel_list);
-		if (ret < 0)
-			return ret;
-
-		cc->unused_chan_list_done = true;
-	}
-
-	if (channel >= 0) {
-		if (cc->id != EDMA_CTLR(channel)) {
-			dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n",
-				__func__, cc->id, EDMA_CTLR(channel));
-			return -EINVAL;
-		}
-		channel = EDMA_CHAN_SLOT(channel);
-	}
-
-	if (channel < 0) {
-		channel = 0;
-		for (;;) {
-			channel = find_next_bit(cc->edma_unused,
-						cc->num_channels, channel);
-			if (channel == cc->num_channels)
-				break;
-			if (!test_and_set_bit(channel, cc->edma_inuse)) {
-				done = 1;
-				break;
-			}
-			channel++;
-		}
-		if (!done)
-			return -ENOMEM;
-	} else if (channel >= cc->num_channels) {
-		return -EINVAL;
-	} else if (test_and_set_bit(channel, cc->edma_inuse)) {
-		return -EBUSY;
-	}
-
-	/* ensure access through shadow region 0 */
-	edma_or_array2(cc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
-
-	/* ensure no events are pending */
-	edma_stop(cc, EDMA_CTLR_CHAN(cc->id, channel));
-	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
-		    PARM_SIZE);
-
-	if (callback)
-		setup_dma_interrupt(cc, EDMA_CTLR_CHAN(cc->id, channel),
-				    callback, data);
-
-	map_dmach_queue(cc, channel, eventq_no);
-
-	return EDMA_CTLR_CHAN(cc->id, channel);
-}
-EXPORT_SYMBOL(edma_alloc_channel);
-
-
-/**
- * edma_free_channel - deallocate DMA channel
- * @channel: dma channel returned from edma_alloc_channel()
- *
- * This deallocates the DMA channel and associated parameter RAM slot
- * allocated by edma_alloc_channel().
- *
- * Callers are responsible for ensuring the channel is inactive, and
- * will not be reactivated by linking, chaining, or software calls to
- * edma_start().
- */
-void edma_free_channel(struct edma *cc, unsigned channel)
-{
-
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel >= cc->num_channels)
-		return;
-
-	setup_dma_interrupt(cc, channel, NULL, NULL);
-	/* REVISIT should probably take out of shadow region 0 */
-
-	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
-		    PARM_SIZE);
-	clear_bit(channel, cc->edma_inuse);
-}
-EXPORT_SYMBOL(edma_free_channel);
-
-/**
- * edma_alloc_slot - allocate DMA parameter RAM
- * @slot: specific slot to allocate; negative for "any unused slot"
- *
- * This allocates a parameter RAM slot, initializing it to hold a
- * dummy transfer.  Slots allocated using this routine have not been
- * mapped to a hardware DMA channel, and will normally be used by
- * linking to them from a slot associated with a DMA channel.
- *
- * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
- * slots may be allocated on behalf of DSP firmware.
- *
- * Returns the number of the slot, else negative errno.
- */
-int edma_alloc_slot(struct edma *cc, int slot)
-{
-	if (slot > 0)
-		slot = EDMA_CHAN_SLOT(slot);
-	if (slot < 0) {
-		slot = cc->num_channels;
-		for (;;) {
-			slot = find_next_zero_bit(cc->edma_inuse, cc->num_slots,
-						  slot);
-			if (slot == cc->num_slots)
-				return -ENOMEM;
-			if (!test_and_set_bit(slot, cc->edma_inuse))
-				break;
-		}
-	} else if (slot < cc->num_channels || slot >= cc->num_slots) {
-		return -EINVAL;
-	} else if (test_and_set_bit(slot, cc->edma_inuse)) {
-		return -EBUSY;
-	}
-
-	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
-
-	return slot;
-}
-EXPORT_SYMBOL(edma_alloc_slot);
-
-/**
- * edma_free_slot - deallocate DMA parameter RAM
- * @slot: parameter RAM slot returned from edma_alloc_slot()
- *
- * This deallocates the parameter RAM slot allocated by edma_alloc_slot().
- * Callers are responsible for ensuring the slot is inactive, and will
- * not be activated.
- */
-void edma_free_slot(struct edma *cc, unsigned slot)
-{
-
-	slot = EDMA_CHAN_SLOT(slot);
-	if (slot < cc->num_channels || slot >= cc->num_slots)
-		return;
-
-	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
-	clear_bit(slot, cc->edma_inuse);
-}
-EXPORT_SYMBOL(edma_free_slot);
-
-/*-----------------------------------------------------------------------*/
-
-/* Parameter RAM operations (i) -- read/write partial slots */
-
-/**
- * edma_get_position - returns the current transfer point
- * @slot: parameter RAM slot being examined
- * @dst:  true selects the dest position, false the source
- *
- * Returns the position of the current active slot
- */
-dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst)
-{
-	u32 offs;
-
-	slot = EDMA_CHAN_SLOT(slot);
-	offs = PARM_OFFSET(slot);
-	offs += dst ? PARM_DST : PARM_SRC;
-
-	return edma_read(cc, offs);
-}
-
-/**
- * edma_link - link one parameter RAM slot to another
- * @from: parameter RAM slot originating the link
- * @to: parameter RAM slot which is the link target
- *
- * The originating slot should not be part of any active DMA transfer.
- */
-void edma_link(struct edma *cc, unsigned from, unsigned to)
-{
-	from = EDMA_CHAN_SLOT(from);
-	to = EDMA_CHAN_SLOT(to);
-	if (from >= cc->num_slots || to >= cc->num_slots)
-		return;
-
-	edma_parm_modify(cc, PARM_LINK_BCNTRLD, from, 0xffff0000,
-			 PARM_OFFSET(to));
-}
-EXPORT_SYMBOL(edma_link);
-
-/*-----------------------------------------------------------------------*/
-
-/* Parameter RAM operations (ii) -- read/write whole parameter sets */
-
-/**
- * edma_write_slot - write parameter RAM data for slot
- * @slot: number of parameter RAM slot being modified
- * @param: data to be written into parameter RAM slot
- *
- * Use this to assign all parameters of a transfer at once.  This
- * allows more efficient setup of transfers than issuing multiple
- * calls to set up those parameters in small pieces, and provides
- * complete control over all transfer options.
- */
-void edma_write_slot(struct edma *cc, unsigned slot,
-		     const struct edmacc_param *param)
-{
-	slot = EDMA_CHAN_SLOT(slot);
-	if (slot >= cc->num_slots)
-		return;
-	memcpy_toio(cc->base + PARM_OFFSET(slot), param, PARM_SIZE);
-}
-EXPORT_SYMBOL(edma_write_slot);
-
-/**
- * edma_read_slot - read parameter RAM data from slot
- * @slot: number of parameter RAM slot being copied
- * @param: where to store copy of parameter RAM data
- *
- * Use this to read data from a parameter RAM slot, perhaps to
- * save them as a template for later reuse.
- */
-void edma_read_slot(struct edma *cc, unsigned slot, struct edmacc_param *param)
-{
-	slot = EDMA_CHAN_SLOT(slot);
-	if (slot >= cc->num_slots)
-		return;
-	memcpy_fromio(param, cc->base + PARM_OFFSET(slot), PARM_SIZE);
-}
-EXPORT_SYMBOL(edma_read_slot);
-
-/*-----------------------------------------------------------------------*/
-
-/* Various EDMA channel control operations */
-
-/**
- * edma_pause - pause dma on a channel
- * @channel: on which edma_start() has been called
- *
- * This temporarily disables EDMA hardware events on the specified channel,
- * preventing them from triggering new transfers on its behalf
- */
-void edma_pause(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		unsigned int mask = BIT(channel & 0x1f);
-
-		edma_shadow0_write_array(cc, SH_EECR, channel >> 5, mask);
-	}
-}
-EXPORT_SYMBOL(edma_pause);
-
-/**
- * edma_resume - resumes dma on a paused channel
- * @channel: on which edma_pause() has been called
- *
- * This re-enables EDMA hardware events on the specified channel.
- */
-void edma_resume(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		unsigned int mask = BIT(channel & 0x1f);
-
-		edma_shadow0_write_array(cc, SH_EESR, channel >> 5, mask);
-	}
-}
-EXPORT_SYMBOL(edma_resume);
-
-int edma_trigger_channel(struct edma *cc, unsigned channel)
-{
-	unsigned int mask;
-
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return -EINVAL;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-	mask = BIT(channel & 0x1f);
-
-	edma_shadow0_write_array(cc, SH_ESR, (channel >> 5), mask);
-
-	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
-		 edma_shadow0_read_array(cc, SH_ESR, (channel >> 5)));
-	return 0;
-}
-EXPORT_SYMBOL(edma_trigger_channel);
-
-/**
- * edma_start - start dma on a channel
- * @channel: channel being activated
- *
- * Channels with event associations will be triggered by their hardware
- * events, and channels without such associations will be triggered by
- * software.  (At this writing there is no interface for using software
- * triggers except with channels that don't support hardware triggers.)
- *
- * Returns zero on success, else negative errno.
- */
-int edma_start(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return -EINVAL;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		int j = channel >> 5;
-		unsigned int mask = BIT(channel & 0x1f);
-
-		/* EDMA channels without event association */
-		if (test_bit(channel, cc->edma_unused)) {
-			pr_debug("EDMA: ESR%d %08x\n", j,
-				 edma_shadow0_read_array(cc, SH_ESR, j));
-			edma_shadow0_write_array(cc, SH_ESR, j, mask);
-			return 0;
-		}
-
-		/* EDMA channel with event association */
-		pr_debug("EDMA: ER%d %08x\n", j,
-			edma_shadow0_read_array(cc, SH_ER, j));
-		/* Clear any pending event or error */
-		edma_write_array(cc, EDMA_ECR, j, mask);
-		edma_write_array(cc, EDMA_EMCR, j, mask);
-		/* Clear any SER */
-		edma_shadow0_write_array(cc, SH_SECR, j, mask);
-		edma_shadow0_write_array(cc, SH_EESR, j, mask);
-		pr_debug("EDMA: EER%d %08x\n", j,
-			 edma_shadow0_read_array(cc, SH_EER, j));
-		return 0;
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(edma_start);
-
-/**
- * edma_stop - stops dma on the channel passed
- * @channel: channel being deactivated
- *
- * When @lch is a channel, any active transfer is paused and
- * all pending hardware events are cleared.  The current transfer
- * may not be resumed, and the channel's Parameter RAM should be
- * reinitialized before being reused.
- */
-void edma_stop(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		int j = channel >> 5;
-		unsigned int mask = BIT(channel & 0x1f);
-
-		edma_shadow0_write_array(cc, SH_EECR, j, mask);
-		edma_shadow0_write_array(cc, SH_ECR, j, mask);
-		edma_shadow0_write_array(cc, SH_SECR, j, mask);
-		edma_write_array(cc, EDMA_EMCR, j, mask);
-
-		/* clear possibly pending completion interrupt */
-		edma_shadow0_write_array(cc, SH_ICR, j, mask);
-
-		pr_debug("EDMA: EER%d %08x\n", j,
-			 edma_shadow0_read_array(cc, SH_EER, j));
-
-		/* REVISIT:  consider guarding against inappropriate event
-		 * chaining by overwriting with dummy_paramset.
-		 */
-	}
-}
-EXPORT_SYMBOL(edma_stop);
-
-/******************************************************************************
- *
- * It cleans ParamEntry qand bring back EDMA to initial state if media has
- * been removed before EDMA has finished.It is usedful for removable media.
- * Arguments:
- *      ch_no     - channel no
- *
- * Return: zero on success, or corresponding error no on failure
- *
- * FIXME this should not be needed ... edma_stop() should suffice.
- *
- *****************************************************************************/
-
-void edma_clean_channel(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		int j = (channel >> 5);
-		unsigned int mask = BIT(channel & 0x1f);
-
-		pr_debug("EDMA: EMR%d %08x\n", j,
-			 edma_read_array(cc, EDMA_EMR, j));
-		edma_shadow0_write_array(cc, SH_ECR, j, mask);
-		/* Clear the corresponding EMR bits */
-		edma_write_array(cc, EDMA_EMCR, j, mask);
-		/* Clear any SER */
-		edma_shadow0_write_array(cc, SH_SECR, j, mask);
-		edma_write(cc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
-	}
-}
-EXPORT_SYMBOL(edma_clean_channel);
-
-/*
- * edma_assign_channel_eventq - move given channel to desired eventq
- * Arguments:
- *	channel - channel number
- *	eventq_no - queue to move the channel
- *
- * Can be used to move a channel to a selected event queue.
- */
-void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
-				enum dma_event_q eventq_no)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel >= cc->num_channels)
-		return;
-
-	/* default to low priority queue */
-	if (eventq_no == EVENTQ_DEFAULT)
-		eventq_no = cc->default_queue;
-	if (eventq_no >= cc->num_tc)
-		return;
-
-	map_dmach_queue(cc, channel, eventq_no);
-}
-EXPORT_SYMBOL(edma_assign_channel_eventq);
-
-struct edma *edma_get_data(struct device *edma_dev)
-{
-	return dev_get_drvdata(edma_dev);
-}
-
-
-static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
-			      struct edma *edma_cc, int cc_id)
-{
-	int i;
-	u32 value, cccfg;
-	s8 (*queue_priority_map)[2];
-
-	/* Decode the eDMA3 configuration from CCCFG register */
-	cccfg = edma_read(edma_cc, EDMA_CCCFG);
-
-	value = GET_NUM_REGN(cccfg);
-	edma_cc->num_region = BIT(value);
-
-	value = GET_NUM_DMACH(cccfg);
-	edma_cc->num_channels = BIT(value + 1);
-
-	value = GET_NUM_PAENTRY(cccfg);
-	edma_cc->num_slots = BIT(value + 4);
-
-	value = GET_NUM_EVQUE(cccfg);
-	edma_cc->num_tc = value + 1;
-
-	dev_dbg(dev, "eDMA3 CC%d HW configuration (cccfg: 0x%08x):\n", cc_id,
-		cccfg);
-	dev_dbg(dev, "num_region: %u\n", edma_cc->num_region);
-	dev_dbg(dev, "num_channel: %u\n", edma_cc->num_channels);
-	dev_dbg(dev, "num_slot: %u\n", edma_cc->num_slots);
-	dev_dbg(dev, "num_tc: %u\n", edma_cc->num_tc);
-
-	/* Nothing need to be done if queue priority is provided */
-	if (pdata->queue_priority_mapping)
-		return 0;
-
-	/*
-	 * Configure TC/queue priority as follows:
-	 * Q0 - priority 0
-	 * Q1 - priority 1
-	 * Q2 - priority 2
-	 * ...
-	 * The meaning of priority numbers: 0 highest priority, 7 lowest
-	 * priority. So Q0 is the highest priority queue and the last queue has
-	 * the lowest priority.
-	 */
-	queue_priority_map = devm_kzalloc(dev,
-					  (edma_cc->num_tc + 1) * sizeof(s8),
-					  GFP_KERNEL);
-	if (!queue_priority_map)
-		return -ENOMEM;
-
-	for (i = 0; i < edma_cc->num_tc; i++) {
-		queue_priority_map[i][0] = i;
-		queue_priority_map[i][1] = i;
-	}
-	queue_priority_map[i][0] = -1;
-	queue_priority_map[i][1] = -1;
-
-	pdata->queue_priority_mapping = queue_priority_map;
-	/* Default queue has the lowest priority */
-	pdata->default_queue = i - 1;
-
-	return 0;
-}
-
-#if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_DMADEVICES)
-
-static int edma_xbar_event_map(struct device *dev, struct device_node *node,
-			       struct edma_soc_info *pdata, size_t sz)
-{
-	const char pname[] = "ti,edma-xbar-event-map";
-	struct resource res;
-	void __iomem *xbar;
-	s16 (*xbar_chans)[2];
-	size_t nelm = sz / sizeof(s16);
-	u32 shift, offset, mux;
-	int ret, i;
-
-	xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL);
-	if (!xbar_chans)
-		return -ENOMEM;
-
-	ret = of_address_to_resource(node, 1, &res);
-	if (ret)
-		return -ENOMEM;
-
-	xbar = devm_ioremap(dev, res.start, resource_size(&res));
-	if (!xbar)
-		return -ENOMEM;
-
-	ret = of_property_read_u16_array(node, pname, (u16 *)xbar_chans, nelm);
-	if (ret)
-		return -EIO;
-
-	/* Invalidate last entry for the other user of this mess */
-	nelm >>= 1;
-	xbar_chans[nelm][0] = xbar_chans[nelm][1] = -1;
-
-	for (i = 0; i < nelm; i++) {
-		shift = (xbar_chans[i][1] & 0x03) << 3;
-		offset = xbar_chans[i][1] & 0xfffffffc;
-		mux = readl(xbar + offset);
-		mux &= ~(0xff << shift);
-		mux |= xbar_chans[i][0] << shift;
-		writel(mux, (xbar + offset));
-	}
-
-	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
-	return 0;
-}
-
-static int edma_of_parse_dt(struct device *dev,
-			    struct device_node *node,
-			    struct edma_soc_info *pdata)
-{
-	int ret = 0;
-	struct property *prop;
-	size_t sz;
-	struct edma_rsv_info *rsv_info;
-
-	rsv_info = devm_kzalloc(dev, sizeof(struct edma_rsv_info), GFP_KERNEL);
-	if (!rsv_info)
-		return -ENOMEM;
-	pdata->rsv = rsv_info;
-
-	prop = of_find_property(node, "ti,edma-xbar-event-map", &sz);
-	if (prop)
-		ret = edma_xbar_event_map(dev, node, pdata, sz);
-
-	return ret;
-}
-
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
-						      struct device_node *node)
-{
-	struct edma_soc_info *info;
-	int ret;
-
-	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
-	if (!info)
-		return ERR_PTR(-ENOMEM);
-
-	ret = edma_of_parse_dt(dev, node, info);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return info;
-}
-#else
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
-						      struct device_node *node)
-{
-	return ERR_PTR(-ENOSYS);
-}
-#endif
-
-static int edma_probe(struct platform_device *pdev)
-{
-	struct edma_soc_info	*info = pdev->dev.platform_data;
-	s8		(*queue_priority_mapping)[2];
-	int			i, off, ln;
-	const s16		(*rsv_chans)[2];
-	const s16		(*rsv_slots)[2];
-	const s16		(*xbar_chans)[2];
-	int			irq;
-	char			*irq_name;
-	struct resource		*mem;
-	struct device_node	*node = pdev->dev.of_node;
-	struct device		*dev = &pdev->dev;
-	int			dev_id = pdev->id;
-	struct edma		*cc;
-	int			ret;
-	struct platform_device_info edma_dev_info = {
-		.name = "edma-dma-engine",
-		.dma_mask = DMA_BIT_MASK(32),
-		.parent = &pdev->dev,
-	};
-
-	if (node) {
-		info = edma_setup_info_from_dt(dev, node);
-		if (IS_ERR(info)) {
-			dev_err(dev, "failed to get DT data\n");
-			return PTR_ERR(info);
-		}
-	}
-
-	if (!info)
-		return -ENODEV;
-
-	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
-		dev_err(dev, "pm_runtime_get_sync() failed\n");
-		return ret;
-	}
-
-	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
-	if (!mem) {
-		dev_dbg(dev, "mem resource not found, using index 0\n");
-		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!mem) {
-			dev_err(dev, "no mem resource?\n");
-			return -ENODEV;
-		}
-	}
-
-	cc = devm_kzalloc(dev, sizeof(struct edma), GFP_KERNEL);
-	if (!cc)
-		return -ENOMEM;
-
-	cc->dev = dev;
-	cc->id = dev_id;
-	/* When booting with DT the pdev->id is -1 */
-	if (dev_id < 0) {
-		cc->id = 0;
-		dev_id = arch_num_cc;
-	}
-	dev_set_drvdata(dev, cc);
-
-	cc->base = devm_ioremap_resource(dev, mem);
-	if (IS_ERR(cc->base))
-		return PTR_ERR(cc->base);
-
-	/* Get eDMA3 configuration from IP */
-	ret = edma_setup_from_hw(dev, info, cc, dev_id);
-	if (ret)
-		return ret;
-
-	cc->default_queue = info->default_queue;
-
-	for (i = 0; i < cc->num_slots; i++)
-		memcpy_toio(cc->base + PARM_OFFSET(i), &dummy_paramset,
-			    PARM_SIZE);
-
-	/* Mark all channels as unused */
-	memset(cc->edma_unused, 0xff, sizeof(cc->edma_unused));
-
-	if (info->rsv) {
-
-		/* Clear the reserved channels in unused list */
-		rsv_chans = info->rsv->rsv_chans;
-		if (rsv_chans) {
-			for (i = 0; rsv_chans[i][0] != -1; i++) {
-				off = rsv_chans[i][0];
-				ln = rsv_chans[i][1];
-				clear_bits(off, ln, cc->edma_unused);
-			}
-		}
-
-		/* Set the reserved slots in inuse list */
-		rsv_slots = info->rsv->rsv_slots;
-		if (rsv_slots) {
-			for (i = 0; rsv_slots[i][0] != -1; i++) {
-				off = rsv_slots[i][0];
-				ln = rsv_slots[i][1];
-				set_bits(off, ln, cc->edma_inuse);
-			}
-		}
-	}
-
-	/* Clear the xbar mapped channels in unused list */
-	xbar_chans = info->xbar_chans;
-	if (xbar_chans) {
-		for (i = 0; xbar_chans[i][1] != -1; i++) {
-			off = xbar_chans[i][1];
-			clear_bits(off, 1, cc->edma_unused);
-		}
-	}
-
-	irq = platform_get_irq_byname(pdev, "edma3_ccint");
-	if (irq < 0 && node)
-		irq = irq_of_parse_and_map(node, 0);
-
-	if (irq >= 0) {
-		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
-					  dev_name(dev));
-		ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
-				       cc);
-		if (ret) {
-			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
-			return ret;
-		}
-	}
-
-	irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
-	if (irq < 0 && node)
-		irq = irq_of_parse_and_map(node, 2);
-
-	if (irq >= 0) {
-		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
-					  dev_name(dev));
-		ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
-				       cc);
-		if (ret) {
-			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
-			return ret;
-		}
-	}
-
-	for (i = 0; i < cc->num_channels; i++)
-		map_dmach_queue(cc, i, info->default_queue);
-
-	queue_priority_mapping = info->queue_priority_mapping;
-
-	/* Event queue priority mapping */
-	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
-		assign_priority_to_queue(cc, queue_priority_mapping[i][0],
-					 queue_priority_mapping[i][1]);
-
-	/* Map the channel to param entry if channel mapping logic exist */
-	if (edma_read(cc, EDMA_CCCFG) & CHMAP_EXIST)
-		map_dmach_param(cc);
-
-	for (i = 0; i < cc->num_region; i++) {
-		edma_write_array2(cc, EDMA_DRAE, i, 0, 0x0);
-		edma_write_array2(cc, EDMA_DRAE, i, 1, 0x0);
-		edma_write_array(cc, EDMA_QRAE, i, 0x0);
-	}
-	cc->info = info;
-	arch_num_cc++;
-
-	edma_dev_info.id = dev_id;
-
-	platform_device_register_full(&edma_dev_info);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int edma_pm_resume(struct device *dev)
-{
-	struct edma *cc = dev_get_drvdata(dev);
-	int i;
-	s8 (*queue_priority_mapping)[2];
-
-	queue_priority_mapping = cc->info->queue_priority_mapping;
-
-	/* Event queue priority mapping */
-	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
-		assign_priority_to_queue(cc, queue_priority_mapping[i][0],
-					 queue_priority_mapping[i][1]);
-
-	/* Map the channel to param entry if channel mapping logic */
-	if (edma_read(cc, EDMA_CCCFG) & CHMAP_EXIST)
-		map_dmach_param(cc);
-
-	for (i = 0; i < cc->num_channels; i++) {
-		if (test_bit(i, cc->edma_inuse)) {
-			/* ensure access through shadow region 0 */
-			edma_or_array2(cc, EDMA_DRAE, 0, i >> 5, BIT(i & 0x1f));
-
-			setup_dma_interrupt(cc, EDMA_CTLR_CHAN(cc->id, i),
-					    cc->intr_data[i].callback,
-					    cc->intr_data[i].data);
-		}
-	}
-
-	return 0;
-}
-#endif
-
-static const struct dev_pm_ops edma_pm_ops = {
-	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
-};
-
-static struct platform_driver edma_driver = {
-	.driver = {
-		.name	= "edma",
-		.pm	= &edma_pm_ops,
-		.of_match_table = edma_of_ids,
-	},
-	.probe = edma_probe,
-};
-
-static int __init edma_init(void)
-{
-	return platform_driver_probe(&edma_driver, edma_probe);
-}
-arch_initcall(edma_init);
-
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 07d2e100caab..e0b6736db984 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -90,7 +90,6 @@ config ARCH_OMAP2PLUS
 	select OMAP_GPMC
 	select PINCTRL
 	select SOC_BUS
-	select TI_PRIV_EDMA
 	select OMAP_IRQCHIP
 	help
 	  Systems based on OMAP2, OMAP3, OMAP4 or OMAP5
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b4584757dae0..992efc8e465e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -486,7 +486,6 @@ config TI_EDMA
 	depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
-	select TI_PRIV_EDMA
 	default n
 	help
 	  Enable support for the TI EDMA controller. This DMA
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index fc91ab9dd1bb..aeb67e0cc523 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -26,12 +26,92 @@
 #include <linux/spinlock.h>
 #include <linux/of.h>
 #include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/platform_data/edma.h>
 
 #include "dmaengine.h"
 #include "virt-dma.h"
 
+/* Offsets matching "struct edmacc_param" */
+#define PARM_OPT		0x00
+#define PARM_SRC		0x04
+#define PARM_A_B_CNT		0x08
+#define PARM_DST		0x0c
+#define PARM_SRC_DST_BIDX	0x10
+#define PARM_LINK_BCNTRLD	0x14
+#define PARM_SRC_DST_CIDX	0x18
+#define PARM_CCNT		0x1c
+
+#define PARM_SIZE		0x20
+
+/* Offsets for EDMA CC global channel registers and their shadows */
+#define SH_ER			0x00	/* 64 bits */
+#define SH_ECR			0x08	/* 64 bits */
+#define SH_ESR			0x10	/* 64 bits */
+#define SH_CER			0x18	/* 64 bits */
+#define SH_EER			0x20	/* 64 bits */
+#define SH_EECR			0x28	/* 64 bits */
+#define SH_EESR			0x30	/* 64 bits */
+#define SH_SER			0x38	/* 64 bits */
+#define SH_SECR			0x40	/* 64 bits */
+#define SH_IER			0x50	/* 64 bits */
+#define SH_IECR			0x58	/* 64 bits */
+#define SH_IESR			0x60	/* 64 bits */
+#define SH_IPR			0x68	/* 64 bits */
+#define SH_ICR			0x70	/* 64 bits */
+#define SH_IEVAL		0x78
+#define SH_QER			0x80
+#define SH_QEER			0x84
+#define SH_QEECR		0x88
+#define SH_QEESR		0x8c
+#define SH_QSER			0x90
+#define SH_QSECR		0x94
+#define SH_SIZE			0x200
+
+/* Offsets for EDMA CC global registers */
+#define EDMA_REV		0x0000
+#define EDMA_CCCFG		0x0004
+#define EDMA_QCHMAP		0x0200	/* 8 registers */
+#define EDMA_DMAQNUM		0x0240	/* 8 registers (4 on OMAP-L1xx) */
+#define EDMA_QDMAQNUM		0x0260
+#define EDMA_QUETCMAP		0x0280
+#define EDMA_QUEPRI		0x0284
+#define EDMA_EMR		0x0300	/* 64 bits */
+#define EDMA_EMCR		0x0308	/* 64 bits */
+#define EDMA_QEMR		0x0310
+#define EDMA_QEMCR		0x0314
+#define EDMA_CCERR		0x0318
+#define EDMA_CCERRCLR		0x031c
+#define EDMA_EEVAL		0x0320
+#define EDMA_DRAE		0x0340	/* 4 x 64 bits*/
+#define EDMA_QRAE		0x0380	/* 4 registers */
+#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
+#define EDMA_QSTAT		0x0600	/* 2 registers */
+#define EDMA_QWMTHRA		0x0620
+#define EDMA_QWMTHRB		0x0624
+#define EDMA_CCSTAT		0x0640
+
+#define EDMA_M			0x1000	/* global channel registers */
+#define EDMA_ECR		0x1008
+#define EDMA_ECRH		0x100C
+#define EDMA_SHADOW0		0x2000	/* 4 shadow regions */
+#define EDMA_PARM		0x4000	/* PaRAM entries */
+
+#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
+
+#define EDMA_DCHMAP		0x0100  /* 64 registers */
+
+/* CCCFG register */
+#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
+#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
+#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
+#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
+#define CHMAP_EXIST		BIT(24)
+
 /*
  * This will go away when the private EDMA API is folded
  * into this driver and the platform device(s) are
@@ -60,6 +140,47 @@
 #define EDMA_MAX_SLOTS		MAX_NR_SG
 #define EDMA_DESCRIPTORS	16
 
+#define EDMA_MAX_PARAMENTRY     512
+
+#define EDMA_CHANNEL_ANY		-1	/* for edma_alloc_channel() */
+#define EDMA_SLOT_ANY			-1	/* for edma_alloc_slot() */
+#define EDMA_CONT_PARAMS_ANY		 1001
+#define EDMA_CONT_PARAMS_FIXED_EXACT	 1002
+#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
+
+#define EDMA_MAX_CC               2
+
+/* PaRAM slots are laid out like this */
+struct edmacc_param {
+	u32 opt;
+	u32 src;
+	u32 a_b_cnt;
+	u32 dst;
+	u32 src_dst_bidx;
+	u32 link_bcntrld;
+	u32 src_dst_cidx;
+	u32 ccnt;
+} __packed;
+
+/* fields in edmacc_param.opt */
+#define SAM		BIT(0)
+#define DAM		BIT(1)
+#define SYNCDIM		BIT(2)
+#define STATIC		BIT(3)
+#define EDMA_FWID	(0x07 << 8)
+#define TCCMODE		BIT(11)
+#define EDMA_TCC(t)	((t) << 12)
+#define TCINTEN		BIT(20)
+#define ITCINTEN	BIT(21)
+#define TCCHEN		BIT(22)
+#define ITCCHEN		BIT(23)
+
+/*ch_status parameter of callback function possible values*/
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
+
 struct edma_pset {
 	u32				len;
 	dma_addr_t			addr;
@@ -119,14 +240,929 @@ struct edma_chan {
 };
 
 struct edma_cc {
-	struct edma			*cc;
-	int				ctlr;
+	struct device			*dev;
+	struct edma_soc_info		*info;
+	void __iomem			*base;
+	int				id;
+
+	/* eDMA3 resource information */
+	unsigned			num_channels;
+	unsigned			num_region;
+	unsigned			num_slots;
+	unsigned			num_tc;
+	enum dma_event_q		default_queue;
+
+	bool				unused_chan_list_done;
+	/* The edma_inuse bit for each PaRAM slot is clear unless the
+	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
+	 */
+	DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
+
+	/* The edma_unused bit for each channel is clear unless
+	 * it is not being used on this platform. It uses a bit
+	 * of SOC-specific initialization code.
+	 */
+	DECLARE_BITMAP(edma_unused, EDMA_CHANS);
+
+	struct dma_interrupt_data {
+		void (*callback)(unsigned channel, unsigned short ch_status,
+				 void *data);
+		void *data;
+	} intr_data[EDMA_CHANS];
+
 	struct dma_device		dma_slave;
 	struct edma_chan		slave_chans[EDMA_CHANS];
-	int				num_slave_chans;
 	int				dummy_slot;
 };
 
+/* dummy param set used to (re)initialize parameter RAM slots */
+static const struct edmacc_param dummy_paramset = {
+	.link_bcntrld = 0xffff,
+	.ccnt = 1,
+};
+
+static const struct of_device_id edma_of_ids[] = {
+	{ .compatible = "ti,edma3", },
+	{}
+};
+
+static inline unsigned int edma_read(struct edma_cc *ecc, int offset)
+{
+	return (unsigned int)__raw_readl(ecc->base + offset);
+}
+
+static inline void edma_write(struct edma_cc *ecc, int offset, int val)
+{
+	__raw_writel(val, ecc->base + offset);
+}
+
+static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and,
+			       unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset,
+					   int i)
+{
+	return edma_read(ecc, offset + (i << 2));
+}
+
+static inline void edma_write_array(struct edma_cc *ecc, int offset, int i,
+				    unsigned val)
+{
+	edma_write(ecc, offset + (i << 2), val);
+}
+
+static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i,
+				     unsigned and, unsigned or)
+{
+	edma_modify(ecc, offset + (i << 2), and, or);
+}
+
+static inline void edma_or_array(struct edma_cc *ecc, int offset, int i,
+				 unsigned or)
+{
+	edma_or(ecc, offset + (i << 2), or);
+}
+
+static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j,
+				  unsigned or)
+{
+	edma_or(ecc, offset + ((i * 2 + j) << 2), or);
+}
+
+static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i,
+				     int j, unsigned val)
+{
+	edma_write(ecc, offset + ((i * 2 + j) << 2), val);
+}
+
+static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset);
+}
+
+static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc,
+						   int offset, int i)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2));
+}
+
+static inline void edma_shadow0_write(struct edma_cc *ecc, int offset,
+				      unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset, val);
+}
+
+static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset,
+					    int i, unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val);
+}
+
+static inline unsigned int edma_parm_read(struct edma_cc *ecc, int offset,
+					  int param_no)
+{
+	return edma_read(ecc, EDMA_PARM + offset + (param_no << 5));
+}
+
+static inline void edma_parm_write(struct edma_cc *ecc, int offset,
+				   int param_no, unsigned val)
+{
+	edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val);
+}
+
+static inline void edma_parm_modify(struct edma_cc *ecc, int offset,
+				    int param_no, unsigned and, unsigned or)
+{
+	edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or);
+}
+
+static inline void edma_parm_and(struct edma_cc *ecc, int offset, int param_no,
+				 unsigned and)
+{
+	edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and);
+}
+
+static inline void edma_parm_or(struct edma_cc *ecc, int offset, int param_no,
+				unsigned or)
+{
+	edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or);
+}
+
+static inline void set_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		set_bit(offset + (len - 1), p);
+}
+
+static inline void clear_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		clear_bit(offset + (len - 1), p);
+}
+
+static void edma_map_dmach_to_queue(struct edma_cc *ecc, unsigned ch_no,
+				    enum dma_event_q queue_no)
+{
+	int bit = (ch_no & 0x7) * 4;
+
+	/* default to low priority queue */
+	if (queue_no == EVENTQ_DEFAULT)
+		queue_no = ecc->default_queue;
+
+	queue_no &= 7;
+	edma_modify_array(ecc, EDMA_DMAQNUM, (ch_no >> 3), ~(0x7 << bit),
+			  queue_no << bit);
+}
+
+static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no,
+					  int priority)
+{
+	int bit = queue_no * 4;
+
+	edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
+}
+
+static void edma_direct_dmach_to_param_mapping(struct edma_cc *ecc)
+{
+	int i;
+
+	for (i = 0; i < ecc->num_channels; i++)
+		edma_write_array(ecc, EDMA_DCHMAP, i, (i << 5));
+}
+
+static int prepare_unused_channel_list(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct edma_cc *ecc = data;
+	int i, count;
+	struct of_phandle_args  dma_spec;
+
+	if (dev->of_node) {
+		struct platform_device *dma_pdev;
+
+		count = of_property_count_strings(dev->of_node, "dma-names");
+		if (count < 0)
+			return 0;
+		for (i = 0; i < count; i++) {
+			if (of_parse_phandle_with_args(dev->of_node, "dmas",
+						       "#dma-cells", i,
+						       &dma_spec))
+				continue;
+
+			if (!of_match_node(edma_of_ids, dma_spec.np)) {
+				of_node_put(dma_spec.np);
+				continue;
+			}
+
+			dma_pdev = of_find_device_by_node(dma_spec.np);
+			if (&dma_pdev->dev != ecc->dev)
+				continue;
+
+			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
+				  ecc->edma_unused);
+			of_node_put(dma_spec.np);
+		}
+		return 0;
+	}
+
+	/* For non-OF case */
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct resource	*res = &pdev->resource[i];
+
+		if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
+			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
+				  ecc->edma_unused);
+		}
+	}
+
+	return 0;
+}
+
+static void edma_setup_interrupt(struct edma_cc *ecc, unsigned lch,
+	void (*callback)(unsigned channel, u16 ch_status, void *data),
+	void *data)
+{
+	lch = EDMA_CHAN_SLOT(lch);
+
+	if (!callback)
+		edma_shadow0_write_array(ecc, SH_IECR, lch >> 5,
+					 BIT(lch & 0x1f));
+
+	ecc->intr_data[lch].callback = callback;
+	ecc->intr_data[lch].data = data;
+
+	if (callback) {
+		edma_shadow0_write_array(ecc, SH_ICR, lch >> 5,
+					 BIT(lch & 0x1f));
+		edma_shadow0_write_array(ecc, SH_IESR, lch >> 5,
+					 BIT(lch & 0x1f));
+	}
+}
+
+/*
+ * paRAM management functions
+ */
+
+/**
+ * edma_write_slot - write parameter RAM data for slot
+ * @ecc: pointer to edma_cc struct
+ * @slot: number of parameter RAM slot being modified
+ * @param: data to be written into parameter RAM slot
+ *
+ * Use this to assign all parameters of a transfer at once.  This
+ * allows more efficient setup of transfers than issuing multiple
+ * calls to set up those parameters in small pieces, and provides
+ * complete control over all transfer options.
+ */
+static void edma_write_slot(struct edma_cc *ecc, unsigned slot,
+			    const struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE);
+}
+
+/**
+ * edma_read_slot - read parameter RAM data from slot
+ * @ecc: pointer to edma_cc struct
+ * @slot: number of parameter RAM slot being copied
+ * @param: where to store copy of parameter RAM data
+ *
+ * Use this to read data from a parameter RAM slot, perhaps to
+ * save them as a template for later reuse.
+ */
+static void edma_read_slot(struct edma_cc *ecc, unsigned slot,
+			   struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE);
+}
+
+/**
+ * edma_alloc_slot - allocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: specific slot to allocate; negative for "any unused slot"
+ *
+ * This allocates a parameter RAM slot, initializing it to hold a
+ * dummy transfer.  Slots allocated using this routine have not been
+ * mapped to a hardware DMA channel, and will normally be used by
+ * linking to them from a slot associated with a DMA channel.
+ *
+ * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
+ * slots may be allocated on behalf of DSP firmware.
+ *
+ * Returns the number of the slot, else negative errno.
+ */
+static int edma_alloc_slot(struct edma_cc *ecc, int slot)
+{
+	if (slot > 0)
+		slot = EDMA_CHAN_SLOT(slot);
+	if (slot < 0) {
+		slot = ecc->num_channels;
+		for (;;) {
+			slot = find_next_zero_bit(ecc->edma_inuse,
+						  ecc->num_slots,
+						  slot);
+			if (slot == ecc->num_slots)
+				return -ENOMEM;
+			if (!test_and_set_bit(slot, ecc->edma_inuse))
+				break;
+		}
+	} else if (slot < ecc->num_channels || slot >= ecc->num_slots) {
+		return -EINVAL;
+	} else if (test_and_set_bit(slot, ecc->edma_inuse)) {
+		return -EBUSY;
+	}
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+
+	return EDMA_CTLR_CHAN(ecc->id, slot);
+}
+
+/**
+ * edma_free_slot - deallocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot returned from edma_alloc_slot()
+ *
+ * This deallocates the parameter RAM slot allocated by edma_alloc_slot().
+ * Callers are responsible for ensuring the slot is inactive, and will
+ * not be activated.
+ */
+static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot < ecc->num_channels || slot >= ecc->num_slots)
+		return;
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+	clear_bit(slot, ecc->edma_inuse);
+}
+
+/**
+ * edma_link - link one parameter RAM slot to another
+ * @ecc: pointer to edma_cc struct
+ * @from: parameter RAM slot originating the link
+ * @to: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to)
+{
+	from = EDMA_CHAN_SLOT(from);
+	to = EDMA_CHAN_SLOT(to);
+	if (from >= ecc->num_slots || to >= ecc->num_slots)
+		return;
+
+	edma_parm_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+			 PARM_OFFSET(to));
+}
+
+/**
+ * edma_get_position - returns the current transfer point
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot being examined
+ * @dst:  true selects the dest position, false the source
+ *
+ * Returns the position of the current active slot
+ */
+static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot,
+				    bool dst)
+{
+	u32 offs;
+
+	slot = EDMA_CHAN_SLOT(slot);
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ecc, offs);
+}
+
+/*-----------------------------------------------------------------------*/
+/**
+ * edma_start - start dma on a channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: channel being activated
+ *
+ * Channels with event associations will be triggered by their hardware
+ * events, and channels without such associations will be triggered by
+ * software.  (At this writing there is no interface for using software
+ * triggers except with channels that don't support hardware triggers.)
+ *
+ * Returns zero on success, else negative errno.
+ */
+static int edma_start(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		int j = channel >> 5;
+		unsigned int mask = BIT(channel & 0x1f);
+
+		/* EDMA channels without event association */
+		if (test_bit(channel, ecc->edma_unused)) {
+			pr_debug("EDMA: ESR%d %08x\n", j,
+				 edma_shadow0_read_array(ecc, SH_ESR, j));
+			edma_shadow0_write_array(ecc, SH_ESR, j, mask);
+			return 0;
+		}
+
+		/* EDMA channel with event association */
+		pr_debug("EDMA: ER%d %08x\n", j,
+			 edma_shadow0_read_array(ecc, SH_ER, j));
+		/* Clear any pending event or error */
+		edma_write_array(ecc, EDMA_ECR, j, mask);
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_EESR, j, mask);
+		pr_debug("EDMA: EER%d %08x\n", j,
+			 edma_shadow0_read_array(ecc, SH_EER, j));
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * edma_stop - stops dma on the channel passed
+ * @ecc: pointer to edma_cc struct
+ * @channel: channel being deactivated
+ *
+ * When @lch is a channel, any active transfer is paused and
+ * all pending hardware events are cleared.  The current transfer
+ * may not be resumed, and the channel's Parameter RAM should be
+ * reinitialized before being reused.
+ */
+static void edma_stop(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		int j = channel >> 5;
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ecc, SH_EECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+
+		/* clear possibly pending completion interrupt */
+		edma_shadow0_write_array(ecc, SH_ICR, j, mask);
+
+		pr_debug("EDMA: EER%d %08x\n", j,
+			 edma_shadow0_read_array(ecc, SH_EER, j));
+
+		/* REVISIT:  consider guarding against inappropriate event
+		 * chaining by overwriting with dummy_paramset.
+		 */
+	}
+}
+
+/**
+ * edma_pause - pause dma on a channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: on which edma_start() has been called
+ *
+ * This temporarily disables EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers on its behalf
+ */
+static void edma_pause(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ecc, SH_EECR, channel >> 5, mask);
+	}
+}
+
+/**
+ * edma_resume - resumes dma on a paused channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: on which edma_pause() has been called
+ *
+ * This re-enables EDMA hardware events on the specified channel.
+ */
+static void edma_resume(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ecc, SH_EESR, channel >> 5, mask);
+	}
+}
+
+static int edma_trigger_channel(struct edma_cc *ecc, unsigned channel)
+{
+	unsigned int mask;
+
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+	mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ecc, SH_ESR, (channel >> 5), mask);
+
+	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
+		 edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5)));
+	return 0;
+}
+
+/******************************************************************************
+ *
+ * It cleans ParamEntry qand bring back EDMA to initial state if media has
+ * been removed before EDMA has finished.It is usedful for removable media.
+ * Arguments:
+ *      ch_no     - channel no
+ *
+ * Return: zero on success, or corresponding error no on failure
+ *
+ * FIXME this should not be needed ... edma_stop() should suffice.
+ *
+ *****************************************************************************/
+
+static void edma_clean_channel(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		int j = (channel >> 5);
+		unsigned int mask = BIT(channel & 0x1f);
+
+		pr_debug("EDMA: EMR%d %08x\n", j,
+			 edma_read_array(ecc, EDMA_EMR, j));
+		edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+		/* Clear the corresponding EMR bits */
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
+	}
+}
+
+/**
+ * edma_alloc_channel - allocate DMA channel and paired parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @channel: specific channel to allocate; negative for "any unmapped channel"
+ * @callback: optional; to be issued on DMA completion or errors
+ * @data: passed to callback
+ * @eventq_no: an EVENTQ_* constant, used to choose which Transfer
+ *	Controller (TC) executes requests using this channel.  Use
+ *	EVENTQ_DEFAULT unless you really need a high priority queue.
+ *
+ * This allocates a DMA channel and its associated parameter RAM slot.
+ * The parameter RAM is initialized to hold a dummy transfer.
+ *
+ * Normal use is to pass a specific channel number as @channel, to make
+ * use of hardware events mapped to that channel.  When the channel will
+ * be used only for software triggering or event chaining, channels not
+ * mapped to hardware events (or mapped to unused events) are preferable.
+ *
+ * DMA transfers start from a channel using edma_start(), or by
+ * chaining.  When the transfer described in that channel's parameter RAM
+ * slot completes, that slot's data may be reloaded through a link.
+ *
+ * DMA errors are only reported to the @callback associated with the
+ * channel driving that transfer, but transfer completion callbacks can
+ * be sent to another channel under control of the TCC field in
+ * the option word of the transfer's parameter RAM set.  Drivers must not
+ * use DMA transfer completion callbacks for channels they did not allocate.
+ * (The same applies to TCC codes used in transfer chaining.)
+ *
+ * Returns the number of the channel, else negative errno.
+ */
+static int edma_alloc_channel(struct edma_cc *ecc, int channel,
+		void (*callback)(unsigned channel, u16 ch_status, void *data),
+		void *data,
+		enum dma_event_q eventq_no)
+{
+	unsigned done = 0;
+	int ret = 0;
+
+	if (!ecc->unused_chan_list_done) {
+		/*
+		 * Scan all the platform devices to find out the EDMA channels
+		 * used and clear them in the unused list, making the rest
+		 * available for ARM usage.
+		 */
+		ret = bus_for_each_dev(&platform_bus_type, NULL, ecc,
+				       prepare_unused_channel_list);
+		if (ret < 0)
+			return ret;
+
+		ecc->unused_chan_list_done = true;
+	}
+
+	if (channel >= 0) {
+		if (ecc->id != EDMA_CTLR(channel)) {
+			dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n",
+				__func__, ecc->id, EDMA_CTLR(channel));
+			return -EINVAL;
+		}
+		channel = EDMA_CHAN_SLOT(channel);
+	}
+
+	if (channel < 0) {
+		channel = 0;
+		for (;;) {
+			channel = find_next_bit(ecc->edma_unused,
+						ecc->num_channels, channel);
+			if (channel == ecc->num_channels)
+				break;
+			if (!test_and_set_bit(channel, ecc->edma_inuse)) {
+				done = 1;
+				break;
+			}
+			channel++;
+		}
+		if (!done)
+			return -ENOMEM;
+	} else if (channel >= ecc->num_channels) {
+		return -EINVAL;
+	} else if (test_and_set_bit(channel, ecc->edma_inuse)) {
+		return -EBUSY;
+	}
+
+	/* ensure access through shadow region 0 */
+	edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+
+	/* ensure no events are pending */
+	edma_stop(ecc, EDMA_CTLR_CHAN(ecc->id, channel));
+	edma_write_slot(ecc, channel, &dummy_paramset);
+
+	if (callback)
+		edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, channel),
+				     callback, data);
+
+	edma_map_dmach_to_queue(ecc, channel, eventq_no);
+
+	return EDMA_CTLR_CHAN(ecc->id, channel);
+}
+
+/**
+ * edma_free_channel - deallocate DMA channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: dma channel returned from edma_alloc_channel()
+ *
+ * This deallocates the DMA channel and associated parameter RAM slot
+ * allocated by edma_alloc_channel().
+ *
+ * Callers are responsible for ensuring the channel is inactive, and
+ * will not be reactivated by linking, chaining, or software calls to
+ * edma_start().
+ */
+static void edma_free_channel(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= ecc->num_channels)
+		return;
+
+	edma_setup_interrupt(ecc, channel, NULL, NULL);
+	/* REVISIT should probably take out of shadow region 0 */
+
+	memcpy_toio(ecc->base + PARM_OFFSET(channel), &dummy_paramset,
+		    PARM_SIZE);
+	clear_bit(channel, ecc->edma_inuse);
+}
+
+/*
+ * edma_assign_channel_eventq - move given channel to desired eventq
+ * Arguments:
+ *	channel - channel number
+ *	eventq_no - queue to move the channel
+ *
+ * Can be used to move a channel to a selected event queue.
+ */
+static void edma_assign_channel_eventq(struct edma_cc *ecc, unsigned channel,
+				       enum dma_event_q eventq_no)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= ecc->num_channels)
+		return;
+
+	/* default to low priority queue */
+	if (eventq_no == EVENTQ_DEFAULT)
+		eventq_no = ecc->default_queue;
+	if (eventq_no >= ecc->num_tc)
+		return;
+
+	edma_map_dmach_to_queue(ecc, channel, eventq_no);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int ctlr;
+	u32 sh_ier;
+	u32 sh_ipr;
+	u32 bank;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_dbg(ecc->dev, "dma_irq_handler\n");
+
+	sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0);
+	if (!sh_ipr) {
+		sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1);
+		if (!sh_ipr)
+			return IRQ_NONE;
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1);
+		bank = 1;
+	} else {
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0);
+		bank = 0;
+	}
+
+	do {
+		u32 slot;
+		u32 channel;
+
+		dev_dbg(ecc->dev, "IPR%d %08x\n", bank, sh_ipr);
+
+		slot = __ffs(sh_ipr);
+		sh_ipr &= ~(BIT(slot));
+
+		if (sh_ier & BIT(slot)) {
+			channel = (bank << 5) | slot;
+			/* Clear the corresponding IPR bits */
+			edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot));
+			if (ecc->intr_data[channel].callback)
+				ecc->intr_data[channel].callback(
+						EDMA_CTLR_CHAN(ctlr, channel),
+						EDMA_DMA_COMPLETE,
+						ecc->intr_data[channel].data);
+		}
+	} while (sh_ipr);
+
+	edma_shadow0_write(ecc, SH_IEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+/******************************************************************************
+ *
+ * DMA error interrupt handler
+ *
+ *****************************************************************************/
+static irqreturn_t dma_ccerr_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int i;
+	int ctlr;
+	unsigned int cnt = 0;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_dbg(ecc->dev, "dma_ccerr_handler\n");
+
+	if ((edma_read_array(ecc, EDMA_EMR, 0) == 0) &&
+	    (edma_read_array(ecc, EDMA_EMR, 1) == 0) &&
+	    (edma_read(ecc, EDMA_QEMR) == 0) &&
+	    (edma_read(ecc, EDMA_CCERR) == 0))
+		return IRQ_NONE;
+
+	while (1) {
+		int j = -1;
+
+		if (edma_read_array(ecc, EDMA_EMR, 0))
+			j = 0;
+		else if (edma_read_array(ecc, EDMA_EMR, 1))
+			j = 1;
+		if (j >= 0) {
+			dev_dbg(ecc->dev, "EMR%d %08x\n", j,
+				edma_read_array(ecc, EDMA_EMR, j));
+			for (i = 0; i < 32; i++) {
+				int k = (j << 5) + i;
+
+				if (edma_read_array(ecc, EDMA_EMR, j) &
+							BIT(i)) {
+					/* Clear the corresponding EMR bits */
+					edma_write_array(ecc, EDMA_EMCR, j,
+							 BIT(i));
+					/* Clear any SER */
+					edma_shadow0_write_array(ecc, SH_SECR,
+								 j, BIT(i));
+					if (ecc->intr_data[k].callback) {
+						ecc->intr_data[k].callback(
+							EDMA_CTLR_CHAN(ctlr, k),
+							EDMA_DMA_CC_ERROR,
+							ecc->intr_data[k].data);
+					}
+				}
+			}
+		} else if (edma_read(ecc, EDMA_QEMR)) {
+			dev_dbg(ecc->dev, "QEMR %02x\n",
+				edma_read(ecc, EDMA_QEMR));
+			for (i = 0; i < 8; i++) {
+				if (edma_read(ecc, EDMA_QEMR) & BIT(i)) {
+					/* Clear the corresponding IPR bits */
+					edma_write(ecc, EDMA_QEMCR, BIT(i));
+					edma_shadow0_write(ecc, SH_QSECR,
+							   BIT(i));
+
+					/* NOTE:  not reported!! */
+				}
+			}
+		} else if (edma_read(ecc, EDMA_CCERR)) {
+			dev_dbg(ecc->dev, "CCERR %08x\n",
+				edma_read(ecc, EDMA_CCERR));
+			/* FIXME:  CCERR.BIT(16) ignored!  much better
+			 * to just write CCERRCLR with CCERR value...
+			 */
+			for (i = 0; i < 8; i++) {
+				if (edma_read(ecc, EDMA_CCERR) & BIT(i)) {
+					/* Clear the corresponding IPR bits */
+					edma_write(ecc, EDMA_CCERRCLR, BIT(i));
+
+					/* NOTE:  not reported!! */
+				}
+			}
+		}
+		if ((edma_read_array(ecc, EDMA_EMR, 0) == 0) &&
+		    (edma_read_array(ecc, EDMA_EMR, 1) == 0) &&
+		    (edma_read(ecc, EDMA_QEMR) == 0) &&
+		    (edma_read(ecc, EDMA_CCERR) == 0))
+			break;
+		cnt++;
+		if (cnt > 10)
+			break;
+	}
+	edma_write(ecc, EDMA_EEVAL, 1);
+	return IRQ_HANDLED;
+}
+
 static inline struct edma_cc *to_edma_cc(struct dma_device *d)
 {
 	return container_of(d, struct edma_cc, dma_slave);
@@ -137,8 +1173,7 @@ static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
 	return container_of(c, struct edma_chan, vchan.chan);
 }
 
-static inline struct edma_desc
-*to_edma_desc(struct dma_async_tx_descriptor *tx)
+static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx)
 {
 	return container_of(tx, struct edma_desc, vdesc.tx);
 }
@@ -151,7 +1186,7 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
 /* Dispatch a queued descriptor to the controller (caller holds lock) */
 static void edma_execute(struct edma_chan *echan)
 {
-	struct edma *cc = echan->ecc->cc;
+	struct edma_cc *ecc = echan->ecc;
 	struct virt_dma_desc *vdesc;
 	struct edma_desc *edesc;
 	struct device *dev = echan->vchan.chan.device->dev;
@@ -176,7 +1211,7 @@ static void edma_execute(struct edma_chan *echan)
 	/* Write descriptor PaRAM set(s) */
 	for (i = 0; i < nslots; i++) {
 		j = i + edesc->processed;
-		edma_write_slot(cc, echan->slot[i], &edesc->pset[j].param);
+		edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param);
 		edesc->sg_len += edesc->pset[j].len;
 		dev_vdbg(echan->vchan.chan.device->dev,
 			"\n pset[%d]:\n"
@@ -201,7 +1236,7 @@ static void edma_execute(struct edma_chan *echan)
 			edesc->pset[j].param.link_bcntrld);
 		/* Link to the previous slot if not the last set */
 		if (i != (nslots - 1))
-			edma_link(cc, echan->slot[i], echan->slot[i+1]);
+			edma_link(ecc, echan->slot[i], echan->slot[i + 1]);
 	}
 
 	edesc->processed += nslots;
@@ -213,9 +1248,9 @@ static void edma_execute(struct edma_chan *echan)
 	 */
 	if (edesc->processed == edesc->pset_nr) {
 		if (edesc->cyclic)
-			edma_link(cc, echan->slot[nslots-1], echan->slot[1]);
+			edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]);
 		else
-			edma_link(cc, echan->slot[nslots-1],
+			edma_link(ecc, echan->slot[nslots - 1],
 				  echan->ecc->dummy_slot);
 	}
 
@@ -226,19 +1261,19 @@ static void edma_execute(struct edma_chan *echan)
 		 * transfers of MAX_NR_SG
 		 */
 		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
-		edma_clean_channel(cc, echan->ch_num);
-		edma_stop(cc, echan->ch_num);
-		edma_start(cc, echan->ch_num);
-		edma_trigger_channel(cc, echan->ch_num);
+		edma_clean_channel(ecc, echan->ch_num);
+		edma_stop(ecc, echan->ch_num);
+		edma_start(ecc, echan->ch_num);
+		edma_trigger_channel(ecc, echan->ch_num);
 		echan->missed = 0;
 	} else if (edesc->processed <= MAX_NR_SG) {
 		dev_dbg(dev, "first transfer starting on channel %d\n",
 			echan->ch_num);
-		edma_start(cc, echan->ch_num);
+		edma_start(ecc, echan->ch_num);
 	} else {
 		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
 			echan->ch_num, edesc->processed);
-		edma_resume(cc, echan->ch_num);
+		edma_resume(ecc, echan->ch_num);
 	}
 }
 
@@ -256,11 +1291,10 @@ static int edma_terminate_all(struct dma_chan *chan)
 	 * echan->edesc is NULL and exit.)
 	 */
 	if (echan->edesc) {
-		edma_stop(echan->ecc->cc, echan->ch_num);
+		edma_stop(echan->ecc, echan->ch_num);
 		/* Move the cyclic channel back to default queue */
 		if (echan->edesc->cyclic)
-			edma_assign_channel_eventq(echan->ecc->cc,
-						   echan->ch_num,
+			edma_assign_channel_eventq(echan->ecc, echan->ch_num,
 						   EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
@@ -298,7 +1332,7 @@ static int edma_dma_pause(struct dma_chan *chan)
 	if (!echan->edesc)
 		return -EINVAL;
 
-	edma_pause(echan->ecc->cc, echan->ch_num);
+	edma_pause(echan->ecc, echan->ch_num);
 	return 0;
 }
 
@@ -306,7 +1340,7 @@ static int edma_dma_resume(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 
-	edma_resume(echan->ecc->cc, echan->ch_num);
+	edma_resume(echan->ecc, echan->ch_num);
 	return 0;
 }
 
@@ -322,9 +1356,10 @@ static int edma_dma_resume(struct dma_chan *chan)
  * @direction: Direction of the transfer
  */
 static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
-	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
-	enum dma_slave_buswidth dev_width, unsigned int dma_length,
-	enum dma_transfer_direction direction)
+			    dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+			    enum dma_slave_buswidth dev_width,
+			    unsigned int dma_length,
+			    enum dma_transfer_direction direction)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
@@ -470,8 +1505,8 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 		return NULL;
 	}
 
-	edesc = kzalloc(sizeof(*edesc) + sg_len *
-		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	edesc = kzalloc(sizeof(*edesc) + sg_len * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
@@ -488,7 +1523,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	for (i = 0; i < nslots; i++) {
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -623,8 +1658,8 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	if (nslots > MAX_NR_SG)
 		return NULL;
 
-	edesc = kzalloc(sizeof(*edesc) + nslots *
-		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
@@ -643,7 +1678,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 		/* Allocate a PaRAM slot, if needed */
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -704,7 +1739,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan->ecc->cc, echan->ch_num, EVENTQ_0);
+	edma_assign_channel_eventq(echan->ecc, echan->ch_num, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -712,7 +1747,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 {
 	struct edma_chan *echan = data;
-	struct edma *cc = echan->ecc->cc;
+	struct edma_cc *ecc = echan->ecc;
 	struct device *dev = echan->vchan.chan.device->dev;
 	struct edma_desc *edesc;
 	struct edmacc_param p;
@@ -727,15 +1762,19 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 				vchan_cyclic_callback(&edesc->vdesc);
 				goto out;
 			} else if (edesc->processed == edesc->pset_nr) {
-				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
+				dev_dbg(dev,
+					"Transfer completed on channel %d\n",
+					ch_num);
 				edesc->residue = 0;
-				edma_stop(cc, echan->ch_num);
+				edma_stop(ecc, echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
 				echan->edesc = NULL;
 			} else {
-				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+				dev_dbg(dev,
+					"Sub transfer completed on channel %d\n",
+					ch_num);
 
-				edma_pause(cc, echan->ch_num);
+				edma_pause(ecc, echan->ch_num);
 
 				/* Update statistics for tx_status */
 				edesc->residue -= edesc->sg_len;
@@ -746,7 +1785,7 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		}
 		break;
 	case EDMA_DMA_CC_ERROR:
-		edma_read_slot(cc, echan->slot[0], &p);
+		edma_read_slot(ecc, echan->slot[0], &p);
 
 		/*
 		 * Issue later based on missed flag which will be sure
@@ -761,18 +1800,18 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		 * slot. So we avoid doing so and set the missed flag.
 		 */
 		if (p.a_b_cnt == 0 && p.ccnt == 0) {
-			dev_dbg(dev, "Error occurred, looks like slot is null, just setting miss\n");
+			dev_dbg(dev, "Error on null slot, setting miss\n");
 			echan->missed = 1;
 		} else {
 			/*
 			 * The slot is already programmed but the event got
 			 * missed, so its safe to issue it here.
 			 */
-			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
-			edma_clean_channel(cc, echan->ch_num);
-			edma_stop(cc, echan->ch_num);
-			edma_start(cc, echan->ch_num);
-			edma_trigger_channel(cc, echan->ch_num);
+			dev_dbg(dev, "Missed event, TRIGGERING\n");
+			edma_clean_channel(ecc, echan->ch_num);
+			edma_stop(ecc, echan->ch_num);
+			edma_start(ecc, echan->ch_num);
+			edma_trigger_channel(ecc, echan->ch_num);
 		}
 		break;
 	default:
@@ -791,7 +1830,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	int a_ch_num;
 	LIST_HEAD(descs);
 
-	a_ch_num = edma_alloc_channel(echan->ecc->cc, echan->ch_num,
+	a_ch_num = edma_alloc_channel(echan->ecc, echan->ch_num,
 				      edma_callback, echan, EVENTQ_DEFAULT);
 
 	if (a_ch_num < 0) {
@@ -816,7 +1855,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	return 0;
 
 err_wrong_chan:
-	edma_free_channel(echan->ecc->cc, a_ch_num);
+	edma_free_channel(echan->ecc, a_ch_num);
 err_no_chan:
 	return ret;
 }
@@ -829,21 +1868,21 @@ static void edma_free_chan_resources(struct dma_chan *chan)
 	int i;
 
 	/* Terminate transfers */
-	edma_stop(echan->ecc->cc, echan->ch_num);
+	edma_stop(echan->ecc, echan->ch_num);
 
 	vchan_free_chan_resources(&echan->vchan);
 
 	/* Free EDMA PaRAM slots */
 	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
 		if (echan->slot[i] >= 0) {
-			edma_free_slot(echan->ecc->cc, echan->slot[i]);
+			edma_free_slot(echan->ecc, echan->slot[i]);
 			echan->slot[i] = -1;
 		}
 	}
 
 	/* Free EDMA channel */
 	if (echan->alloced) {
-		edma_free_channel(echan->ecc->cc, echan->ch_num);
+		edma_free_channel(echan->ecc, echan->ch_num);
 		echan->alloced = false;
 	}
 
@@ -873,8 +1912,7 @@ static u32 edma_residue(struct edma_desc *edesc)
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->ecc->cc, edesc->echan->slot[0],
-				dst);
+	pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
@@ -935,15 +1973,14 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
-static void __init edma_chan_init(struct edma_cc *ecc,
-				  struct dma_device *dma,
+static void __init edma_chan_init(struct edma_cc *ecc, struct dma_device *dma,
 				  struct edma_chan *echans)
 {
 	int i, j;
 
 	for (i = 0; i < EDMA_CHANS; i++) {
 		struct edma_chan *echan = &echans[i];
-		echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i);
+		echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
 		echan->ecc = ecc;
 		echan->vchan.desc_free = edma_desc_free;
 
@@ -991,14 +2028,189 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
 	INIT_LIST_HEAD(&dma->channels);
 }
 
+static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
+			      struct edma_cc *ecc)
+{
+	int i;
+	u32 value, cccfg;
+	s8 (*queue_priority_map)[2];
+
+	/* Decode the eDMA3 configuration from CCCFG register */
+	cccfg = edma_read(ecc, EDMA_CCCFG);
+
+	value = GET_NUM_REGN(cccfg);
+	ecc->num_region = BIT(value);
+
+	value = GET_NUM_DMACH(cccfg);
+	ecc->num_channels = BIT(value + 1);
+
+	value = GET_NUM_PAENTRY(cccfg);
+	ecc->num_slots = BIT(value + 4);
+
+	value = GET_NUM_EVQUE(cccfg);
+	ecc->num_tc = value + 1;
+
+	dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg);
+	dev_dbg(dev, "num_region: %u\n", ecc->num_region);
+	dev_dbg(dev, "num_channels: %u\n", ecc->num_channels);
+	dev_dbg(dev, "num_slots: %u\n", ecc->num_slots);
+	dev_dbg(dev, "num_tc: %u\n", ecc->num_tc);
+
+	/* Nothing need to be done if queue priority is provided */
+	if (pdata->queue_priority_mapping)
+		return 0;
+
+	/*
+	 * Configure TC/queue priority as follows:
+	 * Q0 - priority 0
+	 * Q1 - priority 1
+	 * Q2 - priority 2
+	 * ...
+	 * The meaning of priority numbers: 0 highest priority, 7 lowest
+	 * priority. So Q0 is the highest priority queue and the last queue has
+	 * the lowest priority.
+	 */
+	queue_priority_map = devm_kzalloc(dev, (ecc->num_tc + 1) * sizeof(s8),
+					  GFP_KERNEL);
+	if (!queue_priority_map)
+		return -ENOMEM;
+
+	for (i = 0; i < ecc->num_tc; i++) {
+		queue_priority_map[i][0] = i;
+		queue_priority_map[i][1] = i;
+	}
+	queue_priority_map[i][0] = -1;
+	queue_priority_map[i][1] = -1;
+
+	pdata->queue_priority_mapping = queue_priority_map;
+	/* Default queue has the lowest priority */
+	pdata->default_queue = i - 1;
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_OF)
+static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
+			       size_t sz)
+{
+	const char pname[] = "ti,edma-xbar-event-map";
+	struct resource res;
+	void __iomem *xbar;
+	s16 (*xbar_chans)[2];
+	size_t nelm = sz / sizeof(s16);
+	u32 shift, offset, mux;
+	int ret, i;
+
+	xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL);
+	if (!xbar_chans)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(dev->of_node, 1, &res);
+	if (ret)
+		return -ENOMEM;
+
+	xbar = devm_ioremap(dev, res.start, resource_size(&res));
+	if (!xbar)
+		return -ENOMEM;
+
+	ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans,
+					 nelm);
+	if (ret)
+		return -EIO;
+
+	/* Invalidate last entry for the other user of this mess */
+	nelm >>= 1;
+	xbar_chans[nelm][0] = -1;
+	xbar_chans[nelm][1] = -1;
+
+	for (i = 0; i < nelm; i++) {
+		shift = (xbar_chans[i][1] & 0x03) << 3;
+		offset = xbar_chans[i][1] & 0xfffffffc;
+		mux = readl(xbar + offset);
+		mux &= ~(0xff << shift);
+		mux |= xbar_chans[i][0] << shift;
+		writel(mux, (xbar + offset));
+	}
+
+	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
+	return 0;
+}
+
+static int edma_of_parse_dt(struct device *dev, struct edma_soc_info *pdata)
+{
+	int ret = 0;
+	struct property *prop;
+	size_t sz;
+	struct edma_rsv_info *rsv_info;
+
+	rsv_info = devm_kzalloc(dev, sizeof(struct edma_rsv_info), GFP_KERNEL);
+	if (!rsv_info)
+		return -ENOMEM;
+	pdata->rsv = rsv_info;
+
+	prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map", &sz);
+	if (prop)
+		ret = edma_xbar_event_map(dev, pdata, sz);
+
+	return ret;
+}
+
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+{
+	struct edma_soc_info *info;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	ret = edma_of_parse_dt(dev, info);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return info;
+}
+#else
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
 static int edma_probe(struct platform_device *pdev)
 {
-	struct edma_cc *ecc;
-	struct device_node *parent_node = pdev->dev.parent->of_node;
-	struct platform_device *parent_pdev =
-					to_platform_device(pdev->dev.parent);
+	struct edma_soc_info	*info = pdev->dev.platform_data;
+	s8			(*queue_priority_mapping)[2];
+	int			i, off, ln;
+	const s16		(*rsv_chans)[2];
+	const s16		(*rsv_slots)[2];
+	const s16		(*xbar_chans)[2];
+	int			irq;
+	char			*irq_name;
+	struct resource		*mem;
+	struct device_node	*node = pdev->dev.of_node;
+	struct device		*dev = &pdev->dev;
+	struct edma_cc		*ecc;
 	int ret;
 
+	if (node) {
+		info = edma_setup_info_from_dt(dev);
+		if (IS_ERR(info)) {
+			dev_err(dev, "failed to get DT data\n");
+			return PTR_ERR(info);
+		}
+	}
+
+	if (!info)
+		return -ENODEV;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
 	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -1009,15 +2221,123 @@ static int edma_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	ecc->cc = edma_get_data(pdev->dev.parent);
-	if (!ecc->cc)
-		return -ENODEV;
+	ecc->dev = dev;
+	ecc->id = pdev->id;
+	/* When booting with DT the pdev->id is -1 */
+	if (ecc->id < 0)
+		ecc->id = 0;
+
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
+	if (!mem) {
+		dev_dbg(dev, "mem resource not found, using index 0\n");
+		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!mem) {
+			dev_err(dev, "no mem resource?\n");
+			return -ENODEV;
+		}
+	}
+	ecc->base = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ecc->base))
+		return PTR_ERR(ecc->base);
+
+	platform_set_drvdata(pdev, ecc);
+
+	/* Get eDMA3 configuration from IP */
+	ret = edma_setup_from_hw(dev, info, ecc);
+	if (ret)
+		return ret;
+
+	ecc->default_queue = info->default_queue;
+
+	for (i = 0; i < ecc->num_slots; i++)
+		edma_write_slot(ecc, i, &dummy_paramset);
+
+	/* Mark all channels as unused */
+	memset(ecc->edma_unused, 0xff, sizeof(ecc->edma_unused));
+
+	if (info->rsv) {
+		/* Clear the reserved channels in unused list */
+		rsv_chans = info->rsv->rsv_chans;
+		if (rsv_chans) {
+			for (i = 0; rsv_chans[i][0] != -1; i++) {
+				off = rsv_chans[i][0];
+				ln = rsv_chans[i][1];
+				clear_bits(off, ln, ecc->edma_unused);
+			}
+		}
+
+		/* Set the reserved slots in inuse list */
+		rsv_slots = info->rsv->rsv_slots;
+		if (rsv_slots) {
+			for (i = 0; rsv_slots[i][0] != -1; i++) {
+				off = rsv_slots[i][0];
+				ln = rsv_slots[i][1];
+				set_bits(off, ln, ecc->edma_inuse);
+			}
+		}
+	}
+
+	/* Clear the xbar mapped channels in unused list */
+	xbar_chans = info->xbar_chans;
+	if (xbar_chans) {
+		for (i = 0; xbar_chans[i][1] != -1; i++) {
+			off = xbar_chans[i][1];
+			clear_bits(off, 1, ecc->edma_unused);
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 0);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 2);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < ecc->num_channels; i++)
+		edma_map_dmach_to_queue(ecc, i, info->default_queue);
+
+	queue_priority_mapping = info->queue_priority_mapping;
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
 
-	ecc->ctlr = parent_pdev->id;
-	if (ecc->ctlr < 0)
-		ecc->ctlr = 0;
+	/* Map the channel to param entry if channel mapping logic exist */
+	if (edma_read(ecc, EDMA_CCCFG) & CHMAP_EXIST)
+		edma_direct_dmach_to_param_mapping(ecc);
 
-	ecc->dummy_slot = edma_alloc_slot(ecc->cc, EDMA_SLOT_ANY);
+	for (i = 0; i < ecc->num_region; i++) {
+		edma_write_array2(ecc, EDMA_DRAE, i, 0, 0x0);
+		edma_write_array2(ecc, EDMA_DRAE, i, 1, 0x0);
+		edma_write_array(ecc, EDMA_QRAE, i, 0x0);
+	}
+	ecc->info = info;
+
+	ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
 		return ecc->dummy_slot;
@@ -1036,19 +2356,16 @@ static int edma_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_reg1;
 
-	platform_set_drvdata(pdev, ecc);
-
-	if (parent_node) {
-		of_dma_controller_register(parent_node, of_dma_xlate_by_chan_id,
+	if (node)
+		of_dma_controller_register(node, of_dma_xlate_by_chan_id,
 					   &ecc->dma_slave);
-	}
 
 	dev_info(&pdev->dev, "TI EDMA DMA engine driver\n");
 
 	return 0;
 
 err_reg1:
-	edma_free_slot(ecc->cc, ecc->dummy_slot);
+	edma_free_slot(ecc, ecc->dummy_slot);
 	return ret;
 }
 
@@ -1056,21 +2373,60 @@ static int edma_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct edma_cc *ecc = dev_get_drvdata(dev);
-	struct device_node *parent_node = pdev->dev.parent->of_node;
 
-	if (parent_node)
-		of_dma_controller_free(parent_node);
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&ecc->dma_slave);
-	edma_free_slot(ecc->cc, ecc->dummy_slot);
+	edma_free_slot(ecc, ecc->dummy_slot);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int edma_pm_resume(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	int i;
+	s8 (*queue_priority_mapping)[2];
+
+	queue_priority_mapping = ecc->info->queue_priority_mapping;
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
+
+	/* Map the channel to param entry if channel mapping logic */
+	if (edma_read(ecc, EDMA_CCCFG) & CHMAP_EXIST)
+		edma_direct_dmach_to_param_mapping(ecc);
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (test_bit(i, ecc->edma_inuse)) {
+			/* ensure access through shadow region 0 */
+			edma_or_array2(ecc, EDMA_DRAE, 0, i >> 5,
+				       BIT(i & 0x1f));
+
+			edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, i),
+					     ecc->intr_data[i].callback,
+					     ecc->intr_data[i].data);
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops edma_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
+};
+
 static struct platform_driver edma_driver = {
 	.probe		= edma_probe,
 	.remove		= edma_remove,
 	.driver = {
-		.name = "edma-dma-engine",
+		.name	= "edma",
+		.pm	= &edma_pm_ops,
+		.of_match_table = edma_of_ids,
 	},
 };
 
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 466021c03169..6b9d500956e4 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -41,37 +41,6 @@
 #ifndef EDMA_H_
 #define EDMA_H_
 
-/* PaRAM slots are laid out like this */
-struct edmacc_param {
-	u32 opt;
-	u32 src;
-	u32 a_b_cnt;
-	u32 dst;
-	u32 src_dst_bidx;
-	u32 link_bcntrld;
-	u32 src_dst_cidx;
-	u32 ccnt;
-} __packed;
-
-/* fields in edmacc_param.opt */
-#define SAM		BIT(0)
-#define DAM		BIT(1)
-#define SYNCDIM		BIT(2)
-#define STATIC		BIT(3)
-#define EDMA_FWID	(0x07 << 8)
-#define TCCMODE		BIT(11)
-#define EDMA_TCC(t)	((t) << 12)
-#define TCINTEN		BIT(20)
-#define ITCINTEN	BIT(21)
-#define TCCHEN		BIT(22)
-#define ITCCHEN		BIT(23)
-
-/*ch_status paramater of callback function possible values*/
-#define EDMA_DMA_COMPLETE 1
-#define EDMA_DMA_CC_ERROR 2
-#define EDMA_DMA_TC1_ERROR 3
-#define EDMA_DMA_TC2_ERROR 4
-
 enum dma_event_q {
 	EVENTQ_0 = 0,
 	EVENTQ_1 = 1,
@@ -84,49 +53,6 @@ enum dma_event_q {
 #define EDMA_CTLR(i)			((i) >> 16)
 #define EDMA_CHAN_SLOT(i)		((i) & 0xffff)
 
-#define EDMA_CHANNEL_ANY		-1	/* for edma_alloc_channel() */
-#define EDMA_SLOT_ANY			-1	/* for edma_alloc_slot() */
-#define EDMA_CONT_PARAMS_ANY		 1001
-#define EDMA_CONT_PARAMS_FIXED_EXACT	 1002
-#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
-
-#define EDMA_MAX_CC               2
-
-struct edma;
-
-struct edma *edma_get_data(struct device *edma_dev);
-
-/* alloc/free DMA channels and their dedicated parameter RAM slots */
-int edma_alloc_channel(struct edma *cc, int channel,
-	void (*callback)(unsigned channel, u16 ch_status, void *data),
-	void *data, enum dma_event_q);
-void edma_free_channel(struct edma *cc, unsigned channel);
-
-/* alloc/free parameter RAM slots */
-int edma_alloc_slot(struct edma *cc, int slot);
-void edma_free_slot(struct edma *cc, unsigned slot);
-
-/* calls that operate on part of a parameter RAM slot */
-dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst);
-void edma_link(struct edma *cc, unsigned from, unsigned to);
-
-/* calls that operate on an entire parameter RAM slot */
-void edma_write_slot(struct edma *cc, unsigned slot,
-		     const struct edmacc_param *params);
-void edma_read_slot(struct edma *cc, unsigned slot,
-		    struct edmacc_param *params);
-
-/* channel control operations */
-int edma_start(struct edma *cc, unsigned channel);
-void edma_stop(struct edma *cc, unsigned channel);
-void edma_clean_channel(struct edma *cc, unsigned channel);
-void edma_pause(struct edma *cc, unsigned channel);
-void edma_resume(struct edma *cc, unsigned channel);
-int edma_trigger_channel(struct edma *cc, unsigned channel);
-
-void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
-				enum dma_event_q eventq_no);
-
 struct edma_rsv_info {
 
 	const s16	(*rsv_chans)[2];
-- 
cgit v1.2.3


From 1a7caca20ed56a80cea045327deaeb4e4379cbd1 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Date: Fri, 28 Aug 2015 10:39:20 -0700
Subject: soc: qcom: smd: Implement id_table driver matching

Implement a id_table based driver maching mechanism for drivers that
binds to fixed channels and doesn't need any additional configuration,
e.g. IPCRTR and DIAG.

Signed-off-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 drivers/soc/qcom/smd.c       | 25 ++++++++++++++++++-------
 include/linux/soc/qcom/smd.h | 11 +++++++++++
 2 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
index a6155c917d52..d883c16d1775 100644
--- a/drivers/soc/qcom/smd.c
+++ b/drivers/soc/qcom/smd.c
@@ -727,6 +727,19 @@ static struct qcom_smd_driver *to_smd_driver(struct device *dev)
 
 static int qcom_smd_dev_match(struct device *dev, struct device_driver *drv)
 {
+	struct qcom_smd_device *qsdev = to_smd_device(dev);
+	struct qcom_smd_driver *qsdrv = container_of(drv, struct qcom_smd_driver, driver);
+	const struct qcom_smd_id *match = qsdrv->smd_match_table;
+	const char *name = qsdev->channel->name;
+
+	if (match) {
+		while (match->name[0]) {
+			if (!strcmp(match->name, name))
+				return 1;
+			match++;
+		}
+	}
+
 	return of_driver_match_device(dev, drv);
 }
 
@@ -880,19 +893,17 @@ static int qcom_smd_create_device(struct qcom_smd_channel *channel)
 	if (channel->qsdev)
 		return -EEXIST;
 
-	node = qcom_smd_match_channel(edge->of_node, channel->name);
-	if (!node) {
-		dev_dbg(smd->dev, "no match for '%s'\n", channel->name);
-		return -ENXIO;
-	}
-
 	dev_dbg(smd->dev, "registering '%s'\n", channel->name);
 
 	qsdev = kzalloc(sizeof(*qsdev), GFP_KERNEL);
 	if (!qsdev)
 		return -ENOMEM;
 
-	dev_set_name(&qsdev->dev, "%s.%s", edge->of_node->name, node->name);
+	node = qcom_smd_match_channel(edge->of_node, channel->name);
+	dev_set_name(&qsdev->dev, "%s.%s",
+		     edge->of_node->name,
+		     node ? node->name : channel->name);
+
 	qsdev->dev.parent = smd->dev;
 	qsdev->dev.bus = &qcom_smd_bus;
 	qsdev->dev.release = qcom_smd_release_device;
diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h
index d7e50aa6a4ac..d0cb6d189a0a 100644
--- a/include/linux/soc/qcom/smd.h
+++ b/include/linux/soc/qcom/smd.h
@@ -8,6 +8,14 @@ struct qcom_smd;
 struct qcom_smd_channel;
 struct qcom_smd_lookup;
 
+/**
+ * struct qcom_smd_id - struct used for matching a smd device
+ * @name:	name of the channel
+ */
+struct qcom_smd_id {
+	char name[20];
+};
+
 /**
  * struct qcom_smd_device - smd device struct
  * @dev:	the device struct
@@ -21,6 +29,7 @@ struct qcom_smd_device {
 /**
  * struct qcom_smd_driver - smd driver struct
  * @driver:	underlying device driver
+ * @smd_match_table: static channel match table
  * @probe:	invoked when the smd channel is found
  * @remove:	invoked when the smd channel is closed
  * @callback:	invoked when an inbound message is received on the channel,
@@ -29,6 +38,8 @@ struct qcom_smd_device {
  */
 struct qcom_smd_driver {
 	struct device_driver driver;
+	const struct qcom_smd_id *smd_match_table;
+
 	int (*probe)(struct qcom_smd_device *dev);
 	void (*remove)(struct qcom_smd_device *dev);
 	int (*callback)(struct qcom_smd_device *, const void *, size_t);
-- 
cgit v1.2.3


From 1a03964dec3cecb6382d172b9dfe318735c2cad7 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 2 Sep 2015 15:46:44 -0700
Subject: soc: qcom: Make qcom_smem_get() return a pointer

Passing a void ** almost always requires a cast at the call site.
Instead of littering the code with casts every time this function
is called, have qcom_smem_get() return a void pointer to the
location of the smem item. This frees the caller from having to
cast the pointer.

Cc: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 drivers/soc/qcom/smd.c        | 30 +++++++++---------
 drivers/soc/qcom/smem.c       | 72 +++++++++++++++++++------------------------
 include/linux/soc/qcom/smem.h |  2 +-
 3 files changed, 48 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
index d883c16d1775..beaea1d5169f 100644
--- a/drivers/soc/qcom/smd.c
+++ b/drivers/soc/qcom/smd.c
@@ -989,10 +989,11 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 	spin_lock_init(&channel->recv_lock);
 	init_waitqueue_head(&channel->fblockread_event);
 
-	ret = qcom_smem_get(edge->remote_pid, smem_info_item, (void **)&info,
-			    &info_size);
-	if (ret)
+	info = qcom_smem_get(edge->remote_pid, smem_info_item, &info_size);
+	if (IS_ERR(info)) {
+		ret = PTR_ERR(info);
 		goto free_name_and_channel;
+	}
 
 	/*
 	 * Use the size of the item to figure out which channel info struct to
@@ -1011,10 +1012,11 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 		goto free_name_and_channel;
 	}
 
-	ret = qcom_smem_get(edge->remote_pid, smem_fifo_item, &fifo_base,
-			    &fifo_size);
-	if (ret)
+	fifo_base = qcom_smem_get(edge->remote_pid, smem_fifo_item, &fifo_size);
+	if (IS_ERR(fifo_base)) {
+		ret =  PTR_ERR(fifo_base);
 		goto free_name_and_channel;
+	}
 
 	/* The channel consist of a rx and tx fifo of equal size */
 	fifo_size /= 2;
@@ -1051,16 +1053,13 @@ static void qcom_discover_channels(struct qcom_smd_edge *edge)
 	unsigned long flags;
 	unsigned fifo_id;
 	unsigned info_id;
-	int ret;
 	int tbl;
 	int i;
 
 	for (tbl = 0; tbl < SMD_ALLOC_TBL_COUNT; tbl++) {
-		ret = qcom_smem_get(edge->remote_pid,
-				    smem_items[tbl].alloc_tbl_id,
-				    (void **)&alloc_tbl,
-				    NULL);
-		if (ret < 0)
+		alloc_tbl = qcom_smem_get(edge->remote_pid,
+				    smem_items[tbl].alloc_tbl_id, NULL);
+		if (IS_ERR(alloc_tbl))
 			continue;
 
 		for (i = 0; i < SMD_ALLOC_TBL_SIZE; i++) {
@@ -1238,11 +1237,12 @@ static int qcom_smd_probe(struct platform_device *pdev)
 	int num_edges;
 	int ret;
 	int i = 0;
+	void *p;
 
 	/* Wait for smem */
-	ret = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL, NULL);
-	if (ret == -EPROBE_DEFER)
-		return ret;
+	p = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL);
+	if (PTR_ERR(p) == -EPROBE_DEFER)
+		return PTR_ERR(p);
 
 	num_edges = of_get_available_child_count(pdev->dev.of_node);
 	array_size = sizeof(*smd) + num_edges * sizeof(struct qcom_smd_edge);
diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
index f402a606eb7d..e6d0dae63845 100644
--- a/drivers/soc/qcom/smem.c
+++ b/drivers/soc/qcom/smem.c
@@ -378,10 +378,9 @@ int qcom_smem_alloc(unsigned host, unsigned item, size_t size)
 }
 EXPORT_SYMBOL(qcom_smem_alloc);
 
-static int qcom_smem_get_global(struct qcom_smem *smem,
-				unsigned item,
-				void **ptr,
-				size_t *size)
+static void *qcom_smem_get_global(struct qcom_smem *smem,
+				  unsigned item,
+				  size_t *size)
 {
 	struct smem_header *header;
 	struct smem_region *area;
@@ -390,36 +389,32 @@ static int qcom_smem_get_global(struct qcom_smem *smem,
 	unsigned i;
 
 	if (WARN_ON(item >= SMEM_ITEM_COUNT))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	header = smem->regions[0].virt_base;
 	entry = &header->toc[item];
 	if (!entry->allocated)
-		return -ENXIO;
+		return ERR_PTR(-ENXIO);
 
-	if (ptr != NULL) {
-		aux_base = entry->aux_base & AUX_BASE_MASK;
+	aux_base = entry->aux_base & AUX_BASE_MASK;
 
-		for (i = 0; i < smem->num_regions; i++) {
-			area = &smem->regions[i];
+	for (i = 0; i < smem->num_regions; i++) {
+		area = &smem->regions[i];
 
-			if (area->aux_base == aux_base || !aux_base) {
-				*ptr = area->virt_base + entry->offset;
-				break;
-			}
+		if (area->aux_base == aux_base || !aux_base) {
+			if (size != NULL)
+				*size = entry->size;
+			return area->virt_base + entry->offset;
 		}
 	}
-	if (size != NULL)
-		*size = entry->size;
 
-	return 0;
+	return ERR_PTR(-ENOENT);
 }
 
-static int qcom_smem_get_private(struct qcom_smem *smem,
-				 unsigned host,
-				 unsigned item,
-				 void **ptr,
-				 size_t *size)
+static void *qcom_smem_get_private(struct qcom_smem *smem,
+				   unsigned host,
+				   unsigned item,
+				   size_t *size)
 {
 	struct smem_partition_header *phdr;
 	struct smem_private_entry *hdr;
@@ -435,55 +430,54 @@ static int qcom_smem_get_private(struct qcom_smem *smem,
 			dev_err(smem->dev,
 				"Found invalid canary in host %d partition\n",
 				host);
-			return -EINVAL;
+			return ERR_PTR(-EINVAL);
 		}
 
 		if (hdr->item == item) {
-			if (ptr != NULL)
-				*ptr = p + sizeof(*hdr) + hdr->padding_hdr;
-
 			if (size != NULL)
 				*size = hdr->size - hdr->padding_data;
 
-			return 0;
+			return p + sizeof(*hdr) + hdr->padding_hdr;
 		}
 
 		p += sizeof(*hdr) + hdr->padding_hdr + hdr->size;
 	}
 
-	return -ENOENT;
+	return ERR_PTR(-ENOENT);
 }
 
 /**
  * qcom_smem_get() - resolve ptr of size of a smem item
  * @host:	the remote processor, or -1
  * @item:	smem item handle
- * @ptr:	pointer to be filled out with address of the item
  * @size:	pointer to be filled out with size of the item
  *
- * Looks up pointer and size of a smem item.
+ * Looks up smem item and returns pointer to it. Size of smem
+ * item is returned in @size.
  */
-int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size)
+void *qcom_smem_get(unsigned host, unsigned item, size_t *size)
 {
 	unsigned long flags;
 	int ret;
+	void *ptr = ERR_PTR(-EPROBE_DEFER);
 
 	if (!__smem)
-		return -EPROBE_DEFER;
+		return ptr;
 
 	ret = hwspin_lock_timeout_irqsave(__smem->hwlock,
 					  HWSPINLOCK_TIMEOUT,
 					  &flags);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	if (host < SMEM_HOST_COUNT && __smem->partitions[host])
-		ret = qcom_smem_get_private(__smem, host, item, ptr, size);
+		ptr = qcom_smem_get_private(__smem, host, item, size);
 	else
-		ret = qcom_smem_get_global(__smem, item, ptr, size);
+		ptr = qcom_smem_get_global(__smem, item, size);
 
 	hwspin_unlock_irqrestore(__smem->hwlock, &flags);
-	return ret;
+
+	return ptr;
 
 }
 EXPORT_SYMBOL(qcom_smem_get);
@@ -520,11 +514,9 @@ static int qcom_smem_get_sbl_version(struct qcom_smem *smem)
 {
 	unsigned *versions;
 	size_t size;
-	int ret;
 
-	ret = qcom_smem_get_global(smem, SMEM_ITEM_VERSION,
-				   (void **)&versions, &size);
-	if (ret < 0) {
+	versions = qcom_smem_get_global(smem, SMEM_ITEM_VERSION, &size);
+	if (IS_ERR(versions)) {
 		dev_err(smem->dev, "Unable to read the version item\n");
 		return -ENOENT;
 	}
diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
index bc9630d3aced..785e196ee2ca 100644
--- a/include/linux/soc/qcom/smem.h
+++ b/include/linux/soc/qcom/smem.h
@@ -4,7 +4,7 @@
 #define QCOM_SMEM_HOST_ANY -1
 
 int qcom_smem_alloc(unsigned host, unsigned item, size_t size);
-int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size);
+void *qcom_smem_get(unsigned host, unsigned item, size_t *size);
 
 int qcom_smem_get_free_space(unsigned host);
 
-- 
cgit v1.2.3


From 2d3c277ca5b1a9c12cde1f760ff925b87608bc76 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Tue, 29 Sep 2015 15:48:55 -0400
Subject: qcom-scm: add missing prototype for qcom_scm_is_available()

Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 include/linux/qcom_scm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 6e7d5ec65838..9e12000914b3 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -23,6 +23,8 @@ struct qcom_scm_hdcp_req {
 	u32 val;
 };
 
+extern bool qcom_scm_is_available(void);
+
 extern bool qcom_scm_hdcp_available(void);
 extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
 		u32 *resp);
-- 
cgit v1.2.3


From 2f8e2c877784a0b23f02b41550170a24e14f5c95 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Aug 2015 09:27:14 +0200
Subject: move io-64-nonatomic*.h out of asm-generic

These are not implementations of default architecture code but helpers
for drivers. Move them to the place they belong to.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Acked-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/acpi/nfit.c                            |  2 +-
 drivers/acpi/osl.c                             |  2 +-
 drivers/block/nvme-core.c                      |  2 +-
 drivers/edac/i3200_edac.c                      |  2 +-
 drivers/edac/ie31200_edac.c                    |  2 +-
 drivers/edac/x38_edac.c                        |  2 +-
 drivers/i2c/busses/i2c-ismt.c                  |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_osdep.h   |  2 +-
 drivers/net/ethernet/intel/i40evf/i40e_osdep.h |  2 +-
 drivers/net/ethernet/rocker/rocker.c           |  2 +-
 drivers/platform/x86/ibm_rtl.c                 |  2 +-
 drivers/platform/x86/intel_ips.c               |  2 +-
 drivers/scsi/qla4xxx/ql4_nx.c                  |  2 +-
 include/asm-generic/io-64-nonatomic-hi-lo.h    | 32 --------------------------
 include/asm-generic/io-64-nonatomic-lo-hi.h    | 32 --------------------------
 include/linux/io-64-nonatomic-hi-lo.h          | 32 ++++++++++++++++++++++++++
 include/linux/io-64-nonatomic-lo-hi.h          | 32 ++++++++++++++++++++++++++
 17 files changed, 77 insertions(+), 77 deletions(-)
 delete mode 100644 include/asm-generic/io-64-nonatomic-hi-lo.h
 delete mode 100644 include/asm-generic/io-64-nonatomic-lo-hi.h
 create mode 100644 include/linux/io-64-nonatomic-hi-lo.h
 create mode 100644 include/linux/io-64-nonatomic-lo-hi.h

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index c1b8d03e262e..a14ee291d1a7 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -27,7 +27,7 @@
  * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
  * irrelevant.
  */
-#include <asm-generic/io-64-nonatomic-hi-lo.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 
 static bool force_enable_dimms;
 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 739a4a6b3b9b..7d0848190b75 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -43,7 +43,7 @@
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "internal.h"
 
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 6f04771f1019..c962527305f7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -41,7 +41,7 @@
 #include <linux/t10-pi.h>
 #include <linux/types.h>
 #include <scsi/sg.h>
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #define NVME_MINORS		(1U << MINORBITS)
 #define NVME_Q_DEPTH		1024
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 4ad062b0ef26..1f453382258a 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -15,7 +15,7 @@
 #include <linux/io.h>
 #include "edac_core.h"
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #define I3200_REVISION        "1.1"
 
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index a981dc6fd88e..18d77ace4813 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -39,7 +39,7 @@
 #include <linux/pci_ids.h>
 #include <linux/edac.h>
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include "edac_core.h"
 
 #define IE31200_REVISION "1.0"
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
index 7c5cdc62f31c..314cf5cf268c 100644
--- a/drivers/edac/x38_edac.c
+++ b/drivers/edac/x38_edac.c
@@ -15,7 +15,7 @@
 #include <linux/pci_ids.h>
 #include <linux/edac.h>
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include "edac_core.h"
 
 #define X38_REVISION		"1.1"
diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index f994712d0904..39becbbdfd99 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -67,7 +67,7 @@
 #include <linux/acpi.h>
 #include <linux/interrupt.h>
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 /* PCI Address Constants */
 #define SMBBAR		0
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index ad802dd0f67a..5b6feb7edeb1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -35,7 +35,7 @@
 #include <linux/highuid.h>
 
 /* get readq/writeq support for 32 bit kernels, use the low-first version */
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 /* File to be the magic between shared code and
  * actual OS primitives
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index 21a91b14bf81..5e314fd3c016 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -34,7 +34,7 @@
 #include <linux/pci.h>
 
 /* get readq/writeq support for 32 bit kernels, use the low-first version */
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 /* File to be the magic between shared code and
  * actual OS primitives
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 34ac41ac9e61..7ca1abbc0d05 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -36,7 +36,7 @@
 #include <net/ip_fib.h>
 #include <net/netevent.h>
 #include <net/arp.h>
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <generated/utsrelease.h>
 
 #include "rocker.h"
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 97c2be195efc..c62e5e11ca4b 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -33,7 +33,7 @@
 #include <linux/mutex.h>
 #include <asm/bios_ebda.h>
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 static bool force;
 module_param(force, bool, 0);
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index e2065e06a3f3..55663b3d7282 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -78,7 +78,7 @@
 #include <asm/processor.h>
 #include "intel_ips.h"
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
 
diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 7c3365864242..ae87d6c19f17 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c
@@ -12,7 +12,7 @@
 #include "ql4_glbl.h"
 #include "ql4_inline.h"
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #define TIMEOUT_100_MS	100
 #define MASK(n)		DMA_BIT_MASK(n)
diff --git a/include/asm-generic/io-64-nonatomic-hi-lo.h b/include/asm-generic/io-64-nonatomic-hi-lo.h
deleted file mode 100644
index 2e29d13fc154..000000000000
--- a/include/asm-generic/io-64-nonatomic-hi-lo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _ASM_IO_64_NONATOMIC_HI_LO_H_
-#define _ASM_IO_64_NONATOMIC_HI_LO_H_
-
-#include <linux/io.h>
-#include <asm-generic/int-ll64.h>
-
-static inline __u64 hi_lo_readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	high = readl(p + 1);
-	low = readl(p);
-
-	return low + ((u64)high << 32);
-}
-
-static inline void hi_lo_writeq(__u64 val, volatile void __iomem *addr)
-{
-	writel(val >> 32, addr + 4);
-	writel(val, addr);
-}
-
-#ifndef readq
-#define readq hi_lo_readq
-#endif
-
-#ifndef writeq
-#define writeq hi_lo_writeq
-#endif
-
-#endif	/* _ASM_IO_64_NONATOMIC_HI_LO_H_ */
diff --git a/include/asm-generic/io-64-nonatomic-lo-hi.h b/include/asm-generic/io-64-nonatomic-lo-hi.h
deleted file mode 100644
index 0efacff0a1ce..000000000000
--- a/include/asm-generic/io-64-nonatomic-lo-hi.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _ASM_IO_64_NONATOMIC_LO_HI_H_
-#define _ASM_IO_64_NONATOMIC_LO_HI_H_
-
-#include <linux/io.h>
-#include <asm-generic/int-ll64.h>
-
-static inline __u64 lo_hi_readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	low = readl(p);
-	high = readl(p + 1);
-
-	return low + ((u64)high << 32);
-}
-
-static inline void lo_hi_writeq(__u64 val, volatile void __iomem *addr)
-{
-	writel(val, addr);
-	writel(val >> 32, addr + 4);
-}
-
-#ifndef readq
-#define readq lo_hi_readq
-#endif
-
-#ifndef writeq
-#define writeq lo_hi_writeq
-#endif
-
-#endif	/* _ASM_IO_64_NONATOMIC_LO_HI_H_ */
diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h
new file mode 100644
index 000000000000..11d7e840d913
--- /dev/null
+++ b/include/linux/io-64-nonatomic-hi-lo.h
@@ -0,0 +1,32 @@
+#ifndef _LINUX_IO_64_NONATOMIC_HI_LO_H_
+#define _LINUX_IO_64_NONATOMIC_HI_LO_H_
+
+#include <linux/io.h>
+#include <asm-generic/int-ll64.h>
+
+static inline __u64 hi_lo_readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	high = readl(p + 1);
+	low = readl(p);
+
+	return low + ((u64)high << 32);
+}
+
+static inline void hi_lo_writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val >> 32, addr + 4);
+	writel(val, addr);
+}
+
+#ifndef readq
+#define readq hi_lo_readq
+#endif
+
+#ifndef writeq
+#define writeq hi_lo_writeq
+#endif
+
+#endif	/* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */
diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h
new file mode 100644
index 000000000000..1a4315f97360
--- /dev/null
+++ b/include/linux/io-64-nonatomic-lo-hi.h
@@ -0,0 +1,32 @@
+#ifndef _LINUX_IO_64_NONATOMIC_LO_HI_H_
+#define _LINUX_IO_64_NONATOMIC_LO_HI_H_
+
+#include <linux/io.h>
+#include <asm-generic/int-ll64.h>
+
+static inline __u64 lo_hi_readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl(p);
+	high = readl(p + 1);
+
+	return low + ((u64)high << 32);
+}
+
+static inline void lo_hi_writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val, addr);
+	writel(val >> 32, addr + 4);
+}
+
+#ifndef readq
+#define readq lo_hi_readq
+#endif
+
+#ifndef writeq
+#define writeq lo_hi_writeq
+#endif
+
+#endif	/* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */
-- 
cgit v1.2.3


From a1164a3ac75feeab86f6c02fabdfbf24b81e3c1a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Aug 2015 09:27:15 +0200
Subject: move count_zeroes.h out of asm-generic

This header contains a few helpers currenly only used by the mpi
implementation, and not default implementation of architecture code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/bitops/count_zeros.h | 57 --------------------------------
 include/linux/count_zeros.h              | 57 ++++++++++++++++++++++++++++++++
 lib/mpi/longlong.h                       |  2 +-
 lib/mpi/mpicoder.c                       |  2 +-
 4 files changed, 59 insertions(+), 59 deletions(-)
 delete mode 100644 include/asm-generic/bitops/count_zeros.h
 create mode 100644 include/linux/count_zeros.h

(limited to 'include/linux')

diff --git a/include/asm-generic/bitops/count_zeros.h b/include/asm-generic/bitops/count_zeros.h
deleted file mode 100644
index 97520d21fe62..000000000000
--- a/include/asm-generic/bitops/count_zeros.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Count leading and trailing zeros functions
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_GENERIC_BITOPS_COUNT_ZEROS_H_
-#define _ASM_GENERIC_BITOPS_COUNT_ZEROS_H_
-
-#include <asm/bitops.h>
-
-/**
- * count_leading_zeros - Count the number of zeros from the MSB back
- * @x: The value
- *
- * Count the number of leading zeros from the MSB going towards the LSB in @x.
- *
- * If the MSB of @x is set, the result is 0.
- * If only the LSB of @x is set, then the result is BITS_PER_LONG-1.
- * If @x is 0 then the result is COUNT_LEADING_ZEROS_0.
- */
-static inline int count_leading_zeros(unsigned long x)
-{
-	if (sizeof(x) == 4)
-		return BITS_PER_LONG - fls(x);
-	else
-		return BITS_PER_LONG - fls64(x);
-}
-
-#define COUNT_LEADING_ZEROS_0 BITS_PER_LONG
-
-/**
- * count_trailing_zeros - Count the number of zeros from the LSB forwards
- * @x: The value
- *
- * Count the number of trailing zeros from the LSB going towards the MSB in @x.
- *
- * If the LSB of @x is set, the result is 0.
- * If only the MSB of @x is set, then the result is BITS_PER_LONG-1.
- * If @x is 0 then the result is COUNT_TRAILING_ZEROS_0.
- */
-static inline int count_trailing_zeros(unsigned long x)
-{
-#define COUNT_TRAILING_ZEROS_0 (-1)
-
-	if (sizeof(x) == 4)
-		return ffs(x);
-	else
-		return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_COUNT_ZEROS_H_ */
diff --git a/include/linux/count_zeros.h b/include/linux/count_zeros.h
new file mode 100644
index 000000000000..363da78c4f64
--- /dev/null
+++ b/include/linux/count_zeros.h
@@ -0,0 +1,57 @@
+/* Count leading and trailing zeros functions
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_BITOPS_COUNT_ZEROS_H_
+#define _LINUX_BITOPS_COUNT_ZEROS_H_
+
+#include <asm/bitops.h>
+
+/**
+ * count_leading_zeros - Count the number of zeros from the MSB back
+ * @x: The value
+ *
+ * Count the number of leading zeros from the MSB going towards the LSB in @x.
+ *
+ * If the MSB of @x is set, the result is 0.
+ * If only the LSB of @x is set, then the result is BITS_PER_LONG-1.
+ * If @x is 0 then the result is COUNT_LEADING_ZEROS_0.
+ */
+static inline int count_leading_zeros(unsigned long x)
+{
+	if (sizeof(x) == 4)
+		return BITS_PER_LONG - fls(x);
+	else
+		return BITS_PER_LONG - fls64(x);
+}
+
+#define COUNT_LEADING_ZEROS_0 BITS_PER_LONG
+
+/**
+ * count_trailing_zeros - Count the number of zeros from the LSB forwards
+ * @x: The value
+ *
+ * Count the number of trailing zeros from the LSB going towards the MSB in @x.
+ *
+ * If the LSB of @x is set, the result is 0.
+ * If only the MSB of @x is set, then the result is BITS_PER_LONG-1.
+ * If @x is 0 then the result is COUNT_TRAILING_ZEROS_0.
+ */
+static inline int count_trailing_zeros(unsigned long x)
+{
+#define COUNT_TRAILING_ZEROS_0 (-1)
+
+	if (sizeof(x) == 4)
+		return ffs(x);
+	else
+		return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0;
+}
+
+#endif /* _LINUX_BITOPS_COUNT_ZEROS_H_ */
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index a89d041592c8..b90e255c2a68 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -19,7 +19,7 @@
  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  * MA 02111-1307, USA. */
 
-#include <asm-generic/bitops/count_zeros.h>
+#include <linux/count_zeros.h>
 
 /* You have to define the following before including this file:
  *
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 95c52a95259e..d30549fcc506 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/bitops.h>
-#include <asm-generic/bitops/count_zeros.h>
+#include <linux/count_zeros.h>
 #include "mpi-internal.h"
 
 #define MAX_EXTERN_MPI_BITS 16384
-- 
cgit v1.2.3


From 9b3fedde27d3d63055c43c05e8254e252e58ba48 Mon Sep 17 00:00:00 2001
From: Lukasz Anaczkowski <lukasz.anaczkowski@intel.com>
Date: Wed, 9 Sep 2015 15:47:28 +0200
Subject: ACPI / tables: Add acpi_subtable_proc to ACPI table parsers

ACPI subtable parsing needs to be extended to allow two or more
handlers to be run in the same ACPI table walk, thus adding
acpi_subtable_proc structure which stores
 () ACPI table id
 () handler that processes table
 () counter how many items has been processed
and passing it to acpi_parse_entries_array() and
acpi_table_parse_entries_array().

This is needed to fix CPU enumeration when APIC/X2APIC entries
are interleaved.

Signed-off-by: Lukasz Anaczkowski <lukasz.anaczkowski@intel.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/tables.c | 93 +++++++++++++++++++++++++++++++++++++++++----------
 include/linux/acpi.h  | 19 +++++++++--
 2 files changed, 91 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 17a6fa01a338..c1ff58de99e9 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -210,20 +210,39 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
 	}
 }
 
-int __init
-acpi_parse_entries(char *id, unsigned long table_size,
-		acpi_tbl_entry_handler handler,
+/**
+ * acpi_parse_entries_array - for each proc_num find a suitable subtable
+ *
+ * @id: table id (for debugging purposes)
+ * @table_size: single entry size
+ * @table_header: where does the table start?
+ * @proc: array of acpi_subtable_proc struct containing entry id
+ *        and associated handler with it
+ * @proc_num: how big proc is?
+ * @max_entries: how many entries can we process?
+ *
+ * For each proc_num find a subtable with proc->id and run proc->handler
+ * on it. Assumption is that there's only single handler for particular
+ * entry id.
+ *
+ * On success returns sum of all matching entries for all proc handlers.
+ * Otherwise, -ENODEV or -EINVAL is returned.
+ */
+static int __init
+acpi_parse_entries_array(char *id, unsigned long table_size,
 		struct acpi_table_header *table_header,
-		int entry_id, unsigned int max_entries)
+		struct acpi_subtable_proc *proc, int proc_num,
+		unsigned int max_entries)
 {
 	struct acpi_subtable_header *entry;
-	int count = 0;
 	unsigned long table_end;
+	int count = 0;
+	int i;
 
 	if (acpi_disabled)
 		return -ENODEV;
 
-	if (!id || !handler)
+	if (!id)
 		return -EINVAL;
 
 	if (!table_size)
@@ -243,20 +262,27 @@ acpi_parse_entries(char *id, unsigned long table_size,
 
 	while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
 	       table_end) {
-		if (entry->type == entry_id
-		    && (!max_entries || count < max_entries)) {
-			if (handler(entry, table_end))
+		if (max_entries && count >= max_entries)
+			break;
+
+		for (i = 0; i < proc_num; i++) {
+			if (entry->type != proc[i].id)
+				continue;
+			if (!proc->handler || proc[i].handler(entry, table_end))
 				return -EINVAL;
 
-			count++;
+			proc->count++;
+			break;
 		}
+		if (i != proc_num)
+			count++;
 
 		/*
 		 * If entry->length is 0, break from this loop to avoid
 		 * infinite loop.
 		 */
 		if (entry->length == 0) {
-			pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
+			pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id);
 			return -EINVAL;
 		}
 
@@ -266,17 +292,32 @@ acpi_parse_entries(char *id, unsigned long table_size,
 
 	if (max_entries && count > max_entries) {
 		pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
-			id, entry_id, count - max_entries, count);
+			id, proc->id, count - max_entries, count);
 	}
 
 	return count;
 }
 
 int __init
-acpi_table_parse_entries(char *id,
+acpi_parse_entries(char *id,
+			unsigned long table_size,
+			acpi_tbl_entry_handler handler,
+			struct acpi_table_header *table_header,
+			int entry_id, unsigned int max_entries)
+{
+	struct acpi_subtable_proc proc = {
+		.id		= entry_id,
+		.handler	= handler,
+	};
+
+	return acpi_parse_entries_array(id, table_size, table_header,
+			&proc, 1, max_entries);
+}
+
+int __init
+acpi_table_parse_entries_array(char *id,
 			 unsigned long table_size,
-			 int entry_id,
-			 acpi_tbl_entry_handler handler,
+			 struct acpi_subtable_proc *proc, int proc_num,
 			 unsigned int max_entries)
 {
 	struct acpi_table_header *table_header = NULL;
@@ -287,7 +328,7 @@ acpi_table_parse_entries(char *id,
 	if (acpi_disabled)
 		return -ENODEV;
 
-	if (!id || !handler)
+	if (!id)
 		return -EINVAL;
 
 	if (!strncmp(id, ACPI_SIG_MADT, 4))
@@ -299,13 +340,29 @@ acpi_table_parse_entries(char *id,
 		return -ENODEV;
 	}
 
-	count = acpi_parse_entries(id, table_size, handler, table_header,
-			entry_id, max_entries);
+	count = acpi_parse_entries_array(id, table_size, table_header,
+			proc, proc_num, max_entries);
 
 	early_acpi_os_unmap_memory((char *)table_header, tbl_size);
 	return count;
 }
 
+int __init
+acpi_table_parse_entries(char *id,
+			unsigned long table_size,
+			int entry_id,
+			acpi_tbl_entry_handler handler,
+			unsigned int max_entries)
+{
+	struct acpi_subtable_proc proc = {
+		.id		= entry_id,
+		.handler	= handler,
+	};
+
+	return acpi_table_parse_entries_array(id, table_size, &proc, 1,
+						max_entries);
+}
+
 int __init
 acpi_table_parse_madt(enum acpi_madt_type id,
 		      acpi_tbl_entry_handler handler, unsigned int max_entries)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7235c4851460..b0299f8db660 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -131,6 +131,12 @@ static inline void acpi_initrd_override(void *data, size_t size)
 		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
 		((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
 
+struct acpi_subtable_proc {
+	int id;
+	acpi_tbl_entry_handler handler;
+	int count;
+};
+
 char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
@@ -146,9 +152,16 @@ int __init acpi_parse_entries(char *id, unsigned long table_size,
 			      struct acpi_table_header *table_header,
 			      int entry_id, unsigned int max_entries);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
-				    int entry_id,
-				    acpi_tbl_entry_handler handler,
-				    unsigned int max_entries);
+			      int entry_id,
+			      acpi_tbl_entry_handler handler,
+			      unsigned int max_entries);
+int __init acpi_table_parse_entries(char *id, unsigned long table_size,
+			      int entry_id,
+			      acpi_tbl_entry_handler handler,
+			      unsigned int max_entries);
+int __init acpi_table_parse_entries_array(char *id, unsigned long table_size,
+			      struct acpi_subtable_proc *proc, int proc_num,
+			      unsigned int max_entries);
 int acpi_table_parse_madt(enum acpi_madt_type id,
 			  acpi_tbl_entry_handler handler,
 			  unsigned int max_entries);
-- 
cgit v1.2.3


From fd76ee4da55abb21babfc69310d321b9cb9a32e0 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 14 Oct 2015 17:43:45 +0300
Subject: net/mlx5_core: Fix internal error detection conditions

The detection of a fatal condition has been updated to take into account
the state reported by the device or by detecting an all ones read of the
firmware version which indicates that the device is not accessible.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 51 ++++++++++++++++++++----
 include/linux/mlx5/driver.h                      |  1 +
 2 files changed, 45 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9b81e1ceb8de..f1eb686c45b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -57,6 +57,31 @@ enum {
 	MLX5_HEALTH_SYNDR_HIGH_TEMP		= 0x10
 };
 
+enum {
+	MLX5_NIC_IFC_FULL		= 0,
+	MLX5_NIC_IFC_DISABLED		= 1,
+	MLX5_NIC_IFC_NO_DRAM_NIC	= 2
+};
+
+static u8 get_nic_interface(struct mlx5_core_dev *dev)
+{
+	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+}
+
+static int in_fatal(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+
+	if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED)
+		return 1;
+
+	if (ioread32be(&h->fw_ver) == 0xffffffff)
+		return 1;
+
+	return 0;
+}
+
 static void health_care(struct work_struct *work)
 {
 	struct mlx5_core_health *health;
@@ -136,11 +161,21 @@ static void print_health_info(struct mlx5_core_dev *dev)
 	dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
 }
 
+static unsigned long get_next_poll_jiffies(void)
+{
+	unsigned long next;
+
+	get_random_bytes(&next, sizeof(next));
+	next %= HZ;
+	next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+
+	return next;
+}
+
 static void poll_health(unsigned long data)
 {
 	struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
 	struct mlx5_core_health *health = &dev->priv.health;
-	unsigned long next;
 	u32 count;
 
 	count = ioread32be(health->health_counter);
@@ -151,14 +186,16 @@ static void poll_health(unsigned long data)
 
 	health->prev = count;
 	if (health->miss_counter == MAX_MISSES) {
-		mlx5_core_err(dev, "device's health compromised\n");
+		dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
 		print_health_info(dev);
-		queue_work(health->wq, &health->work);
 	} else {
-		get_random_bytes(&next, sizeof(next));
-		next %= HZ;
-		next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
-		mod_timer(&health->timer, next);
+		mod_timer(&health->timer, get_next_poll_jiffies());
+	}
+
+	if (in_fatal(dev) && !health->sick) {
+		health->sick = true;
+		print_health_info(dev);
+		queue_work(health->wq, &health->work);
 	}
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 41a32873f608..62b7d439813d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -393,6 +393,7 @@ struct mlx5_core_health {
 	struct timer_list		timer;
 	u32				prev;
 	int				miss_counter;
+	bool				sick;
 	struct workqueue_struct	       *wq;
 	struct work_struct		work;
 };
-- 
cgit v1.2.3


From 89d44f0a6c732db23b219be708e2fe1e03ee4842 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Wed, 14 Oct 2015 17:43:46 +0300
Subject: net/mlx5_core: Add pci error handlers to mlx5_core driver

This patch implement the pci_error_handlers for mlx5_core which allow the
driver to recover from PCI error.

Once an error is detected in the PCI, the mlx5_pci_err_detected is called
and it:
1) Marks the device to be in 'Internal Error' state.
2) Dispatches an event to the mlx5_ib to flush all the outstanding cqes
with error.
3) Returns all the on going commands with error.
4) Unloads the driver.

Afterwards, the FW is reset and mlx5_pci_slot_reset is called and it
enables the device and restore it's pci state.

If the later succeeds, mlx5_pci_resume is called, and it loads the SW
stack.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      | 170 +++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/health.c   |  72 ++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 182 ++++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |   4 +
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    |  13 +-
 include/linux/mlx5/driver.h                        |  22 +++
 6 files changed, 451 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c3e54b7e8780..fabfc9e0a948 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -256,8 +256,154 @@ static void dump_buf(void *buf, int size, int data_only, int offset)
 
 enum {
 	MLX5_DRIVER_STATUS_ABORTED = 0xfe,
+	MLX5_DRIVER_SYND = 0xbadd00de,
 };
 
+static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+				       u32 *synd, u8 *status)
+{
+	*synd = 0;
+	*status = 0;
+
+	switch (op) {
+	case MLX5_CMD_OP_TEARDOWN_HCA:
+	case MLX5_CMD_OP_DISABLE_HCA:
+	case MLX5_CMD_OP_MANAGE_PAGES:
+	case MLX5_CMD_OP_DESTROY_MKEY:
+	case MLX5_CMD_OP_DESTROY_EQ:
+	case MLX5_CMD_OP_DESTROY_CQ:
+	case MLX5_CMD_OP_DESTROY_QP:
+	case MLX5_CMD_OP_DESTROY_PSV:
+	case MLX5_CMD_OP_DESTROY_SRQ:
+	case MLX5_CMD_OP_DESTROY_XRC_SRQ:
+	case MLX5_CMD_OP_DESTROY_DCT:
+	case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
+	case MLX5_CMD_OP_DEALLOC_PD:
+	case MLX5_CMD_OP_DEALLOC_UAR:
+	case MLX5_CMD_OP_DETTACH_FROM_MCG:
+	case MLX5_CMD_OP_DEALLOC_XRCD:
+	case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
+	case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_DESTROY_TIR:
+	case MLX5_CMD_OP_DESTROY_SQ:
+	case MLX5_CMD_OP_DESTROY_RQ:
+	case MLX5_CMD_OP_DESTROY_RMP:
+	case MLX5_CMD_OP_DESTROY_TIS:
+	case MLX5_CMD_OP_DESTROY_RQT:
+	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+	case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
+	case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
+		return MLX5_CMD_STAT_OK;
+
+	case MLX5_CMD_OP_QUERY_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_ADAPTER:
+	case MLX5_CMD_OP_INIT_HCA:
+	case MLX5_CMD_OP_ENABLE_HCA:
+	case MLX5_CMD_OP_QUERY_PAGES:
+	case MLX5_CMD_OP_SET_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_ISSI:
+	case MLX5_CMD_OP_SET_ISSI:
+	case MLX5_CMD_OP_CREATE_MKEY:
+	case MLX5_CMD_OP_QUERY_MKEY:
+	case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+	case MLX5_CMD_OP_CREATE_EQ:
+	case MLX5_CMD_OP_QUERY_EQ:
+	case MLX5_CMD_OP_GEN_EQE:
+	case MLX5_CMD_OP_CREATE_CQ:
+	case MLX5_CMD_OP_QUERY_CQ:
+	case MLX5_CMD_OP_MODIFY_CQ:
+	case MLX5_CMD_OP_CREATE_QP:
+	case MLX5_CMD_OP_RST2INIT_QP:
+	case MLX5_CMD_OP_INIT2RTR_QP:
+	case MLX5_CMD_OP_RTR2RTS_QP:
+	case MLX5_CMD_OP_RTS2RTS_QP:
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_QUERY_QP:
+	case MLX5_CMD_OP_SQD_RTS_QP:
+	case MLX5_CMD_OP_INIT2INIT_QP:
+	case MLX5_CMD_OP_CREATE_PSV:
+	case MLX5_CMD_OP_CREATE_SRQ:
+	case MLX5_CMD_OP_QUERY_SRQ:
+	case MLX5_CMD_OP_ARM_RQ:
+	case MLX5_CMD_OP_CREATE_XRC_SRQ:
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+	case MLX5_CMD_OP_CREATE_DCT:
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_QUERY_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+	case MLX5_CMD_OP_QUERY_VPORT_STATE:
+	case MLX5_CMD_OP_MODIFY_VPORT_STATE:
+	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+	case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+	case MLX5_CMD_OP_SET_ROCE_ADDRESS:
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_ALLOC_PD:
+	case MLX5_CMD_OP_ALLOC_UAR:
+	case MLX5_CMD_OP_CONFIG_INT_MODERATION:
+	case MLX5_CMD_OP_ACCESS_REG:
+	case MLX5_CMD_OP_ATTACH_TO_MCG:
+	case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+	case MLX5_CMD_OP_MAD_IFC:
+	case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+	case MLX5_CMD_OP_SET_MAD_DEMUX:
+	case MLX5_CMD_OP_NOP:
+	case MLX5_CMD_OP_ALLOC_XRCD:
+	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+	case MLX5_CMD_OP_QUERY_CONG_STATUS:
+	case MLX5_CMD_OP_MODIFY_CONG_STATUS:
+	case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+	case MLX5_CMD_OP_MODIFY_CONG_PARAMS:
+	case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_CREATE_TIR:
+	case MLX5_CMD_OP_MODIFY_TIR:
+	case MLX5_CMD_OP_QUERY_TIR:
+	case MLX5_CMD_OP_CREATE_SQ:
+	case MLX5_CMD_OP_MODIFY_SQ:
+	case MLX5_CMD_OP_QUERY_SQ:
+	case MLX5_CMD_OP_CREATE_RQ:
+	case MLX5_CMD_OP_MODIFY_RQ:
+	case MLX5_CMD_OP_QUERY_RQ:
+	case MLX5_CMD_OP_CREATE_RMP:
+	case MLX5_CMD_OP_MODIFY_RMP:
+	case MLX5_CMD_OP_QUERY_RMP:
+	case MLX5_CMD_OP_CREATE_TIS:
+	case MLX5_CMD_OP_MODIFY_TIS:
+	case MLX5_CMD_OP_QUERY_TIS:
+	case MLX5_CMD_OP_CREATE_RQT:
+	case MLX5_CMD_OP_MODIFY_RQT:
+	case MLX5_CMD_OP_QUERY_RQT:
+	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+		*status = MLX5_DRIVER_STATUS_ABORTED;
+		*synd = MLX5_DRIVER_SYND;
+		return -EIO;
+	default:
+		mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
+		return -EINVAL;
+	}
+}
+
 const char *mlx5_command_str(int command)
 {
 	switch (command) {
@@ -592,6 +738,16 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 	return err;
 }
 
+static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out)
+{
+	return &out->syndrome;
+}
+
+static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
+{
+	return &out->status;
+}
+
 /*  Notes:
  *    1. Callback functions may not sleep
  *    2. page queue commands do not support asynchrous completion
@@ -1200,6 +1356,11 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
 	return msg;
 }
 
+static u16 opcode_from_in(struct mlx5_inbox_hdr *in)
+{
+	return be16_to_cpu(in->opcode);
+}
+
 static int is_manage_pages(struct mlx5_inbox_hdr *in)
 {
 	return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
@@ -1214,6 +1375,15 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	gfp_t gfp;
 	int err;
 	u8 status = 0;
+	u32 drv_synd;
+
+	if (pci_channel_offline(dev->pdev) ||
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status);
+		*get_synd_ptr(out) = cpu_to_be32(drv_synd);
+		*get_status_ptr(out) = status;
+		return err;
+	}
 
 	pages_queue = is_manage_pages(in);
 	gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index f1eb686c45b1..f5deb642d0d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/vmalloc.h>
+#include <linux/hardirq.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
@@ -68,6 +69,29 @@ static u8 get_nic_interface(struct mlx5_core_dev *dev)
 	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
 }
 
+static void trigger_cmd_completions(struct mlx5_core_dev *dev)
+{
+	unsigned long flags;
+	u64 vector;
+
+	/* wait for pending handlers to complete */
+	synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector);
+	spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+	vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+	if (!vector)
+		goto no_trig;
+
+	vector |= MLX5_TRIGGERED_CMD_COMP;
+	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+	mlx5_cmd_comp_handler(dev, vector);
+	return;
+
+no_trig:
+	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
 static int in_fatal(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
@@ -82,6 +106,43 @@ static int in_fatal(struct mlx5_core_dev *dev)
 	return 0;
 }
 
+void mlx5_enter_error_state(struct mlx5_core_dev *dev)
+{
+	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		return;
+
+	mlx5_core_err(dev, "start\n");
+	if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+
+	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
+	mlx5_core_err(dev, "end\n");
+}
+
+static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
+{
+	u8 nic_interface = get_nic_interface(dev);
+
+	switch (nic_interface) {
+	case MLX5_NIC_IFC_FULL:
+		mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
+		break;
+
+	case MLX5_NIC_IFC_DISABLED:
+		mlx5_core_warn(dev, "starting teardown\n");
+		break;
+
+	case MLX5_NIC_IFC_NO_DRAM_NIC:
+		mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
+		break;
+	default:
+		mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
+			       nic_interface);
+	}
+
+	mlx5_disable_device(dev);
+}
+
 static void health_care(struct work_struct *work)
 {
 	struct mlx5_core_health *health;
@@ -92,6 +153,7 @@ static void health_care(struct work_struct *work)
 	priv = container_of(health, struct mlx5_priv, health);
 	dev = container_of(priv, struct mlx5_core_dev, priv);
 	mlx5_core_warn(dev, "handling bad device here\n");
+	mlx5_handle_bad_state(dev);
 }
 
 static const char *hsynd_str(u8 synd)
@@ -147,6 +209,10 @@ static void print_health_info(struct mlx5_core_dev *dev)
 	u32 fw;
 	int i;
 
+	/* If the syndrom is 0, the device is OK and no need to print buffer */
+	if (!ioread8(&h->synd))
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
 		dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
 
@@ -178,6 +244,12 @@ static void poll_health(unsigned long data)
 	struct mlx5_core_health *health = &dev->priv.health;
 	u32 count;
 
+	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		trigger_cmd_completions(dev);
+		mod_timer(&health->timer, get_next_poll_jiffies());
+		return;
+	}
+
 	count = ioread32be(health->health_counter);
 	if (count == health->prev)
 		++health->miss_counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b6edc58766ad..a103a54d6660 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -45,6 +45,7 @@
 #include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
 #include <linux/kmod.h>
+#include <linux/delay.h>
 #include <linux/mlx5/mlx5_ifc.h>
 #include "mlx5_core.h"
 
@@ -181,6 +182,34 @@ static int set_dma_caps(struct pci_dev *pdev)
 	return err;
 }
 
+static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
+{
+	struct pci_dev *pdev = dev->pdev;
+	int err = 0;
+
+	mutex_lock(&dev->pci_status_mutex);
+	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
+		err = pci_enable_device(pdev);
+		if (!err)
+			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
+	}
+	mutex_unlock(&dev->pci_status_mutex);
+
+	return err;
+}
+
+static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
+{
+	struct pci_dev *pdev = dev->pdev;
+
+	mutex_lock(&dev->pci_status_mutex);
+	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
+		pci_disable_device(pdev);
+		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
+	}
+	mutex_unlock(&dev->pci_status_mutex);
+}
+
 static int request_bar(struct pci_dev *pdev)
 {
 	int err = 0;
@@ -807,7 +836,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	if (!priv->dbg_root)
 		return -ENOMEM;
 
-	err = pci_enable_device(pdev);
+	err = mlx5_pci_enable_device(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
 		goto err_dbg;
@@ -841,7 +870,7 @@ err_clr_master:
 	pci_clear_master(dev->pdev);
 	release_bar(dev->pdev);
 err_disable:
-	pci_disable_device(dev->pdev);
+	mlx5_pci_disable_device(dev);
 
 err_dbg:
 	debugfs_remove(priv->dbg_root);
@@ -853,7 +882,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	iounmap(dev->iseg);
 	pci_clear_master(dev->pdev);
 	release_bar(dev->pdev);
-	pci_disable_device(dev->pdev);
+	mlx5_pci_disable_device(dev);
 	debugfs_remove(priv->dbg_root);
 }
 
@@ -863,13 +892,25 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	struct pci_dev *pdev = dev->pdev;
 	int err;
 
+	mutex_lock(&dev->intf_state_mutex);
+	if (dev->interface_state == MLX5_INTERFACE_STATE_UP) {
+		dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
+			 __func__);
+		goto out;
+	}
+
 	dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
 		 fw_rev_min(dev), fw_rev_sub(dev));
 
+	/* on load removing any previous indication of internal error, device is
+	 * up
+	 */
+	dev->state = MLX5_DEVICE_STATE_UP;
+
 	err = mlx5_cmd_init(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
-		return err;
+		goto out_err;
 	}
 
 	mlx5_pagealloc_init(dev);
@@ -994,6 +1035,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	if (err)
 		pr_info("failed request module on %s\n", MLX5_IB_MOD);
 
+	dev->interface_state = MLX5_INTERFACE_STATE_UP;
+out:
+	mutex_unlock(&dev->intf_state_mutex);
+
 	return 0;
 
 err_reg_dev:
@@ -1024,7 +1069,7 @@ err_stop_poll:
 	mlx5_stop_health_poll(dev);
 	if (mlx5_cmd_teardown_hca(dev)) {
 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
-		return err;
+		goto out_err;
 	}
 
 err_pagealloc_stop:
@@ -1040,13 +1085,23 @@ err_pagealloc_cleanup:
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
 
+out_err:
+	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+	mutex_unlock(&dev->intf_state_mutex);
+
 	return err;
 }
 
 static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
-	int err;
+	int err = 0;
 
+	mutex_lock(&dev->intf_state_mutex);
+	if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) {
+		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
+			 __func__);
+		goto out;
+	}
 	mlx5_unregister_device(dev);
 	mlx5_cleanup_mr_table(dev);
 	mlx5_cleanup_srq_table(dev);
@@ -1072,10 +1127,12 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_cmd_cleanup(dev);
 
 out:
+	dev->interface_state = MLX5_INTERFACE_STATE_DOWN;
+	mutex_unlock(&dev->intf_state_mutex);
 	return err;
 }
 
-static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -1125,6 +1182,8 @@ static int init_one(struct pci_dev *pdev,
 
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
+	mutex_init(&dev->pci_status_mutex);
+	mutex_init(&dev->intf_state_mutex);
 	err = mlx5_pci_init(dev, priv);
 	if (err) {
 		dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
@@ -1172,6 +1231,112 @@ static void remove_one(struct pci_dev *pdev)
 	kfree(dev);
 }
 
+static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+					      pci_channel_state_t state)
+{
+	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+	struct mlx5_priv *priv = &dev->priv;
+
+	dev_info(&pdev->dev, "%s was called\n", __func__);
+	mlx5_enter_error_state(dev);
+	mlx5_unload_one(dev, priv);
+	mlx5_pci_disable_device(dev);
+	return state == pci_channel_io_perm_failure ?
+		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+	int err = 0;
+
+	dev_info(&pdev->dev, "%s was called\n", __func__);
+
+	err = mlx5_pci_enable_device(dev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+			, __func__, err);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	pci_set_master(pdev);
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+
+	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+	mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+/* wait for the device to show vital signs. For now we check
+ * that we can read the device ID and that the health buffer
+ * shows a non zero value which is different than 0xffffffff
+ */
+static void wait_vital(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+	struct mlx5_core_health *health = &dev->priv.health;
+	const int niter = 100;
+	u32 count;
+	u16 did;
+	int i;
+
+	/* Wait for firmware to be ready after reset */
+	msleep(1000);
+	for (i = 0; i < niter; i++) {
+		if (pci_read_config_word(pdev, 2, &did)) {
+			dev_warn(&pdev->dev, "failed reading config word\n");
+			break;
+		}
+		if (did == pdev->device) {
+			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
+			break;
+		}
+		msleep(50);
+	}
+	if (i == niter)
+		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+
+	for (i = 0; i < niter; i++) {
+		count = ioread32be(health->health_counter);
+		if (count && count != 0xffffffff) {
+			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+			break;
+		}
+		msleep(50);
+	}
+
+	if (i == niter)
+		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+	struct mlx5_priv *priv = &dev->priv;
+	int err;
+
+	dev_info(&pdev->dev, "%s was called\n", __func__);
+
+	pci_save_state(pdev);
+	wait_vital(pdev);
+
+	err = mlx5_load_one(dev, priv);
+	if (err)
+		dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
+			, __func__, err);
+	else
+		dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+}
+
+static const struct pci_error_handlers mlx5_err_handler = {
+	.error_detected = mlx5_pci_err_detected,
+	.slot_reset	= mlx5_pci_slot_reset,
+	.resume		= mlx5_pci_resume
+};
+
 static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */
 	{ PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */
@@ -1188,7 +1353,8 @@ static struct pci_driver mlx5_core_driver = {
 	.name           = DRIVER_NAME,
 	.id_table       = mlx5_core_pci_table,
 	.probe          = init_one,
-	.remove         = remove_one
+	.remove         = remove_one,
+	.err_handler	= &mlx5_err_handler
 };
 
 static int __init init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 30c0be721b08..cee5b7a839bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -86,6 +86,10 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+		     unsigned long param);
+void mlx5_enter_error_state(struct mlx5_core_dev *dev);
+void mlx5_disable_device(struct mlx5_core_dev *dev);
 
 void mlx5e_init(void);
 void mlx5e_cleanup(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 76432a510ac2..1cda5d268ec9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -493,15 +493,20 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 	struct fw_page *fwp;
 	struct rb_node *p;
 	int nclaimed = 0;
-	int err;
+	int err = 0;
 
 	do {
 		p = rb_first(&dev->priv.page_root);
 		if (p) {
 			fwp = rb_entry(p, struct fw_page, rb_node);
-			err = reclaim_pages(dev, fwp->func_id,
-					    optimal_reclaimed_pages(),
-					    &nclaimed);
+			if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+				free_4k(dev, fwp->addr);
+				nclaimed = 1;
+			} else {
+				err = reclaim_pages(dev, fwp->func_id,
+						    optimal_reclaimed_pages(),
+						    &nclaimed);
+			}
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
 					       err);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 62b7d439813d..9aba8d5139fa 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -487,8 +487,26 @@ struct mlx5_priv {
 	spinlock_t              ctx_lock;
 };
 
+enum mlx5_device_state {
+	MLX5_DEVICE_STATE_UP,
+	MLX5_DEVICE_STATE_INTERNAL_ERROR,
+};
+
+enum mlx5_interface_state {
+	MLX5_INTERFACE_STATE_DOWN,
+	MLX5_INTERFACE_STATE_UP,
+};
+
+enum mlx5_pci_status {
+	MLX5_PCI_STATUS_DISABLED,
+	MLX5_PCI_STATUS_ENABLED,
+};
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
+	/* sync pci state */
+	struct mutex		pci_status_mutex;
+	enum mlx5_pci_status	pci_status;
 	u8			rev_id;
 	char			board_id[MLX5_BOARD_ID_LEN];
 	struct mlx5_cmd		cmd;
@@ -497,6 +515,10 @@ struct mlx5_core_dev {
 	u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
+	enum mlx5_device_state	state;
+	/* sync interface state */
+	struct mutex		intf_state_mutex;
+	enum mlx5_interface_state interface_state;
 	void			(*event) (struct mlx5_core_dev *dev,
 					  enum mlx5_dev_event event,
 					  unsigned long param);
-- 
cgit v1.2.3


From e3297246c2c8cf8548ba722da3e3a8104cdcd035 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 14 Oct 2015 17:43:47 +0300
Subject: net/mlx5_core: Wait for FW readiness on startup

On device initialization, wait till firmware indicates that that it is done
with initialization before proceeding to initialize the device.

Also update initialization segment layout to match driver/firmware
interface definitions.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 27 ++++++++++++++++++++++++++
 include/linux/mlx5/device.h                    |  3 ++-
 include/linux/mlx5/driver.h                    |  5 +++++
 3 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a103a54d6660..2388aec208fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <linux/io-mapping.h>
 #include <linux/interrupt.h>
+#include <linux/delay.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
@@ -152,6 +153,25 @@ static struct mlx5_profile profile[] = {
 	},
 };
 
+#define FW_INIT_TIMEOUT_MILI	2000
+#define FW_INIT_WAIT_MS		2
+
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+{
+	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+	int err = 0;
+
+	while (fw_initializing(dev)) {
+		if (time_after(jiffies, end)) {
+			err = -EBUSY;
+			break;
+		}
+		msleep(FW_INIT_WAIT_MS);
+	}
+
+	return err;
+}
+
 static int set_dma_caps(struct pci_dev *pdev)
 {
 	int err;
@@ -913,6 +933,13 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto out_err;
 	}
 
+	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+	if (err) {
+		dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
+			FW_INIT_TIMEOUT_MILI);
+		goto out_err;
+	}
+
 	mlx5_pagealloc_init(dev);
 
 	err = mlx5_core_enable_hca(dev);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 2a0b95662548..0b473cbfa7ef 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -439,7 +439,8 @@ struct mlx5_init_seg {
 	__be32			cmdq_addr_h;
 	__be32			cmdq_addr_l_sz;
 	__be32			cmd_dbell;
-	__be32			rsvd1[121];
+	__be32			rsvd1[120];
+	__be32			initializing;
 	struct health_buffer	health;
 	__be32			rsvd2[884];
 	__be32			health_counter;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9aba8d5139fa..5c857f2a20d7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -826,6 +826,11 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
 int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 			struct mlx5_odp_caps *odp_caps);
 
+static inline int fw_initializing(struct mlx5_core_dev *dev)
+{
+	return ioread32be(&dev->iseg->initializing) >> 31;
+}
+
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
 	return mkey >> 8;
-- 
cgit v1.2.3


From 2b3ddf27f48c8061f0676c5a8796008099945280 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 14 Oct 2015 17:43:48 +0300
Subject: net/mlx4_core: Replace VF zero mac with random mac in mlx4_core

By design, when no default MAC addresses are set in the Hypervisor for VFs,
the VFs are passed zero-macs. When such a MAC is received by the VF, it
generates a random MAC address and registers that MAC address
with the Hypervisor.

This random mac generation is currently done in the mlx4_en module.
There is a problem, though, if the mlx4_ib module is loaded by a VF before
the mlx4_en module. In this case, for RoCE, mlx4_ib will see the un-replaced
zero-mac and register that zero-mac as part of QP1 initialization.

Having a zero-mac in the port's MAC table creates problems for a
Baseboard Management Console. The BMC occasionally sends packets with a
zero-mac destination MAC. If there is a zero-mac present in the port's
MAC table, the FW will send such BMC packets to the host driver rather than
to the wire, and BMC will stop working.

To address this problem, we move the replacement of zero-mac addresses
with random-mac addresses to procedure mlx4_slave_cap(), which is part of the
driver startup for VFs, and is before activation of mlx4_ib and mlx4_en.
As a result, zero-mac addresses will never be registered in the port MAC table
by the driver.

In addition, when mlx4_en does initialize the net device, it needs to set
the NET_ADDR_RANDOM flag in the netdev structure if the address was
randomly generated. This is done so that udev on the VM does not create
a new device name after each VF probe (VM boot and such). To accomplish this,
we add a per-port flag in mlx4_dev which gets set whenever mlx4_core replaces
a zero-mac with a randomly-generated mac. This flag is examined when mlx4_en
initializes the net-device.

Fix was suggested by Matan Barak <matanb@mellanox.com>

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 23 +++++++++++------------
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 16 ++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/main.c      |  2 ++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |  2 ++
 include/linux/mlx4/device.h                    |  1 +
 5 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 597d8923c8e1..886e1bc86374 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2816,7 +2816,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	struct mlx4_en_priv *priv;
 	int i;
 	int err;
-	u64 mac_u64;
 
 	dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
 				 MAX_TX_RINGS, MAX_RX_RINGS);
@@ -2908,17 +2907,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	dev->addr_len = ETH_ALEN;
 	mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]);
 	if (!is_valid_ether_addr(dev->dev_addr)) {
-		if (mlx4_is_slave(priv->mdev->dev)) {
-			eth_hw_addr_random(dev);
-			en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr);
-			mac_u64 = mlx4_mac_to_u64(dev->dev_addr);
-			mdev->dev->caps.def_mac[priv->port] = mac_u64;
-		} else {
-			en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n",
-			       priv->port, dev->dev_addr);
-			err = -EINVAL;
-			goto out;
-		}
+		en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n",
+		       priv->port, dev->dev_addr);
+		err = -EINVAL;
+		goto out;
+	} else if (mlx4_is_slave(priv->mdev->dev) &&
+		   (priv->mdev->dev->port_random_macs & 1 << priv->port)) {
+		/* Random MAC was assigned in mlx4_slave_cap
+		 * in mlx4_core module
+		 */
+		dev->addr_assign_type |= NET_ADDR_RANDOM;
+		en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr);
 	}
 
 	memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac));
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e8ec1dec5789..f13a4d7bbf95 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -2840,3 +2840,19 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val)
 	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(set_phv_bit);
+
+void mlx4_replace_zero_macs(struct mlx4_dev *dev)
+{
+	int i;
+	u8 mac_addr[ETH_ALEN];
+
+	dev->port_random_macs = 0;
+	for (i = 1; i <= dev->caps.num_ports; ++i)
+		if (!dev->caps.def_mac[i] &&
+		    dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) {
+			eth_random_addr(mac_addr);
+			dev->port_random_macs |= 1 << i;
+			dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr);
+		}
+}
+EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 006757f80988..bcbdfab1fe19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -863,6 +863,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		return -ENODEV;
 	}
 
+	mlx4_replace_zero_macs(dev);
+
 	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
 	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 232b2b55f23b..e1cf9036af22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1378,6 +1378,8 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
 
 void mlx4_init_quotas(struct mlx4_dev *dev);
 
+/* for VFs, replace zero MACs with randomly-generated MACs at driver start */
+void mlx4_replace_zero_macs(struct mlx4_dev *dev);
 int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
 /* Returns the VF index of slave */
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index baad4cb8e9b0..5a8677bafe04 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -833,6 +833,7 @@ struct mlx4_dev {
 	struct mlx4_quotas	quotas;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
+	u8			port_random_macs;
 	char			board_id[MLX4_BOARD_ID_LEN];
 	int			numa_node;
 	int			oper_log_mgm_entry_size;
-- 
cgit v1.2.3


From d5c373eb5610686162ff50429f63f4c00c554799 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Wed, 14 Oct 2015 12:07:55 -0700
Subject: posix_cpu_timer: Convert cputimer->running to bool

In the next patch in this series, a new field 'checking_timer' will
be added to 'struct thread_group_cputimer'. Both this and the
existing 'running' integer field are just used as boolean values. To
save space in the structure, we can make both of these fields booleans.

This is a preparatory patch to convert the existing running integer
field to a boolean.

Suggested-by: George Spelvin <linux@horizon.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed: George Spelvin <linux@horizon.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: hideaki.kimura@hpe.com
Cc: terry.rudd@hpe.com
Cc: scott.norton@hpe.com
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1444849677-29330-4-git-send-email-jason.low2@hp.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/init_task.h      | 2 +-
 include/linux/sched.h          | 6 +++---
 kernel/fork.c                  | 2 +-
 kernel/time/posix-cpu-timers.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e38681f4912d..c43b80f3f875 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -59,7 +59,7 @@ extern struct fs_struct init_fs;
 	.rlim		= INIT_RLIMITS,					\
 	.cputimer	= { 						\
 		.cputime_atomic	= INIT_CPUTIME_ATOMIC,			\
-		.running	= 0,					\
+		.running	= false,				\
 	},								\
 	INIT_PREV_CPUTIME(sig)						\
 	.cred_guard_mutex =						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501b41af..6c8504ade2ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -617,15 +617,15 @@ struct task_cputime_atomic {
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.
- * @running:		non-zero when there are timers running and
- * 			@cputime receives updates.
+ * @running:		true when there are timers running and
+ *			@cputime_atomic receives updates.
  *
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU timer calculations.
  */
 struct thread_group_cputimer {
 	struct task_cputime_atomic cputime_atomic;
-	int running;
+	bool running;
 };
 
 #include <linux/rwsem.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 2845623fb582..6ac894244d39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1101,7 +1101,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 	if (cpu_limit != RLIM_INFINITY) {
 		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
-		sig->cputimer.running = 1;
+		sig->cputimer.running = true;
 	}
 
 	/* The timer lists. */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 6f6e252ec761..2d58153074d9 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -249,7 +249,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 		 * but barriers are not required because update_gt_cputime()
 		 * can handle concurrent updates.
 		 */
-		WRITE_ONCE(cputimer->running, 1);
+		WRITE_ONCE(cputimer->running, true);
 	}
 	sample_cputime_atomic(times, &cputimer->cputime_atomic);
 }
@@ -918,7 +918,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
 	struct thread_group_cputimer *cputimer = &sig->cputimer;
 
 	/* Turn off cputimer->running. This is done without locking. */
-	WRITE_ONCE(cputimer->running, 0);
+	WRITE_ONCE(cputimer->running, false);
 }
 
 static u32 onecputick;
-- 
cgit v1.2.3


From c8d75aa47dd585c9538a8205e9bb9847e12cfb84 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Wed, 14 Oct 2015 12:07:56 -0700
Subject: posix_cpu_timer: Reduce unnecessary sighand lock contention

It was found while running a database workload on large systems that
significant time was spent trying to acquire the sighand lock.

The issue was that whenever an itimer expired, many threads ended up
simultaneously trying to send the signal. Most of the time, nothing
happened after acquiring the sighand lock because another thread
had just already sent the signal and updated the "next expire" time.
The fastpath_timer_check() didn't help much since the "next expire"
time was updated after the threads exit fastpath_timer_check().

This patch addresses this by having the thread_group_cputimer structure
maintain a boolean to signify when a thread in the group is already
checking for process wide timers, and adds extra logic in the fastpath
to check the boolean.

Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: George Spelvin <linux@horizon.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: hideaki.kimura@hpe.com
Cc: terry.rudd@hpe.com
Cc: scott.norton@hpe.com
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1444849677-29330-5-git-send-email-jason.low2@hp.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/init_task.h      |  1 +
 include/linux/sched.h          |  3 +++
 kernel/time/posix-cpu-timers.c | 26 ++++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index c43b80f3f875..810a34f60424 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -60,6 +60,7 @@ extern struct fs_struct init_fs;
 	.cputimer	= { 						\
 		.cputime_atomic	= INIT_CPUTIME_ATOMIC,			\
 		.running	= false,				\
+		.checking_timer = false,				\
 	},								\
 	INIT_PREV_CPUTIME(sig)						\
 	.cred_guard_mutex =						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6c8504ade2ba..f87559df5b75 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -619,6 +619,8 @@ struct task_cputime_atomic {
  * @cputime_atomic:	atomic thread group interval timers.
  * @running:		true when there are timers running and
  *			@cputime_atomic receives updates.
+ * @checking_timer:	true when a thread in the group is in the
+ *			process of checking for thread group timers.
  *
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU timer calculations.
@@ -626,6 +628,7 @@ struct task_cputime_atomic {
 struct thread_group_cputimer {
 	struct task_cputime_atomic cputime_atomic;
 	bool running;
+	bool checking_timer;
 };
 
 #include <linux/rwsem.h>
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 2d58153074d9..f5e86d282d52 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -975,6 +975,12 @@ static void check_process_timers(struct task_struct *tsk,
 	if (!READ_ONCE(tsk->signal->cputimer.running))
 		return;
 
+        /*
+	 * Signify that a thread is checking for process timers.
+	 * Write access to this field is protected by the sighand lock.
+	 */
+	sig->cputimer.checking_timer = true;
+
 	/*
 	 * Collect the current process totals.
 	 */
@@ -1029,6 +1035,8 @@ static void check_process_timers(struct task_struct *tsk,
 	sig->cputime_expires.sched_exp = sched_expires;
 	if (task_cputime_zero(&sig->cputime_expires))
 		stop_process_timers(sig);
+
+	sig->cputimer.checking_timer = false;
 }
 
 /*
@@ -1142,8 +1150,22 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 	}
 
 	sig = tsk->signal;
-	/* Check if cputimer is running. This is accessed without locking. */
-	if (READ_ONCE(sig->cputimer.running)) {
+	/*
+	 * Check if thread group timers expired when the cputimer is
+	 * running and no other thread in the group is already checking
+	 * for thread group cputimers. These fields are read without the
+	 * sighand lock. However, this is fine because this is meant to
+	 * be a fastpath heuristic to determine whether we should try to
+	 * acquire the sighand lock to check/handle timers.
+	 *
+	 * In the worst case scenario, if 'running' or 'checking_timer' gets
+	 * set but the current thread doesn't see the change yet, we'll wait
+	 * until the next thread in the group gets a scheduler interrupt to
+	 * handle the timer. This isn't an issue in practice because these
+	 * types of delays with signals actually getting sent are expected.
+	 */
+	if (READ_ONCE(sig->cputimer.running) &&
+	    !READ_ONCE(sig->cputimer.checking_timer)) {
 		struct task_cputime group_sample;
 
 		sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
-- 
cgit v1.2.3


From ae853ddb9ad5e7c01cad3fbf016040acd961f407 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 9 Sep 2015 11:58:59 +0100
Subject: iommu/vt-d: Introduce intel_iommu=pasid28, and pasid_enabled() macro

As long as we use an identity mapping to work around the worst of the
hardware bugs which caused us to defeature it and change the definition
of the capability bit, we *can* use PASID support on the devices which
advertised it in bit 28 of the Extended Capability Register.

Allow people to do so with 'intel_iommu=pasid28' on the command line.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-iommu.c | 20 ++++++++++++++------
 include/linux/intel-iommu.h |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 17b4744628ab..6a01735f4df3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -497,13 +497,21 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int intel_iommu_ecs = 1;
+static int intel_iommu_pasid28;
+static int iommu_identity_mapping;
+
+#define IDENTMAP_ALL		1
+#define IDENTMAP_GFX		2
+#define IDENTMAP_AZALIA		4
 
 /* We only actually use ECS when PASID support (on the new bit 40)
  * is also advertised. Some early implementations — the ones with
  * PASID support on bit 28 — have issues even when we *only* use
  * extended root/context tables. */
+#define pasid_enabled(iommu) (ecap_pasid(iommu->ecap) || \
+			      (intel_iommu_pasid28 && ecap_broken_pasid(iommu->ecap)))
 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
-			    ecap_pasid(iommu->ecap))
+			    pasid_enabled(iommu))
 
 int intel_iommu_gfx_mapped;
 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -566,6 +574,11 @@ static int __init intel_iommu_setup(char *str)
 			printk(KERN_INFO
 				"Intel-IOMMU: disable extended context table support\n");
 			intel_iommu_ecs = 0;
+		} else if (!strncmp(str, "pasid28", 7)) {
+			printk(KERN_INFO
+				"Intel-IOMMU: enable pre-production PASID support\n");
+			intel_iommu_pasid28 = 1;
+			iommu_identity_mapping |= IDENTMAP_GFX;
 		}
 
 		str += strcspn(str, ",");
@@ -2403,11 +2416,6 @@ found_domain:
 	return domain;
 }
 
-static int iommu_identity_mapping;
-#define IDENTMAP_ALL		1
-#define IDENTMAP_GFX		2
-#define IDENTMAP_AZALIA		4
-
 static int iommu_domain_identity_map(struct dmar_domain *domain,
 				     unsigned long long start,
 				     unsigned long long end)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 08802b99f057..1d69c1d3aa9a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -121,7 +121,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define ecap_srs(e)		((e >> 31) & 0x1)
 #define ecap_ers(e)		((e >> 30) & 0x1)
 #define ecap_prs(e)		((e >> 29) & 0x1)
-/* PASID support used to be on bit 28 */
+#define ecap_broken_pasid(e)	((e >> 28) & 0x1)
 #define ecap_dis(e)		((e >> 27) & 0x1)
 #define ecap_nest(e)		((e >> 26) & 0x1)
 #define ecap_mts(e)		((e >> 25) & 0x1)
-- 
cgit v1.2.3


From 8a94ade4ce6df22006b96c5c9a8d6d12fce67585 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 24 Mar 2015 14:54:56 +0000
Subject: iommu/vt-d: Add initial support for PASID tables

Add CONFIG_INTEL_IOMMU_SVM, and allocate PASID tables on supported hardware.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/Kconfig       |  8 ++++++
 drivers/iommu/Makefile      |  1 +
 drivers/iommu/intel-iommu.c | 14 ++++++++++
 drivers/iommu/intel-svm.c   | 65 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/intel-iommu.h | 15 +++++++++++
 5 files changed, 103 insertions(+)
 create mode 100644 drivers/iommu/intel-svm.c

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..e3b2c2e62e32 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -135,6 +135,14 @@ config INTEL_IOMMU
 	  and include PCI device scope covered by these DMA
 	  remapping devices.
 
+config INTEL_IOMMU_SVM
+	bool "Support for Shared Virtual Memory with Intel IOMMU"
+	depends on INTEL_IOMMU && X86
+	help
+	  Shared Virtual Memory (SVM) provides a facility for devices
+	  to access DMA resources through process address space by
+	  means of a Process Address Space ID (PASID).
+
 config INTEL_IOMMU_DEFAULT_ON
 	def_bool y
 	prompt "Enable Intel DMA Remapping Devices by default"
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d711..dc6f511f45a3 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
+obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6a01735f4df3..ab99fcfd9f2d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1680,6 +1680,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 
 	/* free context mapping */
 	free_context_table(iommu);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	if (pasid_enabled(iommu))
+		intel_svm_free_pasid_tables(iommu);
+#endif
 }
 
 static struct dmar_domain *alloc_domain(int flags)
@@ -3107,6 +3112,10 @@ static int __init init_dmars(void)
 
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
+#ifdef CONFIG_INTEL_IOMMU_SVM
+		if (pasid_enabled(iommu))
+			intel_svm_alloc_pasid_tables(iommu);
+#endif
 	}
 
 	if (iommu_pass_through)
@@ -4122,6 +4131,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 	if (ret)
 		goto out;
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	if (pasid_enabled(iommu))
+		intel_svm_alloc_pasid_tables(iommu);
+#endif
+
 	if (dmaru->ignored) {
 		/*
 		 * we always have to disable PMRs or DMA may fail on this device
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
new file mode 100644
index 000000000000..5b42a95b3f80
--- /dev/null
+++ b/drivers/iommu/intel-svm.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/intel-iommu.h>
+
+int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
+{
+	struct page *pages;
+	int order;
+
+	order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
+	if (order < 0)
+		order = 0;
+
+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!pages) {
+		pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
+			iommu->name);
+		return -ENOMEM;
+	}
+	iommu->pasid_table = page_address(pages);
+	pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
+
+	if (ecap_dis(iommu->ecap)) {
+		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+		if (pages)
+			iommu->pasid_state_table = page_address(pages);
+		else
+			pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
+				iommu->name);
+	}
+
+	return 0;
+}
+
+int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
+{
+	int order;
+
+	order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
+	if (order < 0)
+		order = 0;
+
+	if (iommu->pasid_table) {
+		free_pages((unsigned long)iommu->pasid_table, order);
+		iommu->pasid_table = NULL;
+	}
+	if (iommu->pasid_state_table) {
+		free_pages((unsigned long)iommu->pasid_state_table, order);
+		iommu->pasid_state_table = NULL;
+	}
+	return 0;
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1d69c1d3aa9a..0f38e60d40ad 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -325,6 +325,9 @@ enum {
 #define VTD_FLAG_TRANS_PRE_ENABLED	(1 << 0)
 #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED	(1 << 1)
 
+struct pasid_entry;
+struct pasid_state_entry;
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64 		reg_phys; /* physical address of hw register set */
@@ -347,6 +350,15 @@ struct intel_iommu {
 	struct root_entry *root_entry; /* virtual address */
 
 	struct iommu_flush flush;
+#endif
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	/* These are large and need to be contiguous, so we allocate just
+	 * one for now. We'll maybe want to rethink that if we truly give
+	 * devices away to userspace processes (e.g. for DPDK) and don't
+	 * want to trust that userspace will use *only* the PASID it was
+	 * told to. But while it's all driver-arbitrated, we're fine. */
+	struct pasid_entry *pasid_table;
+	struct pasid_state_entry *pasid_state_table;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -387,6 +399,9 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
+extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+
 extern const struct attribute_group *intel_iommu_groups[];
 
 #endif
-- 
cgit v1.2.3


From 2f26e0a9c9860db290d63e9d85c2c8c09813677f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 9 Sep 2015 11:40:47 +0100
Subject: iommu/vt-d: Add basic SVM PASID support

This provides basic PASID support for endpoint devices, tested with a
version of the i915 driver.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/Kconfig         |   1 +
 drivers/iommu/intel-iommu.c   | 104 +++++++++++++++
 drivers/iommu/intel-svm.c     | 291 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma_remapping.h |   7 +
 include/linux/intel-iommu.h   |  68 +++++++++-
 include/linux/intel-svm.h     |  82 ++++++++++++
 6 files changed, 548 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/intel-svm.h

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8d23f5ed8ae2..be18b214c31e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -139,6 +139,7 @@ config INTEL_IOMMU_SVM
 	bool "Support for Shared Virtual Memory with Intel IOMMU"
 	depends on INTEL_IOMMU && X86
 	select PCI_PASID
+	select MMU_NOTIFIER
 	help
 	  Shared Virtual Memory (SVM) provides a facility for devices
 	  to access DMA resources through process address space by
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9995ea84e23a..60b66d27e655 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4929,6 +4929,110 @@ static void intel_iommu_remove_device(struct device *dev)
 	iommu_device_unlink(iommu->iommu_dev, dev);
 }
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
+{
+	struct device_domain_info *info;
+	struct context_entry *context;
+	struct dmar_domain *domain;
+	unsigned long flags;
+	u64 ctx_lo;
+	int ret;
+
+	domain = get_valid_domain_for_dev(sdev->dev);
+	if (!domain)
+		return -EINVAL;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	spin_lock(&iommu->lock);
+
+	ret = -EINVAL;
+	info = sdev->dev->archdata.iommu;
+	if (!info || !info->pasid_supported)
+		goto out;
+
+	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
+	if (WARN_ON(!context))
+		goto out;
+
+	ctx_lo = context[0].lo;
+
+	sdev->did = domain->iommu_did[iommu->seq_id];
+	sdev->sid = PCI_DEVID(info->bus, info->devfn);
+
+	if (!(ctx_lo & CONTEXT_PASIDE)) {
+		context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+		context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+		wmb();
+		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
+		 * extended to permit requests-with-PASID if the PASIDE bit
+		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
+		 * however, the PASIDE bit is ignored and requests-with-PASID
+		 * are unconditionally blocked. Which makes less sense.
+		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
+		 * "guest mode" translation types depending on whether ATS
+		 * is available or not. Annoyingly, we can't use the new
+		 * modes *unless* PASIDE is set. */
+		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
+			ctx_lo &= ~CONTEXT_TT_MASK;
+			if (info->ats_supported)
+				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
+			else
+				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
+		}
+		ctx_lo |= CONTEXT_PASIDE;
+		context[0].lo = ctx_lo;
+		wmb();
+		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
+					   DMA_CCMD_MASK_NOBIT,
+					   DMA_CCMD_DEVICE_INVL);
+	}
+
+	/* Enable PASID support in the device, if it wasn't already */
+	if (!info->pasid_enabled)
+		iommu_enable_dev_iotlb(info);
+
+	if (info->ats_enabled) {
+		sdev->dev_iotlb = 1;
+		sdev->qdep = info->ats_qdep;
+		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+			sdev->qdep = 0;
+	}
+	ret = 0;
+
+ out:
+	spin_unlock(&iommu->lock);
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+	return ret;
+}
+
+struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
+{
+	struct intel_iommu *iommu;
+	u8 bus, devfn;
+
+	if (iommu_dummy(dev)) {
+		dev_warn(dev,
+			 "No IOMMU translation for device; cannot enable SVM\n");
+		return NULL;
+	}
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if ((!iommu)) {
+		dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n");
+		return NULL;
+	}
+
+	if (!iommu->pasid_table) {
+		dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
+		return NULL;
+	}
+
+	return iommu;
+}
+#endif /* CONFIG_INTEL_IOMMU_SVM */
+
 static const struct iommu_ops intel_iommu_ops = {
 	.capable	= intel_iommu_capable,
 	.domain_alloc	= intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 5b42a95b3f80..82d53e15b865 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -14,6 +14,17 @@
  */
 
 #include <linux/intel-iommu.h>
+#include <linux/mmu_notifier.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/intel-svm.h>
+#include <linux/rculist.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+
+struct pasid_entry {
+	u64 val;
+};
 
 int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 {
@@ -42,6 +53,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 				iommu->name);
 	}
 
+	idr_init(&iommu->pasid_idr);
+
 	return 0;
 }
 
@@ -61,5 +74,283 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 		free_pages((unsigned long)iommu->pasid_state_table, order);
 		iommu->pasid_state_table = NULL;
 	}
+	idr_destroy(&iommu->pasid_idr);
 	return 0;
 }
+
+static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
+				       unsigned long address, int pages, int ih)
+{
+	struct qi_desc desc;
+	int mask = ilog2(__roundup_pow_of_two(pages));
+
+	if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
+	    mask > cap_max_amask_val(svm->iommu->cap)) {
+		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
+		desc.high = 0;
+	} else {
+		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+			QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
+		desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
+			QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
+	}
+
+	qi_submit_sync(&desc, svm->iommu);
+
+	if (sdev->dev_iotlb) {
+		desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
+			QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
+		if (mask) {
+			unsigned long adr, delta;
+
+			/* Least significant zero bits in the address indicate the
+			 * range of the request. So mask them out according to the
+			 * size. */
+			adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);
+
+			/* Now ensure that we round down further if the original
+			 * request was not aligned w.r.t. its size */
+			delta = address - adr;
+			if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
+				adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
+			desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
+		} else {
+			desc.high = QI_DEV_EIOTLB_ADDR(address);
+		}
+		qi_submit_sync(&desc, svm->iommu);
+	}
+}
+
+static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
+				  int pages, int ih)
+{
+	struct intel_svm_dev *sdev;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdev, &svm->devs, list)
+		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
+	rcu_read_unlock();
+}
+
+static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
+			     unsigned long address, pte_t pte)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	intel_flush_svm_range(svm, address, 1, 1);
+}
+
+static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+				  unsigned long address)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	intel_flush_svm_range(svm, address, 1, 1);
+}
+
+/* Pages have been freed at this point */
+static void intel_invalidate_range(struct mmu_notifier *mn,
+				   struct mm_struct *mm,
+				   unsigned long start, unsigned long end)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	intel_flush_svm_range(svm, start,
+			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
+}
+
+
+static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
+{
+	struct qi_desc desc;
+
+	desc.high = 0;
+	desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);
+
+	qi_submit_sync(&desc, svm->iommu);
+}
+
+static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	svm->iommu->pasid_table[svm->pasid].val = 0;
+
+	/* There's no need to do any flush because we can't get here if there
+	 * are any devices left anyway. */
+	WARN_ON(!list_empty(&svm->devs));
+}
+
+static const struct mmu_notifier_ops intel_mmuops = {
+	.release = intel_mm_release,
+	.change_pte = intel_change_pte,
+	.invalidate_page = intel_invalidate_page,
+	.invalidate_range = intel_invalidate_range,
+};
+
+static DEFINE_MUTEX(pasid_mutex);
+
+int intel_svm_bind_mm(struct device *dev, int *pasid)
+{
+	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+	struct intel_svm_dev *sdev;
+	struct intel_svm *svm = NULL;
+	int pasid_max;
+	int ret;
+
+	BUG_ON(pasid && !current->mm);
+
+	if (WARN_ON(!iommu))
+		return -EINVAL;
+
+	if (dev_is_pci(dev)) {
+		pasid_max = pci_max_pasids(to_pci_dev(dev));
+		if (pasid_max < 0)
+			return -EINVAL;
+	} else
+		pasid_max = 1 << 20;
+
+	mutex_lock(&pasid_mutex);
+	if (pasid) {
+		int i;
+
+		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
+			if (svm->mm != current->mm)
+				continue;
+
+			if (svm->pasid >= pasid_max) {
+				dev_warn(dev,
+					 "Limited PASID width. Cannot use existing PASID %d\n",
+					 svm->pasid);
+				ret = -ENOSPC;
+				goto out;
+			}
+
+			list_for_each_entry(sdev, &svm->devs, list) {
+				if (dev == sdev->dev) {
+					sdev->users++;
+					goto success;
+				}
+			}
+
+			break;
+		}
+	}
+
+	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!sdev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	sdev->dev = dev;
+
+	ret = intel_iommu_enable_pasid(iommu, sdev);
+	if (ret || !pasid) {
+		/* If they don't actually want to assign a PASID, this is
+		 * just an enabling check/preparation. */
+		kfree(sdev);
+		goto out;
+	}
+	/* Finish the setup now we know we're keeping it */
+	sdev->users = 1;
+	init_rcu_head(&sdev->rcu);
+
+	if (!svm) {
+		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+		if (!svm) {
+			ret = -ENOMEM;
+			kfree(sdev);
+			goto out;
+		}
+		svm->iommu = iommu;
+
+		if (pasid_max > 2 << ecap_pss(iommu->ecap))
+			pasid_max = 2 << ecap_pss(iommu->ecap);
+
+		ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
+				GFP_KERNEL);
+		if (ret < 0) {
+			kfree(svm);
+			goto out;
+		}
+		svm->pasid = ret;
+		svm->notifier.ops = &intel_mmuops;
+		svm->mm = get_task_mm(current);
+		INIT_LIST_HEAD_RCU(&svm->devs);
+		ret = -ENOMEM;
+		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
+			idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+			kfree(svm);
+			kfree(sdev);
+			goto out;
+		}
+		iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
+		wmb();
+	}
+	list_add_rcu(&sdev->list, &svm->devs);
+
+ success:
+	*pasid = svm->pasid;
+	ret = 0;
+ out:
+	mutex_unlock(&pasid_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
+
+int intel_svm_unbind_mm(struct device *dev, int pasid)
+{
+	struct intel_svm_dev *sdev;
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	int ret = -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+	iommu = intel_svm_device_to_iommu(dev);
+	if (!iommu || !iommu->pasid_table)
+		goto out;
+
+	svm = idr_find(&iommu->pasid_idr, pasid);
+	if (!svm)
+		goto out;
+
+	list_for_each_entry(sdev, &svm->devs, list) {
+		if (dev == sdev->dev) {
+			ret = 0;
+			sdev->users--;
+			if (!sdev->users) {
+				list_del_rcu(&sdev->list);
+				/* Flush the PASID cache and IOTLB for this device.
+				 * Note that we do depend on the hardware *not* using
+				 * the PASID any more. Just as we depend on other
+				 * devices never using PASIDs that they have no right
+				 * to use. We have a *shared* PASID table, because it's
+				 * large and has to be physically contiguous. So it's
+				 * hard to be as defensive as we might like. */
+				intel_flush_pasid_dev(svm, sdev);
+				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
+				kfree_rcu(sdev, rcu);
+
+				if (list_empty(&svm->devs)) {
+					mmu_notifier_unregister(&svm->notifier, svm->mm);
+
+					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+					mmput(svm->mm);
+					/* We mandate that no page faults may be outstanding
+					 * for the PASID when intel_svm_unbind_mm() is called.
+					 * If that is not obeyed, subtle errors will happen.
+					 * Let's make them less subtle... */
+					memset(svm, 0x6b, sizeof(*svm));
+					kfree(svm);
+				}
+			}
+			break;
+		}
+	}
+ out:
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 7ac17f57250e..0e114bfabeed 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -20,6 +20,13 @@
 #define CONTEXT_TT_MULTI_LEVEL	0
 #define CONTEXT_TT_DEV_IOTLB	1
 #define CONTEXT_TT_PASS_THROUGH 2
+/* Extended context entry types */
+#define CONTEXT_TT_PT_PASID	4
+#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
+#define CONTEXT_TT_MASK (7ULL << 2)
+
+#define CONTEXT_PRS		(1ULL << 9)
+#define CONTEXT_PASIDE		(1ULL << 11)
 
 struct intel_iommu;
 struct dmar_domain;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0f38e60d40ad..46add607567b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -1,5 +1,9 @@
 /*
- * Copyright (c) 2006, Intel Corporation.
+ * Copyright © 2006-2015, Intel Corporation.
+ *
+ * Authors: Ashok Raj <ashok.raj@intel.com>
+ *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *          David Woodhouse <David.Woodhouse@intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -13,10 +17,6 @@
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
 #ifndef _INTEL_IOMMU_H_
@@ -25,7 +25,10 @@
 #include <linux/types.h>
 #include <linux/iova.h>
 #include <linux/io.h>
+#include <linux/idr.h>
 #include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+#include <linux/list.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
@@ -251,6 +254,9 @@ enum {
 #define QI_DIOTLB_TYPE		0x3
 #define QI_IEC_TYPE		0x4
 #define QI_IWD_TYPE		0x5
+#define QI_EIOTLB_TYPE		0x6
+#define QI_PC_TYPE		0x7
+#define QI_DEIOTLB_TYPE		0x8
 
 #define QI_IEC_SELECTIVE	(((u64)1) << 4)
 #define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
@@ -278,6 +284,34 @@ enum {
 #define QI_DEV_IOTLB_SIZE	1
 #define QI_DEV_IOTLB_MAX_INVS	32
 
+#define QI_PC_PASID(pasid)	(((u64)pasid) << 32)
+#define QI_PC_DID(did)		(((u64)did) << 16)
+#define QI_PC_GRAN(gran)	(((u64)gran) << 4)
+
+#define QI_PC_ALL_PASIDS	(QI_PC_TYPE | QI_PC_GRAN(0))
+#define QI_PC_PASID_SEL		(QI_PC_TYPE | QI_PC_GRAN(1))
+
+#define QI_EIOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
+#define QI_EIOTLB_GL(gl)	(((u64)gl) << 7)
+#define QI_EIOTLB_IH(ih)	(((u64)ih) << 6)
+#define QI_EIOTLB_AM(am)	(((u64)am))
+#define QI_EIOTLB_PASID(pasid) 	(((u64)pasid) << 32)
+#define QI_EIOTLB_DID(did)	(((u64)did) << 16)
+#define QI_EIOTLB_GRAN(gran) 	(((u64)gran) << 4)
+
+#define QI_DEV_EIOTLB_ADDR(a)	((u64)(a) & VTD_PAGE_MASK)
+#define QI_DEV_EIOTLB_SIZE	(((u64)1) << 11)
+#define QI_DEV_EIOTLB_GLOB(g)	((u64)g)
+#define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
+#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
+#define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_MAX_INVS	32
+
+#define QI_GRAN_ALL_ALL			0
+#define QI_GRAN_NONG_ALL		1
+#define QI_GRAN_NONG_PASID		2
+#define QI_GRAN_PSI_PASID		3
+
 struct qi_desc {
 	u64 low, high;
 };
@@ -359,6 +393,7 @@ struct intel_iommu {
 	 * told to. But while it's all driver-arbitrated, we're fine. */
 	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
+	struct idr pasid_idr;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -399,9 +434,32 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
 
+struct intel_svm_dev {
+	struct list_head list;
+	struct rcu_head rcu;
+	struct device *dev;
+	int users;
+	u16 did;
+	u16 dev_iotlb:1;
+	u16 sid, qdep;
+};
+
+struct intel_svm {
+	struct mmu_notifier notifier;
+	struct mm_struct *mm;
+	struct intel_iommu *iommu;
+	int pasid;
+	struct list_head devs;
+};
+
+extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
+extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
+#endif
+
 extern const struct attribute_group *intel_iommu_groups[];
 
 #endif
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
new file mode 100644
index 000000000000..42f80ad7a7a0
--- /dev/null
+++ b/include/linux/intel-svm.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2015 Intel Corporation.
+ *
+ * Authors: David Woodhouse <David.Woodhouse@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __INTEL_SVM_H__
+#define __INTEL_SVM_H__
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+
+struct device;
+
+/**
+ * intel_svm_bind_mm() - Bind the current process to a PASID
+ * @dev:	Device to be granted acccess
+ * @pasid:	Address for allocated PASID
+ *
+ * This function attempts to enable PASID support for the given device.
+ * If the @pasid argument is non-%NULL, a PASID is allocated for access
+ * to the MM of the current process.
+ *
+ * By using a %NULL value for the @pasid argument, this function can
+ * be used to simply validate that PASID support is available for the
+ * given device — i.e. that it is behind an IOMMU which has the
+ * requisite support, and is enabled.
+ *
+ * Page faults are handled transparently by the IOMMU code, and there
+ * should be no need for the device driver to be involved. If a page
+ * fault cannot be handled (i.e. is an invalid address rather than
+ * just needs paging in), then the page request will be completed by
+ * the core IOMMU code with appropriate status, and the device itself
+ * can then report the resulting fault to its driver via whatever
+ * mechanism is appropriate.
+ *
+ * Multiple calls from the same process may result in the same PASID
+ * being re-used. A reference count is kept.
+ */
+extern int intel_svm_bind_mm(struct device *dev, int *pasid);
+
+/**
+ * intel_svm_unbind_mm() - Unbind a specified PASID
+ * @dev:	Device for which PASID was allocated
+ * @pasid:	PASID value to be unbound
+ *
+ * This function allows a PASID to be retired when the device no
+ * longer requires access to the address space of a given process.
+ *
+ * If the use count for the PASID in question reaches zero, the
+ * PASID is revoked and may no longer be used by hardware.
+ *
+ * Device drivers are required to ensure that no access (including
+ * page requests) is currently outstanding for the PASID in question,
+ * before calling this function.
+ */
+extern int intel_svm_unbind_mm(struct device *dev, int pasid);
+
+#else /* CONFIG_INTEL_IOMMU_SVM */
+
+static inline int intel_svm_bind_mm(struct device *dev, int *pasid)
+{
+	return -ENOSYS;
+}
+
+static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
+{
+	BUG();
+}
+#endif /* CONFIG_INTEL_IOMMU_SVM */
+
+#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL))
+
+#endif /* __INTEL_SVM_H__ */
-- 
cgit v1.2.3


From 907fea3491d52063bb37b1f1ce0cf8a4ae70944c Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 13 Oct 2015 14:11:13 +0100
Subject: iommu/vt-d: Implement deferred invalidate for SVM

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-iommu.c   | 2 ++
 drivers/iommu/intel-svm.c     | 9 +++++++++
 include/linux/dma_remapping.h | 1 +
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 60b66d27e655..58ecd52af43b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4981,6 +4981,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
 		}
 		ctx_lo |= CONTEXT_PASIDE;
+		if (iommu->pasid_state_table)
+			ctx_lo |= CONTEXT_DINVE;
 		context[0].lo = ctx_lo;
 		wmb();
 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 82d53e15b865..a64720b5bd34 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -26,6 +26,10 @@ struct pasid_entry {
 	u64 val;
 };
 
+struct pasid_state_entry {
+	u64 val;
+};
+
 int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 {
 	struct page *pages;
@@ -127,6 +131,11 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 {
 	struct intel_svm_dev *sdev;
 
+	/* Try deferred invalidate if available */
+	if (svm->iommu->pasid_state_table &&
+	    !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
+		return;
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list)
 		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 0e114bfabeed..187c10299722 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -25,6 +25,7 @@
 #define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
 #define CONTEXT_TT_MASK (7ULL << 2)
 
+#define CONTEXT_DINVE		(1ULL << 8)
 #define CONTEXT_PRS		(1ULL << 9)
 #define CONTEXT_PASIDE		(1ULL << 11)
 
-- 
cgit v1.2.3


From 1208225cf48fa3b170b6dfe7369f15c295260755 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 7 Oct 2015 15:37:03 +0100
Subject: iommu/vt-d: Generalise DMAR MSI setup to allow for page request
 events

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/dmar.c        | 42 +++++++++++++++++++++++++++++++-----------
 include/linux/intel-iommu.h | 10 +++++++++-
 2 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8757f8dfc4e5..80e3c176008e 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu)
 	iommu_device_destroy(iommu->iommu_dev);
 
 	if (iommu->irq) {
+		if (iommu->pr_irq) {
+			free_irq(iommu->pr_irq, iommu);
+			dmar_free_hwirq(iommu->pr_irq);
+			iommu->pr_irq = 0;
+		}
 		free_irq(iommu->irq, iommu);
 		dmar_free_hwirq(iommu->irq);
 		iommu->irq = 0;
@@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 	}
 }
 
+
+static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
+{
+	if (iommu->irq == irq)
+		return DMAR_FECTL_REG;
+	else if (iommu->pr_irq == irq)
+		return DMAR_PECTL_REG;
+	else
+		BUG();
+}
+
 void dmar_msi_unmask(struct irq_data *data)
 {
 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
+	int reg = dmar_msi_reg(iommu, data->irq);
 	unsigned long flag;
 
 	/* unmask it */
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(0, iommu->reg + DMAR_FECTL_REG);
+	writel(0, iommu->reg + reg);
 	/* Read a reg to force flush the post write */
-	readl(iommu->reg + DMAR_FECTL_REG);
+	readl(iommu->reg + reg);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_mask(struct irq_data *data)
 {
-	unsigned long flag;
 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
+	int reg = dmar_msi_reg(iommu, data->irq);
+	unsigned long flag;
 
 	/* mask it */
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+	writel(DMA_FECTL_IM, iommu->reg + reg);
 	/* Read a reg to force flush the post write */
-	readl(iommu->reg + DMAR_FECTL_REG);
+	readl(iommu->reg + reg);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_write(int irq, struct msi_msg *msg)
 {
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
+	int reg = dmar_msi_reg(iommu, irq);
 	unsigned long flag;
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
-	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
-	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+	writel(msg->data, iommu->reg + reg + 4);
+	writel(msg->address_lo, iommu->reg + reg + 8);
+	writel(msg->address_hi, iommu->reg + reg + 12);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_read(int irq, struct msi_msg *msg)
 {
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
+	int reg = dmar_msi_reg(iommu, irq);
 	unsigned long flag;
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
-	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
-	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+	msg->data = readl(iommu->reg + reg + 4);
+	msg->address_lo = readl(iommu->reg + reg + 8);
+	msg->address_hi = readl(iommu->reg + reg + 12);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 46add607567b..f16a2b9124d1 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -60,6 +60,14 @@
 #define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
 #define DMAR_ICS_REG	0x9c	/* Invalidation complete status register */
 #define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
+#define DMAR_PQH_REG	0xc0	/* Page request queue head register */
+#define DMAR_PQT_REG	0xc8	/* Page request queue tail register */
+#define DMAR_PQA_REG	0xd0	/* Page request queue address register */
+#define DMAR_PRS_REG	0xdc	/* Page request status register */
+#define DMAR_PECTL_REG	0xe0	/* Page request event control register */
+#define	DMAR_PEDATA_REG	0xe4	/* Page request event interrupt data register */
+#define	DMAR_PEADDR_REG	0xe8	/* Page request event interrupt addr register */
+#define	DMAR_PEUADDR_REG 0xec	/* Page request event Upper address register */
 
 #define OFFSET_STRIDE		(9)
 
@@ -373,7 +381,7 @@ struct intel_iommu {
 	int		seq_id;	/* sequence id of the iommu */
 	int		agaw; /* agaw of this iommu */
 	int		msagaw; /* max sagaw of this iommu */
-	unsigned int 	irq;
+	unsigned int 	irq, pr_irq;
 	u16		segment;     /* PCI segment# */
 	unsigned char 	name[13];    /* Device Name */
 
-- 
cgit v1.2.3


From 8595798ca34d186d39abcb277591e541776c0ef5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 15 Oct 2015 10:30:36 -0400
Subject: jbd2: gate checksum calculations on crc driver presence, not sb flags

Change the journal's checksum functions to gate on whether or not the
crc32c driver is loaded, and gate the loading on the superblock bits.
This prevents a journal crash if someone loads a journal in no-csum
mode and then randomizes the superblock, thus flipping on the feature
bits.

Tested-By: Nikolay Borisov <kernel@kyup.com>
Reported-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c    |  6 +++---
 include/linux/jbd2.h | 13 +++++++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8270fe9e3641..00f7dbdd64b0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
 /* Checksumming functions */
 static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
 {
-	if (!jbd2_journal_has_csum_v2or3(j))
+	if (!jbd2_journal_has_csum_v2or3_feature(j))
 		return 1;
 
 	return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -1531,7 +1531,7 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
-	if (jbd2_journal_has_csum_v2or3(journal) &&
+	if (jbd2_journal_has_csum_v2or3_feature(journal) &&
 	    JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
 		/* Can't have checksum v1 and v2 on at the same time! */
 		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
@@ -1545,7 +1545,7 @@ static int journal_get_superblock(journal_t *journal)
 	}
 
 	/* Load the checksum driver */
-	if (jbd2_journal_has_csum_v2or3(journal)) {
+	if (jbd2_journal_has_csum_v2or3_feature(journal)) {
 		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
 		if (IS_ERR(journal->j_chksum_driver)) {
 			printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index df07e78487d5..6da6f89722e1 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1338,13 +1338,18 @@ static inline int tid_geq(tid_t x, tid_t y)
 extern int jbd2_journal_blocks_per_page(struct inode *inode);
 extern size_t journal_tag_bytes(journal_t *journal);
 
+static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j)
+{
+	return JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2) ||
+	       JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3);
+}
+
 static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
 {
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) ||
-	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
-		return 1;
+	WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) &&
+		     journal->j_chksum_driver == NULL);
 
-	return 0;
+	return journal->j_chksum_driver != NULL;
 }
 
 /*
-- 
cgit v1.2.3


From a222a7f0bb6c94c31cc9c755110593656f19de89 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 7 Oct 2015 23:35:18 +0100
Subject: iommu/vt-d: Implement page request handling

Largely based on the driver-mode implementation by Jesse Barnes.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-iommu.c |  22 +++++-
 drivers/iommu/intel-svm.c   | 173 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/intel-iommu.h |  26 +++++++
 3 files changed, 220 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 58ecd52af43b..68d71f81dfa0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1698,8 +1698,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 	free_context_table(iommu);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
-	if (pasid_enabled(iommu))
+	if (pasid_enabled(iommu)) {
+		if (ecap_prs(iommu->ecap))
+			intel_svm_finish_prq(iommu);
 		intel_svm_free_pasid_tables(iommu);
+	}
 #endif
 }
 
@@ -3243,6 +3246,13 @@ domains_done:
 
 		iommu_flush_write_buffer(iommu);
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+		if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+			ret = intel_svm_enable_prq(iommu);
+			if (ret)
+				goto free_iommu;
+		}
+#endif
 		ret = dmar_set_interrupt(iommu);
 		if (ret)
 			goto free_iommu;
@@ -4187,6 +4197,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 
 	intel_iommu_init_qi(iommu);
 	iommu_flush_write_buffer(iommu);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+		ret = intel_svm_enable_prq(iommu);
+		if (ret)
+			goto disable_iommu;
+	}
+#endif
 	ret = dmar_set_interrupt(iommu);
 	if (ret)
 		goto disable_iommu;
@@ -4983,6 +5001,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 		ctx_lo |= CONTEXT_PASIDE;
 		if (iommu->pasid_state_table)
 			ctx_lo |= CONTEXT_DINVE;
+		if (info->pri_supported)
+			ctx_lo |= CONTEXT_PRS;
 		context[0].lo = ctx_lo;
 		wmb();
 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a64720b5bd34..0e8654282484 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -21,6 +21,10 @@
 #include <linux/rculist.h>
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
+#include <linux/dmar.h>
+#include <linux/interrupt.h>
+
+static irqreturn_t prq_event_thread(int irq, void *d);
 
 struct pasid_entry {
 	u64 val;
@@ -82,6 +86,66 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 	return 0;
 }
 
+#define PRQ_ORDER 0
+
+int intel_svm_enable_prq(struct intel_iommu *iommu)
+{
+	struct page *pages;
+	int irq, ret;
+
+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+	if (!pages) {
+		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+			iommu->name);
+		return -ENOMEM;
+	}
+	iommu->prq = page_address(pages);
+
+	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
+	if (irq <= 0) {
+		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
+		       iommu->name);
+		ret = -EINVAL;
+	err:
+		free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+		iommu->prq = NULL;
+		return ret;
+	}
+	iommu->pr_irq = irq;
+
+	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
+
+	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
+				   iommu->prq_name, iommu);
+	if (ret) {
+		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
+		       iommu->name);
+		dmar_free_hwirq(irq);
+		goto err;
+	}
+	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+
+	return 0;
+}
+
+int intel_svm_finish_prq(struct intel_iommu *iommu)
+{
+	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
+
+	free_irq(iommu->pr_irq, iommu);
+	dmar_free_hwirq(iommu->pr_irq);
+	iommu->pr_irq = 0;
+
+	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+	iommu->prq = NULL;
+
+	return 0;
+}
+
 static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
 				       unsigned long address, int pages, int ih)
 {
@@ -363,3 +427,112 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
+
+/* Page request queue descriptor */
+struct page_req_dsc {
+	u64 srr:1;
+	u64 bof:1;
+	u64 pasid_present:1;
+	u64 lpig:1;
+	u64 pasid:20;
+	u64 bus:8;
+	u64 private:23;
+	u64 prg_index:9;
+	u64 rd_req:1;
+	u64 wr_req:1;
+	u64 exe_req:1;
+	u64 priv_req:1;
+	u64 devfn:8;
+	u64 addr:52;
+};
+
+#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
+static irqreturn_t prq_event_thread(int irq, void *d)
+{
+	struct intel_iommu *iommu = d;
+	struct intel_svm *svm = NULL;
+	int head, tail, handled = 0;
+
+	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+	while (head != tail) {
+		struct vm_area_struct *vma;
+		struct page_req_dsc *req;
+		struct qi_desc resp;
+		int ret, result;
+		u64 address;
+
+		handled = 1;
+
+		req = &iommu->prq[head / sizeof(*req)];
+
+		result = QI_RESP_FAILURE;
+		address = req->addr << PAGE_SHIFT;
+		if (!req->pasid_present) {
+			pr_err("%s: Page request without PASID: %08llx %08llx\n",
+			       iommu->name, ((unsigned long long *)req)[0],
+			       ((unsigned long long *)req)[1]);
+			goto bad_req;
+		}
+
+		if (!svm || svm->pasid != req->pasid) {
+			rcu_read_lock();
+			svm = idr_find(&iommu->pasid_idr, req->pasid);
+			/* It *can't* go away, because the driver is not permitted
+			 * to unbind the mm while any page faults are outstanding.
+			 * So we only need RCU to protect the internal idr code. */
+			rcu_read_unlock();
+
+			if (!svm) {
+				pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
+				       iommu->name, req->pasid, ((unsigned long long *)req)[0],
+				       ((unsigned long long *)req)[1]);
+				goto bad_req;
+			}
+		}
+
+		result = QI_RESP_INVALID;
+		down_read(&svm->mm->mmap_sem);
+		vma = find_extend_vma(svm->mm, address);
+		if (!vma || address < vma->vm_start)
+			goto invalid;
+
+		ret = handle_mm_fault(svm->mm, vma, address,
+				      req->wr_req ? FAULT_FLAG_WRITE : 0);
+		if (ret & VM_FAULT_ERROR)
+			goto invalid;
+
+		result = QI_RESP_SUCCESS;
+	invalid:
+		up_read(&svm->mm->mmap_sem);
+	bad_req:
+		/* Accounting for major/minor faults? */
+
+		if (req->lpig) {
+			/* Page Group Response */
+			resp.low = QI_PGRP_PASID(req->pasid) |
+				QI_PGRP_DID((req->bus << 8) | req->devfn) |
+				QI_PGRP_PASID_P(req->pasid_present) |
+				QI_PGRP_RESP_TYPE;
+			resp.high = QI_PGRP_IDX(req->prg_index) |
+				QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
+
+			qi_submit_sync(&resp, svm->iommu);
+		} else if (req->srr) {
+			/* Page Stream Response */
+			resp.low = QI_PSTRM_IDX(req->prg_index) |
+				QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
+				QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
+			resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
+				QI_PSTRM_RESP_CODE(result);
+
+			qi_submit_sync(&resp, svm->iommu);
+		}
+
+		head = (head + sizeof(*req)) & PRQ_RING_MASK;
+	}
+
+	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+
+	return IRQ_RETVAL(handled);
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index f16a2b9124d1..e5b80d31eb1b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -265,6 +265,8 @@ enum {
 #define QI_EIOTLB_TYPE		0x6
 #define QI_PC_TYPE		0x7
 #define QI_DEIOTLB_TYPE		0x8
+#define QI_PGRP_RESP_TYPE	0x9
+#define QI_PSTRM_RESP_TYPE	0xa
 
 #define QI_IEC_SELECTIVE	(((u64)1) << 4)
 #define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
@@ -315,6 +317,25 @@ enum {
 #define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
+#define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
+#define QI_PGRP_PRIV(priv)	(((u64)(priv)) << 32)
+#define QI_PGRP_RESP_CODE(res)	((u64)(res))
+#define QI_PGRP_PASID(pasid)	(((u64)(pasid)) << 32)
+#define QI_PGRP_DID(did)	(((u64)(did)) << 16)
+#define QI_PGRP_PASID_P(p)	(((u64)(p)) << 4)
+
+#define QI_PSTRM_ADDR(addr)	(((u64)(addr)) & VTD_PAGE_MASK)
+#define QI_PSTRM_DEVFN(devfn)	(((u64)(devfn)) << 4)
+#define QI_PSTRM_RESP_CODE(res)	((u64)(res))
+#define QI_PSTRM_IDX(idx)	(((u64)(idx)) << 55)
+#define QI_PSTRM_PRIV(priv)	(((u64)(priv)) << 32)
+#define QI_PSTRM_BUS(bus)	(((u64)(bus)) << 24)
+#define QI_PSTRM_PASID(pasid)	(((u64)(pasid)) << 4)
+
+#define QI_RESP_SUCCESS		0x0
+#define QI_RESP_INVALID		0x1
+#define QI_RESP_FAILURE		0xf
+
 #define QI_GRAN_ALL_ALL			0
 #define QI_GRAN_NONG_ALL		1
 #define QI_GRAN_NONG_PASID		2
@@ -369,6 +390,7 @@ enum {
 
 struct pasid_entry;
 struct pasid_state_entry;
+struct page_req_dsc;
 
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
@@ -401,6 +423,8 @@ struct intel_iommu {
 	 * told to. But while it's all driver-arbitrated, we're fine. */
 	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
+	struct page_req_dsc *prq;
+	unsigned char prq_name[16];    /* Name for PRQ interrupt */
 	struct idr pasid_idr;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
@@ -445,6 +469,8 @@ extern int dmar_ir_support(void);
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+extern int intel_svm_enable_prq(struct intel_iommu *iommu);
+extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
 struct intel_svm_dev {
 	struct list_head list;
-- 
cgit v1.2.3


From 0204a49609824163092c32a8aeb073f7e9acc76d Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 13 Oct 2015 17:18:10 +0100
Subject: iommu/vt-d: Add callback to device driver on page faults

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-svm.c   | 26 +++++++++++++++++++++++++-
 include/linux/intel-iommu.h |  3 +++
 include/linux/intel-svm.h   | 21 ++++++++++++++++++---
 3 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 0e8654282484..006e95dd64ae 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -264,7 +264,7 @@ static const struct mmu_notifier_ops intel_mmuops = {
 
 static DEFINE_MUTEX(pasid_mutex);
 
-int intel_svm_bind_mm(struct device *dev, int *pasid)
+int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
 {
 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
 	struct intel_svm_dev *sdev;
@@ -302,6 +302,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid)
 
 			list_for_each_entry(sdev, &svm->devs, list) {
 				if (dev == sdev->dev) {
+					if (sdev->ops != ops) {
+						ret = -EBUSY;
+						goto out;
+					}
 					sdev->users++;
 					goto success;
 				}
@@ -327,6 +331,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid)
 	}
 	/* Finish the setup now we know we're keeping it */
 	sdev->users = 1;
+	sdev->ops = ops;
 	init_rcu_head(&sdev->rcu);
 
 	if (!svm) {
@@ -456,6 +461,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 	while (head != tail) {
+		struct intel_svm_dev *sdev;
 		struct vm_area_struct *vma;
 		struct page_req_dsc *req;
 		struct qi_desc resp;
@@ -507,6 +513,24 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		up_read(&svm->mm->mmap_sem);
 	bad_req:
 		/* Accounting for major/minor faults? */
+		rcu_read_lock();
+		list_for_each_entry_rcu(sdev, &svm->devs, list) {
+			if (sdev->sid == PCI_DEVID(req->bus, req->devfn));
+				break;
+		}
+		/* Other devices can go away, but the drivers are not permitted
+		 * to unbind while any page faults might be in flight. So it's
+		 * OK to drop the 'lock' here now we have it. */
+		rcu_read_unlock();
+
+		if (WARN_ON(&sdev->list == &svm->devs))
+			sdev = NULL;
+
+		if (sdev && sdev->ops && sdev->ops->fault_cb) {
+			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
+				(req->wr_req << 1) | (req->exe_req);
+			sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
+		}
 
 		if (req->lpig) {
 			/* Page Group Response */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e5b80d31eb1b..57be14fce640 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -472,10 +472,13 @@ extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
+struct svm_dev_ops;
+
 struct intel_svm_dev {
 	struct list_head list;
 	struct rcu_head rcu;
 	struct device *dev;
+	struct svm_dev_ops *ops;
 	int users;
 	u16 did;
 	u16 dev_iotlb:1;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 42f80ad7a7a0..239f8a13a332 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -20,10 +20,23 @@
 
 struct device;
 
+struct svm_dev_ops {
+	void (*fault_cb)(struct device *dev, int pasid, u64 address,
+			 u32 private, int rwxp, int response);
+};
+
+/* Values for rxwp in fault_cb callback */
+#define SVM_REQ_READ	(1<<3)
+#define SVM_REQ_WRITE	(1<<2)
+#define SVM_REQ_EXEC	(1<<1)
+#define SVM_REQ_PRIV	(1<<0)
+
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
  * @dev:	Device to be granted acccess
  * @pasid:	Address for allocated PASID
+ * @flags:	Flags. Later for requesting supervisor mode, etc.
+ * @ops:	Callbacks to device driver
  *
  * This function attempts to enable PASID support for the given device.
  * If the @pasid argument is non-%NULL, a PASID is allocated for access
@@ -45,7 +58,8 @@ struct device;
  * Multiple calls from the same process may result in the same PASID
  * being re-used. A reference count is kept.
  */
-extern int intel_svm_bind_mm(struct device *dev, int *pasid);
+extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
+			     struct svm_dev_ops *ops);
 
 /**
  * intel_svm_unbind_mm() - Unbind a specified PASID
@@ -66,7 +80,8 @@ extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
-static inline int intel_svm_bind_mm(struct device *dev, int *pasid)
+static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
+				    int flags, struct svm_dev_ops *ops)
 {
 	return -ENOSYS;
 }
@@ -77,6 +92,6 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
-#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL))
+#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
 
 #endif /* __INTEL_SVM_H__ */
-- 
cgit v1.2.3


From 569e4f7782fb92d0e1b395b5fb01de642dd74dcf Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Thu, 15 Oct 2015 13:59:14 +0100
Subject: iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique
 PASIDs

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-svm.c   |  6 ++++--
 include/linux/intel-iommu.h |  1 +
 include/linux/intel-svm.h   | 11 +++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 006e95dd64ae..89d4d47d0ab3 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -285,11 +285,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		pasid_max = 1 << 20;
 
 	mutex_lock(&pasid_mutex);
-	if (pasid) {
+	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
 		int i;
 
 		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
-			if (svm->mm != current->mm)
+			if (svm->mm != current->mm ||
+			    (svm->flags & SVM_FLAG_PRIVATE_PASID))
 				continue;
 
 			if (svm->pasid >= pasid_max) {
@@ -355,6 +356,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		svm->pasid = ret;
 		svm->notifier.ops = &intel_mmuops;
 		svm->mm = get_task_mm(current);
+		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
 		ret = -ENOMEM;
 		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 57be14fce640..821273ca4873 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -489,6 +489,7 @@ struct intel_svm {
 	struct mmu_notifier notifier;
 	struct mm_struct *mm;
 	struct intel_iommu *iommu;
+	int flags;
 	int pasid;
 	struct list_head devs;
 };
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 239f8a13a332..dd94ab45a4db 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -31,6 +31,17 @@ struct svm_dev_ops {
 #define SVM_REQ_EXEC	(1<<1)
 #define SVM_REQ_PRIV	(1<<0)
 
+
+/*
+ * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main"
+ * PASID for the current process. Even if a PASID already exists, a new one
+ * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID
+ * will not be given to subsequent callers. This facility allows a driver to
+ * disambiguate between multiple device contexts which access the same MM,
+ * if there is no other way to do so. It should be used sparingly, if at all.
+ */
+#define SVM_FLAG_PRIVATE_PASID	(1<<0)
+
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
  * @dev:	Device to be granted acccess
-- 
cgit v1.2.3


From 0db2e5d18f76a66ca945447d9f610bed0a94ca5a Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 1 Oct 2015 20:13:58 +0100
Subject: iommu: Implement common IOMMU ops for DMA mapping

Taking inspiration from the existing arch/arm code, break out some
generic functions to interface the DMA-API to the IOMMU-API. This will
do the bulk of the heavy lifting for IOMMU-backed dma-mapping.

Since associating an IOVA allocator with an IOMMU domain is a fairly
common need, rather than introduce yet another private structure just to
do this for ourselves, extend the top-level struct iommu_domain with the
notion. A simple opaque cookie allows reuse by other IOMMU API users
with their various different incompatible allocator types.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig     |   7 +
 drivers/iommu/Makefile    |   1 +
 drivers/iommu/dma-iommu.c | 524 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma-iommu.h |  85 ++++++++
 include/linux/iommu.h     |   1 +
 5 files changed, 618 insertions(+)
 create mode 100644 drivers/iommu/dma-iommu.c
 create mode 100644 include/linux/dma-iommu.h

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..7414f33acfba 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -49,6 +49,13 @@ config OF_IOMMU
        def_bool y
        depends on OF && IOMMU_API
 
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+	bool
+	depends on NEED_SG_DMA_LENGTH
+	select IOMMU_API
+	select IOMMU_IOVA
+
 config FSL_PAMU
 	bool "Freescale IOMMU support"
 	depends on PPC32
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d711..f465cfbdb183 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644
index 000000000000..3a20db4f8604
--- /dev/null
+++ b/drivers/iommu/dma-iommu.c
@@ -0,0 +1,524 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/huge_mm.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/mm.h>
+
+int iommu_dma_init(void)
+{
+	return iova_cache_get();
+}
+
+/**
+ * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
+ * @domain: IOMMU domain to prepare for DMA-API usage
+ *
+ * IOMMU drivers should normally call this from their domain_alloc
+ * callback when domain->type == IOMMU_DOMAIN_DMA.
+ */
+int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+	struct iova_domain *iovad;
+
+	if (domain->iova_cookie)
+		return -EEXIST;
+
+	iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
+	domain->iova_cookie = iovad;
+
+	return iovad ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(iommu_get_dma_cookie);
+
+/**
+ * iommu_put_dma_cookie - Release a domain's DMA mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ *
+ * IOMMU drivers should normally call this from their domain_free callback.
+ */
+void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+	struct iova_domain *iovad = domain->iova_cookie;
+
+	if (!iovad)
+		return;
+
+	put_iova_domain(iovad);
+	kfree(iovad);
+	domain->iova_cookie = NULL;
+}
+EXPORT_SYMBOL(iommu_put_dma_cookie);
+
+/**
+ * iommu_dma_init_domain - Initialise a DMA mapping domain
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
+ * any change which could make prior IOVAs invalid will fail.
+ */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+{
+	struct iova_domain *iovad = domain->iova_cookie;
+	unsigned long order, base_pfn, end_pfn;
+
+	if (!iovad)
+		return -ENODEV;
+
+	/* Use the smallest supported page size for IOVA granularity */
+	order = __ffs(domain->ops->pgsize_bitmap);
+	base_pfn = max_t(unsigned long, 1, base >> order);
+	end_pfn = (base + size - 1) >> order;
+
+	/* Check the domain allows at least some access to the device... */
+	if (domain->geometry.force_aperture) {
+		if (base > domain->geometry.aperture_end ||
+		    base + size <= domain->geometry.aperture_start) {
+			pr_warn("specified DMA range outside IOMMU capability\n");
+			return -EFAULT;
+		}
+		/* ...then finally give it a kicking to make sure it fits */
+		base_pfn = max_t(unsigned long, base_pfn,
+				domain->geometry.aperture_start >> order);
+		end_pfn = min_t(unsigned long, end_pfn,
+				domain->geometry.aperture_end >> order);
+	}
+
+	/* All we can safely do with an existing domain is enlarge it */
+	if (iovad->start_pfn) {
+		if (1UL << order != iovad->granule ||
+		    base_pfn != iovad->start_pfn ||
+		    end_pfn < iovad->dma_32bit_pfn) {
+			pr_warn("Incompatible range for DMA domain\n");
+			return -EFAULT;
+		}
+		iovad->dma_32bit_pfn = end_pfn;
+	} else {
+		init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(iommu_dma_init_domain);
+
+/**
+ * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
+ * @dir: Direction of DMA transfer
+ * @coherent: Is the DMA master cache-coherent?
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+{
+	int prot = coherent ? IOMMU_CACHE : 0;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		return prot | IOMMU_READ | IOMMU_WRITE;
+	case DMA_TO_DEVICE:
+		return prot | IOMMU_READ;
+	case DMA_FROM_DEVICE:
+		return prot | IOMMU_WRITE;
+	default:
+		return 0;
+	}
+}
+
+static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+		dma_addr_t dma_limit)
+{
+	unsigned long shift = iova_shift(iovad);
+	unsigned long length = iova_align(iovad, size) >> shift;
+
+	/*
+	 * Enforce size-alignment to be safe - there could perhaps be an
+	 * attribute to control this per-device, or at least per-domain...
+	 */
+	return alloc_iova(iovad, length, dma_limit >> shift, true);
+}
+
+/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+{
+	struct iova_domain *iovad = domain->iova_cookie;
+	unsigned long shift = iova_shift(iovad);
+	unsigned long pfn = dma_addr >> shift;
+	struct iova *iova = find_iova(iovad, pfn);
+	size_t size;
+
+	if (WARN_ON(!iova))
+		return;
+
+	size = iova_size(iova) << shift;
+	size -= iommu_unmap(domain, pfn << shift, size);
+	/* ...and if we can't, then something is horribly, horribly wrong */
+	WARN_ON(size > 0);
+	__free_iova(iovad, iova);
+}
+
+static void __iommu_dma_free_pages(struct page **pages, int count)
+{
+	while (count--)
+		__free_page(pages[count]);
+	kvfree(pages);
+}
+
+static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+{
+	struct page **pages;
+	unsigned int i = 0, array_size = count * sizeof(*pages);
+
+	if (array_size <= PAGE_SIZE)
+		pages = kzalloc(array_size, GFP_KERNEL);
+	else
+		pages = vzalloc(array_size);
+	if (!pages)
+		return NULL;
+
+	/* IOMMU can map any pages, so himem can also be used here */
+	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+	while (count) {
+		struct page *page = NULL;
+		int j, order = __fls(count);
+
+		/*
+		 * Higher-order allocations are a convenience rather
+		 * than a necessity, hence using __GFP_NORETRY until
+		 * falling back to single-page allocations.
+		 */
+		for (order = min(order, MAX_ORDER); order > 0; order--) {
+			page = alloc_pages(gfp | __GFP_NORETRY, order);
+			if (!page)
+				continue;
+			if (PageCompound(page)) {
+				if (!split_huge_page(page))
+					break;
+				__free_pages(page, order);
+			} else {
+				split_page(page, order);
+				break;
+			}
+		}
+		if (!page)
+			page = alloc_page(gfp);
+		if (!page) {
+			__iommu_dma_free_pages(pages, i);
+			return NULL;
+		}
+		j = 1 << order;
+		count -= j;
+		while (j--)
+			pages[i++] = page++;
+	}
+	return pages;
+}
+
+/**
+ * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
+ * @dev: Device which owns this buffer
+ * @pages: Array of buffer pages as returned by iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @handle: DMA address of buffer
+ *
+ * Frees both the pages associated with the buffer, and the array
+ * describing them
+ */
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+		dma_addr_t *handle)
+{
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+	*handle = DMA_ERROR_CODE;
+}
+
+/**
+ * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * @dev: Device to allocate memory for. Must be a real device
+ *	 attached to an iommu_dma_domain
+ * @size: Size of buffer in bytes
+ * @gfp: Allocation flags
+ * @prot: IOMMU mapping flags
+ * @handle: Out argument for allocated DMA handle
+ * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
+ *		given VA/PA are visible to the given non-coherent device.
+ *
+ * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+ * but an IOMMU which supports smaller pages might not map the whole thing.
+ *
+ * Return: Array of struct page pointers describing the buffer,
+ *	   or NULL on failure.
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+		gfp_t gfp, int prot, dma_addr_t *handle,
+		void (*flush_page)(struct device *, const void *, phys_addr_t))
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova *iova;
+	struct page **pages;
+	struct sg_table sgt;
+	dma_addr_t dma_addr;
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	*handle = DMA_ERROR_CODE;
+
+	pages = __iommu_dma_alloc_pages(count, gfp);
+	if (!pages)
+		return NULL;
+
+	iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+	if (!iova)
+		goto out_free_pages;
+
+	size = iova_align(iovad, size);
+	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
+		goto out_free_iova;
+
+	if (!(prot & IOMMU_CACHE)) {
+		struct sg_mapping_iter miter;
+		/*
+		 * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
+		 * sufficient here, so skip it by using the "wrong" direction.
+		 */
+		sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
+		while (sg_miter_next(&miter))
+			flush_page(dev, miter.addr, page_to_phys(miter.page));
+		sg_miter_stop(&miter);
+	}
+
+	dma_addr = iova_dma_addr(iovad, iova);
+	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+			< size)
+		goto out_free_sg;
+
+	*handle = dma_addr;
+	sg_free_table(&sgt);
+	return pages;
+
+out_free_sg:
+	sg_free_table(&sgt);
+out_free_iova:
+	__free_iova(iovad, iova);
+out_free_pages:
+	__iommu_dma_free_pages(pages, count);
+	return NULL;
+}
+
+/**
+ * iommu_dma_mmap - Map a buffer into provided user VMA
+ * @pages: Array representing buffer from iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @vma: VMA describing requested userspace mapping
+ *
+ * Maps the pages of the buffer in @pages into @vma. The caller is responsible
+ * for verifying the correct size and protection of @vma beforehand.
+ */
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
+{
+	unsigned long uaddr = vma->vm_start;
+	unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	int ret = -ENXIO;
+
+	for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
+		ret = vm_insert_page(vma, uaddr, pages[i]);
+		if (ret)
+			break;
+		uaddr += PAGE_SIZE;
+	}
+	return ret;
+}
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, int prot)
+{
+	dma_addr_t dma_addr;
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iova_domain *iovad = domain->iova_cookie;
+	phys_addr_t phys = page_to_phys(page) + offset;
+	size_t iova_off = iova_offset(iovad, phys);
+	size_t len = iova_align(iovad, size + iova_off);
+	struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+
+	if (!iova)
+		return DMA_ERROR_CODE;
+
+	dma_addr = iova_dma_addr(iovad, iova);
+	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
+		__free_iova(iovad, iova);
+		return DMA_ERROR_CODE;
+	}
+	return dma_addr + iova_off;
+}
+
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+		enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+}
+
+/*
+ * Prepare a successfully-mapped scatterlist to give back to the caller.
+ * Handling IOVA concatenation can come later, if needed
+ */
+static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
+		dma_addr_t dma_addr)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		/* Un-swizzling the fields here, hence the naming mismatch */
+		unsigned int s_offset = sg_dma_address(s);
+		unsigned int s_length = sg_dma_len(s);
+		unsigned int s_dma_len = s->length;
+
+		s->offset = s_offset;
+		s->length = s_length;
+		sg_dma_address(s) = dma_addr + s_offset;
+		dma_addr += s_dma_len;
+	}
+	return i;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_sg(struct scatterlist *sg, int nents)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (sg_dma_address(s) != DMA_ERROR_CODE)
+			s->offset = sg_dma_address(s);
+		if (sg_dma_len(s))
+			s->length = sg_dma_len(s);
+		sg_dma_address(s) = DMA_ERROR_CODE;
+		sg_dma_len(s) = 0;
+	}
+}
+
+/*
+ * The DMA API client is passing in a scatterlist which could describe
+ * any old buffer layout, but the IOMMU API requires everything to be
+ * aligned to IOMMU pages. Hence the need for this complicated bit of
+ * impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, int prot)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova *iova;
+	struct scatterlist *s, *prev = NULL;
+	dma_addr_t dma_addr;
+	size_t iova_len = 0;
+	int i;
+
+	/*
+	 * Work out how much IOVA space we need, and align the segments to
+	 * IOVA granules for the IOMMU driver to handle. With some clever
+	 * trickery we can modify the list in-place, but reversibly, by
+	 * hiding the original data in the as-yet-unused DMA fields.
+	 */
+	for_each_sg(sg, s, nents, i) {
+		size_t s_offset = iova_offset(iovad, s->offset);
+		size_t s_length = s->length;
+
+		sg_dma_address(s) = s->offset;
+		sg_dma_len(s) = s_length;
+		s->offset -= s_offset;
+		s_length = iova_align(iovad, s_length + s_offset);
+		s->length = s_length;
+
+		/*
+		 * The simple way to avoid the rare case of a segment
+		 * crossing the boundary mask is to pad the previous one
+		 * to end at a naturally-aligned IOVA for this one's size,
+		 * at the cost of potentially over-allocating a little.
+		 */
+		if (prev) {
+			size_t pad_len = roundup_pow_of_two(s_length);
+
+			pad_len = (pad_len - iova_len) & (pad_len - 1);
+			prev->length += pad_len;
+			iova_len += pad_len;
+		}
+
+		iova_len += s_length;
+		prev = s;
+	}
+
+	iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+	if (!iova)
+		goto out_restore_sg;
+
+	/*
+	 * We'll leave any physical concatenation to the IOMMU driver's
+	 * implementation - it knows better than we do.
+	 */
+	dma_addr = iova_dma_addr(iovad, iova);
+	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+		goto out_free_iova;
+
+	return __finalise_sg(dev, sg, nents, dma_addr);
+
+out_free_iova:
+	__free_iova(iovad, iova);
+out_restore_sg:
+	__invalidate_sg(sg, nents);
+	return 0;
+}
+
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	/*
+	 * The scatterlist segments are mapped into a single
+	 * contiguous IOVA allocation, so this is incredibly easy.
+	 */
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * 'Special' IOMMUs which don't have the same addressing capability
+	 * as the CPU will have to wait until we have some way to query that
+	 * before they'll be able to use this framework.
+	 */
+	return 1;
+}
+
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == DMA_ERROR_CODE;
+}
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
new file mode 100644
index 000000000000..fc481037478a
--- /dev/null
+++ b/include/linux/dma-iommu.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __DMA_IOMMU_H
+#define __DMA_IOMMU_H
+
+#ifdef __KERNEL__
+#include <asm/errno.h>
+
+#ifdef CONFIG_IOMMU_DMA
+#include <linux/iommu.h>
+
+int iommu_dma_init(void);
+
+/* Domain management interface for IOMMU drivers */
+int iommu_get_dma_cookie(struct iommu_domain *domain);
+void iommu_put_dma_cookie(struct iommu_domain *domain);
+
+/* Setup call for arch DMA mapping code */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size);
+
+/* General helpers for DMA-API <-> IOMMU-API interaction */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
+
+/*
+ * These implement the bulk of the relevant DMA mapping callbacks, but require
+ * the arch code to take care of attributes and cache maintenance
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+		gfp_t gfp, int prot, dma_addr_t *handle,
+		void (*flush_page)(struct device *, const void *, phys_addr_t));
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+		dma_addr_t *handle);
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma);
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, int prot);
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, int prot);
+
+/*
+ * Arch code with no special attribute handling may use these
+ * directly as DMA mapping callbacks for simplicity
+ */
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+		enum dma_data_direction dir, struct dma_attrs *attrs);
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, struct dma_attrs *attrs);
+int iommu_dma_supported(struct device *dev, u64 mask);
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+#else
+
+struct iommu_domain;
+
+static inline int iommu_dma_init(void)
+{
+	return 0;
+}
+
+static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+}
+
+#endif	/* CONFIG_IOMMU_DMA */
+#endif	/* __KERNEL__ */
+#endif	/* __DMA_IOMMU_H */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f9c1b6d0f2e4..f174506a5343 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -81,6 +81,7 @@ struct iommu_domain {
 	iommu_fault_handler_t handler;
 	void *handler_token;
 	struct iommu_domain_geometry geometry;
+	void *iova_cookie;
 };
 
 enum iommu_cap {
-- 
cgit v1.2.3


From 5cec753709adf1a20c8b15edf8e5245cf4fd4e82 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Thu, 15 Oct 2015 15:52:15 +0100
Subject: iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is only usable for the static 1:1 mapping of physical memory.

Any access to vmalloc or module regions will require some way of doing
an IOTLB flush. It's theoretically possible to hook into the
tlb_flush_kernel_range() function, but that seems like overkill — most
of the addresses accessed through a kernel PASID *will* be in the 1:1
mapping.

If we really need to allow access to more interesting kernel regions,
then the answer will probably be an explicit IOTLB flush call after use,
akin to the DMA API's unmap function.

In fact, it might be worth introducing that sooner rather than later, and
making it just BUG() if the address isn't in the static 1:1 mapping.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-svm.c | 43 +++++++++++++++++++++++++++++++------------
 include/linux/intel-svm.h | 15 ++++++++++++++-
 2 files changed, 45 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 89d4d47d0ab3..817be769e94f 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -269,11 +269,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm = NULL;
+	struct mm_struct *mm = NULL;
 	int pasid_max;
 	int ret;
 
-	BUG_ON(pasid && !current->mm);
-
 	if (WARN_ON(!iommu))
 		return -EINVAL;
 
@@ -284,12 +283,20 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	} else
 		pasid_max = 1 << 20;
 
+	if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
+		if (!ecap_srs(iommu->ecap))
+			return -EINVAL;
+	} else if (pasid) {
+		mm = get_task_mm(current);
+		BUG_ON(!mm);
+	}
+
 	mutex_lock(&pasid_mutex);
 	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
 		int i;
 
 		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
-			if (svm->mm != current->mm ||
+			if (svm->mm != mm ||
 			    (svm->flags & SVM_FLAG_PRIVATE_PASID))
 				continue;
 
@@ -355,17 +362,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		}
 		svm->pasid = ret;
 		svm->notifier.ops = &intel_mmuops;
-		svm->mm = get_task_mm(current);
+		svm->mm = mm;
 		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
 		ret = -ENOMEM;
-		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
-			idr_remove(&svm->iommu->pasid_idr, svm->pasid);
-			kfree(svm);
-			kfree(sdev);
-			goto out;
-		}
-		iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
+		if (mm) {
+			ret = mmu_notifier_register(&svm->notifier, mm);
+			if (ret) {
+				idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+				kfree(svm);
+				kfree(sdev);
+				goto out;
+			}
+			iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
+			mm = NULL;
+		} else
+			iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
 		wmb();
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
@@ -375,6 +387,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	ret = 0;
  out:
 	mutex_unlock(&pasid_mutex);
+	if (mm)
+		mmput(mm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
@@ -416,7 +430,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					mmu_notifier_unregister(&svm->notifier, svm->mm);
 
 					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
-					mmput(svm->mm);
+					if (svm->mm)
+						mmput(svm->mm);
 					/* We mandate that no page faults may be outstanding
 					 * for the PASID when intel_svm_unbind_mm() is called.
 					 * If that is not obeyed, subtle errors will happen.
@@ -500,6 +515,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		}
 
 		result = QI_RESP_INVALID;
+		/* Since we're using init_mm.pgd directly, we should never take
+		 * any faults on kernel addresses. */
+		if (!svm->mm)
+			goto bad_req;
 		down_read(&svm->mm->mmap_sem);
 		vma = find_extend_vma(svm->mm, address);
 		if (!vma || address < vma->vm_start)
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index dd94ab45a4db..0a48ccff24ae 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -40,7 +40,20 @@ struct svm_dev_ops {
  * disambiguate between multiple device contexts which access the same MM,
  * if there is no other way to do so. It should be used sparingly, if at all.
  */
-#define SVM_FLAG_PRIVATE_PASID	(1<<0)
+#define SVM_FLAG_PRIVATE_PASID		(1<<0)
+
+/*
+ * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
+ * for access to kernel addresses. No IOTLB flushes are automatically done
+ * for kernel mappings; it is valid only for access to the kernel's static
+ * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
+ * A future API addition may permit the use of such ranges, by means of an
+ * explicit IOTLB flush call (akin to the DMA API's unmap method).
+ *
+ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
+ * do such IOTLB flushes automatically.
+ */
+#define SVM_FLAG_SUPERVISOR_MODE	(1<<1)
 
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
-- 
cgit v1.2.3


From b02176f30cd30acccd3b633ab7d9aed8b5da52ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Sep 2015 12:20:22 -0400
Subject: block: don't release bdi while request_queue has live references

bdi's are initialized in two steps, bdi_init() and bdi_register(), but
destroyed in a single step by bdi_destroy() which, for a bdi embedded
in a request_queue, is called during blk_cleanup_queue() which makes
the queue invisible and starts the draining of remaining usages.

A request_queue's user can access the congestion state of the embedded
bdi as long as it holds a reference to the queue.  As such, it may
access the congested state of a queue which finished
blk_cleanup_queue() but hasn't reached blk_release_queue() yet.
Because the congested state was embedded in backing_dev_info which in
turn is embedded in request_queue, accessing the congested state after
bdi_destroy() was called was fine.  The bdi was destroyed but the
memory region for the congested state remained accessible till the
queue got released.

a13f35e87140 ("writeback: don't embed root bdi_writeback_congested in
bdi_writeback") changed the situation.  Now, the root congested state
which is expected to be pinned while request_queue remains accessible
is separately reference counted and the base ref is put during
bdi_destroy().  This means that the root congested state may go away
prematurely while the queue is between bdi_dstroy() and
blk_cleanup_queue(), which was detected by Andrey's KASAN tests.

The root cause of this problem is that bdi doesn't distinguish the two
steps of destruction, unregistration and release, and now the root
congested state actually requires a separate release step.  To fix the
issue, this patch separates out bdi_unregister() and bdi_exit() from
bdi_destroy().  bdi_unregister() is called from blk_cleanup_queue()
and bdi_exit() from blk_release_queue().  bdi_destroy() is now just a
simple wrapper calling the two steps back-to-back.

While at it, the prototype of bdi_destroy() is moved right below
bdi_setup_and_register() so that the counterpart operations are
located together.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: a13f35e87140 ("writeback: don't embed root bdi_writeback_congested in bdi_writeback")
Cc: stable@vger.kernel.org # v4.2+
Reported-and-tested-by: Andrey Konovalov <andreyknvl@google.com>
Link: http://lkml.kernel.org/g/CAAeHK+zUJ74Zn17=rOyxacHU18SgCfC6bsYW=6kCY5GXJBwGfQ@mail.gmail.com
Reviewed-by: Jan Kara <jack@suse.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c            |  2 +-
 block/blk-sysfs.c           |  1 +
 include/linux/backing-dev.h |  6 +++++-
 mm/backing-dev.c            | 12 +++++++++++-
 4 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2eb722d48773..18e92a6645e2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -576,7 +576,7 @@ void blk_cleanup_queue(struct request_queue *q)
 		q->queue_lock = &q->__queue_lock;
 	spin_unlock_irq(lock);
 
-	bdi_destroy(&q->backing_dev_info);
+	bdi_unregister(&q->backing_dev_info);
 
 	/* @q is and will stay empty, shutdown and put */
 	blk_put_queue(q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3e44a9da2a13..07b42f5ad797 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -540,6 +540,7 @@ static void blk_release_queue(struct kobject *kobj)
 	struct request_queue *q =
 		container_of(kobj, struct request_queue, kobj);
 
+	bdi_exit(&q->backing_dev_info);
 	blkcg_exit_queue(q);
 
 	if (q->elevator) {
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 78677e5a65bf..c85f74946a8b 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -19,13 +19,17 @@
 #include <linux/slab.h>
 
 int __must_check bdi_init(struct backing_dev_info *bdi);
-void bdi_destroy(struct backing_dev_info *bdi);
+void bdi_exit(struct backing_dev_info *bdi);
 
 __printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
+void bdi_unregister(struct backing_dev_info *bdi);
+
 int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
+void bdi_destroy(struct backing_dev_info *bdi);
+
 void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
 			bool range_cyclic, enum wb_reason reason);
 void wb_start_background_writeback(struct bdi_writeback *wb);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e92d77937fd3..9e841399041a 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -835,7 +835,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
 	synchronize_rcu_expedited();
 }
 
-void bdi_destroy(struct backing_dev_info *bdi)
+void bdi_unregister(struct backing_dev_info *bdi)
 {
 	/* make sure nobody finds us on the bdi_list anymore */
 	bdi_remove_from_list(bdi);
@@ -847,9 +847,19 @@ void bdi_destroy(struct backing_dev_info *bdi)
 		device_unregister(bdi->dev);
 		bdi->dev = NULL;
 	}
+}
 
+void bdi_exit(struct backing_dev_info *bdi)
+{
+	WARN_ON_ONCE(bdi->dev);
 	wb_exit(&bdi->wb);
 }
+
+void bdi_destroy(struct backing_dev_info *bdi)
+{
+	bdi_unregister(bdi);
+	bdi_exit(bdi);
+}
 EXPORT_SYMBOL(bdi_destroy);
 
 /*
-- 
cgit v1.2.3


From 36022770de6cf9a403c40a68712ed2d2ea2746be Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Sat, 26 Sep 2015 02:24:34 +0800
Subject: nfs42: add CLONE xdr functions

xdr definitions per draft-ietf-nfsv4-minorversion2-38.txt

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42xdr.c       | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/nfs4xdr.c        |  1 +
 include/linux/nfs4.h    |  2 +
 include/linux/nfs_xdr.h | 19 ++++++++++
 4 files changed, 118 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0eb29e14070d..0ca482a51e53 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -34,6 +34,12 @@
 					1 /* opaque devaddr4 length */ + \
 					XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
 #define decode_layoutstats_maxsz	(op_decode_hdr_maxsz)
+#define encode_clone_maxsz		(encode_stateid_maxsz + \
+					encode_stateid_maxsz + \
+					2 /* src offset */ + \
+					2 /* dst offset */ + \
+					2 /* count */)
+#define decode_clone_maxsz		(op_decode_hdr_maxsz)
 
 #define NFS4_enc_allocate_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
@@ -65,7 +71,20 @@
 					 decode_sequence_maxsz + \
 					 decode_putfh_maxsz + \
 					 PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
-
+#define NFS4_enc_clone_sz		(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_savefh_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_clone_maxsz + \
+					 encode_getattr_maxsz)
+#define NFS4_dec_clone_sz		(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_savefh_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_clone_maxsz + \
+					 decode_getattr_maxsz)
 
 static void encode_fallocate(struct xdr_stream *xdr,
 			     struct nfs42_falloc_args *args)
@@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr,
 		encode_uint32(xdr, 0);
 }
 
+static void encode_clone(struct xdr_stream *xdr,
+			 struct nfs42_clone_args *args,
+			 struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr);
+	encode_nfs4_stateid(xdr, &args->src_stateid);
+	encode_nfs4_stateid(xdr, &args->dst_stateid);
+	p = reserve_space(xdr, 3*8);
+	p = xdr_encode_hyper(p, args->src_offset);
+	p = xdr_encode_hyper(p, args->dst_offset);
+	xdr_encode_hyper(p, args->count);
+}
+
 /*
  * Encode ALLOCATE request
  */
@@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode CLONE request
+ */
+static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
+			       struct xdr_stream *xdr,
+			       struct nfs42_clone_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->src_fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_putfh(xdr, args->dst_fh, &hdr);
+	encode_clone(xdr, args, &hdr);
+	encode_getfattr(xdr, args->dst_bitmask, &hdr);
+	encode_nops(&hdr);
+}
+
 static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
 	return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_LAYOUTSTATS);
 }
 
+static int decode_clone(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_CLONE);
+}
+
 /*
  * Decode ALLOCATE request
  */
@@ -351,4 +411,39 @@ out:
 	return status;
 }
 
+/*
+ * Decode CLONE request
+ */
+static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
+			      struct xdr_stream *xdr,
+			      struct nfs42_clone_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_savefh(xdr);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_clone(xdr);
+	if (status)
+		goto out;
+	status = decode_getfattr(xdr, res->dst_fattr, res->server);
+
+out:
+	res->rpc_status = status;
+	return status;
+}
+
 #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 788adf3897c7..868472b6b303 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7465,6 +7465,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(ALLOCATE,		enc_allocate,		dec_allocate),
 	PROC(DEALLOCATE,	enc_deallocate,		dec_deallocate),
 	PROC(LAYOUTSTATS,	enc_layoutstats,	dec_layoutstats),
+	PROC(CLONE,		enc_clone,		dec_clone),
 #endif /* CONFIG_NFS_V4_2 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 00121f298269..c0c695b634d0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -130,6 +130,7 @@ enum nfs_opnum4 {
 	OP_READ_PLUS = 68,
 	OP_SEEK = 69,
 	OP_WRITE_SAME = 70,
+	OP_CLONE = 71,
 
 	OP_ILLEGAL = 10044,
 };
@@ -501,6 +502,7 @@ enum {
 	NFSPROC4_CLNT_ALLOCATE,
 	NFSPROC4_CLNT_DEALLOCATE,
 	NFSPROC4_CLNT_LAYOUTSTATS,
+	NFSPROC4_CLNT_CLONE,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 52faf7e96c65..ac678b7a65ed 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -359,6 +359,25 @@ struct nfs42_layoutstat_data {
 	struct nfs42_layoutstat_res res;
 };
 
+struct nfs42_clone_args {
+	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh			*src_fh;
+	struct nfs_fh			*dst_fh;
+	nfs4_stateid			src_stateid;
+	nfs4_stateid			dst_stateid;
+	__u64				src_offset;
+	__u64				dst_offset;
+	__u64				count;
+	const u32			*dst_bitmask;
+};
+
+struct nfs42_clone_res {
+	struct nfs4_sequence_res	seq_res;
+	unsigned int			rpc_status;
+	struct nfs_fattr		*dst_fattr;
+	const struct nfs_server		*server;
+};
+
 struct stateowner_id {
 	__u64	create_time;
 	__u32	uniquifier;
-- 
cgit v1.2.3


From e5341f3a5762d17be9cdd06257c02c0098bdcab8 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Sat, 26 Sep 2015 02:24:35 +0800
Subject: nfs42: add CLONE proc functions

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42.h            |  1 +
 fs/nfs/nfs42proc.c        | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c         |  3 +-
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 75 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 814c1255f1d2..b587ccd31083 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
 loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 int nfs42_proc_layoutstats_generic(struct nfs_server *,
 				   struct nfs42_layoutstat_data *);
+int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
 
 #endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 0f020e4d8421..3e92a3cde15d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
 		return PTR_ERR(task);
 	return 0;
 }
+
+static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
+			     struct file *dst_f, loff_t src_offset,
+			     loff_t dst_offset, loff_t count)
+{
+	struct inode *src_inode = file_inode(src_f);
+	struct inode *dst_inode = file_inode(dst_f);
+	struct nfs_server *server = NFS_SERVER(dst_inode);
+	struct nfs42_clone_args args = {
+		.src_fh = NFS_FH(src_inode),
+		.dst_fh = NFS_FH(dst_inode),
+		.src_offset = src_offset,
+		.dst_offset = dst_offset,
+		.dst_bitmask = server->cache_consistency_bitmask,
+	};
+	struct nfs42_clone_res res = {
+		.server	= server,
+	};
+	int status;
+
+	msg->rpc_argp = &args;
+	msg->rpc_resp = &res;
+
+	status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+	if (status)
+		return status;
+
+	status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+	if (status)
+		return status;
+
+	res.dst_fattr = nfs_alloc_fattr();
+	if (!res.dst_fattr)
+		return -ENOMEM;
+
+	status = nfs4_call_sync(server->client, server, msg,
+				&args.seq_args, &res.seq_res, 0);
+	if (status == 0)
+		status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+
+	kfree(res.dst_fattr);
+	return status;
+}
+
+int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
+		     loff_t src_offset, loff_t dst_offset, loff_t count)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE],
+	};
+	struct inode *inode = file_inode(src_f);
+	struct nfs_server *server = NFS_SERVER(file_inode(src_f));
+	struct nfs4_exception exception = { };
+	int err;
+
+	if (!nfs_server_capable(inode, NFS_CAP_CLONE))
+		return -EOPNOTSUPP;
+
+	do {
+		err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
+					dst_offset, count);
+		if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
+			NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
+			return -EOPNOTSUPP;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+
+	return err;
+
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5133bb18830e..9688b1a9787f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8729,7 +8729,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_ALLOCATE
 		| NFS_CAP_DEALLOCATE
 		| NFS_CAP_SEEK
-		| NFS_CAP_LAYOUTSTATS,
+		| NFS_CAP_LAYOUTSTATS
+		| NFS_CAP_CLONE,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 570a7df2775b..a50de1002b20 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -243,5 +243,6 @@ struct nfs_server {
 #define NFS_CAP_ALLOCATE	(1U << 20)
 #define NFS_CAP_DEALLOCATE	(1U << 21)
 #define NFS_CAP_LAYOUTSTATS	(1U << 22)
+#define NFS_CAP_CLONE		(1U << 23)
 
 #endif
-- 
cgit v1.2.3


From 2a92ee92d4545448066fb664674c0ae5a9d5ea99 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Sat, 26 Sep 2015 02:24:37 +0800
Subject: nfs: get clone_blksize when probing fsinfo

NFSv42 CLONE operation is supposed to respect it.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs4proc.c         |  1 +
 fs/nfs/nfs4xdr.c          | 25 +++++++++++++++++++++++++
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 6 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 57c5a02f6213..d6d5d2a48e83 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
 
 	server->time_delta = fsinfo->time_delta;
 
+	server->clone_blksize = fsinfo->clone_blksize;
 	/* We're airborne Set socket buffersize */
 	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9688b1a9787f..8814fbe62623 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -239,6 +239,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
 			FATTR4_WORD1_TIME_DELTA
 			| FATTR4_WORD1_FS_LAYOUT_TYPES,
 			FATTR4_WORD2_LAYOUT_BLKSIZE
+			| FATTR4_WORD2_CLONE_BLKSIZE
 };
 
 const u32 nfs4_fs_locations_bitmap[3] = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 868472b6b303..9f656791a338 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4764,6 +4764,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
 	return 0;
 }
 
+/*
+ * The granularity of a CLONE operation.
+ */
+static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
+				     uint32_t *res)
+{
+	__be32 *p;
+
+	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
+	*res = 0;
+	if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p)) {
+			print_overflow_msg(__func__, xdr);
+			return -EIO;
+		}
+		*res = be32_to_cpup(p);
+		bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE;
+	}
+	return 0;
+}
+
 static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 {
 	unsigned int savep;
@@ -4796,6 +4818,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 	if (status != 0)
 		goto xdr_error;
 	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
+	if (status)
+		goto xdr_error;
+	status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize);
 	if (status)
 		goto xdr_error;
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c0c695b634d0..e7e78537aea2 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -422,6 +422,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_TYPES       (1UL << 0)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
+#define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 
 /* MDS threshold bitmap bits */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a50de1002b20..2469ab0bb3a1 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -147,6 +147,7 @@ struct nfs_server {
 	unsigned int		acdirmax;
 	unsigned int		namelen;
 	unsigned int		options;	/* extra options enabled by mount */
+	unsigned int		clone_blksize;	/* granularity of a CLONE operation */
 #define NFS_OPTION_FSCACHE	0x00000001	/* - local caching enabled */
 #define NFS_OPTION_MIGRATION	0x00000002	/* - NFSv4 migration enabled */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ac678b7a65ed..92ff445e60a0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -141,6 +141,7 @@ struct nfs_fsinfo {
 	__u32			lease_time; /* in seconds */
 	__u32			layouttype; /* supported pnfs layout driver */
 	__u32			blksize; /* preferred pnfs io block size */
+	__u32			clone_blksize; /* granularity of a CLONE operation */
 };
 
 struct nfs_fsstat {
-- 
cgit v1.2.3


From 0de0942db2b36dd91c088a7950398d2e87f23b23 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2015 16:41:49 -0400
Subject: cgroup: make cgroup->nr_populated count the number of populated
 css_sets

Currently, cgroup->nr_populated counts whether the cgroup has any
css_sets linked to it and the number of children which has non-zero
->nr_populated.  This works because a css_set's refcnt converges with
the number of tasks linked to it and thus there's no css_set linked to
a cgroup if it doesn't have any live tasks.

To help tracking resource usage of zombie tasks, putting the ref of
css_set will be separated from disassociating the task from the
css_set which means that a cgroup may have css_sets linked to it even
when it doesn't have any live tasks.

This patch updates cgroup->nr_populated so that for the cgroup itself
it counts the number of css_sets which have tasks associated with them
so that empty css_sets don't skew the populated test.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h |  8 +++---
 kernel/cgroup.c             | 65 ++++++++++++++++++++++++++++++++++++---------
 2 files changed, 56 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index df589a097539..17444505c870 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -232,10 +232,10 @@ struct cgroup {
 	int id;
 
 	/*
-	 * If this cgroup contains any tasks, it contributes one to
-	 * populated_cnt.  All children with non-zero popuplated_cnt of
-	 * their own contribute one.  The count is zero iff there's no task
-	 * in this cgroup or its subtree.
+	 * Each non-empty css_set associated with this cgroup contributes
+	 * one to populated_cnt.  All children with non-zero popuplated_cnt
+	 * of their own contribute one.  The count is zero iff there's no
+	 * task in this cgroup or its subtree.
 	 */
 	int populated_cnt;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 49f30f1b7e0a..e5231d045eb9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -581,15 +581,26 @@ struct css_set init_css_set = {
 
 static int css_set_count	= 1;	/* 1 for init_css_set */
 
+/**
+ * css_set_populated - does a css_set contain any tasks?
+ * @cset: target css_set
+ */
+static bool css_set_populated(struct css_set *cset)
+{
+	lockdep_assert_held(&css_set_rwsem);
+
+	return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
+}
+
 /**
  * cgroup_update_populated - updated populated count of a cgroup
  * @cgrp: the target cgroup
  * @populated: inc or dec populated count
  *
- * @cgrp is either getting the first task (css_set) or losing the last.
- * Update @cgrp->populated_cnt accordingly.  The count is propagated
- * towards root so that a given cgroup's populated_cnt is zero iff the
- * cgroup and all its descendants are empty.
+ * One of the css_sets associated with @cgrp is either getting its first
+ * task or losing the last.  Update @cgrp->populated_cnt accordingly.  The
+ * count is propagated towards root so that a given cgroup's populated_cnt
+ * is zero iff the cgroup and all its descendants don't contain any tasks.
  *
  * @cgrp's interface file "cgroup.populated" is zero if
  * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
@@ -618,6 +629,24 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 	} while (cgrp);
 }
 
+/**
+ * css_set_update_populated - update populated state of a css_set
+ * @cset: target css_set
+ * @populated: whether @cset is populated or depopulated
+ *
+ * @cset is either getting the first task or losing the last.  Update the
+ * ->populated_cnt of all associated cgroups accordingly.
+ */
+static void css_set_update_populated(struct css_set *cset, bool populated)
+{
+	struct cgrp_cset_link *link;
+
+	lockdep_assert_held(&css_set_rwsem);
+
+	list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
+		cgroup_update_populated(link->cgrp, populated);
+}
+
 /*
  * hash table for cgroup groups. This improves the performance to find
  * an existing css_set. This hash doesn't (currently) take into
@@ -663,10 +692,8 @@ static void put_css_set_locked(struct css_set *cset)
 		list_del(&link->cgrp_link);
 
 		/* @cgrp can't go away while we're holding css_set_rwsem */
-		if (list_empty(&cgrp->cset_links)) {
-			cgroup_update_populated(cgrp, false);
+		if (list_empty(&cgrp->cset_links))
 			check_for_release(cgrp);
-		}
 
 		kfree(link);
 	}
@@ -875,8 +902,6 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 	link->cset = cset;
 	link->cgrp = cgrp;
 
-	if (list_empty(&cgrp->cset_links))
-		cgroup_update_populated(cgrp, true);
 	list_move(&link->cset_link, &cgrp->cset_links);
 
 	/*
@@ -1754,6 +1779,8 @@ static void cgroup_enable_task_cg_lists(void)
 		if (!(p->flags & PF_EXITING)) {
 			struct css_set *cset = task_css_set(p);
 
+			if (!css_set_populated(cset))
+				css_set_update_populated(cset, true);
 			list_add(&p->cg_list, &cset->tasks);
 			get_css_set(cset);
 		}
@@ -1868,8 +1895,11 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
 	 * objects.
 	 */
 	down_write(&css_set_rwsem);
-	hash_for_each(css_set_table, i, cset, hlist)
+	hash_for_each(css_set_table, i, cset, hlist) {
 		link_css_set(&tmp_links, cset, root_cgrp);
+		if (css_set_populated(cset))
+			cgroup_update_populated(root_cgrp, true);
+	}
 	up_write(&css_set_rwsem);
 
 	BUG_ON(!list_empty(&root_cgrp->self.children));
@@ -2256,10 +2286,16 @@ static void cgroup_task_migrate(struct task_struct *tsk,
 	WARN_ON_ONCE(tsk->flags & PF_EXITING);
 	old_cset = task_css_set(tsk);
 
+	if (!css_set_populated(new_cset))
+		css_set_update_populated(new_cset, true);
+
 	get_css_set(new_cset);
 	rcu_assign_pointer(tsk->cgroups, new_cset);
 	list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
 
+	if (!css_set_populated(old_cset))
+		css_set_update_populated(old_cset, false);
+
 	/*
 	 * We just gained a reference on old_cset by taking it from the
 	 * task. As trading it for new_cset is protected by cgroup_mutex,
@@ -3774,7 +3810,7 @@ static void css_advance_task_iter(struct css_task_iter *it)
 			link = list_entry(l, struct cgrp_cset_link, cset_link);
 			cset = link->cset;
 		}
-	} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
+	} while (!css_set_populated(cset));
 
 	it->cset_pos = l;
 
@@ -5492,17 +5528,20 @@ void cgroup_exit(struct task_struct *tsk)
 
 	/*
 	 * Unlink from @tsk from its css_set.  As migration path can't race
-	 * with us, we can check cg_list without grabbing css_set_rwsem.
+	 * with us, we can check css_set and cg_list without synchronization.
 	 */
+	cset = task_css_set(tsk);
+
 	if (!list_empty(&tsk->cg_list)) {
 		down_write(&css_set_rwsem);
 		list_del_init(&tsk->cg_list);
+		if (!css_set_populated(cset))
+			css_set_update_populated(cset, false);
 		up_write(&css_set_rwsem);
 		put_cset = true;
 	}
 
 	/* Reassign the task to the init_css_set. */
-	cset = task_css_set(tsk);
 	RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
 
 	/* see cgroup_post_fork() for details */
-- 
cgit v1.2.3


From 27bd4dbb8d51c476298e62bd088225317b7853de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2015 16:41:50 -0400
Subject: cgroup: replace cgroup_has_tasks() with cgroup_is_populated()

Currently, cgroup_has_tasks() tests whether the target cgroup has any
css_set linked to it.  This works because a css_set's refcnt converges
with the number of tasks linked to it and thus there's no css_set
linked to a cgroup if it doesn't have any live tasks.

To help tracking resource usage of zombie tasks, putting the ref of
css_set will be separated from disassociating the task from the
css_set which means that a cgroup may have css_sets linked to it even
when it doesn't have any live tasks.

This patch replaces cgroup_has_tasks() with cgroup_is_populated()
which tests cgroup->nr_populated instead which locally counts the
number of populated css_sets.  Unlike cgroup_has_tasks(),
cgroup_is_populated() is recursive - if any of the descendants is
populated, the cgroup is populated too.  While this changes the
meaning of the test, all the existing users are okay with the change.

While at it, replace the open-coded ->populated_cnt test in
cgroup_events_show() with cgroup_is_populated().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
---
 include/linux/cgroup.h | 4 ++--
 kernel/cgroup.c        | 6 +++---
 kernel/cpuset.c        | 2 +-
 mm/memcontrol.c        | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e9c3eac074e2..bdfdb3a1a83c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -456,9 +456,9 @@ static inline struct cgroup *task_cgroup(struct task_struct *task,
 }
 
 /* no synchronization, the result can only be used as a hint */
-static inline bool cgroup_has_tasks(struct cgroup *cgrp)
+static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
-	return !list_empty(&cgrp->cset_links);
+	return cgrp->populated_cnt;
 }
 
 /* returns ino associated with a cgroup */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e5231d045eb9..435aa686567b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3121,7 +3121,7 @@ err_undo_css:
 static int cgroup_events_show(struct seq_file *seq, void *v)
 {
 	seq_printf(seq, "populated %d\n",
-		   (bool)seq_css(seq)->cgroup->populated_cnt);
+		   cgroup_is_populated(seq_css(seq)->cgroup));
 	return 0;
 }
 
@@ -5558,7 +5558,7 @@ void cgroup_exit(struct task_struct *tsk)
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
+	if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
 	    !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
 		schedule_work(&cgrp->release_agent_work);
 }
@@ -5806,7 +5806,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 
 static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return (!cgroup_has_tasks(css->cgroup) &&
+	return (!cgroup_is_populated(css->cgroup) &&
 		!css_has_online_children(&css->cgroup->self));
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e4d999929903..d7ccb87a6714 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -498,7 +498,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 	 * be changed to have empty cpus_allowed or mems_allowed.
 	 */
 	ret = -ENOSPC;
-	if ((cgroup_has_tasks(cur->css.cgroup) || cur->attach_in_progress)) {
+	if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
 		if (!cpumask_empty(cur->cpus_allowed) &&
 		    cpumask_empty(trial->cpus_allowed))
 			goto out;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 33c8dad6830f..0ddd0ff2b52e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2920,7 +2920,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
 	 * of course permitted.
 	 */
 	mutex_lock(&memcg_create_mutex);
-	if (cgroup_has_tasks(memcg->css.cgroup) ||
+	if (cgroup_is_populated(memcg->css.cgroup) ||
 	    (memcg->use_hierarchy && memcg_has_children(memcg)))
 		err = -EBUSY;
 	mutex_unlock(&memcg_create_mutex);
-- 
cgit v1.2.3


From ed27b9f7a17ddfbc007e16d4d11f33dff4fc2de7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2015 16:41:52 -0400
Subject: cgroup: don't hold css_set_rwsem across css task iteration

css_sets are synchronized through css_set_rwsem but the locking scheme
is kinda bizarre.  The hot paths - fork and exit - have to write lock
the rwsem making the rw part pointless; furthermore, many readers
already hold cgroup_mutex.

One of the readers is css task iteration.  It read locks the rwsem
over the entire duration of iteration.  This leads to silly locking
behavior.  When cpuset tries to migrate processes of a cgroup to a
different NUMA node, css_set_rwsem is held across the entire migration
attempt which can take a long time locking out forking, exiting and
other cgroup operations.

This patch updates css task iteration so that it locks css_set_rwsem
only while the iterator is being advanced.  css task iteration
involves two levels - css_set and task iteration.  As css_sets in use
are practically immutable, simply pinning the current one is enough
for resuming iteration afterwards.  Task iteration is tricky as tasks
may leave their css_set while iteration is in progress.  This is
solved by keeping track of active iterators and advancing them if
their next task leaves its css_set.

v2: put_task_struct() in css_task_iter_next() moved outside
    css_set_rwsem.  A later patch will add cgroup operations to
    task_struct free path which may grab the same lock and this avoids
    deadlock possibilities.

    css_set_move_task() updated to use list_for_each_entry_safe() when
    walking task_iters and advancing them.  This is necessary as
    advancing an iter may remove it from the list.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h |  3 ++
 include/linux/cgroup.h      |  4 +++
 kernel/cgroup.c             | 87 +++++++++++++++++++++++++++++++++++++--------
 3 files changed, 80 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 17444505c870..62413c3e2f4b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -211,6 +211,9 @@ struct css_set {
 	 */
 	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
 
+	/* all css_task_iters currently walking this cset */
+	struct list_head task_iters;
+
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
 };
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bdfdb3a1a83c..a9dcf0e76865 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -42,6 +42,10 @@ struct css_task_iter {
 	struct list_head		*task_pos;
 	struct list_head		*tasks_head;
 	struct list_head		*mg_tasks_head;
+
+	struct css_set			*cur_cset;
+	struct task_struct		*cur_task;
+	struct list_head		iters_node;	/* css_set->task_iters */
 };
 
 extern struct cgroup_root cgrp_dfl_root;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 56e2b772b143..0c5b0e605c47 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -216,6 +216,7 @@ static struct cftype cgroup_legacy_base_files[];
 
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned long ss_mask);
+static void css_task_iter_advance(struct css_task_iter *it);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 		      bool visible);
@@ -593,6 +594,7 @@ struct css_set init_css_set = {
 	.mg_tasks		= LIST_HEAD_INIT(init_css_set.mg_tasks),
 	.mg_preload_node	= LIST_HEAD_INIT(init_css_set.mg_preload_node),
 	.mg_node		= LIST_HEAD_INIT(init_css_set.mg_node),
+	.task_iters		= LIST_HEAD_INIT(init_css_set.task_iters),
 };
 
 static int css_set_count	= 1;	/* 1 for init_css_set */
@@ -675,8 +677,9 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
  * css_set, @from_cset can be NULL.  If @task is being disassociated
  * instead of moved, @to_cset can be NULL.
  *
- * This function automatically handles populated_cnt updates but the caller
- * is responsible for managing @from_cset and @to_cset's reference counts.
+ * This function automatically handles populated_cnt updates and
+ * css_task_iter adjustments but the caller is responsible for managing
+ * @from_cset and @to_cset's reference counts.
  */
 static void css_set_move_task(struct task_struct *task,
 			      struct css_set *from_cset, struct css_set *to_cset,
@@ -685,7 +688,22 @@ static void css_set_move_task(struct task_struct *task,
 	lockdep_assert_held(&css_set_rwsem);
 
 	if (from_cset) {
+		struct css_task_iter *it, *pos;
+
 		WARN_ON_ONCE(list_empty(&task->cg_list));
+
+		/*
+		 * @task is leaving, advance task iterators which are
+		 * pointing to it so that they can resume at the next
+		 * position.  Advancing an iterator might remove it from
+		 * the list, use safe walk.  See css_task_iter_advance*()
+		 * for details.
+		 */
+		list_for_each_entry_safe(it, pos, &from_cset->task_iters,
+					 iters_node)
+			if (it->task_pos == &task->cg_list)
+				css_task_iter_advance(it);
+
 		list_del_init(&task->cg_list);
 		if (!css_set_populated(from_cset))
 			css_set_update_populated(from_cset, false);
@@ -1019,6 +1037,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	INIT_LIST_HEAD(&cset->mg_tasks);
 	INIT_LIST_HEAD(&cset->mg_preload_node);
 	INIT_LIST_HEAD(&cset->mg_node);
+	INIT_LIST_HEAD(&cset->task_iters);
 	INIT_HLIST_NODE(&cset->hlist);
 
 	/* Copy the set of subsystem state objects generated in
@@ -3804,6 +3823,8 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 	struct cgrp_cset_link *link;
 	struct css_set *cset;
 
+	lockdep_assert_held(&css_set_rwsem);
+
 	/* Advance to the next non-empty css_set */
 	do {
 		l = l->next;
@@ -3831,12 +3852,36 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 
 	it->tasks_head = &cset->tasks;
 	it->mg_tasks_head = &cset->mg_tasks;
+
+	/*
+	 * We don't keep css_sets locked across iteration steps and thus
+	 * need to take steps to ensure that iteration can be resumed after
+	 * the lock is re-acquired.  Iteration is performed at two levels -
+	 * css_sets and tasks in them.
+	 *
+	 * Once created, a css_set never leaves its cgroup lists, so a
+	 * pinned css_set is guaranteed to stay put and we can resume
+	 * iteration afterwards.
+	 *
+	 * Tasks may leave @cset across iteration steps.  This is resolved
+	 * by registering each iterator with the css_set currently being
+	 * walked and making css_set_move_task() advance iterators whose
+	 * next task is leaving.
+	 */
+	if (it->cur_cset) {
+		list_del(&it->iters_node);
+		put_css_set_locked(it->cur_cset);
+	}
+	get_css_set(cset);
+	it->cur_cset = cset;
+	list_add(&it->iters_node, &cset->task_iters);
 }
 
 static void css_task_iter_advance(struct css_task_iter *it)
 {
 	struct list_head *l = it->task_pos;
 
+	lockdep_assert_held(&css_set_rwsem);
 	WARN_ON_ONCE(!l);
 
 	/*
@@ -3864,19 +3909,16 @@ static void css_task_iter_advance(struct css_task_iter *it)
  * css_task_iter_next() to walk through the tasks until the function
  * returns NULL.  On completion of iteration, css_task_iter_end() must be
  * called.
- *
- * Note that this function acquires a lock which is released when the
- * iteration finishes.  The caller can't sleep while iteration is in
- * progress.
  */
 void css_task_iter_start(struct cgroup_subsys_state *css,
 			 struct css_task_iter *it)
-	__acquires(css_set_rwsem)
 {
 	/* no one should try to iterate before mounting cgroups */
 	WARN_ON_ONCE(!use_task_css_set_links);
 
-	down_read(&css_set_rwsem);
+	memset(it, 0, sizeof(*it));
+
+	down_write(&css_set_rwsem);
 
 	it->ss = css->ss;
 
@@ -3888,6 +3930,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 	it->cset_head = it->cset_pos;
 
 	css_task_iter_advance_css_set(it);
+
+	up_write(&css_set_rwsem);
 }
 
 /**
@@ -3900,14 +3944,22 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
  */
 struct task_struct *css_task_iter_next(struct css_task_iter *it)
 {
-	struct task_struct *res;
-
 	if (!it->cset_pos)
 		return NULL;
 
-	res = list_entry(it->task_pos, struct task_struct, cg_list);
+	if (it->cur_task)
+		put_task_struct(it->cur_task);
+
+	down_write(&css_set_rwsem);
+
+	it->cur_task = list_entry(it->task_pos, struct task_struct, cg_list);
+	get_task_struct(it->cur_task);
+
 	css_task_iter_advance(it);
-	return res;
+
+	up_write(&css_set_rwsem);
+
+	return it->cur_task;
 }
 
 /**
@@ -3917,9 +3969,16 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
  * Finish task iteration started by css_task_iter_start().
  */
 void css_task_iter_end(struct css_task_iter *it)
-	__releases(css_set_rwsem)
 {
-	up_read(&css_set_rwsem);
+	if (it->cur_cset) {
+		down_write(&css_set_rwsem);
+		list_del(&it->iters_node);
+		put_css_set_locked(it->cur_cset);
+		up_write(&css_set_rwsem);
+	}
+
+	if (it->cur_task)
+		put_task_struct(it->cur_task);
 }
 
 /**
-- 
cgit v1.2.3


From f0d9a5f175753a371bc7fdff0d584a8d9cd72bb0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2015 16:41:53 -0400
Subject: cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock

css_set_rwsem is the inner lock protecting css_sets and is accessed
from hot paths such as fork and exit.  Internally, it has no reason to
be a rwsem or even mutex.  There are no internal blocking operations
while holding it.  This was rwsem because css task iteration used to
expose it to external iterator users.  As the previous patch updated
css task iteration such that the locking is not leaked to its users,
there's no reason to keep it a rwsem.

This patch converts css_set_rwsem to a spinlock and rename it to
css_set_lock.  It uses bh-safe operations as a planned usage needs to
access it from RCU callback context.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |   5 +-
 kernel/cgroup.c        | 144 ++++++++++++++++++++++++-------------------------
 2 files changed, 74 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a9dcf0e76865..46020735bcbb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -13,7 +13,6 @@
 #include <linux/nodemask.h>
 #include <linux/rculist.h>
 #include <linux/cgroupstats.h>
-#include <linux/rwsem.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
@@ -367,11 +366,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
  */
 #ifdef CONFIG_PROVE_RCU
 extern struct mutex cgroup_mutex;
-extern struct rw_semaphore css_set_rwsem;
+extern spinlock_t css_set_lock;
 #define task_css_set_check(task, __c)					\
 	rcu_dereference_check((task)->cgroups,				\
 		lockdep_is_held(&cgroup_mutex) ||			\
-		lockdep_is_held(&css_set_rwsem) ||			\
+		lockdep_is_held(&css_set_lock) ||			\
 		((task)->flags & PF_EXITING) || (__c))
 #else
 #define task_css_set_check(task, __c)					\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0c5b0e605c47..ba7b3284c2e4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -45,7 +45,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/rwsem.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/string.h>
 #include <linux/sort.h>
@@ -76,7 +75,7 @@
  * cgroup_mutex is the master lock.  Any modification to cgroup or its
  * hierarchy must be performed while holding it.
  *
- * css_set_rwsem protects task->cgroups pointer, the list of css_set
+ * css_set_lock protects task->cgroups pointer, the list of css_set
  * objects, and the chain of tasks off each css_set.
  *
  * These locks are exported if CONFIG_PROVE_RCU so that accessors in
@@ -84,12 +83,12 @@
  */
 #ifdef CONFIG_PROVE_RCU
 DEFINE_MUTEX(cgroup_mutex);
-DECLARE_RWSEM(css_set_rwsem);
+DEFINE_SPINLOCK(css_set_lock);
 EXPORT_SYMBOL_GPL(cgroup_mutex);
-EXPORT_SYMBOL_GPL(css_set_rwsem);
+EXPORT_SYMBOL_GPL(css_set_lock);
 #else
 static DEFINE_MUTEX(cgroup_mutex);
-static DECLARE_RWSEM(css_set_rwsem);
+static DEFINE_SPINLOCK(css_set_lock);
 #endif
 
 /*
@@ -605,7 +604,7 @@ static int css_set_count	= 1;	/* 1 for init_css_set */
  */
 static bool css_set_populated(struct css_set *cset)
 {
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
 }
@@ -628,7 +627,7 @@ static bool css_set_populated(struct css_set *cset)
  */
 static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 {
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	do {
 		bool trigger;
@@ -660,7 +659,7 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
 {
 	struct cgrp_cset_link *link;
 
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
 		cgroup_update_populated(link->cgrp, populated);
@@ -685,7 +684,7 @@ static void css_set_move_task(struct task_struct *task,
 			      struct css_set *from_cset, struct css_set *to_cset,
 			      bool use_mg_tasks)
 {
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	if (from_cset) {
 		struct css_task_iter *it, *pos;
@@ -755,7 +754,7 @@ static void put_css_set_locked(struct css_set *cset)
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	if (!atomic_dec_and_test(&cset->refcount))
 		return;
@@ -787,9 +786,9 @@ static void put_css_set(struct css_set *cset)
 	if (atomic_add_unless(&cset->refcount, -1, 1))
 		return;
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	put_css_set_locked(cset);
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 }
 
 /*
@@ -1012,11 +1011,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
 	/* First see if we already have a cgroup group that matches
 	 * the desired set */
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	cset = find_existing_css_set(old_cset, cgrp, template);
 	if (cset)
 		get_css_set(cset);
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	if (cset)
 		return cset;
@@ -1044,7 +1043,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	 * find_existing_css_set() */
 	memcpy(cset->subsys, template, sizeof(cset->subsys));
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	/* Add reference counts and links from the new css_set. */
 	list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
 		struct cgroup *c = link->cgrp;
@@ -1066,7 +1065,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 		list_add_tail(&cset->e_cset_node[ssid],
 			      &cset->subsys[ssid]->cgroup->e_csets[ssid]);
 
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	return cset;
 }
@@ -1130,14 +1129,15 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	 * Release all the links from cset_links to this hierarchy's
 	 * root cgroup
 	 */
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 
 	list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
 		list_del(&link->cset_link);
 		list_del(&link->cgrp_link);
 		kfree(link);
 	}
-	up_write(&css_set_rwsem);
+
+	spin_unlock_bh(&css_set_lock);
 
 	if (!list_empty(&root->root_list)) {
 		list_del(&root->root_list);
@@ -1159,7 +1159,7 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
 	struct cgroup *res = NULL;
 
 	lockdep_assert_held(&cgroup_mutex);
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	if (cset == &init_css_set) {
 		res = &root->cgrp;
@@ -1182,7 +1182,7 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
 
 /*
  * Return the cgroup for "task" from the given hierarchy. Must be
- * called with cgroup_mutex and css_set_rwsem held.
+ * called with cgroup_mutex and css_set_lock held.
  */
 static struct cgroup *task_cgroup_from_root(struct task_struct *task,
 					    struct cgroup_root *root)
@@ -1531,11 +1531,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		ss->root = dst_root;
 		css->cgroup = dcgrp;
 
-		down_write(&css_set_rwsem);
+		spin_lock_bh(&css_set_lock);
 		hash_for_each(css_set_table, i, cset, hlist)
 			list_move_tail(&cset->e_cset_node[ss->id],
 				       &dcgrp->e_csets[ss->id]);
-		up_write(&css_set_rwsem);
+		spin_unlock_bh(&css_set_lock);
 
 		src_root->subsys_mask &= ~(1 << ssid);
 		scgrp->subtree_control &= ~(1 << ssid);
@@ -1812,7 +1812,7 @@ static void cgroup_enable_task_cg_lists(void)
 {
 	struct task_struct *p, *g;
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 
 	if (use_task_css_set_links)
 		goto out_unlock;
@@ -1851,7 +1851,7 @@ static void cgroup_enable_task_cg_lists(void)
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 out_unlock:
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 }
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1915,7 +1915,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
 		goto out;
 
 	/*
-	 * We're accessing css_set_count without locking css_set_rwsem here,
+	 * We're accessing css_set_count without locking css_set_lock here,
 	 * but that's OK - it can only be increased by someone holding
 	 * cgroup_lock, and that's us. The worst that can happen is that we
 	 * have some link structures left over
@@ -1957,13 +1957,13 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
 	 * Link the root cgroup in this hierarchy into all the css_set
 	 * objects.
 	 */
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	hash_for_each(css_set_table, i, cset, hlist) {
 		link_css_set(&tmp_links, cset, root_cgrp);
 		if (css_set_populated(cset))
 			cgroup_update_populated(root_cgrp, true);
 	}
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
@@ -2196,7 +2196,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 	char *path = NULL;
 
 	mutex_lock(&cgroup_mutex);
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 
 	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
 
@@ -2209,7 +2209,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 			path = buf;
 	}
 
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	return path;
 }
@@ -2258,7 +2258,7 @@ static void cgroup_taskset_add(struct task_struct *task,
 {
 	struct css_set *cset;
 
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	/* @task either already exited or can't exit until the end */
 	if (task->flags & PF_EXITING)
@@ -2364,7 +2364,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 	 * the new cgroup.  There are no failure cases after here, so this
 	 * is the commit point.
 	 */
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_for_each_entry(cset, &tset->src_csets, mg_node) {
 		list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
 			struct css_set *from_cset = task_css_set(task);
@@ -2375,7 +2375,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 			put_css_set_locked(from_cset);
 		}
 	}
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	/*
 	 * Migration is committed, all target tasks are now on dst_csets.
@@ -2399,13 +2399,13 @@ out_cancel_attach:
 			css->ss->cancel_attach(css, tset);
 	}
 out_release_tset:
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_splice_init(&tset->dst_csets, &tset->src_csets);
 	list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
 		list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
 		list_del_init(&cset->mg_node);
 	}
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 	return ret;
 }
 
@@ -2422,14 +2422,14 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		cset->mg_src_cgrp = NULL;
 		cset->mg_dst_cset = NULL;
 		list_del_init(&cset->mg_preload_node);
 		put_css_set_locked(cset);
 	}
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 }
 
 /**
@@ -2455,7 +2455,7 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
 	struct cgroup *src_cgrp;
 
 	lockdep_assert_held(&cgroup_mutex);
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
 
@@ -2571,7 +2571,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 	 * already PF_EXITING could be freed from underneath us unless we
 	 * take an rcu_read_lock.
 	 */
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2580,7 +2580,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	return cgroup_taskset_migrate(&tset, cgrp);
 }
@@ -2601,7 +2601,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 	int ret;
 
 	/* look up all src csets */
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2611,7 +2611,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	/* prepare dst csets and commit */
 	ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
@@ -2644,9 +2644,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 		struct cgroup *cgrp;
 		struct inode *inode;
 
-		down_read(&css_set_rwsem);
+		spin_lock_bh(&css_set_lock);
 		cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
-		up_read(&css_set_rwsem);
+		spin_unlock_bh(&css_set_lock);
 
 		while (!cgroup_is_descendant(dst_cgrp, cgrp))
 			cgrp = cgroup_parent(cgrp);
@@ -2743,9 +2743,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 		if (root == &cgrp_dfl_root)
 			continue;
 
-		down_read(&css_set_rwsem);
+		spin_lock_bh(&css_set_lock);
 		from_cgrp = task_cgroup_from_root(from, root);
-		up_read(&css_set_rwsem);
+		spin_unlock_bh(&css_set_lock);
 
 		retval = cgroup_attach_task(from_cgrp, tsk, false);
 		if (retval)
@@ -2870,7 +2870,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 	percpu_down_write(&cgroup_threadgroup_rwsem);
 
 	/* look up all csses currently attached to @cgrp's subtree */
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
 		struct cgrp_cset_link *link;
 
@@ -2882,14 +2882,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 			cgroup_migrate_add_src(link->cset, cgrp,
 					       &preloaded_csets);
 	}
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	/* NULL dst indicates self on default hierarchy */
 	ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
 	if (ret)
 		goto out_finish;
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
 		struct task_struct *task, *ntask;
 
@@ -2901,7 +2901,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 		list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
 			cgroup_taskset_add(task, &tset);
 	}
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	ret = cgroup_taskset_migrate(&tset, cgrp);
 out_finish:
@@ -3577,10 +3577,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 	int count = 0;
 	struct cgrp_cset_link *link;
 
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		count += atomic_read(&link->cset->refcount);
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 	return count;
 }
 
@@ -3823,7 +3823,7 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 	struct cgrp_cset_link *link;
 	struct css_set *cset;
 
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 
 	/* Advance to the next non-empty css_set */
 	do {
@@ -3881,7 +3881,7 @@ static void css_task_iter_advance(struct css_task_iter *it)
 {
 	struct list_head *l = it->task_pos;
 
-	lockdep_assert_held(&css_set_rwsem);
+	lockdep_assert_held(&css_set_lock);
 	WARN_ON_ONCE(!l);
 
 	/*
@@ -3918,7 +3918,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	memset(it, 0, sizeof(*it));
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 
 	it->ss = css->ss;
 
@@ -3931,7 +3931,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	css_task_iter_advance_css_set(it);
 
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 }
 
 /**
@@ -3950,14 +3950,14 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 	if (it->cur_task)
 		put_task_struct(it->cur_task);
 
-	down_write(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 
 	it->cur_task = list_entry(it->task_pos, struct task_struct, cg_list);
 	get_task_struct(it->cur_task);
 
 	css_task_iter_advance(it);
 
-	up_write(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	return it->cur_task;
 }
@@ -3971,10 +3971,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 void css_task_iter_end(struct css_task_iter *it)
 {
 	if (it->cur_cset) {
-		down_write(&css_set_rwsem);
+		spin_lock_bh(&css_set_lock);
 		list_del(&it->iters_node);
 		put_css_set_locked(it->cur_cset);
-		up_write(&css_set_rwsem);
+		spin_unlock_bh(&css_set_lock);
 	}
 
 	if (it->cur_task)
@@ -4003,10 +4003,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	mutex_lock(&cgroup_mutex);
 
 	/* all tasks in @from are being moved, all csets are source */
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_for_each_entry(link, &from->cset_links, cset_link)
 		cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 
 	ret = cgroup_migrate_prepare_dst(to, &preloaded_csets);
 	if (ret)
@@ -5359,7 +5359,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		goto out;
 
 	mutex_lock(&cgroup_mutex);
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 
 	for_each_root(root) {
 		struct cgroup_subsys *ss;
@@ -5391,7 +5391,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 
 	retval = 0;
 out_unlock:
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	kfree(buf);
 out:
@@ -5537,7 +5537,7 @@ void cgroup_post_fork(struct task_struct *child,
 	 * @child during its iteration.
 	 *
 	 * If we won the race, @child is associated with %current's
-	 * css_set.  Grabbing css_set_rwsem guarantees both that the
+	 * css_set.  Grabbing css_set_lock guarantees both that the
 	 * association is stable, and, on completion of the parent's
 	 * migration, @child is visible in the source of migration or
 	 * already in the destination cgroup.  This guarantee is necessary
@@ -5552,13 +5552,13 @@ void cgroup_post_fork(struct task_struct *child,
 	if (use_task_css_set_links) {
 		struct css_set *cset;
 
-		down_write(&css_set_rwsem);
+		spin_lock_bh(&css_set_lock);
 		cset = task_css_set(current);
 		if (list_empty(&child->cg_list)) {
 			get_css_set(cset);
 			css_set_move_task(child, NULL, cset, false);
 		}
-		up_write(&css_set_rwsem);
+		spin_unlock_bh(&css_set_lock);
 	}
 
 	/*
@@ -5603,9 +5603,9 @@ void cgroup_exit(struct task_struct *tsk)
 	cset = task_css_set(tsk);
 
 	if (!list_empty(&tsk->cg_list)) {
-		down_write(&css_set_rwsem);
+		spin_lock_bh(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
-		up_write(&css_set_rwsem);
+		spin_unlock_bh(&css_set_lock);
 		put_cset = true;
 	}
 
@@ -5823,7 +5823,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 	if (!name_buf)
 		return -ENOMEM;
 
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	rcu_read_lock();
 	cset = rcu_dereference(current->cgroups);
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
@@ -5834,7 +5834,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 			   c->root->hierarchy_id, name_buf);
 	}
 	rcu_read_unlock();
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 	kfree(name_buf);
 	return 0;
 }
@@ -5845,7 +5845,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	struct cgroup_subsys_state *css = seq_css(seq);
 	struct cgrp_cset_link *link;
 
-	down_read(&css_set_rwsem);
+	spin_lock_bh(&css_set_lock);
 	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
 		struct css_set *cset = link->cset;
 		struct task_struct *task;
@@ -5868,7 +5868,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	overflow:
 		seq_puts(seq, "  ...\n");
 	}
-	up_read(&css_set_rwsem);
+	spin_unlock_bh(&css_set_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2e91fa7f6d451e3ea9fec999065d2fd199691f9d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2015 16:41:53 -0400
Subject: cgroup: keep zombies associated with their original cgroups

cgroup_exit() is called when a task exits and disassociates the
exiting task from its cgroups and half-attach it to the root cgroup.
This is unnecessary and undesirable.

No controller actually needs an exiting task to be disassociated with
non-root cgroups.  Both cpu and perf_event controllers update the
association to the root cgroup from their exit callbacks just to keep
consistent with the cgroup core behavior.

Also, this disassociation makes it difficult to track resources held
by zombies or determine where the zombies came from.  Currently, pids
controller is completely broken as it uncharges on exit and zombies
always escape the resource restriction.  With cgroup association being
reset on exit, fixing it is pretty painful.

There's no reason to reset cgroup membership on exit.  The zombie can
be removed from its css_set so that it doesn't show up on
"cgroup.procs" and thus can't be migrated or interfere with cgroup
removal.  It can still pin and point to the css_set so that its cgroup
membership is maintained.  This patch makes cgroup core keep zombies
associated with their cgroups at the time of exit.

* Previous patches decoupled populated_cnt tracking from css_set
  lifetime, so a dying task can be simply unlinked from its css_set
  while pinning and pointing to the css_set.  This keeps css_set
  association from task side alive while hiding it from "cgroup.procs"
  and populated_cnt tracking.  The css_set reference is dropped when
  the task_struct is freed.

* ->exit() callback no longer needs the css arguments as the
  associated css never changes once PF_EXITING is set.  Removed.

* cpu and perf_events controllers no longer need ->exit() callbacks.
  There's no reason to explicitly switch away on exit.  The final
  schedule out is enough.  The callbacks are removed.

* On traditional hierarchies, nothing changes.  "/proc/PID/cgroup"
  still reports "/" for all zombies.  On the default hierarchy,
  "/proc/PID/cgroup" keeps reporting the cgroup that the task belonged
  to at the time of exit.  If the cgroup gets removed before the task
  is reaped, " (deleted)" is appended.

v2: Build brekage due to missing dummy cgroup_free() when
    !CONFIG_CGROUP fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
---
 Documentation/cgroups/unified-hierarchy.txt |  4 +++
 include/linux/cgroup-defs.h                 |  4 +--
 include/linux/cgroup.h                      |  2 ++
 kernel/cgroup.c                             | 51 +++++++++++++++++++----------
 kernel/cgroup_pids.c                        |  6 ++--
 kernel/events/core.c                        | 16 ---------
 kernel/fork.c                               |  1 +
 kernel/sched/core.c                         | 16 ---------
 8 files changed, 44 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
index 176b940f8327..6932453d37a2 100644
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -374,6 +374,10 @@ supported and the interface files "release_agent" and
 
 - The "cgroup.clone_children" file is removed.
 
+- /proc/PID/cgroup keeps reporting the cgroup that a zombie belonged
+  to before exiting.  If the cgroup is removed before the zombie is
+  reaped, " (deleted)" is appeneded to the path.
+
 
 5-3. Controller File Conventions
 
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 62413c3e2f4b..6a1ab64ee5f9 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -435,9 +435,7 @@ struct cgroup_subsys {
 	int (*can_fork)(struct task_struct *task, void **priv_p);
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
-	void (*exit)(struct cgroup_subsys_state *css,
-		     struct cgroup_subsys_state *old_css,
-		     struct task_struct *task);
+	void (*exit)(struct task_struct *task);
 	void (*bind)(struct cgroup_subsys_state *root_css);
 
 	int early_init;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 46020735bcbb..22e3754f89c5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -102,6 +102,7 @@ extern void cgroup_cancel_fork(struct task_struct *p,
 extern void cgroup_post_fork(struct task_struct *p,
 			     void *old_ss_priv[CGROUP_CANFORK_COUNT]);
 void cgroup_exit(struct task_struct *p);
+void cgroup_free(struct task_struct *p);
 
 int cgroup_init_early(void);
 int cgroup_init(void);
@@ -547,6 +548,7 @@ static inline void cgroup_cancel_fork(struct task_struct *p,
 static inline void cgroup_post_fork(struct task_struct *p,
 				    void *ss_priv[CGROUP_CANFORK_COUNT]) {}
 static inline void cgroup_exit(struct task_struct *p) {}
+static inline void cgroup_free(struct task_struct *p) {}
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ba7b3284c2e4..918658497625 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5379,14 +5379,34 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 			seq_printf(m, "%sname=%s", count ? "," : "",
 				   root->name);
 		seq_putc(m, ':');
+
 		cgrp = task_cgroup_from_root(tsk, root);
-		path = cgroup_path(cgrp, buf, PATH_MAX);
-		if (!path) {
-			retval = -ENAMETOOLONG;
-			goto out_unlock;
+
+		/*
+		 * On traditional hierarchies, all zombie tasks show up as
+		 * belonging to the root cgroup.  On the default hierarchy,
+		 * while a zombie doesn't show up in "cgroup.procs" and
+		 * thus can't be migrated, its /proc/PID/cgroup keeps
+		 * reporting the cgroup it belonged to before exiting.  If
+		 * the cgroup is removed before the zombie is reaped,
+		 * " (deleted)" is appended to the cgroup path.
+		 */
+		if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
+			path = cgroup_path(cgrp, buf, PATH_MAX);
+			if (!path) {
+				retval = -ENAMETOOLONG;
+				goto out_unlock;
+			}
+		} else {
+			path = "/";
 		}
+
 		seq_puts(m, path);
-		seq_putc(m, '\n');
+
+		if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
+			seq_puts(m, " (deleted)\n");
+		else
+			seq_putc(m, '\n');
 	}
 
 	retval = 0;
@@ -5593,7 +5613,6 @@ void cgroup_exit(struct task_struct *tsk)
 {
 	struct cgroup_subsys *ss;
 	struct css_set *cset;
-	bool put_cset = false;
 	int i;
 
 	/*
@@ -5606,22 +5625,20 @@ void cgroup_exit(struct task_struct *tsk)
 		spin_lock_bh(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
 		spin_unlock_bh(&css_set_lock);
-		put_cset = true;
+	} else {
+		get_css_set(cset);
 	}
 
-	/* Reassign the task to the init_css_set. */
-	RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
-
 	/* see cgroup_post_fork() for details */
-	for_each_subsys_which(ss, i, &have_exit_callback) {
-		struct cgroup_subsys_state *old_css = cset->subsys[i];
-		struct cgroup_subsys_state *css = task_css(tsk, i);
+	for_each_subsys_which(ss, i, &have_exit_callback)
+		ss->exit(tsk);
+}
 
-		ss->exit(css, old_css, tsk);
-	}
+void cgroup_free(struct task_struct *task)
+{
+	struct css_set *cset = task_css_set(task);
 
-	if (put_cset)
-		put_css_set(cset);
+	put_css_set(cset);
 }
 
 static void check_for_release(struct cgroup *cgrp)
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 806cd7693ac8..45f0856a61fe 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -266,11 +266,9 @@ static void pids_fork(struct task_struct *task, void *priv)
 	css_put(old_css);
 }
 
-static void pids_exit(struct cgroup_subsys_state *css,
-		      struct cgroup_subsys_state *old_css,
-		      struct task_struct *task)
+static void pids_exit(struct task_struct *task)
 {
-	struct pids_cgroup *pids = css_pids(old_css);
+	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
 
 	pids_uncharge(pids, 1);
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f548f69c4299..e9874949c787 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9293,25 +9293,9 @@ static void perf_cgroup_attach(struct cgroup_subsys_state *css,
 		task_function_call(task, __perf_cgroup_move, task);
 }
 
-static void perf_cgroup_exit(struct cgroup_subsys_state *css,
-			     struct cgroup_subsys_state *old_css,
-			     struct task_struct *task)
-{
-	/*
-	 * cgroup_exit() is called in the copy_process() failure path.
-	 * Ignore this case since the task hasn't ran yet, this avoids
-	 * trying to poke a half freed task state from generic code.
-	 */
-	if (!(task->flags & PF_EXITING))
-		return;
-
-	task_function_call(task, __perf_cgroup_move, task);
-}
-
 struct cgroup_subsys perf_event_cgrp_subsys = {
 	.css_alloc	= perf_cgroup_css_alloc,
 	.css_free	= perf_cgroup_css_free,
-	.exit		= perf_cgroup_exit,
 	.attach		= perf_cgroup_attach,
 };
 #endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d5f0f118a63..118743bb5964 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -251,6 +251,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	cgroup_free(tsk);
 	task_numa_free(tsk);
 	security_task_free(tsk);
 	exit_creds(tsk);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3595403921bd..2cad9ba91036 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8163,21 +8163,6 @@ static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
 		sched_move_task(task);
 }
 
-static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
-			    struct cgroup_subsys_state *old_css,
-			    struct task_struct *task)
-{
-	/*
-	 * cgroup_exit() is called in the copy_process() failure path.
-	 * Ignore this case since the task hasn't ran yet, this avoids
-	 * trying to poke a half freed task state from generic code.
-	 */
-	if (!(task->flags & PF_EXITING))
-		return;
-
-	sched_move_task(task);
-}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
 				struct cftype *cftype, u64 shareval)
@@ -8509,7 +8494,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
 	.fork		= cpu_cgroup_fork,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
-	.exit		= cpu_cgroup_exit,
 	.legacy_cftypes	= cpu_files,
 	.early_init	= 1,
 };
-- 
cgit v1.2.3


From afcf6c8b75444382e0f9996157207ebae34a8848 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2015 16:41:53 -0400
Subject: cgroup: add cgroup_subsys->free() method and use it to fix pids
 controller

pids controller is completely broken in that it uncharges when a task
exits allowing zombies to escape resource control.  With the recent
updates, cgroup core now maintains cgroup association till task free
and pids controller can be fixed by uncharging on free instead of
exit.

This patch adds cgroup_subsys->free() method and update pids
controller to use it instead of ->exit() for uncharging.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
---
 Documentation/cgroups/cgroups.txt | 4 ++++
 include/linux/cgroup-defs.h       | 1 +
 kernel/cgroup.c                   | 7 +++++++
 kernel/cgroup_pids.c              | 4 ++--
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index f935fac1e73b..c6256ae9885b 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -637,6 +637,10 @@ void exit(struct task_struct *task)
 
 Called during task exit.
 
+void free(struct task_struct *task)
+
+Called when the task_struct is freed.
+
 void bind(struct cgroup *root)
 (cgroup_mutex held by caller)
 
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 6a1ab64ee5f9..60d44b26276d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -436,6 +436,7 @@ struct cgroup_subsys {
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
 	void (*exit)(struct task_struct *task);
+	void (*free)(struct task_struct *task);
 	void (*bind)(struct cgroup_subsys_state *root_css);
 
 	int early_init;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 918658497625..867384369669 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -206,6 +206,7 @@ static u64 css_serial_nr_next = 1;
  */
 static unsigned long have_fork_callback __read_mostly;
 static unsigned long have_exit_callback __read_mostly;
+static unsigned long have_free_callback __read_mostly;
 
 /* Ditto for the can_fork callback. */
 static unsigned long have_canfork_callback __read_mostly;
@@ -5180,6 +5181,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 
 	have_fork_callback |= (bool)ss->fork << ss->id;
 	have_exit_callback |= (bool)ss->exit << ss->id;
+	have_free_callback |= (bool)ss->free << ss->id;
 	have_canfork_callback |= (bool)ss->can_fork << ss->id;
 
 	/* At system boot, before all subsystems have been
@@ -5637,6 +5639,11 @@ void cgroup_exit(struct task_struct *tsk)
 void cgroup_free(struct task_struct *task)
 {
 	struct css_set *cset = task_css_set(task);
+	struct cgroup_subsys *ss;
+	int ssid;
+
+	for_each_subsys_which(ss, ssid, &have_free_callback)
+		ss->free(task);
 
 	put_css_set(cset);
 }
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 45f0856a61fe..cdd8df4e991c 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -266,7 +266,7 @@ static void pids_fork(struct task_struct *task, void *priv)
 	css_put(old_css);
 }
 
-static void pids_exit(struct task_struct *task)
+static void pids_free(struct task_struct *task)
 {
 	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
 
@@ -347,7 +347,7 @@ struct cgroup_subsys pids_cgrp_subsys = {
 	.can_fork	= pids_can_fork,
 	.cancel_fork	= pids_cancel_fork,
 	.fork		= pids_fork,
-	.exit		= pids_exit,
+	.free		= pids_free,
 	.legacy_cftypes	= pids_files,
 	.dfl_cftypes	= pids_files,
 };
-- 
cgit v1.2.3


From 11eecf910bd81d36425020743b2df73651c5b466 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Sat, 3 Oct 2015 14:15:13 +0900
Subject: extcon: Modify the id and name of external connector

This patch modifies the id and name of external connector with the
additional prefix to clarify both attribute and meaning of external
connector as following:
- EXTCON_CHG_* mean the charger connector.
- EXTCON_JACK_* mean the jack connector.
- EXTCON_DISP_* mean the display port connector.

Following table show the new name of external connector with old name:
--------------------------------------------------
Old extcon name         | New extcon name        |
--------------------------------------------------
EXTCON_TA               | EXTCON_CHG_USB_DCP     |
EXTCON_CHARGE_DOWNSTREAM| EXTCON_CHG_USB_CDP     |
EXTCON_FAST_CHARGER     | EXTCON_CHG_USB_FAST    |
EXTCON_SLOW_CHARGER     | EXTCON_CHG_USB_SLOW    |
--------------------------------------------------
EXTCON_MICROPHONE       | EXTCON_JACK_MICROPHONE |
EXTCON_HEADPHONE        | EXTCON_JACK_HEADPHONE  |
EXTCON_LINE_IN          | EXTCON_JACK_LINE_IN    |
EXTCON_LINE_OUT         | EXTCON_JACK_LINE_OUT   |
EXTCON_VIDEO_IN         | EXTCON_JACK_VIDEO_IN   |
EXTCON_VIDEO_OUT        | EXTCON_JACK_VIDEO_OUT  |
EXTCON_SPDIF_IN         | EXTCON_JACK_SPDIF_IN   |
EXTCON_SPDIF_OUT        | EXTCON_JACK_SPDIF_OUT  |
--------------------------------------------------
EXTCON_HMDI             | EXTCON_DISP_HDMI       |
EXTCON_MHL              | EXTCON_DISP_MHL        |
EXTCON_DVI              | EXTCON_DISP_DVI        |
EXTCON_VGA              | EXTCON_DISP_VGA        |
--------------------------------------------------

And, when altering the name of USB charger connector, EXTCON refers to the
"Battery Charging v1.2 Spec and Adopters Agreement"[1] to use the standard
name of USB charging port as following. Following name of USB charging port
are already used in power_supply subsystem. We chan check it on patch[2].
- EXTCON_CHG_USB_SDP	/* Standard Downstream Port */
- EXTCON_CHG_USB_DCP	/* Dedicated Charging Port */
- EXTCON_CHG_USB_CDP	/* Charging Downstream Port */
- EXTCON_CHG_USB_ACA	/* Accessory Charger Adapter */

[1] www.usb.org/developers/docs/devclass_docs/BCv1.2_070312.zip
[2] commit 85efc8a18ced ("power_supply: Add types for USB chargers")

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
[ckeepax: For the Arizona changes]
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Reviewed-by: Roger Quadros <rogerq@ti.com>
---
 drivers/extcon/extcon-arizona.c  | 18 ++++++------
 drivers/extcon/extcon-axp288.c   | 12 ++++----
 drivers/extcon/extcon-max14577.c | 17 +++++------
 drivers/extcon/extcon-max77693.c | 32 +++++++++++----------
 drivers/extcon/extcon-max77843.c | 27 +++++++++--------
 drivers/extcon/extcon-max8997.c  | 21 +++++++-------
 drivers/extcon/extcon-rt8973a.c  |  4 +--
 drivers/extcon/extcon-sm5502.c   |  4 +--
 drivers/extcon/extcon.c          | 61 ++++++++++++++++++++-------------------
 include/linux/extcon.h           | 62 +++++++++++++++++++++++-----------------
 10 files changed, 139 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index a1ab0a56b798..e4890dd4fefd 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -137,9 +137,9 @@ static const int arizona_micd_levels[] = {
 
 static const unsigned int arizona_cable[] = {
 	EXTCON_MECHANICAL,
-	EXTCON_MICROPHONE,
-	EXTCON_HEADPHONE,
-	EXTCON_LINE_OUT,
+	EXTCON_JACK_MICROPHONE,
+	EXTCON_JACK_HEADPHONE,
+	EXTCON_JACK_LINE_OUT,
 	EXTCON_NONE,
 };
 
@@ -600,7 +600,7 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 	struct arizona_extcon_info *info = data;
 	struct arizona *arizona = info->arizona;
 	int id_gpio = arizona->pdata.hpdet_id_gpio;
-	unsigned int report = EXTCON_HEADPHONE;
+	unsigned int report = EXTCON_JACK_HEADPHONE;
 	int ret, reading;
 	bool mic = false;
 
@@ -645,9 +645,9 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 
 	/* Report high impedence cables as line outputs */
 	if (reading >= 5000)
-		report = EXTCON_LINE_OUT;
+		report = EXTCON_JACK_LINE_OUT;
 	else
-		report = EXTCON_HEADPHONE;
+		report = EXTCON_JACK_HEADPHONE;
 
 	ret = extcon_set_cable_state_(info->edev, report, true);
 	if (ret != 0)
@@ -732,7 +732,7 @@ err:
 			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
 
 	/* Just report headphone */
-	ret = extcon_set_cable_state_(info->edev, EXTCON_HEADPHONE, true);
+	ret = extcon_set_cable_state_(info->edev, EXTCON_JACK_HEADPHONE, true);
 	if (ret != 0)
 		dev_err(arizona->dev, "Failed to report headphone: %d\n", ret);
 
@@ -789,7 +789,7 @@ err:
 			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
 
 	/* Just report headphone */
-	ret = extcon_set_cable_state_(info->edev, EXTCON_HEADPHONE, true);
+	ret = extcon_set_cable_state_(info->edev, EXTCON_JACK_HEADPHONE, true);
 	if (ret != 0)
 		dev_err(arizona->dev, "Failed to report headphone: %d\n", ret);
 
@@ -915,7 +915,7 @@ static void arizona_micd_detect(struct work_struct *work)
 		arizona_identify_headphone(info);
 
 		ret = extcon_set_cable_state_(info->edev,
-					      EXTCON_MICROPHONE, true);
+					      EXTCON_JACK_MICROPHONE, true);
 		if (ret != 0)
 			dev_err(arizona->dev, "Headset report failed: %d\n",
 				ret);
diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c
index 9668d6a94e38..fd55c2f2080a 100644
--- a/drivers/extcon/extcon-axp288.c
+++ b/drivers/extcon/extcon-axp288.c
@@ -102,9 +102,9 @@ enum axp288_extcon_irq {
 };
 
 static const unsigned int axp288_extcon_cables[] = {
-	EXTCON_SLOW_CHARGER,
-	EXTCON_CHARGE_DOWNSTREAM,
-	EXTCON_FAST_CHARGER,
+	EXTCON_CHG_USB_SDP,
+	EXTCON_CHG_USB_CDP,
+	EXTCON_CHG_USB_DCP,
 	EXTCON_NONE,
 };
 
@@ -192,18 +192,18 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info)
 		dev_dbg(info->dev, "sdp cable is connecetd\n");
 		notify_otg = true;
 		notify_charger = true;
-		cable = EXTCON_SLOW_CHARGER;
+		cable = EXTCON_CHG_USB_SDP;
 		break;
 	case DET_STAT_CDP:
 		dev_dbg(info->dev, "cdp cable is connecetd\n");
 		notify_otg = true;
 		notify_charger = true;
-		cable = EXTCON_CHARGE_DOWNSTREAM;
+		cable = EXTCON_CHG_USB_CDP;
 		break;
 	case DET_STAT_DCP:
 		dev_dbg(info->dev, "dcp cable is connecetd\n");
 		notify_charger = true;
-		cable = EXTCON_FAST_CHARGER;
+		cable = EXTCON_CHG_USB_DCP;
 		break;
 	default:
 		dev_warn(info->dev,
diff --git a/drivers/extcon/extcon-max14577.c b/drivers/extcon/extcon-max14577.c
index df0659d98e5a..601dbd996487 100644
--- a/drivers/extcon/extcon-max14577.c
+++ b/drivers/extcon/extcon-max14577.c
@@ -150,10 +150,10 @@ enum max14577_muic_acc_type {
 
 static const unsigned int max14577_extcon_cable[] = {
 	EXTCON_USB,
-	EXTCON_TA,
-	EXTCON_FAST_CHARGER,
-	EXTCON_SLOW_CHARGER,
-	EXTCON_CHARGE_DOWNSTREAM,
+	EXTCON_CHG_USB_DCP,
+	EXTCON_CHG_USB_FAST,
+	EXTCON_CHG_USB_SLOW,
+	EXTCON_CHG_USB_CDP,
 	EXTCON_JIG,
 	EXTCON_NONE,
 };
@@ -456,18 +456,19 @@ static int max14577_muic_chg_handler(struct max14577_muic_info *info)
 		extcon_set_cable_state_(info->edev, EXTCON_USB, attached);
 		break;
 	case MAX14577_CHARGER_TYPE_DEDICATED_CHG:
-		extcon_set_cable_state_(info->edev, EXTCON_TA, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state_(info->edev, EXTCON_CHARGE_DOWNSTREAM,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_SPECIAL_500MA:
-		extcon_set_cable_state_(info->edev, EXTCON_SLOW_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_SPECIAL_1A:
-		extcon_set_cable_state_(info->edev, EXTCON_FAST_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_NONE:
diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 35b9e118b2fb..44c499e1beee 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -204,11 +204,11 @@ enum max77693_muic_acc_type {
 static const unsigned int max77693_extcon_cable[] = {
 	EXTCON_USB,
 	EXTCON_USB_HOST,
-	EXTCON_TA,
-	EXTCON_FAST_CHARGER,
-	EXTCON_SLOW_CHARGER,
-	EXTCON_CHARGE_DOWNSTREAM,
-	EXTCON_MHL,
+	EXTCON_CHG_USB_DCP,
+	EXTCON_CHG_USB_FAST,
+	EXTCON_CHG_USB_SLOW,
+	EXTCON_CHG_USB_CDP,
+	EXTCON_DISP_MHL,
 	EXTCON_JIG,
 	EXTCON_DOCK,
 	EXTCON_NONE,
@@ -505,7 +505,7 @@ static int max77693_muic_dock_handler(struct max77693_muic_info *info,
 			return ret;
 
 		extcon_set_cable_state_(info->edev, EXTCON_DOCK, attached);
-		extcon_set_cable_state_(info->edev, EXTCON_MHL, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
 		goto out;
 	case MAX77693_MUIC_ADC_AUDIO_MODE_REMOTE:	/* Dock-Desk */
 		dock_id = EXTCON_DOCK;
@@ -605,7 +605,7 @@ static int max77693_muic_adc_ground_handler(struct max77693_muic_info *info)
 	case MAX77693_MUIC_GND_MHL:
 	case MAX77693_MUIC_GND_MHL_VB:
 		/* MHL or MHL with USB/TA cable */
-		extcon_set_cable_state_(info->edev, EXTCON_MHL, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
 		break;
 	default:
 		dev_err(info->dev, "failed to detect %s cable of gnd type\n",
@@ -801,10 +801,11 @@ static int max77693_muic_chg_handler(struct max77693_muic_info *info)
 			 * - Support charging through micro-usb port without
 			 *   data connection
 			 */
-			extcon_set_cable_state_(info->edev, EXTCON_TA, attached);
+			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+						attached);
 			if (!cable_attached)
-				extcon_set_cable_state_(info->edev, EXTCON_MHL,
-							cable_attached);
+				extcon_set_cable_state_(info->edev,
+					EXTCON_DISP_MHL, cable_attached);
 			break;
 		}
 
@@ -862,7 +863,7 @@ static int max77693_muic_chg_handler(struct max77693_muic_info *info)
 
 			extcon_set_cable_state_(info->edev, EXTCON_DOCK,
 						attached);
-			extcon_set_cable_state_(info->edev, EXTCON_MHL,
+			extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL,
 						attached);
 			break;
 		}
@@ -901,20 +902,21 @@ static int max77693_muic_chg_handler(struct max77693_muic_info *info)
 			break;
 		case MAX77693_CHARGER_TYPE_DEDICATED_CHG:
 			/* Only TA cable */
-			extcon_set_cable_state_(info->edev, EXTCON_TA, attached);
+			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+						attached);
 			break;
 		}
 		break;
 	case MAX77693_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state_(info->edev, EXTCON_CHARGE_DOWNSTREAM,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX77693_CHARGER_TYPE_APPLE_500MA:
-		extcon_set_cable_state_(info->edev, EXTCON_SLOW_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX77693_CHARGER_TYPE_APPLE_1A_2A:
-		extcon_set_cable_state_(info->edev, EXTCON_FAST_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	case MAX77693_CHARGER_TYPE_DEAD_BATTERY:
diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c
index fdd928542c19..9f9ea334399c 100644
--- a/drivers/extcon/extcon-max77843.c
+++ b/drivers/extcon/extcon-max77843.c
@@ -122,11 +122,11 @@ enum max77843_muic_charger_type {
 static const unsigned int max77843_extcon_cable[] = {
 	EXTCON_USB,
 	EXTCON_USB_HOST,
-	EXTCON_TA,
-	EXTCON_CHARGE_DOWNSTREAM,
-	EXTCON_FAST_CHARGER,
-	EXTCON_SLOW_CHARGER,
-	EXTCON_MHL,
+	EXTCON_CHG_USB_DCP,
+	EXTCON_CHG_USB_CDP,
+	EXTCON_CHG_USB_FAST,
+	EXTCON_CHG_USB_SLOW,
+	EXTCON_DISP_MHL,
 	EXTCON_JIG,
 	EXTCON_NONE,
 };
@@ -355,7 +355,7 @@ static int max77843_muic_adc_gnd_handler(struct max77843_muic_info *info)
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_MHL, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
 		break;
 	default:
 		dev_err(info->dev, "failed to detect %s accessory(gnd:0x%x)\n",
@@ -494,7 +494,7 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info)
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_CHARGE_DOWNSTREAM,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_DEDICATED:
@@ -504,7 +504,8 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info)
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_TA, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+					attached);
 		break;
 	case MAX77843_MUIC_CHG_SPECIAL_500MA:
 		ret = max77843_muic_set_path(info,
@@ -513,7 +514,7 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info)
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_SLOW_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_SPECIAL_1A:
@@ -523,7 +524,7 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info)
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_FAST_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_GND:
@@ -532,9 +533,11 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info)
 
 		/* Charger cable on MHL accessory is attach or detach */
 		if (gnd_type == MAX77843_MUIC_GND_MHL_VB)
-			extcon_set_cable_state_(info->edev, EXTCON_TA, true);
+			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+						true);
 		else if (gnd_type == MAX77843_MUIC_GND_MHL)
-			extcon_set_cable_state_(info->edev, EXTCON_TA, false);
+			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+						false);
 		break;
 	case MAX77843_MUIC_CHG_NONE:
 		break;
diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 7b1ef200b121..b2b13b3dce14 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -148,11 +148,11 @@ struct max8997_muic_info {
 static const unsigned int max8997_extcon_cable[] = {
 	EXTCON_USB,
 	EXTCON_USB_HOST,
-	EXTCON_TA,
-	EXTCON_FAST_CHARGER,
-	EXTCON_SLOW_CHARGER,
-	EXTCON_CHARGE_DOWNSTREAM,
-	EXTCON_MHL,
+	EXTCON_CHG_USB_DCP,
+	EXTCON_CHG_USB_FAST,
+	EXTCON_CHG_USB_SLOW,
+	EXTCON_CHG_USB_CDP,
+	EXTCON_DISP_MHL,
 	EXTCON_DOCK,
 	EXTCON_JIG,
 	EXTCON_NONE,
@@ -403,7 +403,7 @@ static int max8997_muic_adc_handler(struct max8997_muic_info *info)
 			return ret;
 		break;
 	case MAX8997_MUIC_ADC_MHL:
-		extcon_set_cable_state_(info->edev, EXTCON_MHL, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
 		break;
 	case MAX8997_MUIC_ADC_FACTORY_MODE_USB_OFF:
 	case MAX8997_MUIC_ADC_FACTORY_MODE_USB_ON:
@@ -486,18 +486,19 @@ static int max8997_muic_chg_handler(struct max8997_muic_info *info)
 		}
 		break;
 	case MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state_(info->edev, EXTCON_CHARGE_DOWNSTREAM,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX8997_CHARGER_TYPE_DEDICATED_CHG:
-		extcon_set_cable_state_(info->edev, EXTCON_TA, attached);
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+					attached);
 		break;
 	case MAX8997_CHARGER_TYPE_500MA:
-		extcon_set_cable_state_(info->edev, EXTCON_SLOW_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX8997_CHARGER_TYPE_1A:
-		extcon_set_cable_state_(info->edev, EXTCON_FAST_CHARGER,
+		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	default:
diff --git a/drivers/extcon/extcon-rt8973a.c b/drivers/extcon/extcon-rt8973a.c
index 1bc3737ea01c..36bf1d63791c 100644
--- a/drivers/extcon/extcon-rt8973a.c
+++ b/drivers/extcon/extcon-rt8973a.c
@@ -93,7 +93,7 @@ static struct reg_data rt8973a_reg_data[] = {
 static const unsigned int rt8973a_extcon_cable[] = {
 	EXTCON_USB,
 	EXTCON_USB_HOST,
-	EXTCON_TA,
+	EXTCON_CHG_USB_DCP,
 	EXTCON_JIG,
 	EXTCON_NONE,
 };
@@ -333,7 +333,7 @@ static int rt8973a_muic_cable_handler(struct rt8973a_muic_info *info,
 		con_sw = DM_DP_SWITCH_USB;
 		break;
 	case RT8973A_MUIC_ADC_TA:
-		id = EXTCON_TA;
+		id = EXTCON_CHG_USB_DCP;
 		con_sw = DM_DP_SWITCH_OPEN;
 		break;
 	case RT8973A_MUIC_ADC_FACTORY_MODE_BOOT_OFF_USB:
diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
index 2945091bfd0e..7aac3cc7efd7 100644
--- a/drivers/extcon/extcon-sm5502.c
+++ b/drivers/extcon/extcon-sm5502.c
@@ -95,7 +95,7 @@ static struct reg_data sm5502_reg_data[] = {
 static const unsigned int sm5502_extcon_cable[] = {
 	EXTCON_USB,
 	EXTCON_USB_HOST,
-	EXTCON_TA,
+	EXTCON_CHG_USB_DCP,
 	EXTCON_NONE,
 };
 
@@ -389,7 +389,7 @@ static int sm5502_muic_cable_handler(struct sm5502_muic_info *info,
 		vbus_sw	= VBUSIN_SWITCH_VBUSOUT_WITH_USB;
 		break;
 	case SM5502_MUIC_ADC_OPEN_TA:
-		id	= EXTCON_TA;
+		id	= EXTCON_CHG_USB_DCP;
 		con_sw	= DM_DP_SWITCH_OPEN;
 		vbus_sw	= VBUSIN_SWITCH_VBUSOUT;
 		break;
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index 8dd0af1d50bc..21a123cadf78 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -39,37 +39,40 @@
 #define CABLE_NAME_MAX		30
 
 static const char *extcon_name[] =  {
-	[EXTCON_NONE]		= "NONE",
+	[EXTCON_NONE]			= "NONE",
 
 	/* USB external connector */
-	[EXTCON_USB]		= "USB",
-	[EXTCON_USB_HOST]	= "USB-HOST",
-
-	/* Charger external connector */
-	[EXTCON_TA]		= "TA",
-	[EXTCON_FAST_CHARGER]	= "FAST-CHARGER",
-	[EXTCON_SLOW_CHARGER]	= "SLOW-CHARGER",
-	[EXTCON_CHARGE_DOWNSTREAM] = "CHARGE-DOWNSTREAM",
-
-	/* Audio/Video external connector */
-	[EXTCON_LINE_IN]	= "LINE-IN",
-	[EXTCON_LINE_OUT]	= "LINE-OUT",
-	[EXTCON_MICROPHONE]	= "MICROPHONE",
-	[EXTCON_HEADPHONE]	= "HEADPHONE",
-
-	[EXTCON_HDMI]		= "HDMI",
-	[EXTCON_MHL]		= "MHL",
-	[EXTCON_DVI]		= "DVI",
-	[EXTCON_VGA]		= "VGA",
-	[EXTCON_SPDIF_IN]	= "SPDIF-IN",
-	[EXTCON_SPDIF_OUT]	= "SPDIF-OUT",
-	[EXTCON_VIDEO_IN]	= "VIDEO-IN",
-	[EXTCON_VIDEO_OUT]	= "VIDEO-OUT",
-
-	/* Etc external connector */
-	[EXTCON_DOCK]		= "DOCK",
-	[EXTCON_JIG]		= "JIG",
-	[EXTCON_MECHANICAL]	= "MECHANICAL",
+	[EXTCON_USB]			= "USB",
+	[EXTCON_USB_HOST]		= "USB-HOST",
+
+	/* Charging external connector */
+	[EXTCON_CHG_USB_SDP]		= "SDP",
+	[EXTCON_CHG_USB_DCP]		= "DCP",
+	[EXTCON_CHG_USB_CDP]		= "CDP",
+	[EXTCON_CHG_USB_ACA]		= "ACA",
+	[EXTCON_CHG_USB_FAST]		= "FAST-CHARGER",
+	[EXTCON_CHG_USB_SLOW]		= "SLOW-CHARGER",
+
+	/* Jack external connector */
+	[EXTCON_JACK_MICROPHONE]	= "MICROPHONE",
+	[EXTCON_JACK_HEADPHONE]		= "HEADPHONE",
+	[EXTCON_JACK_LINE_IN]		= "LINE-IN",
+	[EXTCON_JACK_LINE_OUT]		= "LINE-OUT",
+	[EXTCON_JACK_VIDEO_IN]		= "VIDEO-IN",
+	[EXTCON_JACK_VIDEO_OUT]		= "VIDEO-OUT",
+	[EXTCON_JACK_SPDIF_IN]		= "SPDIF-IN",
+	[EXTCON_JACK_SPDIF_OUT]		= "SPDIF-OUT",
+
+	/* Display external connector */
+	[EXTCON_DISP_HDMI]		= "HDMI",
+	[EXTCON_DISP_MHL]		= "MHL",
+	[EXTCON_DISP_DVI]		= "DVI",
+	[EXTCON_DISP_VGA]		= "VGA",
+
+	/* Miscellaneous external connector */
+	[EXTCON_DOCK]			= "DOCK",
+	[EXTCON_JIG]			= "JIG",
+	[EXTCON_MECHANICAL]		= "MECHANICAL",
 
 	NULL,
 };
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index c0f8c4fc5d45..7abf674c388c 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -31,32 +31,42 @@
 /*
  * Define the unique id of supported external connectors
  */
-#define EXTCON_NONE			0
-
-#define EXTCON_USB			1	/* USB connector */
-#define EXTCON_USB_HOST			2
-
-#define EXTCON_TA			3	/* Charger connector */
-#define EXTCON_FAST_CHARGER		4
-#define EXTCON_SLOW_CHARGER		5
-#define EXTCON_CHARGE_DOWNSTREAM	6
-
-#define EXTCON_LINE_IN			7	/* Audio/Video connector */
-#define EXTCON_LINE_OUT			8
-#define EXTCON_MICROPHONE		9
-#define EXTCON_HEADPHONE		10
-#define EXTCON_HDMI			11
-#define EXTCON_MHL			12
-#define EXTCON_DVI			13
-#define EXTCON_VGA			14
-#define EXTCON_SPDIF_IN			15
-#define EXTCON_SPDIF_OUT		16
-#define EXTCON_VIDEO_IN			17
-#define EXTCON_VIDEO_OUT		18
-
-#define EXTCON_DOCK			19	/* Misc connector */
-#define EXTCON_JIG			20
-#define EXTCON_MECHANICAL		21
+#define EXTCON_NONE		0
+
+/* USB external connector */
+#define EXTCON_USB		1
+#define EXTCON_USB_HOST		2
+
+/* Charging external connector */
+#define EXTCON_CHG_USB_SDP	5	/* Standard Downstream Port */
+#define EXTCON_CHG_USB_DCP	6	/* Dedicated Charging Port */
+#define EXTCON_CHG_USB_CDP	7	/* Charging Downstream Port */
+#define EXTCON_CHG_USB_ACA	8	/* Accessory Charger Adapter */
+#define EXTCON_CHG_USB_FAST	9
+#define EXTCON_CHG_USB_SLOW	10
+
+/* Jack external connector */
+#define EXTCON_JACK_MICROPHONE	20
+#define EXTCON_JACK_HEADPHONE	21
+#define EXTCON_JACK_LINE_IN	22
+#define EXTCON_JACK_LINE_OUT	23
+#define EXTCON_JACK_VIDEO_IN	24
+#define EXTCON_JACK_VIDEO_OUT	25
+#define EXTCON_JACK_SPDIF_IN	26	/* Sony Philips Digital InterFace */
+#define EXTCON_JACK_SPDIF_OUT	27
+
+/* Display external connector */
+#define EXTCON_DISP_HDMI	40	/* High-Definition Multimedia Interface */
+#define EXTCON_DISP_MHL		41	/* Mobile High-Definition Link */
+#define EXTCON_DISP_DVI		42	/* Digital Visual Interface */
+#define EXTCON_DISP_VGA		43	/* Video Graphics Array */
+
+/* Miscellaneous external connector */
+#define EXTCON_DOCK		60
+#define EXTCON_JIG		61
+#define EXTCON_MECHANICAL	62
+
+#define EXTCON_NUM		63
 
 struct extcon_cable;
 
-- 
cgit v1.2.3


From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:46 +0300
Subject: kvm/eventfd: factor out kvm_notify_acked_gsi()

Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners
and notify those matching the given gsi.

It will be reused in the upcoming Hyper-V SynIC implementation.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/eventfd.c       | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9596a2f0977b..b66861c297c4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -829,6 +829,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
 		int irq_source_id, int level, bool line_status);
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
+void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 518421e65b0d..f6b986a41823 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -451,9 +451,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 }
 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
 {
 	struct kvm_irq_ack_notifier *kian;
+
+	hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+				 link)
+		if (kian->gsi == gsi)
+			kian->irq_acked(kian);
+}
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
@@ -461,10 +470,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
+		kvm_notify_acked_gsi(kvm, gsi);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-- 
cgit v1.2.3


From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:47 +0300
Subject: kvm/eventfd: add arch-specific set_irq

Allow for arch-specific interrupt types to be set.  For that, add
kvm_arch_set_irq() which takes interrupt type-specific action if it
recognizes the interrupt type given, and -EWOULDBLOCK otherwise.

The default implementation always returns -EWOULDBLOCK.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  4 ++++
 virt/kvm/eventfd.c       | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b66861c297c4..eba9caebc9c1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -828,6 +828,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
 		int irq_source_id, int level, bool line_status);
+
+int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
+		     int irq_source_id, int level, bool line_status);
+
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f6b986a41823..e29fd2640709 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -171,6 +171,15 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
 }
 
+int __attribute__((weak)) kvm_arch_set_irq(
+				struct kvm_kernel_irq_routing_entry *irq,
+				struct kvm *kvm, int irq_source_id,
+				int level,
+				bool line_status)
+{
+	return -EWOULDBLOCK;
+}
+
 /*
  * Called with wqh->lock held and interrupts disabled
  */
@@ -195,7 +204,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 		if (irq.type == KVM_IRQ_ROUTING_MSI)
 			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
-		else
+		else if (kvm_arch_set_irq(&irq, kvm,
+					  KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+					  false) == -EWOULDBLOCK)
 			schedule_work(&irqfd->inject);
 		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
-- 
cgit v1.2.3


From 8db02d8b4089fa8098a170738e8ae7939aa8ae7a Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 8 Oct 2015 15:10:48 -0700
Subject: of/irq: Add new function of_msi_map_rid()

The device tree property "msi-map" specifies how to create the PCI
requester id used in some MSI controllers.  Add a new function
of_msi_map_rid() that finds the msi-map property and applies its
translation to a given requester id.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/of/irq.c       | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_irq.h |  7 +++++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 55317fa9c9dc..c90bd4ec3183 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -598,3 +598,87 @@ void of_msi_configure(struct device *dev, struct device_node *np)
 		d = irq_find_host(msi_np);
 	dev_set_msi_domain(dev, d);
 }
+
+/**
+ * of_msi_map_rid - Map a MSI requester ID for a device.
+ * @dev: device for which the mapping is to be done.
+ * @msi_np: device node of the expected msi controller.
+ * @rid_in: unmapped MSI requester ID for the device.
+ *
+ * Walk up the device hierarchy looking for devices with a "msi-map"
+ * property.  If found, apply the mapping to @rid_in.
+ *
+ * Returns the mapped MSI requester ID.
+ */
+u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
+{
+	struct device *parent_dev;
+	struct device_node *msi_controller_node;
+	u32 map_mask, masked_rid, rid_base, msi_base, rid_len, phandle;
+	int msi_map_len;
+	bool matched;
+	u32 rid_out = rid_in;
+	const __be32 *msi_map = NULL;
+
+	/*
+	 * Walk up the device parent links looking for one with a
+	 * "msi-map" property.
+	 */
+	for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) {
+		if (!parent_dev->of_node)
+			continue;
+
+		msi_map = of_get_property(parent_dev->of_node,
+					  "msi-map", &msi_map_len);
+		if (!msi_map)
+			continue;
+
+		if (msi_map_len % (4 * sizeof(__be32))) {
+			dev_err(parent_dev, "Error: Bad msi-map length: %d\n",
+				msi_map_len);
+			return rid_out;
+		}
+		/* We have a good parent_dev and msi_map, let's use them. */
+		break;
+	}
+	if (!msi_map)
+		return rid_out;
+
+	/* The default is to select all bits. */
+	map_mask = 0xffffffff;
+
+	/*
+	 * Can be overridden by "msi-map-mask" property.  If
+	 * of_property_read_u32() fails, the default is used.
+	 */
+	of_property_read_u32(parent_dev->of_node, "msi-map-mask", &map_mask);
+
+	masked_rid = map_mask & rid_in;
+	matched = false;
+	while (!matched && msi_map_len >= 4 * sizeof(__be32)) {
+		rid_base = be32_to_cpup(msi_map + 0);
+		phandle = be32_to_cpup(msi_map + 1);
+		msi_base = be32_to_cpup(msi_map + 2);
+		rid_len = be32_to_cpup(msi_map + 3);
+
+		msi_controller_node = of_find_node_by_phandle(phandle);
+
+		matched = masked_rid >= rid_base &&
+			masked_rid < rid_base + rid_len &&
+			msi_np == msi_controller_node;
+
+		of_node_put(msi_controller_node);
+		msi_map_len -= 4 * sizeof(__be32);
+		msi_map += 4;
+	}
+	if (!matched)
+		return rid_out;
+
+	rid_out = masked_rid + msi_base;
+	dev_dbg(dev,
+		"msi-map at: %s, using mask %08x, rid-base: %08x, msi-base: %08x, length: %08x, rid: %08x -> %08x\n",
+		dev_name(parent_dev), map_mask, rid_base, msi_base,
+		rid_len, rid_in, rid_out);
+
+	return rid_out;
+}
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 4bcbd586a672..8cd9334e5731 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -75,6 +75,7 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
 extern struct device_node *of_irq_find_parent(struct device_node *child);
 extern void of_msi_configure(struct device *dev, struct device_node *np);
+u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 
 #else /* !CONFIG_OF */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
@@ -87,6 +88,12 @@ static inline void *of_irq_find_parent(struct device_node *child)
 {
 	return NULL;
 }
+
+static inline u32 of_msi_map_rid(struct device *dev,
+				 struct device_node *msi_np, u32 rid_in)
+{
+	return rid_in;
+}
 #endif /* !CONFIG_OF */
 
 #endif /* __OF_IRQ_H */
-- 
cgit v1.2.3


From b6eec9b717d4dcb39ef024b8a3b619a32468b01e Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 8 Oct 2015 15:10:49 -0700
Subject: PCI/MSI: Add helper function pci_msi_domain_get_msi_rid().

Add pci_msi_domain_get_msi_rid() to return the MSI requester id (RID).
Initially needed by gic-v3 based systems. It will be used by follow on
patch to drivers/irqchip/irq-gic-v3-its-pci-msi.c

Initially supports mapping the RID via OF device tree.  In the future,
this could be extended to use ACPI _IORT tables as well.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/pci/msi.c   | 32 ++++++++++++++++++++++++++++++++
 include/linux/msi.h |  1 +
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index ddd59fe786f8..5ab5c4f8dcb0 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/irqdomain.h>
+#include <linux/of_irq.h>
 
 #include "pci.h"
 
@@ -1327,4 +1328,35 @@ struct irq_domain *pci_msi_create_default_irq_domain(struct fwnode_handle *fwnod
 
 	return domain;
 }
+
+static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data)
+{
+	u32 *pa = data;
+
+	*pa = alias;
+	return 0;
+}
+/**
+ * pci_msi_domain_get_msi_rid - Get the MSI requester id (RID)
+ * @domain:	The interrupt domain
+ * @pdev:	The PCI device.
+ *
+ * The RID for a device is formed from the alias, with a firmware
+ * supplied mapping applied
+ *
+ * Returns: The RID.
+ */
+u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
+{
+	struct device_node *of_node;
+	u32 rid = 0;
+
+	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
+
+	of_node = irq_domain_get_of_node(domain);
+	if (of_node)
+		rid = of_msi_map_rid(&pdev->dev, of_node, rid);
+
+	return rid;
+}
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 32a24b9a9556..87723751054d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -294,6 +294,7 @@ irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
 					  struct msi_desc *desc);
 int pci_msi_domain_check_cap(struct irq_domain *domain,
 			     struct msi_domain_info *info, struct device *dev);
+u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
 
 #endif /* LINUX_MSI_H */
-- 
cgit v1.2.3


From 48ae34fb39b0c0cfc76275e844fba5b0b04fa49e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Sep 2015 14:07:40 +0100
Subject: of/irq: Add support code for multi-parent version of "msi-parent"

Since 126b16e2ad98 ("Docs: dt: add generic MSI bindings"),
the definition of "msi-parent" has evolved, while maintaining
some degree of compatibility. It can now express multiple MSI
controllers as parents, as well as some sideband data being
communicated to the controller.

This patch adds the parsing of the property, iterating over
the multiple parents until a suitable irqdomain is found.
It can also fallback to the original parsing if the old
binding is detected.

This support code gets used in the subsequent patches.

Suggested-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/of/irq.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_irq.h |  9 ++++++++
 2 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index c90bd4ec3183..62cfdc2c86ac 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -682,3 +682,62 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
 
 	return rid_out;
 }
+
+static struct irq_domain *__of_get_msi_domain(struct device_node *np,
+					      enum irq_domain_bus_token token)
+{
+	struct irq_domain *d;
+
+	d = irq_find_matching_host(np, token);
+	if (!d)
+		d = irq_find_host(np);
+
+	return d;
+}
+
+/**
+ * of_msi_get_domain - Use msi-parent to find the relevant MSI domain
+ * @dev: device for which the domain is requested
+ * @np: device node for @dev
+ * @token: bus type for this domain
+ *
+ * Parse the msi-parent property (both the simple and the complex
+ * versions), and returns the corresponding MSI domain.
+ *
+ * Returns: the MSI domain for this device (or NULL on failure).
+ */
+struct irq_domain *of_msi_get_domain(struct device *dev,
+				     struct device_node *np,
+				     enum irq_domain_bus_token token)
+{
+	struct device_node *msi_np;
+	struct irq_domain *d;
+
+	/* Check for a single msi-parent property */
+	msi_np = of_parse_phandle(np, "msi-parent", 0);
+	if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
+		d = __of_get_msi_domain(msi_np, token);
+		if (!d)
+			of_node_put(msi_np);
+		return d;
+	}
+
+	if (token == DOMAIN_BUS_PLATFORM_MSI) {
+		/* Check for the complex msi-parent version */
+		struct of_phandle_args args;
+		int index = 0;
+
+		while (!of_parse_phandle_with_args(np, "msi-parent",
+						   "#msi-cells",
+						   index, &args)) {
+			d = __of_get_msi_domain(args.np, token);
+			if (d)
+				return d;
+
+			of_node_put(args.np);
+			index++;
+		}
+	}
+
+	return NULL;
+}
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 8cd9334e5731..62ae6edecfd5 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -46,6 +46,9 @@ extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct irq_domain *of_msi_get_domain(struct device *dev,
+					    struct device_node *np,
+					    enum irq_domain_bus_token token);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -64,6 +67,12 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
 {
 	return 0;
 }
+static inline struct irq_domain *of_msi_get_domain(struct device *dev,
+						   struct device_node *np,
+						   enum irq_domain_bus_token token)
+{
+	return NULL;
+}
 #endif
 
 #if defined(CONFIG_OF)
-- 
cgit v1.2.3


From 82b9b4243c6d99d9e38087fa89183aa7479185e9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 2 Oct 2015 14:38:55 +0100
Subject: of/irq: Use the msi-map property to provide device-specific MSI
 domain

While msi-parent is used to point to the MSI controller that
works for all the devices behind a root complex, it doesn't
allow configurations where each individual device can be routed
to a separate MSI controller.

The msi-map property provides this flexibility (and much more),
so let's add a utility function that parses it, and return the
corresponding MSI domain.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/of/irq.c       | 18 ++++++++++++++++++
 include/linux/of_irq.h |  7 +++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index ed64d98807f9..0baf626da56a 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -688,6 +688,24 @@ static struct irq_domain *__of_get_msi_domain(struct device_node *np,
 	return d;
 }
 
+/**
+ * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
+ * @dev: device for which the mapping is to be done.
+ * @rid: Requester ID for the device.
+ *
+ * Walk up the device hierarchy looking for devices with a "msi-map"
+ * property.
+ *
+ * Returns: the MSI domain for this device (or NULL on failure)
+ */
+struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
+{
+	struct device_node *np = NULL;
+
+	__of_msi_map_rid(dev, &np, rid);
+	return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+}
+
 /**
  * of_msi_get_domain - Use msi-parent to find the relevant MSI domain
  * @dev: device for which the domain is requested
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 62ae6edecfd5..65d969246a4d 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -49,6 +49,8 @@ extern int of_irq_to_resource_table(struct device_node *dev,
 extern struct irq_domain *of_msi_get_domain(struct device *dev,
 					    struct device_node *np,
 					    enum irq_domain_bus_token token);
+extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
+						       u32 rid);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -73,6 +75,11 @@ static inline struct irq_domain *of_msi_get_domain(struct device *dev,
 {
 	return NULL;
 }
+static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
+							      u32 rid)
+{
+	return NULL;
+}
 #endif
 
 #if defined(CONFIG_OF)
-- 
cgit v1.2.3


From 54fa97eeb9e22b47d68b67ee00987afa7fbc2178 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 2 Oct 2015 14:43:06 +0100
Subject: PCI/MSI: Allow the MSI domain to be device-specific

So far, we've always considered that for a given PCI device, its
MSI controller was either set by the architecture-specific
pcibios hook, or simply inherited from the host bridge.

This doesn't cover things like firmware-defined topologies like
msi-map (DT) or IORT (ACPI), which can provide information about
which MSI controller to use on a per-device basis.

This patch adds the necessary hook into the MSI code to allow this
feature, and provides the msi-map functionnality as a first
implementation.

Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/pci/msi.c   | 17 +++++++++++++++++
 drivers/pci/probe.c |  8 ++++++++
 include/linux/msi.h |  6 ++++++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5ab5c4f8dcb0..4cd6f3abcecf 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1359,4 +1359,21 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
 
 	return rid;
 }
+
+/**
+ * pci_msi_get_device_domain - Get the MSI domain for a given PCI device
+ * @pdev:	The PCI device
+ *
+ * Use the firmware data to find a device-specific MSI domain
+ * (i.e. not one that is ste as a default).
+ *
+ * Returns: The coresponding MSI domain or NULL if none has been found.
+ */
+struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
+{
+	u32 rid = 0;
+
+	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
+	return of_msi_map_get_device_domain(&pdev->dev, rid);
+}
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7c333f8c2327..f14a970b61fa 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1639,6 +1639,14 @@ static struct irq_domain *pci_dev_msi_domain(struct pci_dev *dev)
 	if (d)
 		return d;
 
+	/*
+	 * Let's see if we have a firmware interface able to provide
+	 * the domain.
+	 */
+	d = pci_msi_get_device_domain(dev);
+	if (d)
+		return d;
+
 	return NULL;
 }
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 87723751054d..0b4460374020 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -295,6 +295,12 @@ irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
 int pci_msi_domain_check_cap(struct irq_domain *domain,
 			     struct msi_domain_info *info, struct device *dev);
 u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
+struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
+#else
+static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
+{
+	return NULL;
+}
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
 
 #endif /* LINUX_MSI_H */
-- 
cgit v1.2.3


From 203d027de4d7068c607b60d4310a1599dec8839f Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Aug 2015 11:56:26 +0200
Subject: vga_switcheroo: Use enum vga_switcheroo_state instead of int

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 6 +++---
 include/linux/vga_switcheroo.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 1acbe20143d4..a7870d23c5ab 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -100,7 +100,7 @@
 struct vga_switcheroo_client {
 	struct pci_dev *pdev;
 	struct fb_info *fb_info;
-	int pwr_state;
+	enum vga_switcheroo_state pwr_state;
 	const struct vga_switcheroo_client_ops *ops;
 	int id;
 	bool active;
@@ -344,7 +344,7 @@ find_active_client(struct list_head *head)
  *
  * Return: Power state.
  */
-int vga_switcheroo_get_client_state(struct pci_dev *pdev)
+enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *pdev)
 {
 	struct vga_switcheroo_client *client;
 	enum vga_switcheroo_state ret;
@@ -496,7 +496,7 @@ static int vga_switchoff(struct vga_switcheroo_client *client)
 	return 0;
 }
 
-static void set_audio_state(int id, int state)
+static void set_audio_state(int id, enum vga_switcheroo_state state)
 {
 	struct vga_switcheroo_client *client;
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 376499197717..e63661757505 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -138,7 +138,7 @@ void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
 
-int vga_switcheroo_get_client_state(struct pci_dev *dev);
+enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev);
 
 void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic);
 
@@ -157,7 +157,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
-static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
+static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
 static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {}
 
-- 
cgit v1.2.3


From 21c5ba8c1ee02f204e556c26703cebaf9c4019e0 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Aug 2015 13:30:32 +0200
Subject: vga_switcheroo: Use VGA_SWITCHEROO_UNKNOWN_ID instead of -1

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 17 +++++++++--------
 include/linux/vga_switcheroo.h   |  4 ++++
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index a7870d23c5ab..989630528529 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -84,9 +84,9 @@
  * @fb_info: framebuffer to which console is remapped on switching
  * @pwr_state: current power state
  * @ops: client callbacks
- * @id: client identifier, see enum vga_switcheroo_client_id.
- * 	Determining the id requires the handler, so GPUs are initially
- * 	assigned -1 and later given their true id in vga_switcheroo_enable()
+ * @id: client identifier. Determining the id requires the handler,
+ * 	so gpus are initially assigned VGA_SWITCHEROO_UNKNOWN_ID
+ * 	and later given their true id in vga_switcheroo_enable()
  * @active: whether the outputs are currently switched to this client
  * @driver_power_control: whether power state is controlled by the driver's
  * 	runtime pm. If true, writing ON and OFF to the vga_switcheroo debugfs
@@ -145,7 +145,8 @@ struct vgasr_priv {
 
 #define ID_BIT_AUDIO		0x100
 #define client_is_audio(c)	((c)->id & ID_BIT_AUDIO)
-#define client_is_vga(c)	((c)->id == -1 || !client_is_audio(c))
+#define client_is_vga(c)	((c)->id == VGA_SWITCHEROO_UNKNOWN_ID || \
+				 !client_is_audio(c))
 #define client_id(c)		((c)->id & ~ID_BIT_AUDIO)
 
 static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
@@ -173,7 +174,7 @@ static void vga_switcheroo_enable(void)
 		vgasr_priv.handler->init();
 
 	list_for_each_entry(client, &vgasr_priv.clients, list) {
-		if (client->id != -1)
+		if (client->id != VGA_SWITCHEROO_UNKNOWN_ID)
 			continue;
 		ret = vgasr_priv.handler->get_client_id(client->pdev);
 		if (ret < 0)
@@ -277,7 +278,7 @@ int vga_switcheroo_register_client(struct pci_dev *pdev,
 				   const struct vga_switcheroo_client_ops *ops,
 				   bool driver_power_control)
 {
-	return register_client(pdev, ops, -1,
+	return register_client(pdev, ops, VGA_SWITCHEROO_UNKNOWN_ID,
 			       pdev == vga_default_device(),
 			       driver_power_control);
 }
@@ -583,7 +584,7 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 	int ret;
 	bool delay = false, can_switch;
 	bool just_mux = false;
-	int client_id = -1;
+	int client_id = VGA_SWITCHEROO_UNKNOWN_ID;
 	struct vga_switcheroo_client *client = NULL;
 
 	if (cnt > 63)
@@ -652,7 +653,7 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 		client_id = VGA_SWITCHEROO_DIS;
 	}
 
-	if (client_id == -1)
+	if (client_id == VGA_SWITCHEROO_UNKNOWN_ID)
 		goto out;
 	client = find_client_from_id(&vgasr_priv.clients, client_id);
 	if (!client)
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index e63661757505..88909a865b72 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -59,6 +59,9 @@ enum vga_switcheroo_state {
 
 /**
  * enum vga_switcheroo_client_id - client identifier
+ * @VGA_SWITCHEROO_UNKNOWN_ID: initial identifier assigned to vga clients.
+ * 	Determining the id requires the handler, so GPUs are given their
+ * 	true id in a delayed fashion in vga_switcheroo_enable()
  * @VGA_SWITCHEROO_IGD: integrated graphics device
  * @VGA_SWITCHEROO_DIS: discrete graphics device
  * @VGA_SWITCHEROO_MAX_CLIENTS: currently no more than two GPUs are supported
@@ -66,6 +69,7 @@ enum vga_switcheroo_state {
  * Client identifier. Audio clients use the same identifier & 0x100.
  */
 enum vga_switcheroo_client_id {
+	VGA_SWITCHEROO_UNKNOWN_ID = -1,
 	VGA_SWITCHEROO_IGD,
 	VGA_SWITCHEROO_DIS,
 	VGA_SWITCHEROO_MAX_CLIENTS,
-- 
cgit v1.2.3


From fa3e967fffaf267ccab7959429722da34e45ad77 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Aug 2015 12:54:07 +0200
Subject: vga_switcheroo: Use enum vga_switcheroo_client_id instead of int

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 17 ++++++++++-------
 include/linux/vga_switcheroo.h   |  6 +++---
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 989630528529..af0d372ff7d4 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -102,7 +102,7 @@ struct vga_switcheroo_client {
 	struct fb_info *fb_info;
 	enum vga_switcheroo_state pwr_state;
 	const struct vga_switcheroo_client_ops *ops;
-	int id;
+	enum vga_switcheroo_client_id id;
 	bool active;
 	bool driver_power_control;
 	struct list_head list;
@@ -233,7 +233,8 @@ EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
 
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
-			   int id, bool active, bool driver_power_control)
+			   enum vga_switcheroo_client_id id, bool active,
+			   bool driver_power_control)
 {
 	struct vga_switcheroo_client *client;
 
@@ -288,7 +289,7 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  * vga_switcheroo_register_audio_client - register audio client
  * @pdev: client pci device
  * @ops: client callbacks
- * @id: client identifier, see enum vga_switcheroo_client_id
+ * @id: client identifier
  *
  * Register audio client (audio device on a GPU). The power state of the
  * client is assumed to be ON.
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  */
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 int id)
+					 enum vga_switcheroo_client_id id)
 {
 	return register_client(pdev, ops, id | ID_BIT_AUDIO, false, false);
 }
@@ -315,7 +316,8 @@ find_client_from_pci(struct list_head *head, struct pci_dev *pdev)
 }
 
 static struct vga_switcheroo_client *
-find_client_from_id(struct list_head *head, int client_id)
+find_client_from_id(struct list_head *head,
+		    enum vga_switcheroo_client_id client_id)
 {
 	struct vga_switcheroo_client *client;
 
@@ -497,7 +499,8 @@ static int vga_switchoff(struct vga_switcheroo_client *client)
 	return 0;
 }
 
-static void set_audio_state(int id, enum vga_switcheroo_state state)
+static void set_audio_state(enum vga_switcheroo_client_id id,
+			    enum vga_switcheroo_state state)
 {
 	struct vga_switcheroo_client *client;
 
@@ -584,7 +587,7 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 	int ret;
 	bool delay = false, can_switch;
 	bool just_mux = false;
-	int client_id = VGA_SWITCHEROO_UNKNOWN_ID;
+	enum vga_switcheroo_client_id client_id = VGA_SWITCHEROO_UNKNOWN_ID;
 	struct vga_switcheroo_client *client = NULL;
 
 	if (cnt > 63)
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 88909a865b72..c55751155631 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -100,7 +100,7 @@ struct vga_switcheroo_handler {
 	int (*switchto)(enum vga_switcheroo_client_id id);
 	int (*power_state)(enum vga_switcheroo_client_id id,
 			   enum vga_switcheroo_state state);
-	int (*get_client_id)(struct pci_dev *pdev);
+	enum vga_switcheroo_client_id (*get_client_id)(struct pci_dev *pdev);
 };
 
 /**
@@ -132,7 +132,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev,
 				   bool driver_power_control);
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 int id);
+					 enum vga_switcheroo_client_id id);
 
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
@@ -158,7 +158,7 @@ static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_i
 static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
-	int id) { return 0; }
+	enum vga_switcheroo_client_id id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
 static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
-- 
cgit v1.2.3


From 573c7ba006edbecff0714db651dd3602b9d0a6a0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 16 Oct 2015 14:01:22 +0200
Subject: net: introduce pre-change upper device notifier

This newly introduced netdevice notifier is called before actual change
upper happens. That provides a possibility for notifier handlers to
know upper change will happen and react to it, including possibility to
forbid the change. That is valuable for drivers which can check if the
upper device linkage is supported and forbid that in case it is not.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b3374402c1ea..69fdd427c8cb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2106,6 +2106,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
 #define NETDEV_CHANGEINFODATA	0x0018
 #define NETDEV_BONDING_INFO	0x0019
+#define NETDEV_PRECHANGEUPPER	0x001A
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
diff --git a/net/core/dev.c b/net/core/dev.c
index a229bf0d649d..1225b4be8ed6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5346,6 +5346,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	changeupper_info.master = master;
 	changeupper_info.linking = true;
 
+	ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+					    &changeupper_info.info);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		return ret;
+
 	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
 						   master);
 	if (ret)
@@ -5488,6 +5494,9 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 	changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
 	changeupper_info.linking = false;
 
+	call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+				      &changeupper_info.info);
+
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
 	/* Here is the tricky part. We must remove all dev's lower
-- 
cgit v1.2.3


From 2ffbceb2b08f8ca0496c54a9ebcd11d25275954e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 13 Oct 2015 14:33:26 +0200
Subject: netfilter: remove hook owner refcounting

since commit 8405a8fff3f8 ("netfilter: nf_qeueue: Drop queue entries on
nf_unregister_hook") all pending queued entries are discarded.

So we can simply remove all of the owner handling -- when module is
removed it also needs to unregister all its hooks.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                      |  1 -
 net/bridge/br_netfilter_hooks.c                |  7 -------
 net/bridge/netfilter/ebtable_filter.c          |  3 ---
 net/bridge/netfilter/ebtable_nat.c             |  3 ---
 net/ipv4/netfilter/ipt_SYNPROXY.c              |  2 --
 net/ipv4/netfilter/iptable_nat.c               |  4 ----
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  6 ------
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  2 --
 net/ipv6/netfilter/ip6t_SYNPROXY.c             |  2 --
 net/ipv6/netfilter/ip6table_nat.c              |  4 ----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  6 ------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  2 --
 net/netfilter/ipvs/ip_vs_core.c                | 12 ------------
 net/netfilter/nf_queue.c                       |  5 -----
 net/netfilter/nf_tables_api.c                  |  1 -
 net/netfilter/x_tables.c                       |  1 -
 security/selinux/hooks.c                       |  5 -----
 security/smack/smack_netfilter.c               |  2 --
 18 files changed, 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index edb3dc32f1da..ef11e1d77699 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -90,7 +90,6 @@ struct nf_hook_ops {
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
 	struct net_device	*dev;
-	struct module		*owner;
 	void			*priv;
 	u_int8_t		pf;
 	unsigned int		hooknum;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 18905d4781db..9d3051916a64 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -901,49 +901,42 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
 static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	{
 		.hook = br_nf_pre_routing,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_PRE_ROUTING,
 		.priority = NF_BR_PRI_BRNF,
 	},
 	{
 		.hook = br_nf_local_in,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_LOCAL_IN,
 		.priority = NF_BR_PRI_BRNF,
 	},
 	{
 		.hook = br_nf_forward_ip,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF - 1,
 	},
 	{
 		.hook = br_nf_forward_arp,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF,
 	},
 	{
 		.hook = br_nf_post_routing,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_POST_ROUTING,
 		.priority = NF_BR_PRI_LAST,
 	},
 	{
 		.hook = ip_sabotage_in,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_IPV4,
 		.hooknum = NF_INET_PRE_ROUTING,
 		.priority = NF_IP_PRI_FIRST,
 	},
 	{
 		.hook = ip_sabotage_in,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_IPV6,
 		.hooknum = NF_INET_PRE_ROUTING,
 		.priority = NF_IP6_PRI_FIRST,
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index f9242dffa65e..32eccd101f26 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -73,21 +73,18 @@ ebt_out_hook(void *priv, struct sk_buff *skb,
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 	{
 		.hook		= ebt_in_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_IN,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
 		.hook		= ebt_in_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_FORWARD,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
 		.hook		= ebt_out_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_FILTER_OTHER,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 4bbefe03ab58..ec55358f00c8 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -73,21 +73,18 @@ ebt_nat_out(void *priv, struct sk_buff *skb,
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 	{
 		.hook		= ebt_nat_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_NAT_DST_OTHER,
 	},
 	{
 		.hook		= ebt_nat_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_POST_ROUTING,
 		.priority	= NF_BR_PRI_NAT_SRC,
 	},
 	{
 		.hook		= ebt_nat_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_PRE_ROUTING,
 		.priority	= NF_BR_PRI_NAT_DST_BRIDGED,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 6a6e762ab27f..f105b6ffe351 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -437,14 +437,12 @@ static struct xt_target synproxy_tg4_reg __read_mostly = {
 static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
 	{
 		.hook		= ipv4_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
 	},
 	{
 		.hook		= ipv4_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 3a2e4d830a0b..ae2cd2752046 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -68,7 +68,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= iptable_nat_ipv4_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_NAT_DST,
@@ -76,7 +75,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= iptable_nat_ipv4_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_NAT_SRC,
@@ -84,7 +82,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= iptable_nat_ipv4_local_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST,
@@ -92,7 +89,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= iptable_nat_ipv4_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 752fb40adcf8..461ca926fd39 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -166,42 +166,36 @@ static unsigned int ipv4_conntrack_local(void *priv,
 static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
 	{
 		.hook		= ipv4_conntrack_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv4_conntrack_local,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv4_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv4_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
 	},
 	{
 		.hook		= ipv4_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv4_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index b246346ee849..9df3f93269d3 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -94,14 +94,12 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
 static struct nf_hook_ops ipv4_defrag_ops[] = {
 	{
 		.hook		= ipv4_conntrack_defrag,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook           = ipv4_conntrack_defrag,
-		.owner          = THIS_MODULE,
 		.pf             = NFPROTO_IPV4,
 		.hooknum        = NF_INET_LOCAL_OUT,
 		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5312e9dcebdb..3426d9df1be7 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -458,14 +458,12 @@ static struct xt_target synproxy_tg6_reg __read_mostly = {
 static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
 	{
 		.hook		= ipv6_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
 	},
 	{
 		.hook		= ipv6_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index abea175d5853..de2a10a565f5 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -70,7 +70,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= ip6table_nat_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_DST,
@@ -78,7 +77,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= ip6table_nat_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_SRC,
@@ -86,7 +84,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= ip6table_nat_local_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST,
@@ -94,7 +91,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= ip6table_nat_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index dd83ad42f8f6..1aa5848764a7 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -187,42 +187,36 @@ static unsigned int ipv6_conntrack_local(void *priv,
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
 	{
 		.hook		= ipv6_conntrack_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv6_conntrack_local,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv6_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv6_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_LAST,
 	},
 	{
 		.hook		= ipv6_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv6_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_LAST-1,
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a99baf63eccf..b6ddca746109 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -84,14 +84,12 @@ static unsigned int ipv6_defrag(void *priv,
 static struct nf_hook_ops ipv6_defrag_ops[] = {
 	{
 		.hook		= ipv6_defrag,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook		= ipv6_defrag,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3773154d9b71..ce37d204fcf1 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1923,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC - 2,
@@ -1933,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * applied to IPVS. */
 	{
 		.hook		= ip_vs_remote_request4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC - 1,
@@ -1941,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* Before ip_vs_in, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_local_reply4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST + 1,
@@ -1949,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After mangle, schedule and forward local requests */
 	{
 		.hook		= ip_vs_local_request4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST + 2,
@@ -1958,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
 	{
 		.hook		= ip_vs_forward_icmp,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 99,
@@ -1966,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 100,
@@ -1975,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC - 2,
@@ -1985,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * applied to IPVS. */
 	{
 		.hook		= ip_vs_remote_request6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC - 1,
@@ -1993,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* Before ip_vs_in, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_local_reply6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST + 1,
@@ -2001,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After mangle, schedule and forward local requests */
 	{
 		.hook		= ip_vs_local_request6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST + 2,
@@ -2010,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
 	{
 		.hook		= ip_vs_forward_icmp_v6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 99,
@@ -2018,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 100,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 634d18e6ab2c..eef1c50e0e21 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -69,8 +69,6 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 			dev_put(physdev);
 	}
 #endif
-	/* Drop reference to owner of hook which queued us. */
-	module_put(entry->elem->owner);
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
 
@@ -79,9 +77,6 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
 
-	if (!try_module_get(entry->elem->owner))
-		return false;
-
 	if (state->in)
 		dev_hold(state->in);
 	if (state->out)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4a41eb92bcc0..93cc4737018f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1433,7 +1433,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 		for (i = 0; i < afi->nops; i++) {
 			ops = &basechain->ops[i];
 			ops->pf		= family;
-			ops->owner	= afi->owner;
 			ops->hooknum	= hooknum;
 			ops->priority	= priority;
 			ops->priv	= chain;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9b42b5ea6dcd..d4aaad747ea9 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1193,7 +1193,6 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
 		if (!(hook_mask & 1))
 			continue;
 		ops[i].hook     = fn;
-		ops[i].owner    = table->me;
 		ops[i].pf       = table->af;
 		ops[i].hooknum  = hooknum;
 		ops[i].priority = table->priority;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 64340160f4ac..659bb50f0232 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6127,21 +6127,18 @@ security_initcall(selinux_init);
 static struct nf_hook_ops selinux_nf_ops[] = {
 	{
 		.hook =		selinux_ipv4_postroute,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV4,
 		.hooknum =	NF_INET_POST_ROUTING,
 		.priority =	NF_IP_PRI_SELINUX_LAST,
 	},
 	{
 		.hook =		selinux_ipv4_forward,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV4,
 		.hooknum =	NF_INET_FORWARD,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
 	},
 	{
 		.hook =		selinux_ipv4_output,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV4,
 		.hooknum =	NF_INET_LOCAL_OUT,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
@@ -6149,14 +6146,12 @@ static struct nf_hook_ops selinux_nf_ops[] = {
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	{
 		.hook =		selinux_ipv6_postroute,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV6,
 		.hooknum =	NF_INET_POST_ROUTING,
 		.priority =	NF_IP6_PRI_SELINUX_LAST,
 	},
 	{
 		.hook =		selinux_ipv6_forward,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV6,
 		.hooknum =	NF_INET_FORWARD,
 		.priority =	NF_IP6_PRI_SELINUX_FIRST,
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index a9e41da05d28..6d1706c9777e 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -57,7 +57,6 @@ static unsigned int smack_ipv4_output(void *priv,
 static struct nf_hook_ops smack_nf_ops[] = {
 	{
 		.hook =		smack_ipv4_output,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV4,
 		.hooknum =	NF_INET_LOCAL_OUT,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
@@ -65,7 +64,6 @@ static struct nf_hook_ops smack_nf_ops[] = {
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	{
 		.hook =		smack_ipv6_output,
-		.owner =	THIS_MODULE,
 		.pf =		NFPROTO_IPV6,
 		.hooknum =	NF_INET_LOCAL_OUT,
 		.priority =	NF_IP6_PRI_SELINUX_FIRST,
-- 
cgit v1.2.3


From 008027c31d57a22bd80dda5acc95b037634eee0f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Oct 2015 20:45:42 +0200
Subject: netfilter: turn NF_HOOK into an inline function

A recent change to the dst_output handling caused a new warning
when the call to NF_HOOK() is the only used of a local variable
passed as 'dev', and CONFIG_NETFILTER is disabled:

net/ipv6/ip6_output.c: In function 'ip6_output':
net/ipv6/ip6_output.c:135:21: warning: unused variable 'dev' [-Wunused-variable]

The reason for this is that the NF_HOOK macro in this case does
not reference the variable at all, and the call to dev_net(dev)
got removed from the ip6_output function. To avoid that warning now
and in the future, this changes the macro into an equivalent
inline function, which tells the compiler that the variable is
passed correctly but still unused.

The dn_forward function apparently had the same problem in
the past and added a local workaround that no longer works
with the inline function. In order to avoid a regression, we
have to also remove the #ifdef from decnet in the same patch.

Fixes: ede2059dbaf9 ("dst: Pass net into dst->output")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 19 +++++++++++++++++--
 net/decnet/dn_route.c     |  2 --
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index ef11e1d77699..0ad556726181 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -346,8 +346,23 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 }
 
 #else /* !CONFIG_NETFILTER */
-#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(net, sk, skb)
-#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(net, sk, skb)
+static inline int
+NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *),
+	     bool cond)
+{
+	return okfn(net, sk, skb);
+}
+
+static inline int
+NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+	struct sk_buff *skb, struct net_device *in, struct net_device *out,
+	int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+	return okfn(net, sk, skb);
+}
+
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index e930321e2c1d..0c491fc0e254 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -789,9 +789,7 @@ static int dn_forward(struct sk_buff *skb)
 	struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
 	struct dn_route *rt;
 	int header_len;
-#ifdef CONFIG_NETFILTER
 	struct net_device *dev = skb->dev;
-#endif
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto drop;
-- 
cgit v1.2.3


From 5abe4f223ed6b820443d3657bd48600692f61c12 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 13 Oct 2015 12:45:26 +0200
Subject: regulator: introduce min_dropout_uV

Many voltage Regulators need a input voltage that is higher than the
output voltage. Allow to specify a minimum dropout voltage which will
be used later to find the best input voltage for regulators.

[Changed uv to uV for consistency and legibility -- broonie]

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 45932228cbf5..9c2903e58adb 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -245,6 +245,7 @@ enum regulator_type {
  * @linear_min_sel: Minimal selector for starting linear mapping
  * @fixed_uV: Fixed voltage of rails.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
+ * @min_dropout_uV: The minimum dropout voltage this regulator can handle
  * @linear_ranges: A constant table of possible voltage ranges.
  * @n_linear_ranges: Number of entries in the @linear_ranges table.
  * @volt_table: Voltage mapping table (if table based mapping)
@@ -292,6 +293,7 @@ struct regulator_desc {
 	unsigned int linear_min_sel;
 	int fixed_uV;
 	unsigned int ramp_delay;
+	int min_dropout_uV;
 
 	const struct regulator_linear_range *linear_ranges;
 	int n_linear_ranges;
-- 
cgit v1.2.3


From b76281cb97761002277730432812b1687de96062 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 16 Oct 2015 14:35:21 +0200
Subject: clk: Make clk input parameter of __clk_get_name() const

When calling __clk_get_name() on a const clock:

    warning: passing argument 1 of '__clk_get_name' discards 'const' qualifier from pointer target type
    include/linux/clk-provider.h:613:13: note: expected 'struct clk *' but argument is of type 'const struct clk *'

__clk_get_name() does not modify the passed clock, hence make it const.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c            | 2 +-
 include/linux/clk-provider.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 63d0bfb29e83..ecb1e5232554 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -272,7 +272,7 @@ late_initcall_sync(clk_disable_unused);
 
 /***    helper functions   ***/
 
-const char *__clk_get_name(struct clk *clk)
+const char *__clk_get_name(const struct clk *clk)
 {
 	return !clk ? NULL : clk->core->name;
 }
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 8ff43eb4b311..bbb8fed11e44 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -607,7 +607,7 @@ void clk_unregister(struct clk *clk);
 void devm_clk_unregister(struct device *dev, struct clk *clk);
 
 /* helper functions */
-const char *__clk_get_name(struct clk *clk);
+const char *__clk_get_name(const struct clk *clk);
 const char *clk_hw_get_name(const struct clk_hw *hw);
 struct clk_hw *__clk_get_hw(struct clk *clk);
 unsigned int clk_hw_get_num_parents(const struct clk_hw *hw);
-- 
cgit v1.2.3


From c771c2f484857f3b1fc81d180485e96b7cb67c17 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Sun, 11 Oct 2015 17:34:15 +0200
Subject: gpiolib: provide generic request/free implementations

Provide generic request/free implementations that pinctrl aware gpio
drivers can use instead of open coding if they use a 1:1 pin to gpio
signal mapping.

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 23 +++++++++++++++++++++++
 include/linux/gpio/driver.h |  3 +++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 8f180775a4fa..8eba02db5608 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -15,6 +15,7 @@
 #include <linux/acpi.h>
 #include <linux/gpio/driver.h>
 #include <linux/gpio/machine.h>
+#include <linux/pinctrl/consumer.h>
 
 #include "gpiolib.h"
 
@@ -745,6 +746,28 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) {}
 
 #endif /* CONFIG_GPIOLIB_IRQCHIP */
 
+/**
+ * gpiochip_generic_request() - request the gpio function for a pin
+ * @chip: the gpiochip owning the GPIO
+ * @offset: the offset of the GPIO to request for GPIO function
+ */
+int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset)
+{
+	return pinctrl_request_gpio(chip->base + offset);
+}
+EXPORT_SYMBOL_GPL(gpiochip_generic_request);
+
+/**
+ * gpiochip_generic_free() - free the gpio function from a pin
+ * @chip: the gpiochip to request the gpio function for
+ * @offset: the offset of the GPIO to free from GPIO function
+ */
+void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset)
+{
+	pinctrl_free_gpio(chip->base + offset);
+}
+EXPORT_SYMBOL_GPL(gpiochip_generic_free);
+
 #ifdef CONFIG_PINCTRL
 
 /**
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 1aed31c5ffba..d1baebf350d8 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -206,6 +206,9 @@ int _gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 
 #endif /* CONFIG_GPIOLIB_IRQCHIP */
 
+int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset);
+void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset);
+
 #ifdef CONFIG_PINCTRL
 
 /**
-- 
cgit v1.2.3


From 91236ecc74a25431138f71b6d52e130cd0f774b3 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 14 Oct 2015 14:29:36 +0800
Subject: ACPI/PCI: Enhance ACPI core to support sparse IO space

Enhance ACPI resource parsing interfaces to support sparse IO space,
which will be used to share common code between x86 and IA64 later.

Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/resource.c | 9 ++++++---
 include/linux/ioport.h  | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 15d22db05054..cdc5c2599beb 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -119,7 +119,7 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res)
 EXPORT_SYMBOL_GPL(acpi_dev_resource_memory);
 
 static void acpi_dev_ioresource_flags(struct resource *res, u64 len,
-				      u8 io_decode)
+				      u8 io_decode, u8 translation_type)
 {
 	res->flags = IORESOURCE_IO;
 
@@ -131,6 +131,8 @@ static void acpi_dev_ioresource_flags(struct resource *res, u64 len,
 
 	if (io_decode == ACPI_DECODE_16)
 		res->flags |= IORESOURCE_IO_16BIT_ADDR;
+	if (translation_type == ACPI_SPARSE_TRANSLATION)
+		res->flags |= IORESOURCE_IO_SPARSE;
 }
 
 static void acpi_dev_get_ioresource(struct resource *res, u64 start, u64 len,
@@ -138,7 +140,7 @@ static void acpi_dev_get_ioresource(struct resource *res, u64 start, u64 len,
 {
 	res->start = start;
 	res->end = start + len - 1;
-	acpi_dev_ioresource_flags(res, len, io_decode);
+	acpi_dev_ioresource_flags(res, len, io_decode, 0);
 }
 
 /**
@@ -231,7 +233,8 @@ static bool acpi_decode_space(struct resource_win *win,
 		acpi_dev_memresource_flags(res, len, wp);
 		break;
 	case ACPI_IO_RANGE:
-		acpi_dev_ioresource_flags(res, len, iodec);
+		acpi_dev_ioresource_flags(res, len, iodec,
+					  addr->info.io.translation_type);
 		break;
 	case ACPI_BUS_NUMBER_RANGE:
 		res->flags = IORESOURCE_BUS;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 388e3ae94f7a..24bea087e7af 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -94,6 +94,7 @@ struct resource {
 /* PnP I/O specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IO_16BIT_ADDR	(1<<0)
 #define IORESOURCE_IO_FIXED		(1<<1)
+#define IORESOURCE_IO_SPARSE		(1<<2)
 
 /* PCI ROM control bits (IORESOURCE_BITS) */
 #define IORESOURCE_ROM_ENABLE		(1<<0)	/* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */
-- 
cgit v1.2.3


From 2c204383a2922cd6b79b9d78680a049a2144fbcc Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 14 Oct 2015 14:29:39 +0800
Subject: PCI/ACPI: Add interface acpi_pci_root_create()

Introduce common interface acpi_pci_root_create() and related data
structures to create PCI root bus for ACPI PCI host bridges. It will
be used to kill duplicated arch specific code for IA64 and x86. It may
also help ARM64 in future.

Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pci_root.c  | 204 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci-acpi.h |  24 ++++++
 2 files changed, 228 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 393706a5261b..850d7bf0c873 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -652,6 +652,210 @@ static void acpi_pci_root_remove(struct acpi_device *device)
 	kfree(root);
 }
 
+/*
+ * Following code to support acpi_pci_root_create() is copied from
+ * arch/x86/pci/acpi.c and modified so it could be reused by x86, IA64
+ * and ARM64.
+ */
+static void acpi_pci_root_validate_resources(struct device *dev,
+					     struct list_head *resources,
+					     unsigned long type)
+{
+	LIST_HEAD(list);
+	struct resource *res1, *res2, *root = NULL;
+	struct resource_entry *tmp, *entry, *entry2;
+
+	BUG_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0);
+	root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource;
+
+	list_splice_init(resources, &list);
+	resource_list_for_each_entry_safe(entry, tmp, &list) {
+		bool free = false;
+		resource_size_t end;
+
+		res1 = entry->res;
+		if (!(res1->flags & type))
+			goto next;
+
+		/* Exclude non-addressable range or non-addressable portion */
+		end = min(res1->end, root->end);
+		if (end <= res1->start) {
+			dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n",
+				 res1);
+			free = true;
+			goto next;
+		} else if (res1->end != end) {
+			dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n",
+				 res1, (unsigned long long)end + 1,
+				 (unsigned long long)res1->end);
+			res1->end = end;
+		}
+
+		resource_list_for_each_entry(entry2, resources) {
+			res2 = entry2->res;
+			if (!(res2->flags & type))
+				continue;
+
+			/*
+			 * I don't like throwing away windows because then
+			 * our resources no longer match the ACPI _CRS, but
+			 * the kernel resource tree doesn't allow overlaps.
+			 */
+			if (resource_overlaps(res1, res2)) {
+				res2->start = min(res1->start, res2->start);
+				res2->end = max(res1->end, res2->end);
+				dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n",
+					 res2, res1);
+				free = true;
+				goto next;
+			}
+		}
+
+next:
+		resource_list_del(entry);
+		if (free)
+			resource_list_free_entry(entry);
+		else
+			resource_list_add_tail(entry, resources);
+	}
+}
+
+int acpi_pci_probe_root_resources(struct acpi_pci_root_info *info)
+{
+	int ret;
+	struct list_head *list = &info->resources;
+	struct acpi_device *device = info->bridge;
+	struct resource_entry *entry, *tmp;
+	unsigned long flags;
+
+	flags = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_MEM_8AND16BIT;
+	ret = acpi_dev_get_resources(device, list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0)
+		dev_warn(&device->dev,
+			 "failed to parse _CRS method, error code %d\n", ret);
+	else if (ret == 0)
+		dev_dbg(&device->dev,
+			"no IO and memory resources present in _CRS\n");
+	else {
+		resource_list_for_each_entry_safe(entry, tmp, list) {
+			if (entry->res->flags & IORESOURCE_DISABLED)
+				resource_list_destroy_entry(entry);
+			else
+				entry->res->name = info->name;
+		}
+		acpi_pci_root_validate_resources(&device->dev, list,
+						 IORESOURCE_MEM);
+		acpi_pci_root_validate_resources(&device->dev, list,
+						 IORESOURCE_IO);
+	}
+
+	return ret;
+}
+
+static void pci_acpi_root_add_resources(struct acpi_pci_root_info *info)
+{
+	struct resource_entry *entry, *tmp;
+	struct resource *res, *conflict, *root = NULL;
+
+	resource_list_for_each_entry_safe(entry, tmp, &info->resources) {
+		res = entry->res;
+		if (res->flags & IORESOURCE_MEM)
+			root = &iomem_resource;
+		else if (res->flags & IORESOURCE_IO)
+			root = &ioport_resource;
+		else
+			continue;
+
+		conflict = insert_resource_conflict(root, res);
+		if (conflict) {
+			dev_info(&info->bridge->dev,
+				 "ignoring host bridge window %pR (conflicts with %s %pR)\n",
+				 res, conflict->name, conflict);
+			resource_list_destroy_entry(entry);
+		}
+	}
+}
+
+static void __acpi_pci_root_release_info(struct acpi_pci_root_info *info)
+{
+	struct resource *res;
+	struct resource_entry *entry, *tmp;
+
+	if (!info)
+		return;
+
+	resource_list_for_each_entry_safe(entry, tmp, &info->resources) {
+		res = entry->res;
+		if (res->parent &&
+		    (res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
+			release_resource(res);
+		resource_list_destroy_entry(entry);
+	}
+
+	info->ops->release_info(info);
+}
+
+static void acpi_pci_root_release_info(struct pci_host_bridge *bridge)
+{
+	struct resource *res;
+	struct resource_entry *entry;
+
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		res = entry->res;
+		if (res->parent &&
+		    (res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
+			release_resource(res);
+	}
+	__acpi_pci_root_release_info(bridge->release_data);
+}
+
+struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
+				     struct acpi_pci_root_ops *ops,
+				     struct acpi_pci_root_info *info,
+				     void *sysdata)
+{
+	int ret, busnum = root->secondary.start;
+	struct acpi_device *device = root->device;
+	int node = acpi_get_node(device->handle);
+	struct pci_bus *bus;
+
+	info->root = root;
+	info->bridge = device;
+	info->ops = ops;
+	INIT_LIST_HEAD(&info->resources);
+	snprintf(info->name, sizeof(info->name), "PCI Bus %04x:%02x",
+		 root->segment, busnum);
+
+	if (ops->init_info && ops->init_info(info))
+		goto out_release_info;
+	if (ops->prepare_resources)
+		ret = ops->prepare_resources(info);
+	else
+		ret = acpi_pci_probe_root_resources(info);
+	if (ret < 0)
+		goto out_release_info;
+
+	pci_acpi_root_add_resources(info);
+	pci_add_resource(&info->resources, &root->secondary);
+	bus = pci_create_root_bus(NULL, busnum, ops->pci_ops,
+				  sysdata, &info->resources);
+	if (!bus)
+		goto out_release_info;
+
+	pci_scan_child_bus(bus);
+	pci_set_host_bridge_release(to_pci_host_bridge(bus->bridge),
+				    acpi_pci_root_release_info, info);
+	if (node != NUMA_NO_NODE)
+		dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
+	return bus;
+
+out_release_info:
+	__acpi_pci_root_release_info(info);
+	return NULL;
+}
+
 void __init acpi_pci_root_init(void)
 {
 	acpi_hest_init();
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index a965efa52152..89ab0572dbc6 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -52,6 +52,30 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 	return ACPI_HANDLE(dev);
 }
 
+struct acpi_pci_root;
+struct acpi_pci_root_ops;
+
+struct acpi_pci_root_info {
+	struct acpi_pci_root		*root;
+	struct acpi_device		*bridge;
+	struct acpi_pci_root_ops	*ops;
+	struct list_head		resources;
+	char				name[16];
+};
+
+struct acpi_pci_root_ops {
+	struct pci_ops *pci_ops;
+	int (*init_info)(struct acpi_pci_root_info *info);
+	void (*release_info)(struct acpi_pci_root_info *info);
+	int (*prepare_resources)(struct acpi_pci_root_info *info);
+};
+
+extern int acpi_pci_probe_root_resources(struct acpi_pci_root_info *info);
+extern struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
+					    struct acpi_pci_root_ops *ops,
+					    struct acpi_pci_root_info *info,
+					    void *sd);
+
 void acpi_pci_add_bus(struct pci_bus *bus);
 void acpi_pci_remove_bus(struct pci_bus *bus);
 
-- 
cgit v1.2.3


From 90b665f627b18822a7bbebeff44ce730ccf74275 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Oct 2015 00:20:21 +0300
Subject: gpiolib: Add and use OF_GPIO_SINGLE_ENDED flag

The flag matches the DT GPIO_SINGLE_ENDED flag and allows drivers to
parse and use the DT flag to handle single-ended (open-drain or
open-source) GPIOs.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c  | 20 ++++++++++++++++++--
 include/linux/of_gpio.h |  1 +
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 1615cc904702..6798355c61c6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1831,6 +1831,13 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 	if (of_flags & OF_GPIO_ACTIVE_LOW)
 		*flags |= GPIO_ACTIVE_LOW;
 
+	if (of_flags & OF_GPIO_SINGLE_ENDED) {
+		if (of_flags & OF_GPIO_ACTIVE_LOW)
+			*flags |= GPIO_OPEN_DRAIN;
+		else
+			*flags |= GPIO_OPEN_SOURCE;
+	}
+
 	return desc;
 }
 
@@ -2184,6 +2191,7 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 {
 	struct gpio_desc *desc = ERR_PTR(-ENODEV);
 	bool active_low = false;
+	bool single_ended = false;
 	int ret;
 
 	if (!fwnode)
@@ -2194,8 +2202,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 
 		desc = of_get_named_gpiod_flags(to_of_node(fwnode), propname, 0,
 						&flags);
-		if (!IS_ERR(desc))
+		if (!IS_ERR(desc)) {
 			active_low = flags & OF_GPIO_ACTIVE_LOW;
+			single_ended = flags & OF_GPIO_SINGLE_ENDED;
+		}
 	} else if (is_acpi_node(fwnode)) {
 		struct acpi_gpio_info info;
 
@@ -2208,10 +2218,16 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 	if (IS_ERR(desc))
 		return desc;
 
-	/* Only value flag can be set from both DT and ACPI is active_low */
 	if (active_low)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
+	if (single_ended) {
+		if (active_low)
+			set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+		else
+			set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+	}
+
 	ret = gpiod_request(desc, NULL);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index f3191828f037..87d6d1632dd4 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -29,6 +29,7 @@ struct device_node;
  */
 enum of_gpio_flags {
 	OF_GPIO_ACTIVE_LOW = 0x1,
+	OF_GPIO_SINGLE_ENDED = 0x2,
 };
 
 #ifdef CONFIG_OF_GPIO
-- 
cgit v1.2.3


From 3a341a4c30d427fd05617087db1564a595f65093 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Date: Tue, 13 Oct 2015 23:39:50 -0700
Subject: Input: rotary-encoder - add support for quarter-period mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some encoders have both outputs low in stable states, others also have
a stable state with both outputs high (half-period mode) and some have
a stable state in all steps (quarter-period mode). The driver used to
support the former states and with this change it can also support the
later.

This commit also deprecates the 'half-period' property and introduces
a new property 'steps-per-period'. This property specifies the
number of steps (stable states) produced by the rotary encoder
for each GPIO period.

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../devicetree/bindings/input/rotary-encoder.txt   | 11 ++-
 Documentation/input/rotary-encoder.txt             |  8 +-
 drivers/input/misc/rotary_encoder.c                | 86 ++++++++++++++++++++--
 include/linux/rotary_encoder.h                     |  2 +-
 4 files changed, 98 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/rotary-encoder.txt b/Documentation/devicetree/bindings/input/rotary-encoder.txt
index 891ddba2d792..de99cbbbf6da 100644
--- a/Documentation/devicetree/bindings/input/rotary-encoder.txt
+++ b/Documentation/devicetree/bindings/input/rotary-encoder.txt
@@ -14,9 +14,18 @@ Optional properties:
   device, hence no steps need to be passed.
 - rotary-encoder,rollover: Automatic rollove when the rotary value becomes
   greater than the specified steps or smaller than 0. For absolute axis only.
-- rotary-encoder,half-period: Makes the driver work on half-period mode.
+- rotary-encoder,steps-per-period: Number of steps (stable states) per period.
+  The values have the following meaning:
+  1: Full-period mode (default)
+  2: Half-period mode
+  4: Quarter-period mode
 - wakeup-source: Boolean, rotary encoder can wake up the system.
 
+Deprecated properties:
+- rotary-encoder,half-period: Makes the driver work on half-period mode.
+  This property is deprecated. Instead, a 'steps-per-period ' value should
+  be used, such as "rotary-encoder,steps-per-period = <2>".
+
 See Documentation/input/rotary-encoder.txt for more information.
 
 Example:
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
index bddbee188624..46a74f0c551a 100644
--- a/Documentation/input/rotary-encoder.txt
+++ b/Documentation/input/rotary-encoder.txt
@@ -9,8 +9,9 @@ peripherals with two wires. The outputs are phase-shifted by 90 degrees
 and by triggering on falling and rising edges, the turn direction can
 be determined.
 
-Some encoders have both outputs low in stable states, whereas others also have
-a stable state with both outputs high (half-period mode).
+Some encoders have both outputs low in stable states, others also have
+a stable state with both outputs high (half-period mode) and some have
+a stable state in all steps (quarter-period mode).
 
 The phase diagram of these two outputs look like this:
 
@@ -32,6 +33,9 @@ The phase diagram of these two outputs look like this:
                 |<-->|
 	          one step (half-period mode)
 
+                |<>|
+	          one step (quarter-period mode)
+
 For more information, please see
 	https://en.wikipedia.org/wiki/Rotary_encoder
 
diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index 962f9e86310b..8aee71986430 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c
@@ -143,6 +143,55 @@ static irqreturn_t rotary_encoder_half_period_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t rotary_encoder_quarter_period_irq(int irq, void *dev_id)
+{
+	struct rotary_encoder *encoder = dev_id;
+	unsigned char sum;
+	int state;
+
+	state = rotary_encoder_get_state(encoder->pdata);
+
+	/*
+	 * We encode the previous and the current state using a byte.
+	 * The previous state in the MSB nibble, the current state in the LSB
+	 * nibble. Then use a table to decide the direction of the turn.
+	 */
+	sum = (encoder->last_stable << 4) + state;
+	switch (sum) {
+	case 0x31:
+	case 0x10:
+	case 0x02:
+	case 0x23:
+		encoder->dir = 0; /* clockwise */
+		break;
+
+	case 0x13:
+	case 0x01:
+	case 0x20:
+	case 0x32:
+		encoder->dir = 1; /* counter-clockwise */
+		break;
+
+	default:
+		/*
+		 * Ignore all other values. This covers the case when the
+		 * state didn't change (a spurious interrupt) and the
+		 * cases where the state changed by two steps, making it
+		 * impossible to tell the direction.
+		 *
+		 * In either case, don't report any event and save the
+		 * state for later.
+		 */
+		goto out;
+	}
+
+	rotary_encoder_report_event(encoder);
+
+out:
+	encoder->last_stable = state;
+	return IRQ_HANDLED;
+}
+
 #ifdef CONFIG_OF
 static const struct of_device_id rotary_encoder_of_match[] = {
 	{ .compatible = "rotary-encoder", },
@@ -157,6 +206,7 @@ static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct devic
 	struct device_node *np = dev->of_node;
 	struct rotary_encoder_platform_data *pdata;
 	enum of_gpio_flags flags;
+	int error;
 
 	if (!of_id || !np)
 		return NULL;
@@ -178,8 +228,23 @@ static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct devic
 	pdata->relative_axis =
 		of_property_read_bool(np, "rotary-encoder,relative-axis");
 	pdata->rollover = of_property_read_bool(np, "rotary-encoder,rollover");
-	pdata->half_period =
-		of_property_read_bool(np, "rotary-encoder,half-period");
+
+	error = of_property_read_u32(np, "rotary-encoder,steps-per-period",
+				     &pdata->steps_per_period);
+	if (error) {
+		/*
+		 * The 'half-period' property has been deprecated, you must use
+		 * 'steps-per-period' and set an appropriate value, but we still
+		 * need to parse it to maintain compatibility.
+		 */
+		if (of_property_read_bool(np, "rotary-encoder,half-period")) {
+			pdata->steps_per_period = 2;
+		} else {
+			/* Fallback to one step per period behavior */
+			pdata->steps_per_period = 1;
+		}
+	}
+
 	pdata->wakeup_source = of_property_read_bool(np, "wakeup-source");
 
 	return pdata;
@@ -251,12 +316,23 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 	encoder->irq_a = gpio_to_irq(pdata->gpio_a);
 	encoder->irq_b = gpio_to_irq(pdata->gpio_b);
 
-	/* request the IRQs */
-	if (pdata->half_period) {
+	switch (pdata->steps_per_period) {
+	case 4:
+		handler = &rotary_encoder_quarter_period_irq;
+		encoder->last_stable = rotary_encoder_get_state(pdata);
+		break;
+	case 2:
 		handler = &rotary_encoder_half_period_irq;
 		encoder->last_stable = rotary_encoder_get_state(pdata);
-	} else {
+		break;
+	case 1:
 		handler = &rotary_encoder_irq;
+		break;
+	default:
+		dev_err(dev, "'%d' is not a valid steps-per-period value\n",
+			pdata->steps_per_period);
+		err = -EINVAL;
+		goto exit_free_gpio_b;
 	}
 
 	err = request_irq(encoder->irq_a, handler,
diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h
index b33f2d2a708f..fe3dc64e5aeb 100644
--- a/include/linux/rotary_encoder.h
+++ b/include/linux/rotary_encoder.h
@@ -8,9 +8,9 @@ struct rotary_encoder_platform_data {
 	unsigned int gpio_b;
 	unsigned int inverted_a;
 	unsigned int inverted_b;
+	unsigned int steps_per_period;
 	bool relative_axis;
 	bool rollover;
-	bool half_period;
 	bool wakeup_source;
 };
 
-- 
cgit v1.2.3


From 6a797d2737838906f2ea0a31686e87c3151e21ca Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sat, 17 Oct 2015 16:16:04 -0400
Subject: ext4: call out CRC and corruption errors with specific error codes

Instead of overloading EIO for CRC errors and corrupt structures,
return the same error codes that XFS returns for the same issues.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/balloc.c         |  2 +-
 fs/ext4/block_validity.c |  2 +-
 fs/ext4/dir.c            |  4 +--
 fs/ext4/ext4.h           |  3 ++
 fs/ext4/extents.c        | 75 ++++++++++++++++++++++++------------------------
 fs/ext4/ialloc.c         |  1 +
 fs/ext4/indirect.c       |  2 +-
 fs/ext4/inode.c          | 16 +++++------
 fs/ext4/mmp.c            |  8 ++++--
 fs/ext4/namei.c          | 28 +++++++++---------
 fs/ext4/super.c          | 10 ++++++-
 fs/ext4/symlink.c        |  2 +-
 fs/ext4/xattr.c          | 28 +++++++++---------
 fs/jbd2/journal.c        |  3 +-
 fs/jbd2/recovery.c       |  8 +++---
 include/linux/jbd2.h     |  3 ++
 16 files changed, 107 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index cd6ea29be645..f831e10a7475 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -203,7 +203,7 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 					   count);
 		}
 		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-		return -EIO;
+		return -EFSBADCRC;
 	}
 	memset(bh->b_data, 0, sb->s_blocksize);
 
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3522340c7a99..02ddec6d8a7d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -234,7 +234,7 @@ int ext4_check_blockref(const char *function, unsigned int line,
 			es->s_last_error_block = cpu_to_le64(blk);
 			ext4_error_inode(inode, function, line, blk,
 					 "invalid block");
-			return -EIO;
+			return -EFSCORRUPTED;
 		}
 	}
 	return 0;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index f9e14911918c..b29cb707539f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -621,14 +621,14 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
 	while ((char *) de < top) {
 		if (ext4_check_dir_entry(dir, NULL, de, bh,
 					 buf, buf_size, offset))
-			return -EIO;
+			return -EFSCORRUPTED;
 		nlen = EXT4_DIR_REC_LEN(de->name_len);
 		rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
 		de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
 		offset += rlen;
 	}
 	if ((char *) de > top)
-		return -EIO;
+		return -EFSCORRUPTED;
 
 	return 0;
 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cd832b99f68c..6c0797346da1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3064,4 +3064,7 @@ extern void ext4_resize_end(struct super_block *sb);
 
 #endif	/* __KERNEL__ */
 
+#define EFSBADCRC	EBADMSG		/* Bad CRC detected */
+#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
+
 #endif	/* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7f486e350d15..faeaba8582d7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -442,7 +442,7 @@ static int __ext4_ext_check(const char *function, unsigned int line,
 			    int depth, ext4_fsblk_t pblk)
 {
 	const char *error_msg;
-	int max = 0;
+	int max = 0, err = -EFSCORRUPTED;
 
 	if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
 		error_msg = "invalid magic";
@@ -473,6 +473,7 @@ static int __ext4_ext_check(const char *function, unsigned int line,
 	if (ext_depth(inode) != depth &&
 	    !ext4_extent_block_csum_verify(inode, eh)) {
 		error_msg = "extent tree corrupted";
+		err = -EFSBADCRC;
 		goto corrupted;
 	}
 	return 0;
@@ -485,7 +486,7 @@ corrupted:
 			 le16_to_cpu(eh->eh_magic),
 			 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
 			 max, le16_to_cpu(eh->eh_depth), depth);
-	return -EIO;
+	return err;
 }
 
 #define ext4_ext_check(inode, eh, depth, pblk)			\
@@ -910,7 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
 			put_bh(bh);
 			EXT4_ERROR_INODE(inode,
 					 "ppos %d > depth %d", ppos, depth);
-			ret = -EIO;
+			ret = -EFSCORRUPTED;
 			goto err;
 		}
 		path[ppos].p_bh = bh;
@@ -959,7 +960,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 		EXT4_ERROR_INODE(inode,
 				 "logical %d == ei_block %d!",
 				 logical, le32_to_cpu(curp->p_idx->ei_block));
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
@@ -968,7 +969,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 				 "eh_entries %d >= eh_max %d!",
 				 le16_to_cpu(curp->p_hdr->eh_entries),
 				 le16_to_cpu(curp->p_hdr->eh_max));
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
@@ -992,7 +993,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
 	if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
 		EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	ix->ei_block = cpu_to_le32(logical);
@@ -1001,7 +1002,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
 	if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
 		EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	err = ext4_ext_dirty(handle, inode, curp);
@@ -1042,7 +1043,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	 * border from split point */
 	if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
 		EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
 		border = path[depth].p_ext[1].ee_block;
@@ -1086,7 +1087,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	newblock = ablocks[--a];
 	if (unlikely(newblock == 0)) {
 		EXT4_ERROR_INODE(inode, "newblock == 0!");
-		err = -EIO;
+		err = -EFSCORRUPTED;
 		goto cleanup;
 	}
 	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
@@ -1112,7 +1113,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
 				 path[depth].p_hdr->eh_entries,
 				 path[depth].p_hdr->eh_max);
-		err = -EIO;
+		err = -EFSCORRUPTED;
 		goto cleanup;
 	}
 	/* start copy from next extent */
@@ -1151,7 +1152,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	k = depth - at - 1;
 	if (unlikely(k < 0)) {
 		EXT4_ERROR_INODE(inode, "k %d < 0!", k);
-		err = -EIO;
+		err = -EFSCORRUPTED;
 		goto cleanup;
 	}
 	if (k)
@@ -1191,7 +1192,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 			EXT4_ERROR_INODE(inode,
 					 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
 					 le32_to_cpu(path[i].p_ext->ee_block));
-			err = -EIO;
+			err = -EFSCORRUPTED;
 			goto cleanup;
 		}
 		/* start copy indexes */
@@ -1425,7 +1426,7 @@ static int ext4_ext_search_left(struct inode *inode,
 
 	if (unlikely(path == NULL)) {
 		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	depth = path->p_depth;
 	*phys = 0;
@@ -1444,7 +1445,7 @@ static int ext4_ext_search_left(struct inode *inode,
 			EXT4_ERROR_INODE(inode,
 					 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
 					 *logical, le32_to_cpu(ex->ee_block));
-			return -EIO;
+			return -EFSCORRUPTED;
 		}
 		while (--depth >= 0) {
 			ix = path[depth].p_idx;
@@ -1455,7 +1456,7 @@ static int ext4_ext_search_left(struct inode *inode,
 				  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
 		le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
 				  depth);
-				return -EIO;
+				return -EFSCORRUPTED;
 			}
 		}
 		return 0;
@@ -1465,7 +1466,7 @@ static int ext4_ext_search_left(struct inode *inode,
 		EXT4_ERROR_INODE(inode,
 				 "logical %d < ee_block %d + ee_len %d!",
 				 *logical, le32_to_cpu(ex->ee_block), ee_len);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
@@ -1495,7 +1496,7 @@ static int ext4_ext_search_right(struct inode *inode,
 
 	if (unlikely(path == NULL)) {
 		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	depth = path->p_depth;
 	*phys = 0;
@@ -1514,7 +1515,7 @@ static int ext4_ext_search_right(struct inode *inode,
 			EXT4_ERROR_INODE(inode,
 					 "first_extent(path[%d].p_hdr) != ex",
 					 depth);
-			return -EIO;
+			return -EFSCORRUPTED;
 		}
 		while (--depth >= 0) {
 			ix = path[depth].p_idx;
@@ -1522,7 +1523,7 @@ static int ext4_ext_search_right(struct inode *inode,
 				EXT4_ERROR_INODE(inode,
 						 "ix != EXT_FIRST_INDEX *logical %d!",
 						 *logical);
-				return -EIO;
+				return -EFSCORRUPTED;
 			}
 		}
 		goto found_extent;
@@ -1532,7 +1533,7 @@ static int ext4_ext_search_right(struct inode *inode,
 		EXT4_ERROR_INODE(inode,
 				 "logical %d < ee_block %d + ee_len %d!",
 				 *logical, le32_to_cpu(ex->ee_block), ee_len);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
@@ -1670,7 +1671,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	if (unlikely(ex == NULL || eh == NULL)) {
 		EXT4_ERROR_INODE(inode,
 				 "ex %p == NULL or eh %p == NULL", ex, eh);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	if (depth == 0) {
@@ -1938,14 +1939,14 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 		mb_flags |= EXT4_MB_DELALLOC_RESERVED;
 	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
 		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
 	eh = path[depth].p_hdr;
 	if (unlikely(path[depth].p_hdr == NULL)) {
 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	/* try to insert block into found extent and return */
@@ -2172,7 +2173,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 		if (unlikely(path[depth].p_hdr == NULL)) {
 			up_read(&EXT4_I(inode)->i_data_sem);
 			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
-			err = -EIO;
+			err = -EFSCORRUPTED;
 			break;
 		}
 		ex = path[depth].p_ext;
@@ -2241,7 +2242,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 
 		if (unlikely(es.es_len == 0)) {
 			EXT4_ERROR_INODE(inode, "es.es_len == 0");
-			err = -EIO;
+			err = -EFSCORRUPTED;
 			break;
 		}
 
@@ -2264,7 +2265,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 						 "next extent == %u, next "
 						 "delalloc extent = %u",
 						 next, next_del);
-				err = -EIO;
+				err = -EFSCORRUPTED;
 				break;
 			}
 		}
@@ -2363,7 +2364,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	leaf = ext4_idx_pblock(path->p_idx);
 	if (unlikely(path->p_hdr->eh_entries == 0)) {
 		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	err = ext4_ext_get_access(handle, inode, path);
 	if (err)
@@ -2612,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	eh = path[depth].p_hdr;
 	if (unlikely(path[depth].p_hdr == NULL)) {
 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	/* find where to start removing */
 	ex = path[depth].p_ext;
@@ -2666,7 +2667,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 					 "on extent %u:%u",
 					 start, end, ex_ee_block,
 					 ex_ee_block + ex_ee_len - 1);
-			err = -EIO;
+			err = -EFSCORRUPTED;
 			goto out;
 		} else if (a != ex_ee_block) {
 			/* remove tail of the extent */
@@ -2841,7 +2842,7 @@ again:
 				EXT4_ERROR_INODE(inode,
 						 "path[%d].p_hdr == NULL",
 						 depth);
-				err = -EIO;
+				err = -EFSCORRUPTED;
 			}
 			goto out;
 		}
@@ -2920,7 +2921,7 @@ again:
 		i = 0;
 
 		if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
-			err = -EIO;
+			err = -EFSCORRUPTED;
 			goto out;
 		}
 	}
@@ -2978,7 +2979,7 @@ again:
 			 * Should be a no-op if we did IO above. */
 			cond_resched();
 			if (WARN_ON(i + 1 > depth)) {
-				err = -EIO;
+				err = -EFSCORRUPTED;
 				break;
 			}
 			path[i + 1].p_bh = bh;
@@ -3345,7 +3346,7 @@ static int ext4_split_extent(handle_t *handle,
 	if (!ex) {
 		EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
 				 (unsigned long) map->m_lblk);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	unwritten = ext4_ext_is_unwritten(ex);
 	split_flag1 = 0;
@@ -3973,7 +3974,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
 		if (!ex) {
 			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
 					 (unsigned long) map->m_lblk);
-			return -EIO;
+			return -EFSCORRUPTED;
 		}
 	}
 
@@ -4311,7 +4312,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 				 "lblock: %lu, depth: %d pblock %lld",
 				 (unsigned long) map->m_lblk, depth,
 				 path[depth].p_block);
-		err = -EIO;
+		err = -EFSCORRUPTED;
 		goto out2;
 	}
 
@@ -5274,7 +5275,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 		if (depth == path->p_depth) {
 			ex_start = path[depth].p_ext;
 			if (!ex_start)
-				return -EIO;
+				return -EFSCORRUPTED;
 
 			ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
 
@@ -5414,7 +5415,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 		if (!extent) {
 			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
 					 (unsigned long) *iterator);
-			return -EIO;
+			return -EFSCORRUPTED;
 		}
 		if (SHIFT == SHIFT_LEFT && *iterator >
 		    le32_to_cpu(extent->ee_block)) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 619bfc1fda8c..f34b1aa8bbb5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1116,6 +1116,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
 		ext4_warning(sb, "bad orphan ino %lu!  e2fsck was run?", ino);
+		err = -EFSCORRUPTED;
 		goto error;
 	}
 
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 2468261748b2..d96ea53bbece 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -566,7 +566,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 				       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
 		EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
 				 "non-extent mapped inodes with bigalloc");
-		return -EUCLEAN;
+		return -EFSCORRUPTED;
 	}
 
 	/* Set up for the direct block allocation */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f205ac3c4e41..2b9278867caf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -378,7 +378,7 @@ static int __check_block_validity(struct inode *inode, const char *func,
 				 "lblock %lu mapped to illegal pblock "
 				 "(length %d)", (unsigned long) map->m_lblk,
 				 map->m_len);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	return 0;
 }
@@ -480,7 +480,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 
 	/* We can handle the block number less than EXT_MAX_BLOCKS */
 	if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
-		return -EIO;
+		return -EFSCORRUPTED;
 
 	/* Lookup extent status tree firstly */
 	if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
@@ -3863,7 +3863,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 
 	iloc->bh = NULL;
 	if (!ext4_valid_inum(sb, inode->i_ino))
-		return -EIO;
+		return -EFSCORRUPTED;
 
 	iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
 	gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
@@ -4111,7 +4111,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 			EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
 				EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
 				EXT4_INODE_SIZE(inode->i_sb));
-			ret = -EIO;
+			ret = -EFSCORRUPTED;
 			goto bad_inode;
 		}
 	} else
@@ -4131,7 +4131,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 
 	if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
 		EXT4_ERROR_INODE(inode, "checksum invalid");
-		ret = -EIO;
+		ret = -EFSBADCRC;
 		goto bad_inode;
 	}
 
@@ -4246,7 +4246,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 	    !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
 		EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
 				 ei->i_file_acl);
-		ret = -EIO;
+		ret = -EFSCORRUPTED;
 		goto bad_inode;
 	} else if (!ext4_has_inline_data(inode)) {
 		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
@@ -4297,7 +4297,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 	} else if (ino == EXT4_BOOT_LOADER_INO) {
 		make_bad_inode(inode);
 	} else {
-		ret = -EIO;
+		ret = -EFSCORRUPTED;
 		EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
 		goto bad_inode;
 	}
@@ -4315,7 +4315,7 @@ bad_inode:
 struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
 {
 	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
-		return ERR_PTR(-EIO);
+		return ERR_PTR(-EFSCORRUPTED);
 	return ext4_iget(sb, ino);
 }
 
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 6eb1a619890c..0a512aa81bf7 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -98,10 +98,12 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 	}
 
 	mmp = (struct mmp_struct *)((*bh)->b_data);
-	if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
-	    ext4_mmp_csum_verify(sb, mmp))
+	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
+		ret = -EFSCORRUPTED;
+	else if (!ext4_mmp_csum_verify(sb, mmp))
+		ret = -EFSBADCRC;
+	else
 		return 0;
-	ret = -EINVAL;
 
 warn_exit:
 	ext4_warning(sb, "Error %d while reading MMP block %llu",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 9f61e7679a6d..8fd8e0d4c75e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -109,7 +109,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 	if (!bh) {
 		ext4_error_inode(inode, func, line, block,
 				 "Directory hole found");
-		return ERR_PTR(-EIO);
+		return ERR_PTR(-EFSCORRUPTED);
 	}
 	dirent = (struct ext4_dir_entry *) bh->b_data;
 	/* Determine whether or not we have an index block */
@@ -124,7 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 	if (!is_dx_block && type == INDEX) {
 		ext4_error_inode(inode, func, line, block,
 		       "directory leaf block found instead of index block");
-		return ERR_PTR(-EIO);
+		return ERR_PTR(-EFSCORRUPTED);
 	}
 	if (!ext4_has_metadata_csum(inode->i_sb) ||
 	    buffer_verified(bh))
@@ -142,7 +142,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 			ext4_error_inode(inode, func, line, block,
 					 "Directory index failed checksum");
 			brelse(bh);
-			return ERR_PTR(-EIO);
+			return ERR_PTR(-EFSBADCRC);
 		}
 	}
 	if (!is_dx_block) {
@@ -152,7 +152,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 			ext4_error_inode(inode, func, line, block,
 					 "Directory block failed checksum");
 			brelse(bh);
-			return ERR_PTR(-EIO);
+			return ERR_PTR(-EFSBADCRC);
 		}
 	}
 	return bh;
@@ -1570,19 +1570,19 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 		brelse(bh);
 		if (!ext4_valid_inum(dir->i_sb, ino)) {
 			EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
-			return ERR_PTR(-EIO);
+			return ERR_PTR(-EFSCORRUPTED);
 		}
 		if (unlikely(ino == dir->i_ino)) {
 			EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
 					 dentry);
-			return ERR_PTR(-EIO);
+			return ERR_PTR(-EFSCORRUPTED);
 		}
 		inode = ext4_iget_normal(dir->i_sb, ino);
 		if (inode == ERR_PTR(-ESTALE)) {
 			EXT4_ERROR_INODE(dir,
 					 "deleted inode referenced: %u",
 					 ino);
-			return ERR_PTR(-EIO);
+			return ERR_PTR(-EFSCORRUPTED);
 		}
 		if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
 		    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
@@ -1619,7 +1619,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 	if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) {
 		EXT4_ERROR_INODE(d_inode(child),
 				 "bad parent inode number: %u", ino);
-		return ERR_PTR(-EIO);
+		return ERR_PTR(-EFSCORRUPTED);
 	}
 
 	return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino));
@@ -1807,7 +1807,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
 	while ((char *) de <= top) {
 		if (ext4_check_dir_entry(dir, NULL, de, bh,
 					 buf, buf_size, offset)) {
-			res = -EIO;
+			res = -EFSCORRUPTED;
 			goto return_result;
 		}
 		/* Provide crypto context and crypto buffer to ext4 match */
@@ -1967,7 +1967,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 	if ((char *) de >= (((char *) root) + blocksize)) {
 		EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
 		brelse(bh);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 	len = ((char *) root) + (blocksize - csum_size) - (char *) de;
 
@@ -2315,7 +2315,7 @@ int ext4_generic_delete_entry(handle_t *handle,
 	while (i < buf_size - csum_size) {
 		if (ext4_check_dir_entry(dir, NULL, de, bh,
 					 bh->b_data, bh->b_size, i))
-			return -EIO;
+			return -EFSCORRUPTED;
 		if (de == de_del)  {
 			if (pde)
 				pde->rec_len = ext4_rec_len_to_disk(
@@ -2934,7 +2934,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 
 	inode = d_inode(dentry);
 
-	retval = -EIO;
+	retval = -EFSCORRUPTED;
 	if (le32_to_cpu(de->inode) != inode->i_ino)
 		goto end_rmdir;
 
@@ -3008,7 +3008,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 	inode = d_inode(dentry);
 
-	retval = -EIO;
+	retval = -EFSCORRUPTED;
 	if (le32_to_cpu(de->inode) != inode->i_ino)
 		goto end_unlink;
 
@@ -3310,7 +3310,7 @@ static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
 	if (!ent->dir_bh)
 		return retval;
 	if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
-		return -EIO;
+		return -EFSCORRUPTED;
 	BUFFER_TRACE(ent->dir_bh, "get_write_access");
 	return ext4_journal_get_write_access(handle, ent->dir_bh);
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43c0cc802189..f7519ad25cce 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -490,6 +490,12 @@ const char *ext4_decode_error(struct super_block *sb, int errno,
 	char *errstr = NULL;
 
 	switch (errno) {
+	case -EFSCORRUPTED:
+		errstr = "Corrupt filesystem";
+		break;
+	case -EFSBADCRC:
+		errstr = "Filesystem failed CRC";
+		break;
 	case -EIO:
 		errstr = "IO failure";
 		break;
@@ -3192,6 +3198,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
 			 "invalid superblock checksum.  Run e2fsck?");
 		silent = 1;
+		ret = -EFSBADCRC;
 		goto cantfind_ext4;
 	}
 
@@ -3612,6 +3619,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
 		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
+		ret = -EFSCORRUPTED;
 		goto failed_mount2;
 	}
 
@@ -4664,7 +4672,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 	       "ext4_remount: Checksum for group %u failed (%u!=%u)",
 		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
 					       le16_to_cpu(gdp->bg_checksum));
-					err = -EINVAL;
+					err = -EFSBADCRC;
 					goto restore_opts;
 				}
 			}
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index c677f2c1044b..abe2401ce405 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -57,7 +57,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
 	     sizeof(struct ext4_encrypted_symlink_data) - 1) >
 	    max_size) {
 		/* Symlink data on the disk is corrupted */
-		res = -EIO;
+		res = -EFSCORRUPTED;
 		goto errout;
 	}
 	plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ?
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 16e28c08d1e8..7649422ed7a1 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -195,7 +195,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
 	while (!IS_LAST_ENTRY(e)) {
 		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
 		if ((void *)next >= end)
-			return -EIO;
+			return -EFSCORRUPTED;
 		e = next;
 	}
 
@@ -205,7 +205,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
 		     (void *)e + sizeof(__u32) ||
 		     value_start + le16_to_cpu(entry->e_value_offs) +
 		    le32_to_cpu(entry->e_value_size) > end))
-			return -EIO;
+			return -EFSCORRUPTED;
 		entry = EXT4_XATTR_NEXT(entry);
 	}
 
@@ -222,9 +222,9 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
-		return -EIO;
+		return -EFSCORRUPTED;
 	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
-		return -EIO;
+		return -EFSBADCRC;
 	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
 				       bh->b_data);
 	if (!error)
@@ -239,7 +239,7 @@ ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
 
 	if (entry->e_value_block != 0 || value_size > size ||
 	    le16_to_cpu(entry->e_value_offs) + value_size > size)
-		return -EIO;
+		return -EFSCORRUPTED;
 	return 0;
 }
 
@@ -266,7 +266,7 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
 	}
 	*pentry = entry;
 	if (!cmp && ext4_xattr_check_entry(entry, size))
-			return -EIO;
+		return -EFSCORRUPTED;
 	return cmp ? -ENODATA : 0;
 }
 
@@ -297,13 +297,13 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 bad_block:
 		EXT4_ERROR_INODE(inode, "bad block %llu",
 				 EXT4_I(inode)->i_file_acl);
-		error = -EIO;
+		error = -EFSCORRUPTED;
 		goto cleanup;
 	}
 	ext4_xattr_cache_insert(ext4_mb_cache, bh);
 	entry = BFIRST(bh);
 	error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
-	if (error == -EIO)
+	if (error == -EFSCORRUPTED)
 		goto bad_block;
 	if (error)
 		goto cleanup;
@@ -445,7 +445,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	if (ext4_xattr_check_block(inode, bh)) {
 		EXT4_ERROR_INODE(inode, "bad block %llu",
 				 EXT4_I(inode)->i_file_acl);
-		error = -EIO;
+		error = -EFSCORRUPTED;
 		goto cleanup;
 	}
 	ext4_xattr_cache_insert(ext4_mb_cache, bh);
@@ -751,7 +751,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
 		if (ext4_xattr_check_block(inode, bs->bh)) {
 			EXT4_ERROR_INODE(inode, "bad block %llu",
 					 EXT4_I(inode)->i_file_acl);
-			error = -EIO;
+			error = -EFSCORRUPTED;
 			goto cleanup;
 		}
 		/* Find the named attribute. */
@@ -811,7 +811,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 					bs->bh);
 			}
 			unlock_buffer(bs->bh);
-			if (error == -EIO)
+			if (error == -EFSCORRUPTED)
 				goto bad_block;
 			if (!error)
 				error = ext4_handle_dirty_xattr_block(handle,
@@ -855,7 +855,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 	}
 
 	error = ext4_xattr_set_entry(i, s);
-	if (error == -EIO)
+	if (error == -EFSCORRUPTED)
 		goto bad_block;
 	if (error)
 		goto cleanup;
@@ -1314,7 +1314,7 @@ retry:
 		if (ext4_xattr_check_block(inode, bh)) {
 			EXT4_ERROR_INODE(inode, "bad block %llu",
 					 EXT4_I(inode)->i_file_acl);
-			error = -EIO;
+			error = -EFSCORRUPTED;
 			goto cleanup;
 		}
 		base = BHDR(bh);
@@ -1579,7 +1579,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
 		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
 			return 1;
 		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
-			return -EIO;
+			return -EFSCORRUPTED;
 		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
 			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
 			   le32_to_cpu(entry1->e_value_size)))
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 00f7dbdd64b0..474c1781c0ce 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1558,6 +1558,7 @@ static int journal_get_superblock(journal_t *journal)
 	/* Check superblock checksum */
 	if (!jbd2_superblock_csum_verify(journal, sb)) {
 		printk(KERN_ERR "JBD2: journal checksum error\n");
+		err = -EFSBADCRC;
 		goto out;
 	}
 
@@ -1649,7 +1650,7 @@ int jbd2_journal_load(journal_t *journal)
 		printk(KERN_ERR "JBD2: journal transaction %u on %s "
 		       "is corrupt.\n", journal->j_failed_commit,
 		       journal->j_devname);
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	/* OK, we've finished with the dynamic journal bits:
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index a9079d035ae5..5c836d78af3b 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -140,7 +140,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 
 	if (offset >= journal->j_maxlen) {
 		printk(KERN_ERR "JBD2: corrupted journal superblock\n");
-		return -EIO;
+		return -EFSCORRUPTED;
 	}
 
 	err = jbd2_journal_bmap(journal, offset, &blocknr);
@@ -527,7 +527,7 @@ static int do_one_pass(journal_t *journal,
 				printk(KERN_ERR "JBD2: Invalid checksum "
 				       "recovering block %lu in log\n",
 				       next_log_block);
-				err = -EIO;
+				err = -EFSBADCRC;
 				brelse(bh);
 				goto failed;
 			}
@@ -602,7 +602,7 @@ static int do_one_pass(journal_t *journal,
 						journal, tag, obh->b_data,
 						be32_to_cpu(tmp->h_sequence))) {
 						brelse(obh);
-						success = -EIO;
+						success = -EFSBADCRC;
 						printk(KERN_ERR "JBD2: Invalid "
 						       "checksum recovering "
 						       "block %llu in log\n",
@@ -851,7 +851,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 	rcount = be32_to_cpu(header->r_count);
 
 	if (!jbd2_revoke_block_csum_verify(journal, header))
-		return -EINVAL;
+		return -EFSBADCRC;
 
 	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_revoke_tail);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 6da6f89722e1..f2a4b07a6ce6 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1449,4 +1449,7 @@ static inline tid_t  jbd2_get_latest_transaction(journal_t *journal)
 
 #endif	/* __KERNEL__ */
 
+#define EFSBADCRC	EBADMSG		/* Bad CRC detected */
+#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
+
 #endif	/* _LINUX_JBD2_H */
-- 
cgit v1.2.3


From 56316a0d28f251dae6a3bc2b6d50e7c25389871f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sat, 17 Oct 2015 16:18:45 -0400
Subject: jbd2: clean up feature test macros with predicate functions

Create separate predicate functions to test/set/clear feature flags,
thereby replacing the wordy old macros.  Furthermore, clean out the
places where we open-coded feature tests.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/commit.c     | 22 ++++++----------
 fs/jbd2/journal.c    | 12 ++++-----
 fs/jbd2/recovery.c   | 18 ++++++-------
 fs/jbd2/revoke.c     |  4 +--
 include/linux/jbd2.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 91 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 362e5f614450..36345fefa3ff 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -142,8 +142,7 @@ static int journal_submit_commit_record(journal_t *journal,
 	tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
 	tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
 
-	if (JBD2_HAS_COMPAT_FEATURE(journal,
-				    JBD2_FEATURE_COMPAT_CHECKSUM)) {
+	if (jbd2_has_feature_checksum(journal)) {
 		tmp->h_chksum_type 	= JBD2_CRC32_CHKSUM;
 		tmp->h_chksum_size 	= JBD2_CRC32_CHKSUM_SIZE;
 		tmp->h_chksum[0] 	= cpu_to_be32(crc32_sum);
@@ -157,8 +156,7 @@ static int journal_submit_commit_record(journal_t *journal,
 	bh->b_end_io = journal_end_buffer_io_sync;
 
 	if (journal->j_flags & JBD2_BARRIER &&
-	    !JBD2_HAS_INCOMPAT_FEATURE(journal,
-				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
+	    !jbd2_has_feature_async_commit(journal))
 		ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
 	else
 		ret = submit_bh(WRITE_SYNC, bh);
@@ -317,7 +315,7 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
 				   unsigned long long block)
 {
 	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
-	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
+	if (jbd2_has_feature_64bit(j))
 		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
 }
 
@@ -356,7 +354,7 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
 			     bh->b_size);
 	kunmap_atomic(addr);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+	if (jbd2_has_feature_csum3(j))
 		tag3->t_checksum = cpu_to_be32(csum32);
 	else
 		tag->t_checksum = cpu_to_be16(csum32);
@@ -730,8 +728,7 @@ start_journal_io:
 				/*
 				 * Compute checksum.
 				 */
-				if (JBD2_HAS_COMPAT_FEATURE(journal,
-					JBD2_FEATURE_COMPAT_CHECKSUM)) {
+				if (jbd2_has_feature_checksum(journal)) {
 					crc32_sum =
 					    jbd2_checksum_data(crc32_sum, bh);
 				}
@@ -797,8 +794,7 @@ start_journal_io:
 		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
 
 	/* Done it all: now write the commit record asynchronously. */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+	if (jbd2_has_feature_async_commit(journal)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
 						 &cbh, crc32_sum);
 		if (err)
@@ -889,8 +885,7 @@ start_journal_io:
 	commit_transaction->t_state = T_COMMIT_JFLUSH;
 	write_unlock(&journal->j_state_lock);
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
-				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+	if (!jbd2_has_feature_async_commit(journal)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
 						&cbh, crc32_sum);
 		if (err)
@@ -898,8 +893,7 @@ start_journal_io:
 	}
 	if (cbh)
 		err = journal_wait_on_commit_record(journal, cbh);
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
+	if (jbd2_has_feature_async_commit(journal) &&
 	    journal->j_flags & JBD2_BARRIER) {
 		blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
 	}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 474c1781c0ce..eb8a7546e8a4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1523,8 +1523,8 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
-	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+	if (jbd2_has_feature_csum2(journal) &&
+	    jbd2_has_feature_csum3(journal)) {
 		/* Can't have checksum v2 and v3 at the same time! */
 		printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
 		       "at the same time!\n");
@@ -1532,7 +1532,7 @@ static int journal_get_superblock(journal_t *journal)
 	}
 
 	if (jbd2_journal_has_csum_v2or3_feature(journal) &&
-	    JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
+	    jbd2_has_feature_checksum(journal)) {
 		/* Can't have checksum v1 and v2 on at the same time! */
 		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
 		       "at the same time!\n");
@@ -2198,15 +2198,15 @@ size_t journal_tag_bytes(journal_t *journal)
 {
 	size_t sz;
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+	if (jbd2_has_feature_csum3(journal))
 		return sizeof(journal_block_tag3_t);
 
 	sz = sizeof(journal_block_tag_t);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_has_feature_csum2(journal))
 		sz += sizeof(__u16);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+	if (jbd2_has_feature_64bit(journal))
 		return sz;
 	else
 		return sz - sizeof(__u32);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 5c836d78af3b..7f277e49fe88 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -342,7 +342,7 @@ static inline unsigned long long read_tag_block(journal_t *journal,
 						journal_block_tag_t *tag)
 {
 	unsigned long long block = be32_to_cpu(tag->t_blocknr);
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+	if (jbd2_has_feature_64bit(journal))
 		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
 }
@@ -411,7 +411,7 @@ static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
 	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
 	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+	if (jbd2_has_feature_csum3(j))
 		return tag3->t_checksum == cpu_to_be32(csum32);
 	else
 		return tag->t_checksum == cpu_to_be16(csum32);
@@ -538,8 +538,7 @@ static int do_one_pass(journal_t *journal,
 			 * just skip over the blocks it describes. */
 			if (pass != PASS_REPLAY) {
 				if (pass == PASS_SCAN &&
-				    JBD2_HAS_COMPAT_FEATURE(journal,
-					    JBD2_FEATURE_COMPAT_CHECKSUM) &&
+				    jbd2_has_feature_checksum(journal) &&
 				    !info->end_transaction) {
 					if (calc_chksums(journal, bh,
 							&next_log_block,
@@ -694,8 +693,7 @@ static int do_one_pass(journal_t *journal,
 			 * much to do other than move on to the next sequence
 			 * number. */
 			if (pass == PASS_SCAN &&
-			    JBD2_HAS_COMPAT_FEATURE(journal,
-				    JBD2_FEATURE_COMPAT_CHECKSUM)) {
+			    jbd2_has_feature_checksum(journal)) {
 				int chksum_err, chksum_seen;
 				struct commit_header *cbh =
 					(struct commit_header *)bh->b_data;
@@ -735,8 +733,7 @@ static int do_one_pass(journal_t *journal,
 				if (chksum_err) {
 					info->end_transaction = next_commit_ID;
 
-					if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
-					   JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
+					if (!jbd2_has_feature_async_commit(journal)) {
 						journal->j_failed_commit =
 							next_commit_ID;
 						brelse(bh);
@@ -750,8 +747,7 @@ static int do_one_pass(journal_t *journal,
 							   bh->b_data)) {
 				info->end_transaction = next_commit_ID;
 
-				if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
-				     JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+				if (!jbd2_has_feature_async_commit(journal)) {
 					journal->j_failed_commit =
 						next_commit_ID;
 					brelse(bh);
@@ -859,7 +855,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 		return -EINVAL;
 	max = rcount;
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+	if (jbd2_has_feature_64bit(journal))
 		record_len = 8;
 
 	while (offset + record_len <= max) {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 0abf2e7f725b..705ae577882b 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -589,7 +589,7 @@ static void write_one_revoke_record(journal_t *journal,
 	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_revoke_tail);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+	if (jbd2_has_feature_64bit(journal))
 		sz = 8;
 	else
 		sz = 4;
@@ -619,7 +619,7 @@ static void write_one_revoke_record(journal_t *journal,
 		*descriptorp = descriptor;
 	}
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+	if (jbd2_has_feature_64bit(journal))
 		* ((__be64 *)(&descriptor->b_data[offset])) =
 			cpu_to_be64(record->blocknr);
 	else
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f2a4b07a6ce6..b5ca2314cace 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -278,6 +278,7 @@ typedef struct journal_superblock_s
 /* 0x0400 */
 } journal_superblock_t;
 
+/* Use the jbd2_{has,set,clear}_feature_* helpers; these will be removed */
 #define JBD2_HAS_COMPAT_FEATURE(j,mask)					\
 	((j)->j_format_version >= 2 &&					\
 	 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
@@ -288,7 +289,7 @@ typedef struct journal_superblock_s
 	((j)->j_format_version >= 2 &&					\
 	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
 
-#define JBD2_FEATURE_COMPAT_CHECKSUM	0x00000001
+#define JBD2_FEATURE_COMPAT_CHECKSUM		0x00000001
 
 #define JBD2_FEATURE_INCOMPAT_REVOKE		0x00000001
 #define JBD2_FEATURE_INCOMPAT_64BIT		0x00000002
@@ -296,6 +297,8 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
 #define JBD2_FEATURE_INCOMPAT_CSUM_V3		0x00000010
 
+/* See "journal feature predicate functions" below */
+
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	JBD2_FEATURE_COMPAT_CHECKSUM
 #define JBD2_KNOWN_ROCOMPAT_FEATURES	0
@@ -1034,6 +1037,69 @@ struct journal_s
 	__u32 j_csum_seed;
 };
 
+/* journal feature predicate functions */
+#define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \
+static inline bool jbd2_has_feature_##name(journal_t *j) \
+{ \
+	return ((j)->j_format_version >= 2 && \
+		((j)->j_superblock->s_feature_compat & \
+		 cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \
+} \
+static inline void jbd2_set_feature_##name(journal_t *j) \
+{ \
+	(j)->j_superblock->s_feature_compat |= \
+		cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \
+} \
+static inline void jbd2_clear_feature_##name(journal_t *j) \
+{ \
+	(j)->j_superblock->s_feature_compat &= \
+		~cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \
+}
+
+#define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
+static inline bool jbd2_has_feature_##name(journal_t *j) \
+{ \
+	return ((j)->j_format_version >= 2 && \
+		((j)->j_superblock->s_feature_ro_compat & \
+		 cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \
+} \
+static inline void jbd2_set_feature_##name(journal_t *j) \
+{ \
+	(j)->j_superblock->s_feature_ro_compat |= \
+		cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \
+} \
+static inline void jbd2_clear_feature_##name(journal_t *j) \
+{ \
+	(j)->j_superblock->s_feature_ro_compat &= \
+		~cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \
+}
+
+#define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \
+static inline bool jbd2_has_feature_##name(journal_t *j) \
+{ \
+	return ((j)->j_format_version >= 2 && \
+		((j)->j_superblock->s_feature_incompat & \
+		 cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \
+} \
+static inline void jbd2_set_feature_##name(journal_t *j) \
+{ \
+	(j)->j_superblock->s_feature_incompat |= \
+		cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \
+} \
+static inline void jbd2_clear_feature_##name(journal_t *j) \
+{ \
+	(j)->j_superblock->s_feature_incompat &= \
+		~cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \
+}
+
+JBD2_FEATURE_COMPAT_FUNCS(checksum,		CHECKSUM)
+
+JBD2_FEATURE_INCOMPAT_FUNCS(revoke,		REVOKE)
+JBD2_FEATURE_INCOMPAT_FUNCS(64bit,		64BIT)
+JBD2_FEATURE_INCOMPAT_FUNCS(async_commit,	ASYNC_COMMIT)
+JBD2_FEATURE_INCOMPAT_FUNCS(csum2,		CSUM_V2)
+JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
+
 /*
  * Journal flag definitions
  */
@@ -1340,8 +1406,7 @@ extern size_t journal_tag_bytes(journal_t *journal);
 
 static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j)
 {
-	return JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2) ||
-	       JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3);
+	return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j);
 }
 
 static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
-- 
cgit v1.2.3


From 79c1faa4511e78380cd643dac88a775062a08bc0 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 10 Oct 2015 16:00:51 -0400
Subject: tty: Remove tty_wait_until_sent_from_close()

tty_wait_until_sent_from_close() drops the tty lock while waiting
for the tty driver to finish sending previously accepted data (ie.,
data remaining in its write buffer and transmit fifo).

tty_wait_until_sent_from_close() was added by commit a57a7bf3fc7e
("TTY: define tty_wait_until_sent_from_close") to prevent the entire
tty subsystem from being unable to open new ttys while waiting for
one tty to close while output drained.

However, since commit 0911261d4cb6 ("tty: Don't take tty_mutex for tty
count changes"), holding a tty lock while closing does not prevent other
ttys from being opened/closed/hung up, but only prevents lifetime event
changes for the tty under lock.

Holding the tty lock while waiting for output to drain does prevent
parallel non-blocking opens (O_NONBLOCK) from advancing or returning
while the tty lock is held. However, all parallel opens _already_
block even if the tty lock is dropped while closing and the parallel
open advances. Blocking in open has been in mainline since at least 2.6.29
(see tty_port_block_til_ready(); note the test for O_NONBLOCK is _after_
the wait while ASYNC_CLOSING).

IOW, before this patch a non-blocking open will sleep anyway for the
_entire_ duration of a parallel hardware shutdown, and when it wakes, the
error return will cause a release of its tty, and it will restart with
a fresh attempt to open. Similarly with a blocking open that is already
waiting; when it's woken, the hardware shutdown has already completed
to ASYNC_INITIALIZED is not set, which forces a release and restart as
well.

So, holding the tty lock across the _entire_ close (which is what this
patch does), even while waiting for output to drain, is equivalent to
the current outcome wrt parallel opens.

Cc: Alan Cox <alan@linux.intel.com>
Cc: David Laight <David.Laight@aculab.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Karsten Keil <isdn@linux-pingi.de>
CC: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/i4l/isdn_tty.c   |  2 +-
 drivers/tty/hvc/hvc_console.c |  2 +-
 drivers/tty/hvc/hvcs.c        |  2 +-
 drivers/tty/tty_port.c        | 11 ++---------
 include/linux/tty.h           | 18 ------------------
 5 files changed, 5 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index bc912611fe09..2175225af742 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -1582,7 +1582,7 @@ isdn_tty_close(struct tty_struct *tty, struct file *filp)
 	 * line status register.
 	 */
 	if (port->flags & ASYNC_INITIALIZED) {
-		tty_wait_until_sent_from_close(tty, 3000);	/* 30 seconds timeout */
+		tty_wait_until_sent(tty, 3000);	/* 30 seconds timeout */
 		/*
 		 * Before we drop DTR, make sure the UART transmitter
 		 * has completely drained; this is especially
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 9c30f67c802a..e46d628998f5 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -418,7 +418,7 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
 		 * there is no buffered data otherwise sleeps on a wait queue
 		 * waking periodically to check chars_in_buffer().
 		 */
-		tty_wait_until_sent_from_close(tty, HVC_CLOSE_WAIT);
+		tty_wait_until_sent(tty, HVC_CLOSE_WAIT);
 	} else {
 		if (hp->port.count < 0)
 			printk(KERN_ERR "hvc_close %X: oops, count is %d\n",
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index f7ff97c0ad34..5997b1731111 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -1230,7 +1230,7 @@ static void hvcs_close(struct tty_struct *tty, struct file *filp)
 		irq = hvcsd->vdev->irq;
 		spin_unlock_irqrestore(&hvcsd->lock, flags);
 
-		tty_wait_until_sent_from_close(tty, HVCS_CLOSE_WAIT);
+		tty_wait_until_sent(tty, HVCS_CLOSE_WAIT);
 
 		/*
 		 * This line is important because it tells hvcs_open that this
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 40b31835f80b..d7d9f9cde964 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -463,10 +463,7 @@ static void tty_port_drain_delay(struct tty_port *port, struct tty_struct *tty)
 	schedule_timeout_interruptible(timeout);
 }
 
-/* Caller holds tty lock.
- * NB: may drop and reacquire tty lock (in tty_wait_until_sent_from_close())
- * so tty and tty port may have changed state (but not hung up or reopened).
- */
+/* Caller holds tty lock. */
 int tty_port_close_start(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp)
 {
@@ -502,7 +499,7 @@ int tty_port_close_start(struct tty_port *port,
 		if (tty->flow_stopped)
 			tty_driver_flush_buffer(tty);
 		if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
-			tty_wait_until_sent_from_close(tty, port->closing_wait);
+			tty_wait_until_sent(tty, port->closing_wait);
 		if (port->drain_delay)
 			tty_port_drain_delay(port, tty);
 	}
@@ -543,10 +540,6 @@ EXPORT_SYMBOL(tty_port_close_end);
  * tty_port_close
  *
  * Caller holds tty lock
- *
- * NB: may drop and reacquire tty lock (in tty_port_close_start()->
- * tty_wait_until_sent_from_close()) so tty and tty_port may have changed
- * state (but not hung up or reopened).
  */
 void tty_port_close(struct tty_port *port, struct tty_struct *tty,
 							struct file *filp)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d072ded41678..614c8224c32f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -656,24 +656,6 @@ extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
 extern void tty_set_lock_subclass(struct tty_struct *tty);
-/*
- * this shall be called only from where BTM is held (like close)
- *
- * We need this to ensure nobody waits for us to finish while we are waiting.
- * Without this we were encountering system stalls.
- *
- * This should be indeed removed with BTM removal later.
- *
- * Locking: BTM required. Nobody is allowed to hold port->mutex.
- */
-static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
-		long timeout)
-{
-	tty_unlock(tty); /* tty->ops->close holds the BTM, drop it while waiting */
-	tty_wait_until_sent(tty, timeout);
-	tty_lock(tty);
-}
-
 /*
  * wait_event_interruptible_tty -- wait for a condition with the tty lock held
  *
-- 
cgit v1.2.3


From cc2aaabfd6d6335e2156781ca67715b4de17f993 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 10 Oct 2015 16:00:54 -0400
Subject: tty: Remove tty_port::close_wait

With the removal of tty_wait_until_sent_from_close(), tty drivers
no longer wait during open for parallel closes to complete (instead,
the tty core waits before calling the driver open() method). Thus,
the close_wait waitqueue is no longer used for waiting.

Remove struct tty_port::close_wait.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/rocket.c             | 1 -
 drivers/tty/serial/68328serial.c | 1 -
 drivers/tty/serial/crisv10.c     | 1 -
 drivers/tty/serial/serial_core.c | 1 -
 drivers/tty/tty_port.c           | 2 --
 include/linux/tty.h              | 1 -
 6 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 69c72d1aa627..802eac7e561b 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -1049,7 +1049,6 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 	mutex_unlock(&port->mutex);
 	tty_port_tty_set(port, NULL);
 
-	wake_up_interruptible(&port->close_wait);
 	complete_all(&info->close_wait);
 	atomic_dec(&rp_num_ports_open);
 
diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c
index 9ba0c9333078..0140ba4aacde 100644
--- a/drivers/tty/serial/68328serial.c
+++ b/drivers/tty/serial/68328serial.c
@@ -1071,7 +1071,6 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 		wake_up_interruptible(&port->open_wait);
 	}
 	port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
 	local_irq_restore(flags);
 }
 
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index 06f4fe928dcd..f13f2ebd215b 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -3655,7 +3655,6 @@ rs_close(struct tty_struct *tty, struct file * filp)
 		wake_up_interruptible(&info->port.open_wait);
 	}
 	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&info->port.close_wait);
 	local_irq_restore(flags);
 
 	/* port closed */
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index df4271ae7414..def5199ca004 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1437,7 +1437,6 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	clear_bit(ASYNCB_CLOSING, &port->flags);
 	spin_unlock_irq(&port->lock);
 	wake_up_interruptible(&port->open_wait);
-	wake_up_interruptible(&port->close_wait);
 
 	mutex_unlock(&port->mutex);
 
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 0e1cf0495fb9..e04a8cfb16f7 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -22,7 +22,6 @@ void tty_port_init(struct tty_port *port)
 	memset(port, 0, sizeof(*port));
 	tty_buffer_init(port);
 	init_waitqueue_head(&port->open_wait);
-	init_waitqueue_head(&port->close_wait);
 	init_waitqueue_head(&port->delta_msr_wait);
 	mutex_init(&port->mutex);
 	mutex_init(&port->buf_mutex);
@@ -520,7 +519,6 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
 		wake_up_interruptible(&port->open_wait);
 	}
 	port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 EXPORT_SYMBOL(tty_port_close_end);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 614c8224c32f..090ce2a52262 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -227,7 +227,6 @@ struct tty_port {
 	int			blocked_open;	/* Waiting to open */
 	int			count;		/* Usage count */
 	wait_queue_head_t	open_wait;	/* Open waiters */
-	wait_queue_head_t	close_wait;	/* Close waiters */
 	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
 	unsigned char		console:1,	/* port is a console */
-- 
cgit v1.2.3


From 9b9ab1b3f0860138681862cf6e4c48be59377ef1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 10 Oct 2015 16:00:55 -0400
Subject: tty: r3964: Use tty->read_wait waitqueue

The tty core provides read_wait waitqueue specifically for line
disciplines to wait readers; otherwise, the line discipline may
miss wakeups generated by the tty core.

NB: The tty core already provides serialization for the line discipline's
close() method, and guarantees no readers or writers will be using the
closing instance of the line discipline. Completely remove that wakeup.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_r3964.c   | 10 ++++------
 include/linux/n_r3964.h |  3 ---
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 8b157d68a03e..6fdef921cdb7 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -276,7 +276,7 @@ static void remove_from_tx_queue(struct r3964_info *pInfo, int error_code)
 			add_msg(pHeader->owner, R3964_MSG_ACK, pHeader->length,
 				error_code, NULL);
 		}
-		wake_up_interruptible(&pInfo->read_wait);
+		wake_up_interruptible(&pInfo->tty->read_wait);
 	}
 
 	spin_lock_irqsave(&pInfo->lock, flags);
@@ -542,7 +542,7 @@ static void on_receive_block(struct r3964_info *pInfo)
 				pBlock);
 		}
 	}
-	wake_up_interruptible(&pInfo->read_wait);
+	wake_up_interruptible(&pInfo->tty->read_wait);
 
 	pInfo->state = R3964_IDLE;
 
@@ -979,7 +979,6 @@ static int r3964_open(struct tty_struct *tty)
 
 	spin_lock_init(&pInfo->lock);
 	pInfo->tty = tty;
-	init_waitqueue_head(&pInfo->read_wait);
 	pInfo->priority = R3964_MASTER;
 	pInfo->rx_first = pInfo->rx_last = NULL;
 	pInfo->tx_first = pInfo->tx_last = NULL;
@@ -1045,7 +1044,6 @@ static void r3964_close(struct tty_struct *tty)
 	}
 
 	/* Free buffers: */
-	wake_up_interruptible(&pInfo->read_wait);
 	kfree(pInfo->rx_buf);
 	TRACE_M("r3964_close - rx_buf kfree %p", pInfo->rx_buf);
 	kfree(pInfo->tx_buf);
@@ -1077,7 +1075,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 				goto unlock;
 			}
 			/* block until there is a message: */
-			wait_event_interruptible_tty(tty, pInfo->read_wait,
+			wait_event_interruptible_tty(tty, tty->read_wait,
 					(pMsg = remove_msg(pInfo, pClient)));
 		}
 
@@ -1227,7 +1225,7 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
-		poll_wait(file, &pInfo->read_wait, wait);
+		poll_wait(file, &tty->read_wait, wait);
 		spin_lock_irqsave(&pInfo->lock, flags);
 		pMsg = pClient->first_msg;
 		spin_unlock_irqrestore(&pInfo->lock, flags);
diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h
index 5d0b2a1dee69..e9adb4226224 100644
--- a/include/linux/n_r3964.h
+++ b/include/linux/n_r3964.h
@@ -152,9 +152,6 @@ struct r3964_info {
 	unsigned char *rx_buf;            /* ring buffer */
 	unsigned char *tx_buf;
 
-	wait_queue_head_t read_wait;
-	//struct wait_queue *read_wait;
-
 	struct r3964_block_header *rx_first;
 	struct r3964_block_header *rx_last;
 	struct r3964_block_header *tx_first;
-- 
cgit v1.2.3


From aba24888d9af19e0bd1c7e7344fd27841611d7a8 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 10 Oct 2015 16:00:56 -0400
Subject: tty: r3964: Replace/remove bogus tty lock use

The tty lock is strictly for serializing tty lifetime events
(open/close/hangup), and not for line discipline serialization.

The tty core already provides serialization of concurrent writes
to the same tty, and line discipline lifetime management (by ldisc
references), so pinning the tty via tty_lock() is unnecessary and
counter-productive; remove tty lock use.

However, the line discipline is responsible for serializing reads
(if required by the line discipline); add read_lock mutex to
serialize calls of r3964_read().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_r3964.c   | 20 +++++++++++++-------
 include/linux/n_r3964.h |  5 +++--
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 6fdef921cdb7..345111467b85 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -978,6 +978,7 @@ static int r3964_open(struct tty_struct *tty)
 	}
 
 	spin_lock_init(&pInfo->lock);
+	mutex_init(&pInfo->read_lock);
 	pInfo->tty = tty;
 	pInfo->priority = R3964_MASTER;
 	pInfo->rx_first = pInfo->rx_last = NULL;
@@ -1063,7 +1064,16 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 
 	TRACE_L("read()");
 
-	tty_lock(tty);
+	/*
+	 *	Internal serialization of reads.
+	 */
+	if (file->f_flags & O_NONBLOCK) {
+		if (!mutex_trylock(&pInfo->read_lock))
+			return -EAGAIN;
+	} else {
+		if (mutex_lock_interruptible(&pInfo->read_lock))
+			return -ERESTARTSYS;
+	}
 
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
@@ -1075,7 +1085,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 				goto unlock;
 			}
 			/* block until there is a message: */
-			wait_event_interruptible_tty(tty, tty->read_wait,
+			wait_event_interruptible(tty->read_wait,
 					(pMsg = remove_msg(pInfo, pClient)));
 		}
 
@@ -1105,7 +1115,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 	}
 	ret = -EPERM;
 unlock:
-	tty_unlock(tty);
+	mutex_unlock(&pInfo->read_lock);
 	return ret;
 }
 
@@ -1154,8 +1164,6 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 	pHeader->locks = 0;
 	pHeader->owner = NULL;
 
-	tty_lock(tty);
-
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
 		pHeader->owner = pClient;
@@ -1173,8 +1181,6 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 	add_tx_queue(pInfo, pHeader);
 	trigger_transmit(pInfo);
 
-	tty_unlock(tty);
-
 	return 0;
 }
 
diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h
index e9adb4226224..90a803aa42e8 100644
--- a/include/linux/n_r3964.h
+++ b/include/linux/n_r3964.h
@@ -161,8 +161,9 @@ struct r3964_info {
 	unsigned char last_rx;
 	unsigned char bcc;
         unsigned int  blocks_in_rx_queue;
-	  
-	
+
+	struct mutex read_lock;		/* serialize r3964_read */
+
 	struct r3964_client_info *firstClient;
 	unsigned int state;
 	unsigned int flags;
-- 
cgit v1.2.3


From f148d6d7b79adb42a8e7fa95bf6be5b607015f26 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 10 Oct 2015 16:00:57 -0400
Subject: tty: Remove wait_event_interruptible_tty()

In-tree users of wait_event_interruptible_tty() have been removed;
remove.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 090ce2a52262..c2889f4331e1 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -655,32 +655,6 @@ extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
 extern void tty_set_lock_subclass(struct tty_struct *tty);
-/*
- * wait_event_interruptible_tty -- wait for a condition with the tty lock held
- *
- * The condition we are waiting for might take a long time to
- * become true, or might depend on another thread taking the
- * BTM. In either case, we need to drop the BTM to guarantee
- * forward progress. This is a leftover from the conversion
- * from the BKL and should eventually get removed as the BTM
- * falls out of use.
- *
- * Do not use in new code.
- */
-#define wait_event_interruptible_tty(tty, wq, condition)		\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_tty(tty, wq,		\
-						       condition);	\
-	__ret;								\
-})
-
-#define __wait_event_interruptible_tty(tty, wq, condition)		\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
-			tty_unlock(tty);				\
-			schedule();					\
-			tty_lock(tty))
 
 #ifdef CONFIG_PROC_FS
 extern void proc_tty_register_driver(struct tty_driver *);
-- 
cgit v1.2.3


From 47f82f1adf701b31d1816bf45118f8e83c02588e Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Tue, 13 Oct 2015 13:29:04 +0300
Subject: dmaengine: hsu: introduce stubs for the exported functions

This allows UART drivers to register HSU DMA Engine without
being forced to use ifdefs.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dma/hsu.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index 234393a6997b..765b414eef98 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -38,11 +38,21 @@ struct hsu_dma_chip {
 	struct hsu_dma_platform_data	*pdata;
 };
 
+#if IS_ENABLED(CONFIG_HSU_DMA)
 /* Export to the internal users */
 irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr);
 
 /* Export to the platform drivers */
 int hsu_dma_probe(struct hsu_dma_chip *chip);
 int hsu_dma_remove(struct hsu_dma_chip *chip);
+#else
+static inline irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip,
+				      unsigned short nr)
+{
+	return IRQ_NONE;
+}
+static inline int hsu_dma_probe(struct hsu_dma_chip *chip) { return -ENODEV; }
+static inline int hsu_dma_remove(struct hsu_dma_chip *chip) { return 0; }
+#endif /* CONFIG_HSU_DMA */
 
 #endif /* _DMA_HSU_H */
-- 
cgit v1.2.3


From 4c97ad993d763904fc1c9e0bdc3a6dba062802a2 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Tue, 13 Oct 2015 13:29:05 +0300
Subject: dmaengine: hsu: remove platform data

There are no platforms where it's not possible to calculate
the number of channels based on IO space length, and since
that is the only purpose for struct hsu_dma_platform_data,
removing it.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/hsu/hsu.c                 | 24 +++++++-----------------
 drivers/dma/hsu/hsu.h                 |  1 +
 drivers/dma/hsu/pci.c                 |  2 +-
 include/linux/dma/hsu.h               |  1 -
 include/linux/platform_data/dma-hsu.h |  4 ----
 5 files changed, 9 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 7669c7dd1e34..823ad728aecf 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -146,7 +146,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
 	u32 sr;
 
 	/* Sanity check */
-	if (nr >= chip->pdata->nr_channels)
+	if (nr >= chip->hsu->nr_channels)
 		return IRQ_NONE;
 
 	hsuc = &chip->hsu->chan[nr];
@@ -375,7 +375,6 @@ static void hsu_dma_free_chan_resources(struct dma_chan *chan)
 int hsu_dma_probe(struct hsu_dma_chip *chip)
 {
 	struct hsu_dma *hsu;
-	struct hsu_dma_platform_data *pdata = chip->pdata;
 	void __iomem *addr = chip->regs + chip->offset;
 	unsigned short i;
 	int ret;
@@ -386,25 +385,16 @@ int hsu_dma_probe(struct hsu_dma_chip *chip)
 
 	chip->hsu = hsu;
 
-	if (!pdata) {
-		pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata)
-			return -ENOMEM;
+	/* Calculate nr_channels from the IO space length */
+	hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH;
 
-		chip->pdata = pdata;
-
-		/* Guess nr_channels from the IO space length */
-		pdata->nr_channels = (chip->length - chip->offset) /
-				     HSU_DMA_CHAN_LENGTH;
-	}
-
-	hsu->chan = devm_kcalloc(chip->dev, pdata->nr_channels,
+	hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels,
 				 sizeof(*hsu->chan), GFP_KERNEL);
 	if (!hsu->chan)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&hsu->dma.channels);
-	for (i = 0; i < pdata->nr_channels; i++) {
+	for (i = 0; i < hsu->nr_channels; i++) {
 		struct hsu_dma_chan *hsuc = &hsu->chan[i];
 
 		hsuc->vchan.desc_free = hsu_dma_desc_free;
@@ -440,7 +430,7 @@ int hsu_dma_probe(struct hsu_dma_chip *chip)
 	if (ret)
 		return ret;
 
-	dev_info(chip->dev, "Found HSU DMA, %d channels\n", pdata->nr_channels);
+	dev_info(chip->dev, "Found HSU DMA, %d channels\n", hsu->nr_channels);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(hsu_dma_probe);
@@ -452,7 +442,7 @@ int hsu_dma_remove(struct hsu_dma_chip *chip)
 
 	dma_async_device_unregister(&hsu->dma);
 
-	for (i = 0; i < chip->pdata->nr_channels; i++) {
+	for (i = 0; i < hsu->nr_channels; i++) {
 		struct hsu_dma_chan *hsuc = &hsu->chan[i];
 
 		tasklet_kill(&hsuc->vchan.task);
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index eeb9fff66967..f06579c6d548 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -107,6 +107,7 @@ struct hsu_dma {
 
 	/* channels */
 	struct hsu_dma_chan		*chan;
+	unsigned short			nr_channels;
 };
 
 static inline struct hsu_dma *to_hsu_dma(struct dma_device *ddev)
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
index 77879e6ddc4c..e2db76bd56d8 100644
--- a/drivers/dma/hsu/pci.c
+++ b/drivers/dma/hsu/pci.c
@@ -31,7 +31,7 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev)
 	irqreturn_t ret = IRQ_NONE;
 
 	dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
-	for (i = 0; i < chip->pdata->nr_channels; i++) {
+	for (i = 0; i < chip->hsu->nr_channels; i++) {
 		if (dmaisr & 0x1)
 			ret |= hsu_dma_irq(chip, i);
 		dmaisr >>= 1;
diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index 765b414eef98..79df69dc629c 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -35,7 +35,6 @@ struct hsu_dma_chip {
 	unsigned int			length;
 	unsigned int			offset;
 	struct hsu_dma			*hsu;
-	struct hsu_dma_platform_data	*pdata;
 };
 
 #if IS_ENABLED(CONFIG_HSU_DMA)
diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h
index 8a1f6a4920b2..3453fa655502 100644
--- a/include/linux/platform_data/dma-hsu.h
+++ b/include/linux/platform_data/dma-hsu.h
@@ -18,8 +18,4 @@ struct hsu_dma_slave {
 	int		chan_id;
 };
 
-struct hsu_dma_platform_data {
-	unsigned short	nr_channels;
-};
-
 #endif /* _PLATFORM_DATA_DMA_HSU_H */
-- 
cgit v1.2.3


From 2812d9e9fd94c54b0482215f579e6aa04452a322 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 10 Oct 2015 20:28:42 -0400
Subject: tty: Combine SIGTTOU/SIGTTIN handling

The job_control() check in n_tty_read() has nearly identical purpose
and results as tty_check_change(). Both functions' purpose is to
determine if the current task's pgrp is the foreground pgrp for the tty,
and if not, to signal the current pgrp.

Introduce __tty_check_change() which takes the signal to send
and performs the shared operations for job control() and
tty_check_change().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c  | 26 ++------------------------
 drivers/tty/tty_io.c | 46 ++++++++++++++++++++++++----------------------
 include/linux/tty.h  |  1 +
 3 files changed, 27 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index ff728d32cb53..fb8ccbfdbb30 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2138,37 +2138,15 @@ extern ssize_t redirected_tty_write(struct file *, const char __user *,
 
 static int job_control(struct tty_struct *tty, struct file *file)
 {
-	struct pid *pgrp;
-
 	/* Job control check -- must be done at start and after
 	   every sleep (POSIX.1 7.1.1.4). */
 	/* NOTE: not yet done after every sleep pending a thorough
 	   check of the logic of this change. -- jlc */
 	/* don't stop on /dev/console */
-	if (file->f_op->write == redirected_tty_write ||
-	    current->signal->tty != tty)
+	if (file->f_op->write == redirected_tty_write)
 		return 0;
 
-	rcu_read_lock();
-	pgrp = task_pgrp(current);
-
-	spin_lock_irq(&tty->ctrl_lock);
-	if (!tty->pgrp)
-		printk(KERN_ERR "n_tty_read: no tty->pgrp!\n");
-	else if (pgrp != tty->pgrp) {
-		spin_unlock_irq(&tty->ctrl_lock);
-		if (is_ignored(SIGTTIN) || is_current_pgrp_orphaned()) {
-			rcu_read_unlock();
-			return -EIO;
-		}
-		kill_pgrp(pgrp, SIGTTIN, 1);
-		rcu_read_unlock();
-		set_thread_flag(TIF_SIGPENDING);
-		return -ERESTARTSYS;
-	}
-	spin_unlock_irq(&tty->ctrl_lock);
-	rcu_read_unlock();
-	return 0;
+	return __tty_check_change(tty, SIGTTIN);
 }
 
 
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 2eefaa6e3e3a..aa48367a0c79 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -390,10 +390,10 @@ EXPORT_SYMBOL_GPL(tty_find_polling_driver);
  *	Locking: ctrl_lock
  */
 
-int tty_check_change(struct tty_struct *tty)
+int __tty_check_change(struct tty_struct *tty, int sig)
 {
 	unsigned long flags;
-	struct pid *pgrp;
+	struct pid *pgrp, *tty_pgrp;
 	int ret = 0;
 
 	if (current->signal->tty != tty)
@@ -403,33 +403,35 @@ int tty_check_change(struct tty_struct *tty)
 	pgrp = task_pgrp(current);
 
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
-
-	if (!tty->pgrp) {
-		printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n");
-		goto out_unlock;
-	}
-	if (pgrp == tty->pgrp)
-		goto out_unlock;
+	tty_pgrp = tty->pgrp;
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
-	if (is_ignored(SIGTTOU))
-		goto out_rcuunlock;
-	if (is_current_pgrp_orphaned()) {
-		ret = -EIO;
-		goto out_rcuunlock;
+	if (tty_pgrp && pgrp != tty->pgrp) {
+		if (is_ignored(sig)) {
+			if (sig == SIGTTIN)
+				ret = -EIO;
+		} else if (is_current_pgrp_orphaned())
+			ret = -EIO;
+		else {
+			kill_pgrp(pgrp, sig, 1);
+			set_thread_flag(TIF_SIGPENDING);
+			ret = -ERESTARTSYS;
+		}
 	}
-	kill_pgrp(pgrp, SIGTTOU, 1);
-	rcu_read_unlock();
-	set_thread_flag(TIF_SIGPENDING);
-	ret = -ERESTARTSYS;
-	return ret;
-out_unlock:
-	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
-out_rcuunlock:
 	rcu_read_unlock();
+
+	if (!tty_pgrp) {
+		pr_warn("%s: tty_check_change: sig=%d, tty->pgrp == NULL!\n",
+			tty_name(tty), sig);
+	}
+
 	return ret;
 }
 
+int tty_check_change(struct tty_struct *tty)
+{
+	return __tty_check_change(tty, SIGTTOU);
+}
 EXPORT_SYMBOL(tty_check_change);
 
 static ssize_t hung_up_tty_read(struct file *file, char __user *buf,
diff --git a/include/linux/tty.h b/include/linux/tty.h
index c2889f4331e1..533d7f6e2481 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -423,6 +423,7 @@ extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
 			      const char *routine);
 extern const char *tty_name(const struct tty_struct *tty);
 extern void tty_wait_until_sent(struct tty_struct *tty, long timeout);
+extern int __tty_check_change(struct tty_struct *tty, int sig);
 extern int tty_check_change(struct tty_struct *tty);
 extern void __stop_tty(struct tty_struct *tty);
 extern void stop_tty(struct tty_struct *tty);
-- 
cgit v1.2.3


From e176058f0de53c2346734e5254835e0045364001 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 17 Oct 2015 16:36:23 -0400
Subject: tty: Abstract tty buffer work

Introduce API functions to restart and cancel tty buffer work, rather
than manipulate buffer work directly.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c      |  2 +-
 drivers/tty/tty_buffer.c | 10 ++++++++++
 drivers/tty/tty_io.c     |  2 +-
 drivers/tty/tty_port.c   |  2 +-
 include/linux/tty.h      |  2 ++
 5 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index fb8ccbfdbb30..13844261cd5f 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -201,7 +201,7 @@ static void n_tty_kick_worker(struct tty_struct *tty)
 		 */
 		WARN_RATELIMIT(test_bit(TTY_LDISC_HALTED, &tty->flags),
 			       "scheduling buffer work for halted ldisc\n");
-		queue_work(system_unbound_wq, &tty->port->buf.work);
+		tty_buffer_restart_work(tty->port);
 	}
 }
 
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index a660ab181cca..7cc16db37e0e 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -587,3 +587,13 @@ void tty_buffer_set_lock_subclass(struct tty_port *port)
 {
 	lockdep_set_subclass(&port->buf.lock, TTY_LOCK_SLAVE);
 }
+
+bool tty_buffer_restart_work(struct tty_port *port)
+{
+	return queue_work(system_unbound_wq, &port->buf.work);
+}
+
+bool tty_buffer_cancel_work(struct tty_port *port)
+{
+	return cancel_work_sync(&port->buf.work);
+}
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 173fdeba0987..0c41dbcb90b8 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1689,7 +1689,7 @@ static void release_tty(struct tty_struct *tty, int idx)
 	tty->port->itty = NULL;
 	if (tty->link)
 		tty->link->port->itty = NULL;
-	cancel_work_sync(&tty->port->buf.work);
+	tty_buffer_cancel_work(tty->port);
 
 	tty_kref_put(tty->link);
 	tty_kref_put(tty);
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index e04a8cfb16f7..482f33f20043 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -130,7 +130,7 @@ EXPORT_SYMBOL(tty_port_free_xmit_buf);
  */
 void tty_port_destroy(struct tty_port *port)
 {
-	cancel_work_sync(&port->buf.work);
+	tty_buffer_cancel_work(port);
 	tty_buffer_free_all(port);
 }
 EXPORT_SYMBOL(tty_port_destroy);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 533d7f6e2481..5b04b0a5375b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -467,6 +467,8 @@ extern void tty_buffer_free_all(struct tty_port *port);
 extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld);
 extern void tty_buffer_init(struct tty_port *port);
 extern void tty_buffer_set_lock_subclass(struct tty_port *port);
+extern bool tty_buffer_restart_work(struct tty_port *port);
+extern bool tty_buffer_cancel_work(struct tty_port *port);
 extern speed_t tty_termios_baud_rate(struct ktermios *termios);
 extern speed_t tty_termios_input_baud_rate(struct ktermios *termios);
 extern void tty_termios_encode_baud_rate(struct ktermios *termios,
-- 
cgit v1.2.3


From 52d63671c15bd3dfcd72f4ea324b338d1309db97 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Tue, 6 Oct 2015 16:27:36 +0200
Subject: msm: remove unused header

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/msm_mdp.h | 79 -------------------------------------------------
 1 file changed, 79 deletions(-)
 delete mode 100644 include/linux/msm_mdp.h

(limited to 'include/linux')

diff --git a/include/linux/msm_mdp.h b/include/linux/msm_mdp.h
deleted file mode 100644
index fe722c1fb61d..000000000000
--- a/include/linux/msm_mdp.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* include/linux/msm_mdp.h
- *
- * Copyright (C) 2007 Google Incorporated
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#ifndef _MSM_MDP_H_
-#define _MSM_MDP_H_
-
-#include <linux/types.h>
-
-#define MSMFB_IOCTL_MAGIC 'm'
-#define MSMFB_GRP_DISP          _IOW(MSMFB_IOCTL_MAGIC, 1, unsigned int)
-#define MSMFB_BLIT              _IOW(MSMFB_IOCTL_MAGIC, 2, unsigned int)
-
-enum {
-	MDP_RGB_565,		/* RGB 565 planar */
-	MDP_XRGB_8888,		/* RGB 888 padded */
-	MDP_Y_CBCR_H2V2,	/* Y and CbCr, pseudo planar w/ Cb is in MSB */
-	MDP_ARGB_8888,		/* ARGB 888 */
-	MDP_RGB_888,		/* RGB 888 planar */
-	MDP_Y_CRCB_H2V2,	/* Y and CrCb, pseudo planar w/ Cr is in MSB */
-	MDP_YCRYCB_H2V1,	/* YCrYCb interleave */
-	MDP_Y_CRCB_H2V1,	/* Y and CrCb, pseduo planar w/ Cr is in MSB */
-	MDP_Y_CBCR_H2V1,	/* Y and CrCb, pseduo planar w/ Cr is in MSB */
-	MDP_RGBA_8888,		/* ARGB 888 */
-	MDP_BGRA_8888,		/* ABGR 888 */
-	MDP_RGBX_8888,		/* RGBX 888 */
-	MDP_IMGTYPE_LIMIT	/* Non valid image type after this enum */
-};
-
-enum {
-	PMEM_IMG,
-	FB_IMG,
-};
-
-/* flag values */
-#define MDP_ROT_NOP	0
-#define MDP_FLIP_LR	0x1
-#define MDP_FLIP_UD	0x2
-#define MDP_ROT_90	0x4
-#define MDP_ROT_180	(MDP_FLIP_UD|MDP_FLIP_LR)
-#define MDP_ROT_270	(MDP_ROT_90|MDP_FLIP_UD|MDP_FLIP_LR)
-#define MDP_DITHER	0x8
-#define MDP_BLUR	0x10
-
-#define MDP_TRANSP_NOP	0xffffffff
-#define MDP_ALPHA_NOP	0xff
-
-struct mdp_rect {
-	u32 x, y, w, h;
-};
-
-struct mdp_img {
-	u32 width, height, format, offset;
-	int memory_id;		/* the file descriptor */
-};
-
-struct mdp_blit_req {
-	struct mdp_img src;
-	struct mdp_img dst;
-	struct mdp_rect src_rect;
-	struct mdp_rect dst_rect;
-	u32 alpha, transp_mask, flags;
-};
-
-struct mdp_blit_req_list {
-	u32 count;
-	struct mdp_blit_req req[];
-};
-
-#endif /* _MSM_MDP_H_ */
-- 
cgit v1.2.3


From ad1bfe410e91189522514ea784668dc75a4e64c4 Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Sun, 11 Oct 2015 01:00:58 +0300
Subject: vme: 8-bit status/id takes 256 values, not 255

Fixes an off by one array size.

Signed-off-by: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vme/vme_bridge.h | 4 +++-
 include/linux/vme.h      | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/vme/vme_bridge.h b/drivers/vme/vme_bridge.h
index 934949abd745..a3ef63b336e4 100644
--- a/drivers/vme/vme_bridge.h
+++ b/drivers/vme/vme_bridge.h
@@ -1,6 +1,8 @@
 #ifndef _VME_BRIDGE_H_
 #define _VME_BRIDGE_H_
 
+#include <linux/vme.h>
+
 #define VME_CRCSR_BUF_SIZE (508*1024)
 /*
  * Resource structures
@@ -88,7 +90,7 @@ struct vme_callback {
 
 struct vme_irq {
 	int count;
-	struct vme_callback callback[255];
+	struct vme_callback callback[VME_NUM_STATUSID];
 };
 
 /* Allow 16 characters for name (including null character) */
diff --git a/include/linux/vme.h b/include/linux/vme.h
index c0131358f351..71e4a6dec5ac 100644
--- a/include/linux/vme.h
+++ b/include/linux/vme.h
@@ -81,6 +81,9 @@ struct vme_resource {
 
 extern struct bus_type vme_bus_type;
 
+/* Number of VME interrupt vectors */
+#define VME_NUM_STATUSID	256
+
 /* VME_MAX_BRIDGES comes from the type of vme_bus_numbers */
 #define VME_MAX_BRIDGES		(sizeof(unsigned int)*8)
 #define VME_MAX_SLOTS		32
-- 
cgit v1.2.3


From c23fe83138ed7b11ad763cbe8bf98e5378c04bd6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sun, 18 Oct 2015 22:43:19 +0530
Subject: debugfs: Add debugfs_create_ulong()

Add debugfs_create_ulong() for the users of type 'unsigned long'. These
will be 32 bits long on a 32 bit machine and 64 bits long on a 64 bit
machine.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/file.c       | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/debugfs.h |  2 ++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 8450549d54a9..d2ba12e23ed9 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -243,6 +243,54 @@ struct dentry *debugfs_create_u64(const char *name, umode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u64);
 
+static int debugfs_ulong_set(void *data, u64 val)
+{
+	*(unsigned long *)data = val;
+	return 0;
+}
+
+static int debugfs_ulong_get(void *data, u64 *val)
+{
+	*val = *(unsigned long *)data;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n");
+
+/**
+ * debugfs_create_ulong - create a debugfs file that is used to read and write
+ * an unsigned long value.
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ *         from.
+ *
+ * This function creates a file in debugfs with the given name that
+ * contains the value of the variable @value.  If the @mode variable is so
+ * set, it can be read from, and written to.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, %NULL will be returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.  It is not wise to check for this value, but rather, check for
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
+ * code.
+ */
+struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
+				    struct dentry *parent, unsigned long *value)
+{
+	return debugfs_create_mode(name, mode, parent, value, &fops_ulong,
+				   &fops_ulong_ro, &fops_ulong_wo);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_ulong);
+
 DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
 DEFINE_SIMPLE_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n");
 DEFINE_SIMPLE_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n");
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 8321fe3058d6..19c066dce1da 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -79,6 +79,8 @@ struct dentry *debugfs_create_u32(const char *name, umode_t mode,
 				  struct dentry *parent, u32 *value);
 struct dentry *debugfs_create_u64(const char *name, umode_t mode,
 				  struct dentry *parent, u64 *value);
+struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
+				    struct dentry *parent, unsigned long *value);
 struct dentry *debugfs_create_x8(const char *name, umode_t mode,
 				 struct dentry *parent, u8 *value);
 struct dentry *debugfs_create_x16(const char *name, umode_t mode,
-- 
cgit v1.2.3


From 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daeho.jeong@samsung.com>
Date: Sun, 18 Oct 2015 17:02:56 -0400
Subject: ext4, jbd2: ensure entering into panic after recording an error in
 superblock

If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the
journaling will be aborted first and the error number will be recorded
into JBD2 superblock and, finally, the system will enter into the
panic state in "errors=panic" option.  But, in the rare case, this
sequence is little twisted like the below figure and it will happen
that the system enters into panic state, which means the system reset
in mobile environment, before completion of recording an error in the
journal superblock. In this case, e2fsck cannot recognize that the
filesystem failure occurred in the previous run and the corruption
wouldn't be fixed.

Task A                        Task B
ext4_handle_error()
-> jbd2_journal_abort()
  -> __journal_abort_soft()
    -> __jbd2_journal_abort_hard()
    | -> journal->j_flags |= JBD2_ABORT;
    |
    |                         __ext4_abort()
    |                         -> jbd2_journal_abort()
    |                         | -> __journal_abort_soft()
    |                         |   -> if (journal->j_flags & JBD2_ABORT)
    |                         |           return;
    |                         -> panic()
    |
    -> jbd2_journal_update_sb_errno()

Tested-by: Hobin Woo <hobin.woo@samsung.com>
Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/super.c      | 12 ++++++++++--
 fs/jbd2/journal.c    |  6 +++++-
 include/linux/jbd2.h |  1 +
 3 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 49fa4d48b6a4..639613fe20aa 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -388,9 +388,13 @@ static void ext4_handle_error(struct super_block *sb)
 		smp_wmb();
 		sb->s_flags |= MS_RDONLY;
 	}
-	if (test_opt(sb, ERRORS_PANIC))
+	if (test_opt(sb, ERRORS_PANIC)) {
+		if (EXT4_SB(sb)->s_journal &&
+		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+			return;
 		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
+	}
 }
 
 #define ext4_error_ratelimit(sb)					\
@@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function,
 			jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 		save_error_info(sb, function, line);
 	}
-	if (test_opt(sb, ERRORS_PANIC))
+	if (test_opt(sb, ERRORS_PANIC)) {
+		if (EXT4_SB(sb)->s_journal &&
+		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+			return;
 		panic("EXT4-fs panic from previous error\n");
+	}
 }
 
 void __ext4_msg(struct super_block *sb,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb8a7546e8a4..81e622681c82 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2072,8 +2072,12 @@ static void __journal_abort_soft (journal_t *journal, int errno)
 
 	__jbd2_journal_abort_hard(journal);
 
-	if (errno)
+	if (errno) {
 		jbd2_journal_update_sb_errno(journal);
+		write_lock(&journal->j_state_lock);
+		journal->j_flags |= JBD2_REC_ERR;
+		write_unlock(&journal->j_state_lock);
+	}
 }
 
 /**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index b5ca2314cace..65407f6c9120 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1112,6 +1112,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
 #define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
 						 * data write error in ordered
 						 * mode */
+#define JBD2_REC_ERR	0x080	/* The errno in the sb has been recorded */
 
 /*
  * Function declarations for the journaling transaction and buffer
-- 
cgit v1.2.3


From 37c1c04cca920de8a68285751b2c7b3d937ad50c Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Wed, 22 Apr 2015 19:36:06 +0300
Subject: sysfs: added __compat_only_sysfs_link_entry_to_kobj()

Added a new function __compat_only_sysfs_link_group_to_kobj() that adds
a symlink from attribute or group to a kobject. This needed for
maintaining backwards compatibility with PPI attributes in the TPM
driver.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
---
 fs/sysfs/group.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h | 11 +++++++++++
 2 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 39a019936768..e1236594fffe 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -352,3 +352,47 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
 	}
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
+
+/**
+ * __compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing
+ * to a group or an attribute
+ * @kobj:		The kobject containing the group.
+ * @target_kobj:	The target kobject.
+ * @target_name:	The name of the target group or attribute.
+ */
+int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
+				      struct kobject *target_kobj,
+				      const char *target_name)
+{
+	struct kernfs_node *target;
+	struct kernfs_node *entry;
+	struct kernfs_node *link;
+
+	/*
+	 * We don't own @target_kobj and it may be removed at any time.
+	 * Synchronize using sysfs_symlink_target_lock. See sysfs_remove_dir()
+	 * for details.
+	 */
+	spin_lock(&sysfs_symlink_target_lock);
+	target = target_kobj->sd;
+	if (target)
+		kernfs_get(target);
+	spin_unlock(&sysfs_symlink_target_lock);
+	if (!target)
+		return -ENOENT;
+
+	entry = kernfs_find_and_get(target_kobj->sd, target_name);
+	if (!entry) {
+		kernfs_put(target);
+		return -ENOENT;
+	}
+
+	link = kernfs_create_link(kobj->sd, target_name, entry);
+	if (IS_ERR(link) && PTR_ERR(link) == -EEXIST)
+		sysfs_warn_dup(kobj->sd, target_name);
+
+	kernfs_put(entry);
+	kernfs_put(target);
+	return IS_ERR(link) ? PTR_ERR(link) : 0;
+}
+EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9f65758311a4..ea090eaf468c 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -268,6 +268,9 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
 			    struct kobject *target, const char *link_name);
 void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
 				  const char *link_name);
+int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
+				      struct kobject *target_kobj,
+				      const char *target_name);
 
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 
@@ -451,6 +454,14 @@ static inline void sysfs_remove_link_from_group(struct kobject *kobj,
 {
 }
 
+static inline int __compat_only_sysfs_link_entry_to_kobj(
+	struct kobject *kobj,
+	struct kobject *target_kobj,
+	const char *target_name)
+{
+	return 0;
+}
+
 static inline void sysfs_notify(struct kobject *kobj, const char *dir,
 				const char *attr)
 {
-- 
cgit v1.2.3


From 954650efb79f99d5c817c121bb0a7c6c53362048 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Sat, 30 May 2015 08:09:04 +0300
Subject: tpm: seal/unseal for TPM 2.0

Added tpm_trusted_seal() and tpm_trusted_unseal() API for sealing
trusted keys.

This patch implements basic sealing and unsealing functionality for
TPM 2.0:

* Seal with a parent key using a 20 byte auth value.
* Unseal with a parent key using a 20 byte auth value.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
---
 drivers/char/tpm/tpm-interface.c |  76 ++++++++++++
 drivers/char/tpm/tpm.h           |  15 ++-
 drivers/char/tpm/tpm2-cmd.c      | 250 ++++++++++++++++++++++++++++++++++++++-
 include/keys/trusted-type.h      |   2 +-
 include/linux/tpm.h              |  26 ++++
 5 files changed, 366 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index e85d3416d899..c50637db3a8a 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -665,6 +665,30 @@ int tpm_pcr_read_dev(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
 	return rc;
 }
 
+/**
+ * tpm_is_tpm2 - is the chip a TPM2 chip?
+ * @chip_num:	tpm idx # or ANY
+ *
+ * Returns < 0 on error, and 1 or 0 on success depending whether the chip
+ * is a TPM2 chip.
+ */
+int tpm_is_tpm2(u32 chip_num)
+{
+	struct tpm_chip *chip;
+	int rc;
+
+	chip = tpm_chip_find_get(chip_num);
+	if (chip == NULL)
+		return -ENODEV;
+
+	rc = (chip->flags & TPM_CHIP_FLAG_TPM2) != 0;
+
+	tpm_chip_put(chip);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_is_tpm2);
+
 /**
  * tpm_pcr_read - read a pcr value
  * @chip_num:	tpm idx # or ANY
@@ -1021,6 +1045,58 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 }
 EXPORT_SYMBOL_GPL(tpm_get_random);
 
+/**
+ * tpm_seal_trusted() - seal a trusted key
+ * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+ * @options: authentication values and other options
+ * @payload: the key data in clear and encrypted form
+ *
+ * Returns < 0 on error and 0 on success. At the moment, only TPM 2.0 chips
+ * are supported.
+ */
+int tpm_seal_trusted(u32 chip_num, struct trusted_key_payload *payload,
+		     struct trusted_key_options *options)
+{
+	struct tpm_chip *chip;
+	int rc;
+
+	chip = tpm_chip_find_get(chip_num);
+	if (chip == NULL || !(chip->flags & TPM_CHIP_FLAG_TPM2))
+		return -ENODEV;
+
+	rc = tpm2_seal_trusted(chip, payload, options);
+
+	tpm_chip_put(chip);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_seal_trusted);
+
+/**
+ * tpm_unseal_trusted() - unseal a trusted key
+ * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+ * @options: authentication values and other options
+ * @payload: the key data in clear and encrypted form
+ *
+ * Returns < 0 on error and 0 on success. At the moment, only TPM 2.0 chips
+ * are supported.
+ */
+int tpm_unseal_trusted(u32 chip_num, struct trusted_key_payload *payload,
+		       struct trusted_key_options *options)
+{
+	struct tpm_chip *chip;
+	int rc;
+
+	chip = tpm_chip_find_get(chip_num);
+	if (chip == NULL || !(chip->flags & TPM_CHIP_FLAG_TPM2))
+		return -ENODEV;
+
+	rc = tpm2_unseal_trusted(chip, payload, options);
+
+	tpm_chip_put(chip);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_unseal_trusted);
+
 static int __init tpm_init(void)
 {
 	int rc;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index cb46f6267af2..a4257a32964f 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -90,6 +90,9 @@ enum tpm2_return_codes {
 
 enum tpm2_algorithms {
 	TPM2_ALG_SHA1		= 0x0004,
+	TPM2_ALG_KEYEDHASH	= 0x0008,
+	TPM2_ALG_SHA256		= 0x000B,
+	TPM2_ALG_NULL		= 0x0010
 };
 
 enum tpm2_command_codes {
@@ -97,6 +100,10 @@ enum tpm2_command_codes {
 	TPM2_CC_SELF_TEST	= 0x0143,
 	TPM2_CC_STARTUP		= 0x0144,
 	TPM2_CC_SHUTDOWN	= 0x0145,
+	TPM2_CC_CREATE		= 0x0153,
+	TPM2_CC_LOAD		= 0x0157,
+	TPM2_CC_UNSEAL		= 0x015E,
+	TPM2_CC_FLUSH_CONTEXT	= 0x0165,
 	TPM2_CC_GET_CAPABILITY	= 0x017A,
 	TPM2_CC_GET_RANDOM	= 0x017B,
 	TPM2_CC_PCR_READ	= 0x017E,
@@ -407,7 +414,7 @@ struct tpm_buf {
 	u8 *data;
 };
 
-static inline void tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal)
+static inline int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal)
 {
 	struct tpm_input_header *head;
 
@@ -527,6 +534,12 @@ static inline void tpm_add_ppi(struct tpm_chip *chip)
 int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf);
 int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash);
 int tpm2_get_random(struct tpm_chip *chip, u8 *out, size_t max);
+int tpm2_seal_trusted(struct tpm_chip *chip,
+		      struct trusted_key_payload *payload,
+		      struct trusted_key_options *options);
+int tpm2_unseal_trusted(struct tpm_chip *chip,
+			struct trusted_key_payload *payload,
+			struct trusted_key_options *options);
 ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
 			u32 *value, const char *desc);
 
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 011909a9be96..bd7039fafa8a 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Intel Corporation
+ * Copyright (C) 2014, 2015 Intel Corporation
  *
  * Authors:
  * Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
@@ -16,6 +16,11 @@
  */
 
 #include "tpm.h"
+#include <keys/trusted-type.h>
+
+enum tpm2_object_attributes {
+	TPM2_ATTR_USER_WITH_AUTH	= BIT(6),
+};
 
 struct tpm2_startup_in {
 	__be16	startup_type;
@@ -380,6 +385,249 @@ static const struct tpm_input_header tpm2_get_tpm_pt_header = {
 	.ordinal = cpu_to_be32(TPM2_CC_GET_CAPABILITY)
 };
 
+/**
+ * Append TPMS_AUTH_COMMAND to the buffer. The buffer must be allocated with
+ * tpm_buf_alloc().
+ *
+ * @param buf: an allocated tpm_buf instance
+ * @param nonce: the session nonce, may be NULL if not used
+ * @param nonce_len: the session nonce length, may be 0 if not used
+ * @param attributes: the session attributes
+ * @param hmac: the session HMAC or password, may be NULL if not used
+ * @param hmac_len: the session HMAC or password length, maybe 0 if not used
+ */
+static void tpm2_buf_append_auth(struct tpm_buf *buf, u32 session_handle,
+				 const u8 *nonce, u16 nonce_len,
+				 u8 attributes,
+				 const u8 *hmac, u16 hmac_len)
+{
+	tpm_buf_append_u32(buf, 9 + nonce_len + hmac_len);
+	tpm_buf_append_u32(buf, session_handle);
+	tpm_buf_append_u16(buf, nonce_len);
+
+	if (nonce && nonce_len)
+		tpm_buf_append(buf, nonce, nonce_len);
+
+	tpm_buf_append_u8(buf, attributes);
+	tpm_buf_append_u16(buf, hmac_len);
+
+	if (hmac && hmac_len)
+		tpm_buf_append(buf, hmac, hmac_len);
+}
+
+/**
+ * tpm2_seal_trusted() - seal a trusted key
+ * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+ * @options: authentication values and other options
+ * @payload: the key data in clear and encrypted form
+ *
+ * Returns < 0 on error and 0 on success.
+ */
+int tpm2_seal_trusted(struct tpm_chip *chip,
+		      struct trusted_key_payload *payload,
+		      struct trusted_key_options *options)
+{
+	unsigned int blob_len;
+	struct tpm_buf buf;
+	int rc;
+
+	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE);
+	if (rc)
+		return rc;
+
+	tpm_buf_append_u32(&buf, options->keyhandle);
+	tpm2_buf_append_auth(&buf, TPM2_RS_PW,
+			     NULL /* nonce */, 0,
+			     0 /* session_attributes */,
+			     options->keyauth /* hmac */,
+			     TPM_DIGEST_SIZE);
+
+	/* sensitive */
+	tpm_buf_append_u16(&buf, 4 + TPM_DIGEST_SIZE + payload->key_len);
+
+	tpm_buf_append_u16(&buf, TPM_DIGEST_SIZE);
+	tpm_buf_append(&buf, options->blobauth, TPM_DIGEST_SIZE);
+	tpm_buf_append_u16(&buf, payload->key_len);
+	tpm_buf_append(&buf, payload->key, payload->key_len);
+
+	/* public */
+	tpm_buf_append_u16(&buf, 14);
+
+	tpm_buf_append_u16(&buf, TPM2_ALG_KEYEDHASH);
+	tpm_buf_append_u16(&buf, TPM2_ALG_SHA256);
+	tpm_buf_append_u32(&buf, TPM2_ATTR_USER_WITH_AUTH);
+	tpm_buf_append_u16(&buf, 0); /* policy digest size */
+	tpm_buf_append_u16(&buf, TPM2_ALG_NULL);
+	tpm_buf_append_u16(&buf, 0);
+
+	/* outside info */
+	tpm_buf_append_u16(&buf, 0);
+
+	/* creation PCR */
+	tpm_buf_append_u32(&buf, 0);
+
+	if (buf.flags & TPM_BUF_OVERFLOW) {
+		rc = -E2BIG;
+		goto out;
+	}
+
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "sealing data");
+	if (rc)
+		goto out;
+
+	blob_len = be32_to_cpup((__be32 *) &buf.data[TPM_HEADER_SIZE]);
+	if (blob_len > MAX_BLOB_SIZE) {
+		rc = -E2BIG;
+		goto out;
+	}
+
+	memcpy(payload->blob, &buf.data[TPM_HEADER_SIZE + 4], blob_len);
+	payload->blob_len = blob_len;
+
+out:
+	tpm_buf_destroy(&buf);
+
+	if (rc > 0)
+		rc = -EPERM;
+
+	return rc;
+}
+
+static int tpm2_load(struct tpm_chip *chip,
+		     struct trusted_key_payload *payload,
+		     struct trusted_key_options *options,
+		     u32 *blob_handle)
+{
+	struct tpm_buf buf;
+	unsigned int private_len;
+	unsigned int public_len;
+	unsigned int blob_len;
+	int rc;
+
+	private_len = be16_to_cpup((__be16 *) &payload->blob[0]);
+	if (private_len > (payload->blob_len - 2))
+		return -E2BIG;
+
+	public_len = be16_to_cpup((__be16 *) &payload->blob[2 + private_len]);
+	blob_len = private_len + public_len + 4;
+	if (blob_len > payload->blob_len)
+		return -E2BIG;
+
+	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_LOAD);
+	if (rc)
+		return rc;
+
+	tpm_buf_append_u32(&buf, options->keyhandle);
+	tpm2_buf_append_auth(&buf, TPM2_RS_PW,
+			     NULL /* nonce */, 0,
+			     0 /* session_attributes */,
+			     options->keyauth /* hmac */,
+			     TPM_DIGEST_SIZE);
+
+	tpm_buf_append(&buf, payload->blob, blob_len);
+
+	if (buf.flags & TPM_BUF_OVERFLOW) {
+		rc = -E2BIG;
+		goto out;
+	}
+
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "loading blob");
+	if (!rc)
+		*blob_handle = be32_to_cpup(
+			(__be32 *) &buf.data[TPM_HEADER_SIZE]);
+
+out:
+	tpm_buf_destroy(&buf);
+
+	if (rc > 0)
+		rc = -EPERM;
+
+	return rc;
+}
+
+static void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
+{
+	struct tpm_buf buf;
+	int rc;
+
+	rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_FLUSH_CONTEXT);
+	if (rc) {
+		dev_warn(chip->pdev, "0x%08x was not flushed, out of memory\n",
+			 handle);
+		return;
+	}
+
+	tpm_buf_append_u32(&buf, handle);
+
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "flushing context");
+	if (rc)
+		dev_warn(chip->pdev, "0x%08x was not flushed, rc=%d\n", handle,
+			 rc);
+
+	tpm_buf_destroy(&buf);
+}
+
+static int tpm2_unseal(struct tpm_chip *chip,
+		       struct trusted_key_payload *payload,
+		       struct trusted_key_options *options,
+		       u32 blob_handle)
+{
+	struct tpm_buf buf;
+	int rc;
+
+	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_UNSEAL);
+	if (rc)
+		return rc;
+
+	tpm_buf_append_u32(&buf, blob_handle);
+	tpm2_buf_append_auth(&buf, TPM2_RS_PW,
+			     NULL /* nonce */, 0,
+			     0 /* session_attributes */,
+			     options->blobauth /* hmac */,
+			     TPM_DIGEST_SIZE);
+
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "unsealing");
+	if (rc > 0)
+		rc = -EPERM;
+
+	if (!rc) {
+		payload->key_len = be16_to_cpup(
+			(__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
+
+		memcpy(payload->key, &buf.data[TPM_HEADER_SIZE + 6],
+		       payload->key_len);
+	}
+
+	tpm_buf_destroy(&buf);
+	return rc;
+}
+
+/**
+ * tpm_unseal_trusted() - unseal a trusted key
+ * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+ * @options: authentication values and other options
+ * @payload: the key data in clear and encrypted form
+ *
+ * Returns < 0 on error and 0 on success.
+ */
+int tpm2_unseal_trusted(struct tpm_chip *chip,
+			struct trusted_key_payload *payload,
+			struct trusted_key_options *options)
+{
+	u32 blob_handle;
+	int rc;
+
+	rc = tpm2_load(chip, payload, options, &blob_handle);
+	if (rc)
+		return rc;
+
+	rc = tpm2_unseal(chip, payload, options, blob_handle);
+
+	tpm2_flush_context(chip, blob_handle);
+
+	return rc;
+}
+
 /**
  * tpm2_get_tpm_pt() - get value of a TPM_CAP_TPM_PROPERTIES type property
  * @chip:		TPM chip to use.
diff --git a/include/keys/trusted-type.h b/include/keys/trusted-type.h
index c91651f91687..f91ecd9d1bb1 100644
--- a/include/keys/trusted-type.h
+++ b/include/keys/trusted-type.h
@@ -16,7 +16,7 @@
 
 #define MIN_KEY_SIZE			32
 #define MAX_KEY_SIZE			128
-#define MAX_BLOB_SIZE			320
+#define MAX_BLOB_SIZE			512
 #define MAX_PCRINFO_SIZE		64
 
 struct trusted_key_payload {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 8350c538b486..706e63eea080 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -30,6 +30,8 @@
 #define	TPM_ANY_NUM 0xFFFF
 
 struct tpm_chip;
+struct trusted_key_payload;
+struct trusted_key_options;
 
 struct tpm_class_ops {
 	const u8 req_complete_mask;
@@ -46,11 +48,22 @@ struct tpm_class_ops {
 
 #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
 
+extern int tpm_is_tpm2(u32 chip_num);
 extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf);
 extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash);
 extern int tpm_send(u32 chip_num, void *cmd, size_t buflen);
 extern int tpm_get_random(u32 chip_num, u8 *data, size_t max);
+extern int tpm_seal_trusted(u32 chip_num,
+			    struct trusted_key_payload *payload,
+			    struct trusted_key_options *options);
+extern int tpm_unseal_trusted(u32 chip_num,
+			      struct trusted_key_payload *payload,
+			      struct trusted_key_options *options);
 #else
+static inline int tpm_is_tpm2(u32 chip_num)
+{
+	return -ENODEV;
+}
 static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) {
 	return -ENODEV;
 }
@@ -63,5 +76,18 @@ static inline int tpm_send(u32 chip_num, void *cmd, size_t buflen) {
 static inline int tpm_get_random(u32 chip_num, u8 *data, size_t max) {
 	return -ENODEV;
 }
+
+static inline int tpm_seal_trusted(u32 chip_num,
+				   struct trusted_key_payload *payload,
+				   struct trusted_key_options *options)
+{
+	return -ENODEV;
+}
+static inline int tpm_unseal_trusted(u32 chip_num,
+				     struct trusted_key_payload *payload,
+				     struct trusted_key_options *options)
+{
+	return -ENODEV;
+}
 #endif
 #endif
-- 
cgit v1.2.3


From c8d46ece44458a2088896d6fcae123a72bdfd429 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 19 Oct 2015 09:17:40 +0100
Subject: ARM: 8446/1: amba: Remove unused callbacks for legacy system PM

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 50fc66868402..9006c4e75cf7 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -41,8 +41,6 @@ struct amba_driver {
 	int			(*probe)(struct amba_device *, const struct amba_id *);
 	int			(*remove)(struct amba_device *);
 	void			(*shutdown)(struct amba_device *);
-	int			(*suspend)(struct amba_device *, pm_message_t);
-	int			(*resume)(struct amba_device *);
 	const struct amba_id	*id_table;
 };
 
-- 
cgit v1.2.3


From da23ac1e40ce844d1a9553906bdacce160af76f6 Mon Sep 17 00:00:00 2001
From: "Subhransu S. Prusty" <subhransu.s.prusty@intel.com>
Date: Tue, 29 Sep 2015 13:56:10 +0530
Subject: ALSA: hda - Add hduadio support to DEVTABLE

For generating modalias entries automatically, move the definition of
struct hda_device_id to linux/mod_devicetable.h and add the handling
of this record in file2alias helper.  The new modalias is represented
with combination of vendor id, device id, and api version as
"hdaudio:vNrNaN".

This patch itself doesn't convert the existing modaliases.  Since they
were added manually, this patch won't give any regression by itself at
this point.

[Modified the modalias format to adapt the api_version field, and drop
 invalid ANY_ID definition by tiwai]

Signed-off-by: Subhransu S. Prusty <subhransu.s.prusty@intel.com>
Reviewed-by: Vinod Koul <vinod.koul@intel.com>
Tested-by: Subhransu S Prusty <subhransu.s.prusty@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/mod_devicetable.h   |  8 ++++++++
 include/sound/hdaudio.h           | 12 +-----------
 scripts/mod/devicetable-offsets.c |  5 +++++
 scripts/mod/file2alias.c          | 17 +++++++++++++++++
 sound/hda/hda_bus_type.c          |  1 +
 5 files changed, 32 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 688997a24aad..00825672d256 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -219,6 +219,14 @@ struct serio_device_id {
 	__u8 proto;
 };
 
+struct hda_device_id {
+	__u32 vendor_id;
+	__u32 rev_id;
+	__u8 api_version;
+	const char *name;
+	unsigned long driver_data;
+};
+
 /*
  * Struct used for matching a device
  */
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index b35bf59a1ecc..ddca48eb02e0 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -21,23 +21,13 @@ struct hdac_stream;
 struct hdac_device;
 struct hdac_driver;
 struct hdac_widget_tree;
+struct hda_device_id;
 
 /*
  * exported bus type
  */
 extern struct bus_type snd_hda_bus_type;
 
-/*
- * HDA device table
- */
-struct hda_device_id {
-	__u32 vendor_id;
-	__u32 rev_id;
-	__u8 api_version;
-	const char *name;
-	unsigned long driver_data;
-};
-
 /*
  * generic arrays
  */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index e70fcd12eeeb..e1a5110bd63b 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -196,5 +196,10 @@ int main(void)
 	DEVID_FIELD(ulpi_device_id, vendor);
 	DEVID_FIELD(ulpi_device_id, product);
 
+	DEVID(hda_device_id);
+	DEVID_FIELD(hda_device_id, vendor_id);
+	DEVID_FIELD(hda_device_id, rev_id);
+	DEVID_FIELD(hda_device_id, api_version);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 5f2088209132..fc51d4bff3f8 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1250,6 +1250,23 @@ static int do_ulpi_entry(const char *filename, void *symval,
 }
 ADD_TO_DEVTABLE("ulpi", ulpi_device_id, do_ulpi_entry);
 
+/* Looks like: hdaudio:vNrNaN */
+static int do_hda_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, hda_device_id, vendor_id);
+	DEF_FIELD(symval, hda_device_id, rev_id);
+	DEF_FIELD(symval, hda_device_id, api_version);
+
+	strcpy(alias, "hdaudio:");
+	ADD(alias, "v", vendor_id != 0, vendor_id);
+	ADD(alias, "r", rev_id != 0, rev_id);
+	ADD(alias, "a", api_version != 0, api_version);
+
+	add_wildcard(alias);
+	return 1;
+}
+ADD_TO_DEVTABLE("hdaudio", hda_device_id, do_hda_entry);
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
diff --git a/sound/hda/hda_bus_type.c b/sound/hda/hda_bus_type.c
index 89c2711baaaf..bcb1a79eec38 100644
--- a/sound/hda/hda_bus_type.c
+++ b/sound/hda/hda_bus_type.c
@@ -4,6 +4,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/export.h>
 #include <sound/hdaudio.h>
 
-- 
cgit v1.2.3


From 233e7f267e580fefdeb36628b7efe8bfe056d27c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 8 Oct 2015 16:51:31 +0200
Subject: stop_machine: Ensure that a queued callback will be called before
 cpu_stop_park()

cpu_stop_queue_work() checks stopper->enabled before it queues the
work, but ->enabled == T can only guarantee cpu_stop_signal_done()
if we race with cpu_down().

This is not enough for stop_two_cpus() or stop_machine(), they will
deadlock if multi_cpu_stop() won't be called by one of the target
CPU's. stop_machine/stop_cpus are fine, they rely on stop_cpus_mutex.
But stop_two_cpus() has to check cpu_active() to avoid the same race
with hotplug, and this check is very unobvious and probably not even
correct if we race with cpu_up().

Change cpu_down() pass to clear ->enabled before cpu_stopper_thread()
flushes the pending ->works and returns with KTHREAD_SHOULD_PARK set.

Note also that smpboot_thread_call() calls cpu_stop_unpark() which
sets enabled == T at CPU_ONLINE stage, so this CPU can't go away until
cpu_stopper_thread() is called at least once. This all means that if
cpu_stop_queue_work() succeeds, we know that work->fn() will be called.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: heiko.carstens@de.ibm.com
Link: http://lkml.kernel.org/r/20151008145131.GA18139@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/stop_machine.h |  1 +
 kernel/cpu.c                 |  2 +-
 kernel/stop_machine.c        | 23 +++++++++++++----------
 3 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 414d924318ce..7b76362b381c 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -33,6 +33,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 			 struct cpu_stop_work *work_buf);
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
+void stop_machine_park(int cpu);
 
 #else	/* CONFIG_SMP */
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 050c63472f03..c85df2775b73 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -344,7 +344,7 @@ static int take_cpu_down(void *_param)
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
 	/* Park the stopper thread */
-	kthread_park(current);
+	stop_machine_park((long)param->hcpu);
 	return 0;
 }
 
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 12484e5d5c88..6a402098d4ab 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -452,6 +452,18 @@ repeat:
 	}
 }
 
+void stop_machine_park(int cpu)
+{
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	/*
+	 * Lockless. cpu_stopper_thread() will take stopper->lock and flush
+	 * the pending works before it parks, until then it is fine to queue
+	 * the new works.
+	 */
+	stopper->enabled = false;
+	kthread_park(stopper->thread);
+}
+
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
 static void cpu_stop_create(unsigned int cpu)
@@ -462,17 +474,8 @@ static void cpu_stop_create(unsigned int cpu)
 static void cpu_stop_park(unsigned int cpu)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	struct cpu_stop_work *work, *tmp;
-	unsigned long flags;
 
-	/* drain remaining works */
-	spin_lock_irqsave(&stopper->lock, flags);
-	list_for_each_entry_safe(work, tmp, &stopper->works, list) {
-		list_del_init(&work->list);
-		cpu_stop_signal_done(work->done, false);
-	}
-	stopper->enabled = false;
-	spin_unlock_irqrestore(&stopper->lock, flags);
+	WARN_ON(!list_empty(&stopper->works));
 }
 
 static void cpu_stop_unpark(unsigned int cpu)
-- 
cgit v1.2.3


From c00166d87e730088d919814020e96ffed129d0d1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 9 Oct 2015 18:00:49 +0200
Subject: stop_machine: Kill smp_hotplug_thread->pre_unpark, introduce
 stop_machine_unpark()

1. Change smpboot_unpark_thread() to check ->selfparking, just
   like smpboot_park_thread() does.

2. Introduce stop_machine_unpark() which sets ->enabled and calls
   kthread_unpark().

3. Change smpboot_thread_call() and cpu_stop_init() to call
   stop_machine_unpark() by hand.

This way:

    - IMO the ->selfparking logic becomes more consistent.

    - We can kill the smp_hotplug_thread->pre_unpark() method.

    - We can easily unpark the stopper thread earlier. Say, we
      can move stop_machine_unpark() from smpboot_thread_call()
      to sched_cpu_active() as Peter suggests.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: heiko.carstens@de.ibm.com
Link: http://lkml.kernel.org/r/20151009160049.GA10166@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smpboot.h      |  4 ----
 include/linux/stop_machine.h |  1 +
 kernel/cpu.c                 |  1 +
 kernel/smpboot.c             |  5 ++---
 kernel/stop_machine.c        | 10 +++++++++-
 5 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index e6109a6cd8f6..12910cf19869 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -24,9 +24,6 @@ struct smpboot_thread_data;
  *			parked (cpu offline)
  * @unpark:		Optional unpark function, called when the thread is
  *			unparked (cpu online)
- * @pre_unpark:		Optional unpark function, called before the thread is
- *			unparked (cpu online). This is not guaranteed to be
- *			called on the target cpu of the thread. Careful!
  * @cpumask:		Internal state.  To update which threads are unparked,
  *			call smpboot_update_cpumask_percpu_thread().
  * @selfparking:	Thread is not parked by the park function.
@@ -42,7 +39,6 @@ struct smp_hotplug_thread {
 	void				(*cleanup)(unsigned int cpu, bool online);
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
-	void				(*pre_unpark)(unsigned int cpu);
 	cpumask_var_t			cpumask;
 	bool				selfparking;
 	const char			*thread_comm;
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 7b76362b381c..0adedca24c5b 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -34,6 +34,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 void stop_machine_park(int cpu);
+void stop_machine_unpark(int cpu);
 
 #else	/* CONFIG_SMP */
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index c85df2775b73..6467521e1e15 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -475,6 +475,7 @@ static int smpboot_thread_call(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
+		stop_machine_unpark(cpu);
 		smpboot_unpark_threads(cpu);
 		break;
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index a818cbc73e14..d264f59bff56 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -222,9 +222,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp
 {
 	struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
 
-	if (ht->pre_unpark)
-		ht->pre_unpark(cpu);
-	kthread_unpark(tsk);
+	if (!ht->selfparking)
+		kthread_unpark(tsk);
 }
 
 void smpboot_unpark_threads(unsigned int cpu)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 91fbb109de6c..59096a55089f 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -513,6 +513,14 @@ static void cpu_stop_unpark(unsigned int cpu)
 	spin_unlock_irq(&stopper->lock);
 }
 
+void stop_machine_unpark(int cpu)
+{
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+
+	cpu_stop_unpark(cpu);
+	kthread_unpark(stopper->thread);
+}
+
 static struct smp_hotplug_thread cpu_stop_threads = {
 	.store			= &cpu_stopper.thread,
 	.thread_should_run	= cpu_stop_should_run,
@@ -521,7 +529,6 @@ static struct smp_hotplug_thread cpu_stop_threads = {
 	.create			= cpu_stop_create,
 	.setup			= cpu_stop_unpark,
 	.park			= cpu_stop_park,
-	.pre_unpark		= cpu_stop_unpark,
 	.selfparking		= true,
 };
 
@@ -537,6 +544,7 @@ static int __init cpu_stop_init(void)
 	}
 
 	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
+	stop_machine_unpark(raw_smp_processor_id());
 	stop_machine_initialized = true;
 	return 0;
 }
-- 
cgit v1.2.3


From d976441f44bc5d48635d081d277aa76556ffbf8b Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 19 Oct 2015 11:37:17 +0300
Subject: compiler, atomics, kasan: Provide READ_ONCE_NOCHECK()

Some code may perform racy by design memory reads. This could be
harmless, yet such code may produce KASAN warnings.

To hide such accesses from KASAN this patch introduces
READ_ONCE_NOCHECK() macro. KASAN will not check the memory
accessed by READ_ONCE_NOCHECK(). The KernelThreadSanitizer
(KTSAN) is going to ignore it as well.

This patch creates __read_once_size_nocheck() a clone of
__read_once_size(). The only difference between them is
'no_sanitized_address' attribute appended to '*_nocheck'
function. This attribute tells the compiler that instrumentation
of memory accesses should not be applied to that function. We
declare it as static '__maybe_unsed' because GCC is not capable
to inline such function:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368

With KASAN=n READ_ONCE_NOCHECK() is just a clone of READ_ONCE().

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wolfram Gloger <wmglo@dent.med.uni-muenchen.de>
Cc: kasan-dev <kasan-dev@googlegroups.com>
Link: http://lkml.kernel.org/r/1445243838-17763-2-git-send-email-aryabinin@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 13 +++++++++
 include/linux/compiler.h     | 66 +++++++++++++++++++++++++++++++++++---------
 2 files changed, 66 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index dfaa7b3e9ae9..8efb40e61d6e 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -237,12 +237,25 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
+#if GCC_VERSION >= 40902
+/*
+ * Tell the compiler that address safety instrumentation (KASAN)
+ * should not be applied to that function.
+ * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
+ */
+#define __no_sanitize_address __attribute__((no_sanitize_address))
+#endif
+
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
 #define __noclone	/* not needed */
 #endif
 
+#if !defined(__no_sanitize_address)
+#define __no_sanitize_address
+#endif
+
 /*
  * A trick to suppress uninitialized variable warning without generating any
  * code
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c836eb2dc44d..3d7810341b57 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -198,19 +198,45 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
 #include <uapi/linux/types.h>
 
-static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+#define __READ_ONCE_SIZE						\
+({									\
+	switch (size) {							\
+	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;		\
+	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;		\
+	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;		\
+	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;		\
+	default:							\
+		barrier();						\
+		__builtin_memcpy((void *)res, (const void *)p, size);	\
+		barrier();						\
+	}								\
+})
+
+static __always_inline
+void __read_once_size(const volatile void *p, void *res, int size)
 {
-	switch (size) {
-	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
-	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
-	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
-	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
-	default:
-		barrier();
-		__builtin_memcpy((void *)res, (const void *)p, size);
-		barrier();
-	}
+	__READ_ONCE_SIZE;
+}
+
+#ifdef CONFIG_KASAN
+/*
+ * This function is not 'inline' because __no_sanitize_address confilcts
+ * with inlining. Attempt to inline it may cause a build failure.
+ * 	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
+ * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
+ */
+static __no_sanitize_address __maybe_unused
+void __read_once_size_nocheck(const volatile void *p, void *res, int size)
+{
+	__READ_ONCE_SIZE;
+}
+#else
+static __always_inline
+void __read_once_size_nocheck(const volatile void *p, void *res, int size)
+{
+	__READ_ONCE_SIZE;
 }
+#endif
 
 static __always_inline void __write_once_size(volatile void *p, void *res, int size)
 {
@@ -248,8 +274,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * required ordering.
  */
 
-#define READ_ONCE(x) \
-	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+#define __READ_ONCE(x, check)						\
+({									\
+	union { typeof(x) __val; char __c[1]; } __u;			\
+	if (check)							\
+		__read_once_size(&(x), __u.__c, sizeof(x));		\
+	else								\
+		__read_once_size_nocheck(&(x), __u.__c, sizeof(x));	\
+	__u.__val;							\
+})
+#define READ_ONCE(x) __READ_ONCE(x, 1)
+
+/*
+ * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need
+ * to hide memory access from KASAN.
+ */
+#define READ_ONCE_NOCHECK(x) __READ_ONCE(x, 0)
 
 #define WRITE_ONCE(x, val) \
 ({							\
-- 
cgit v1.2.3


From 839a42af9441982311f33241529f711f23c857c8 Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Thu, 15 Oct 2015 20:34:53 +0200
Subject: lib/mpi: clean unused SHA1_DIGEST_LENGTH

The define SHA1_DIGEST_LENGTH is not used anywhere, so remove it.

Signed-off-by: LABBE Corentin <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 8b8269fa8907..3a5abe95affd 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -33,12 +33,6 @@
 #include <linux/types.h>
 #include <linux/scatterlist.h>
 
-/* DSI defines */
-
-#define SHA1_DIGEST_LENGTH   20
-
-/*end of DSI defines */
-
 #define BYTES_PER_MPI_LIMB	(BITS_PER_LONG / 8)
 #define BITS_PER_MPI_LIMB	BITS_PER_LONG
 
-- 
cgit v1.2.3


From b299167652fe58f1ebadb3e3ac84a5a0b74e534e Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 7 Oct 2015 02:45:11 +0300
Subject: i2c: ocores: support big-endian register layout

This allows using OpenCores I2C controller attached to its host in
native-endian mode with bi-endian CPUs. Example of such system is Xtensa
XTFPGA platform.

Acked-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-ocores.c | 31 +++++++++++++++++++++++++++----
 include/linux/i2c-ocores.h      |  1 +
 2 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index abf5db7e441e..11b7b87311ed 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -92,6 +92,16 @@ static void oc_setreg_32(struct ocores_i2c *i2c, int reg, u8 value)
 	iowrite32(value, i2c->base + (reg << i2c->reg_shift));
 }
 
+static void oc_setreg_16be(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite16be(value, i2c->base + (reg << i2c->reg_shift));
+}
+
+static void oc_setreg_32be(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite32be(value, i2c->base + (reg << i2c->reg_shift));
+}
+
 static inline u8 oc_getreg_8(struct ocores_i2c *i2c, int reg)
 {
 	return ioread8(i2c->base + (reg << i2c->reg_shift));
@@ -107,6 +117,16 @@ static inline u8 oc_getreg_32(struct ocores_i2c *i2c, int reg)
 	return ioread32(i2c->base + (reg << i2c->reg_shift));
 }
 
+static inline u8 oc_getreg_16be(struct ocores_i2c *i2c, int reg)
+{
+	return ioread16be(i2c->base + (reg << i2c->reg_shift));
+}
+
+static inline u8 oc_getreg_32be(struct ocores_i2c *i2c, int reg)
+{
+	return ioread32be(i2c->base + (reg << i2c->reg_shift));
+}
+
 static inline void oc_setreg(struct ocores_i2c *i2c, int reg, u8 value)
 {
 	i2c->setreg(i2c, reg, value);
@@ -428,6 +448,9 @@ static int ocores_i2c_probe(struct platform_device *pdev)
 		i2c->reg_io_width = 1; /* Set to default value */
 
 	if (!i2c->setreg || !i2c->getreg) {
+		bool be = pdata ? pdata->big_endian :
+			of_device_is_big_endian(pdev->dev.of_node);
+
 		switch (i2c->reg_io_width) {
 		case 1:
 			i2c->setreg = oc_setreg_8;
@@ -435,13 +458,13 @@ static int ocores_i2c_probe(struct platform_device *pdev)
 			break;
 
 		case 2:
-			i2c->setreg = oc_setreg_16;
-			i2c->getreg = oc_getreg_16;
+			i2c->setreg = be ? oc_setreg_16be : oc_setreg_16;
+			i2c->getreg = be ? oc_getreg_16be : oc_getreg_16;
 			break;
 
 		case 4:
-			i2c->setreg = oc_setreg_32;
-			i2c->getreg = oc_getreg_32;
+			i2c->setreg = be ? oc_setreg_32be : oc_setreg_32;
+			i2c->getreg = be ? oc_getreg_32be : oc_getreg_32;
 			break;
 
 		default:
diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h
index 1c06b5c7c308..01edd96fe1f7 100644
--- a/include/linux/i2c-ocores.h
+++ b/include/linux/i2c-ocores.h
@@ -15,6 +15,7 @@ struct ocores_i2c_platform_data {
 	u32 reg_shift; /* register offset shift value */
 	u32 reg_io_width; /* register io read/write width */
 	u32 clock_khz; /* input clock in kHz */
+	bool big_endian; /* registers are big endian */
 	u8 num_devices; /* number of devices in the devices list */
 	struct i2c_board_info const *devices; /* devices connected to the bus */
 };
-- 
cgit v1.2.3


From c6f1891323e6a259c0b0f516a3a3e0f6b0ee2c5f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 7 Oct 2015 10:16:31 +0200
Subject: i2c: rcar: Remove obsolete platform data support

Since commit 4baadb9e05c68962 ("ARM: shmobile: r8a7778: remove obsolete
setup code"), Renesas R-Car SoCs are only supported in generic DT-only
ARM multi-platform builds.  The driver doesn't need to use platform data
anymore, hence remove platform data configuration.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
[wsa: removed now unused ret value and cast to proper enum type]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-rcar.c | 21 ++-------------------
 include/linux/i2c/i2c-rcar.h  | 10 ----------
 2 files changed, 2 insertions(+), 29 deletions(-)
 delete mode 100644 include/linux/i2c/i2c-rcar.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 1921294afc87..bbf3b2505aaf 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -25,7 +25,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
-#include <linux/i2c/i2c-rcar.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -582,7 +581,6 @@ MODULE_DEVICE_TABLE(of, rcar_i2c_dt_ids);
 
 static int rcar_i2c_probe(struct platform_device *pdev)
 {
-	struct i2c_rcar_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct rcar_i2c_priv *priv;
 	struct i2c_adapter *adap;
 	struct resource *res;
@@ -601,15 +599,9 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 	}
 
 	bus_speed = 100000; /* default 100 kHz */
-	ret = of_property_read_u32(dev->of_node, "clock-frequency", &bus_speed);
-	if (ret < 0 && pdata && pdata->bus_speed)
-		bus_speed = pdata->bus_speed;
+	of_property_read_u32(dev->of_node, "clock-frequency", &bus_speed);
 
-	if (pdev->dev.of_node)
-		priv->devtype = (long)of_match_device(rcar_i2c_dt_ids,
-						      dev)->data;
-	else
-		priv->devtype = platform_get_device_id(pdev)->driver_data;
+	priv->devtype = (enum rcar_i2c_type)of_match_device(rcar_i2c_dt_ids, dev)->data;
 
 	ret = rcar_i2c_clock_calculate(priv, bus_speed, dev);
 	if (ret < 0)
@@ -667,14 +659,6 @@ static int rcar_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct platform_device_id rcar_i2c_id_table[] = {
-	{ "i2c-rcar",		I2C_RCAR_GEN1 },
-	{ "i2c-rcar_gen1",	I2C_RCAR_GEN1 },
-	{ "i2c-rcar_gen2",	I2C_RCAR_GEN2 },
-	{},
-};
-MODULE_DEVICE_TABLE(platform, rcar_i2c_id_table);
-
 static struct platform_driver rcar_i2c_driver = {
 	.driver	= {
 		.name	= "i2c-rcar",
@@ -682,7 +666,6 @@ static struct platform_driver rcar_i2c_driver = {
 	},
 	.probe		= rcar_i2c_probe,
 	.remove		= rcar_i2c_remove,
-	.id_table	= rcar_i2c_id_table,
 };
 
 module_platform_driver(rcar_i2c_driver);
diff --git a/include/linux/i2c/i2c-rcar.h b/include/linux/i2c/i2c-rcar.h
deleted file mode 100644
index 496f5c2b23c9..000000000000
--- a/include/linux/i2c/i2c-rcar.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __I2C_R_CAR_H__
-#define __I2C_R_CAR_H__
-
-#include <linux/platform_device.h>
-
-struct i2c_rcar_platform_data {
-	u32 bus_speed;
-};
-
-#endif /* __I2C_R_CAR_H__ */
-- 
cgit v1.2.3


From 5d170139eb10ae12e1bd076245c42b35453d8324 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 18 Oct 2015 13:05:40 +0200
Subject: vga_switcheroo: Constify vga_switcheroo_handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vga_switcheroo_client_ops has always been declared const since its
introduction with 26ec685ff9d9 ("vga_switcheroo: Introduce struct
vga_switcheroo_client_ops").

Do so for vga_switcheroo_handler as well.

 drivers/gpu/drm/amd/amdgpu/amdgpu.ko:
   6 .rodata       00009888
- 19 .data         00001f00
+ 19 .data         00001ee0
 drivers/gpu/drm/nouveau/nouveau.ko:
   6 .rodata       000460b8
  17 .data         00018fe0
 drivers/gpu/drm/radeon/radeon.ko:
-  7 .rodata       00030944
+  7 .rodata       00030964
- 21 .data         0000d6a0
+ 21 .data         0000d678
 drivers/platform/x86/apple-gmux.ko:
-  7 .rodata       00000140
+  7 .rodata       00000160
- 11 .data         000000e0
+ 11 .data         000000b8

Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Darren Hart <dvhart@linux.intel.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Christian König <christian.koenig@amd.com>.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_acpi.c           | 2 +-
 drivers/gpu/drm/radeon/radeon_atpx_handler.c     | 2 +-
 drivers/gpu/vga/vga_switcheroo.c                 | 4 ++--
 drivers/platform/x86/apple-gmux.c                | 2 +-
 include/linux/vga_switcheroo.h                   | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 1a6b239baab9..5a8fbadbd27b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -501,7 +501,7 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 		return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler amdgpu_atpx_handler = {
+static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.switchto = amdgpu_atpx_switchto,
 	.power_state = amdgpu_atpx_power_state,
 	.init = amdgpu_atpx_init,
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index df2d9818aba3..8b8332e46f24 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -206,7 +206,7 @@ static int nouveau_dsm_get_client_id(struct pci_dev *pdev)
 	return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler nouveau_dsm_handler = {
+static const struct vga_switcheroo_handler nouveau_dsm_handler = {
 	.switchto = nouveau_dsm_switchto,
 	.power_state = nouveau_dsm_power_state,
 	.get_client_id = nouveau_dsm_get_client_id,
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index a771b9f0bf98..c4b4f298a283 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -499,7 +499,7 @@ static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 		return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler radeon_atpx_handler = {
+static const struct vga_switcheroo_handler radeon_atpx_handler = {
 	.switchto = radeon_atpx_switchto,
 	.power_state = radeon_atpx_power_state,
 	.init = radeon_atpx_init,
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index af0d372ff7d4..56bbbd65ae8a 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -140,7 +140,7 @@ struct vgasr_priv {
 	int registered_clients;
 	struct list_head clients;
 
-	struct vga_switcheroo_handler *handler;
+	const struct vga_switcheroo_handler *handler;
 };
 
 #define ID_BIT_AUDIO		0x100
@@ -195,7 +195,7 @@ static void vga_switcheroo_enable(void)
  *
  * Return: 0 on success, -EINVAL if a handler was already registered.
  */
-int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler)
 {
 	mutex_lock(&vgasr_mutex);
 	if (vgasr_priv.handler) {
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 0dec3f59917a..976efeb3f2ba 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -346,7 +346,7 @@ gmux_active_client(struct apple_gmux_data *gmux_data)
 	return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler gmux_handler = {
+static const struct vga_switcheroo_handler gmux_handler = {
 	.switchto = gmux_switchto,
 	.power_state = gmux_set_power_state,
 	.get_client_id = gmux_get_client_id,
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index c55751155631..786bc931dbd1 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -137,7 +137,7 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
 
-int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler);
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler);
 void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
@@ -155,7 +155,7 @@ static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
 static inline int vga_switcheroo_register_client(struct pci_dev *dev,
 		const struct vga_switcheroo_client_ops *ops, bool driver_power_control) { return 0; }
 static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
-static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
+static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
 	enum vga_switcheroo_client_id id) { return 0; }
-- 
cgit v1.2.3


From c63b7682b6d90530d3a071ff75b81bfddcce8598 Mon Sep 17 00:00:00 2001
From: Tal Shorer <tal.shorer@gmail.com>
Date: Sat, 1 Aug 2015 15:27:57 +0300
Subject: tracing: Allow disabling compilation of specific trace systems

Allow a trace events header file to disable compilation of its
trace events by defining the preprocessor macro NOTRACE.

This could be done, for example, according to a Kconfig option.

Link: http://lkml.kernel.org/r/1438432079-11704-3-git-send-email-tal.shorer@gmail.com

Signed-off-by: Tal Shorer <tal.shorer@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   | 17 ++++++++++++++---
 include/trace/define_trace.h |  2 +-
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a5f7f3ecafa3..afada369c5b7 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -111,7 +111,18 @@ extern void syscall_unregfunc(void);
 #define TP_ARGS(args...)	args
 #define TP_CONDITION(args...)	args
 
-#ifdef CONFIG_TRACEPOINTS
+/*
+ * Individual subsystem my have a separate configuration to
+ * enable their tracepoints. By default, this file will create
+ * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
+ * wants to be able to disable its tracepoints from being created
+ * it can define NOTRACE before including the tracepoint headers.
+ */
+#if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
+#define TRACEPOINTS_ENABLED
+#endif
+
+#ifdef TRACEPOINTS_ENABLED
 
 /*
  * it_func[0] is never NULL because there is at least one element in the array
@@ -234,7 +245,7 @@ extern void syscall_unregfunc(void);
 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
 	EXPORT_SYMBOL(__tracepoint_##name)
 
-#else /* !CONFIG_TRACEPOINTS */
+#else /* !TRACEPOINTS_ENABLED */
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
 	static inline void trace_##name(proto)				\
 	{ }								\
@@ -266,7 +277,7 @@ extern void syscall_unregfunc(void);
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
 #define EXPORT_TRACEPOINT_SYMBOL(name)
 
-#endif /* CONFIG_TRACEPOINTS */
+#endif /* TRACEPOINTS_ENABLED */
 
 #ifdef CONFIG_TRACING
 /**
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 09b3880105a9..2d8639ea64d5 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -86,7 +86,7 @@
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)
 
-#ifdef CONFIG_EVENT_TRACING
+#ifdef TRACEPOINTS_ENABLED
 #include <trace/trace_events.h>
 #include <trace/perf.h>
 #endif
-- 
cgit v1.2.3


From f672258391b42a5c7cc2732c9c063e56a85c8dbe Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 16 Oct 2015 21:57:42 -0700
Subject: tcp: track min RTT using windowed min-filter

Kathleen Nichols' algorithm for tracking the minimum RTT of a
data stream over some measurement window. It uses constant space
and constant time per update. Yet it almost always delivers
the same minimum as an implementation that has to keep all
the data in the window. The measurement window is tunable via
sysctl.net.ipv4.tcp_min_rtt_wlen with a default value of 5 minutes.

The algorithm keeps track of the best, 2nd best & 3rd best min
values, maintaining an invariant that the measurement time of
the n'th best >= n-1'th best. It also makes sure that the three
values are widely separated in the time window since that bounds
the worse case error when that data is monotonically increasing
over the window.

Upon getting a new min, we can forget everything earlier because
it has no value - the new min is less than everything else in the
window by definition and it's the most recent. So we restart fresh
on every new min and overwrites the 2nd & 3rd choices. The same
property holds for the 2nd & 3rd best.

Therefore we have to maintain two invariants to maximize the
information in the samples, one on values (1st.v <= 2nd.v <=
3rd.v) and the other on times (now-win <=1st.t <= 2nd.t <= 3rd.t <=
now). These invariants determine the structure of the code

The RTT input to the windowed filter is the minimum RTT measured
from ACK or SACK, or as the last resort from TCP timestamps.

The accessor tcp_min_rtt() returns the minimum RTT seen in the
window. ~0U indicates it is not available. The minimum is 1usec
even if the true RTT is below that.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  8 ++++
 include/linux/tcp.h                    |  3 ++
 include/net/tcp.h                      |  7 +++
 net/ipv4/sysctl_net_ipv4.c             |  7 +++
 net/ipv4/tcp.c                         |  1 +
 net/ipv4/tcp_input.c                   | 78 +++++++++++++++++++++++++++++++---
 net/ipv4/tcp_minisocks.c               |  1 +
 7 files changed, 100 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ebe94f2cab98..502d6a572b4f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -384,6 +384,14 @@ tcp_mem - vector of 3 INTEGERs: min, pressure, max
 	Defaults are calculated at boot time from amount of available
 	memory.
 
+tcp_min_rtt_wlen - INTEGER
+	The window length of the windowed min filter to track the minimum RTT.
+	A shorter window lets a flow more quickly pick up new (higher)
+	minimum RTT when it is moved to a longer path (e.g., due to traffic
+	engineering). A longer window makes the filter more resistant to RTT
+	inflations such as transient congestion. The unit is seconds.
+	Default: 300
+
 tcp_moderate_rcvbuf - BOOLEAN
 	If set, TCP performs receive buffer auto-tuning, attempting to
 	automatically size the buffer (no greater than tcp_rmem[2]) to
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 86a7edaa6797..90edef5508f9 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -217,6 +217,9 @@ struct tcp_sock {
 	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/
 	u32	rttvar_us;	/* smoothed mdev_max			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
+	struct rtt_meas {
+		u32 rtt, ts;	/* RTT in usec and sampling time in jiffies. */
+	} rtt_min[3];
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
 	u32	retrans_out;	/* Retransmitted packets out		*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eed94fc355c1..4a43152229ea 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -279,6 +279,7 @@ extern int sysctl_tcp_limit_output_bytes;
 extern int sysctl_tcp_challenge_ack_limit;
 extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
+extern int sysctl_tcp_min_rtt_wlen;
 extern int sysctl_tcp_autocorking;
 extern int sysctl_tcp_invalid_ratelimit;
 extern int sysctl_tcp_pacing_ss_ratio;
@@ -671,6 +672,12 @@ static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
 	return dst_metric_locked(dst, RTAX_CC_ALGO);
 }
 
+/* Minimum RTT in usec. ~0 means not available. */
+static inline u32 tcp_min_rtt(const struct tcp_sock *tp)
+{
+	return tp->rtt_min[0].rtt;
+}
+
 /* Compute the actual receive window we are currently advertising.
  * Rcv_nxt can be after the window if our peer push more data
  * than the offered window.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 894da3a70aff..13ab434c2909 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -576,6 +576,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tcp_min_rtt_wlen",
+		.data		= &sysctl_tcp_min_rtt_wlen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "tcp_low_latency",
 		.data		= &sysctl_tcp_low_latency,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ac1bdbb50352..0cfa7c0c1e80 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -388,6 +388,7 @@ void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+	tp->rtt_min[0].rtt = ~0U;
 
 	/* So many TCP implementations out there (incorrectly) count the
 	 * initial SYN frame in their delayed-ACK and congestion control
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 62ee71efd1ce..eedb25db3947 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -95,6 +95,7 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
+int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
 
 int sysctl_tcp_thin_dupack __read_mostly;
 
@@ -2915,8 +2916,69 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 	tcp_xmit_retransmit_queue(sk);
 }
 
+/* Kathleen Nichols' algorithm for tracking the minimum value of
+ * a data stream over some fixed time interval. (E.g., the minimum
+ * RTT over the past five minutes.) It uses constant space and constant
+ * time per update yet almost always delivers the same minimum as an
+ * implementation that has to keep all the data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of the
+ * n'th best >= n-1'th best. It also makes sure that the three values
+ * are widely separated in the time window since that bounds the worse
+ * case error when that data is monotonically increasing over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because it
+ * has no value - the new min is <= everything else in the window by
+ * definition and it's the most recent. So we restart fresh on every new min
+ * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
+ * best.
+ */
+static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
+{
+	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
+	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
+	struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+	u32 elapsed;
+
+	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
+	if (unlikely(rttm.rtt <= m[0].rtt))
+		m[0] = m[1] = m[2] = rttm;
+	else if (rttm.rtt <= m[1].rtt)
+		m[1] = m[2] = rttm;
+	else if (rttm.rtt <= m[2].rtt)
+		m[2] = rttm;
+
+	elapsed = now - m[0].ts;
+	if (unlikely(elapsed > wlen)) {
+		/* Passed entire window without a new min so make 2nd choice
+		 * the new min & 3rd choice the new 2nd. So forth and so on.
+		 */
+		m[0] = m[1];
+		m[1] = m[2];
+		m[2] = rttm;
+		if (now - m[0].ts > wlen) {
+			m[0] = m[1];
+			m[1] = rttm;
+			if (now - m[0].ts > wlen)
+				m[0] = rttm;
+		}
+	} else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
+		/* Passed a quarter of the window without a new min so
+		 * take 2nd choice from the 2nd quarter of the window.
+		 */
+		m[2] = m[1] = rttm;
+	} else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
+		/* Passed half the window without a new min so take the 3rd
+		 * choice from the last half of the window.
+		 */
+		m[2] = rttm;
+	}
+}
+
 static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      long seq_rtt_us, long sack_rtt_us)
+				      long seq_rtt_us, long sack_rtt_us,
+				      long ca_rtt_us)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2936,11 +2998,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 */
 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 	    flag & FLAG_ACKED)
-		seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
-
+		seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
+							  tp->rx_opt.rcv_tsecr);
 	if (seq_rtt_us < 0)
 		return false;
 
+	/* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
+	 * always taken together with ACK, SACK, or TS-opts. Any negative
+	 * values will be skipped with the seq_rtt_us < 0 check above.
+	 */
+	tcp_update_rtt_min(sk, ca_rtt_us);
 	tcp_rtt_estimator(sk, seq_rtt_us);
 	tcp_set_rto(sk);
 
@@ -2961,7 +3028,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
 		rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
 	}
 
-	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L);
+	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
 }
 
 
@@ -3175,7 +3242,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
 	}
 
-	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
+	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
+					ca_rtt_us);
 
 	if (flag & FLAG_ACKED) {
 		tcp_rearm_rto(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 41828bdc5d32..b875c288daaa 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -470,6 +470,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 		newtp->srtt_us = 0;
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+		newtp->rtt_min[0].rtt = ~0U;
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
 
 		newtp->packets_out = 0;
-- 
cgit v1.2.3


From af82f4e84866ecd360a53f770d6217637116e6c1 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 16 Oct 2015 21:57:43 -0700
Subject: tcp: remove tcp_mark_lost_retrans()

Remove the existing lost retransmit detection because RACK subsumes
it completely. This also stops the overloading the ack_seq field of
the skb control block.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  2 --
 net/ipv4/tcp_input.c  | 65 ---------------------------------------------------
 net/ipv4/tcp_output.c |  6 -----
 3 files changed, 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 90edef5508f9..8c54863dfc38 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -283,8 +283,6 @@ struct tcp_sock {
 	int     lost_cnt_hint;
 	u32     retransmit_high;	/* L-bits may be on up to this seqno */
 
-	u32	lost_retrans_low;	/* Sent seq after any rxmit (lowest) */
-
 	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/
 	u32	high_seq;	/* snd_nxt at onset of congestion	*/
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eedb25db3947..5a776897a8c7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1048,70 +1048,6 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
 	return !before(start_seq, end_seq - tp->max_window);
 }
 
-/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "B". Later note: FACK people cheated me again 8), we have to account
- * for reordering! Ugly, but should help.
- *
- * Search retransmitted skbs from write_queue that were sent when snd_nxt was
- * less than what is now known to be received by the other end (derived from
- * highest SACK block). Also calculate the lowest snd_nxt among the remaining
- * retransmitted skbs to avoid some costly processing per ACKs.
- */
-static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-	int cnt = 0;
-	u32 new_low_seq = tp->snd_nxt;
-	u32 received_upto = tcp_highest_sack_seq(tp);
-
-	if (!tcp_is_fack(tp) || !tp->retrans_out ||
-	    !after(received_upto, tp->lost_retrans_low) ||
-	    icsk->icsk_ca_state != TCP_CA_Recovery)
-		return;
-
-	tcp_for_write_queue(skb, sk) {
-		u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
-
-		if (skb == tcp_send_head(sk))
-			break;
-		if (cnt == tp->retrans_out)
-			break;
-		if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-			continue;
-
-		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
-			continue;
-
-		/* TODO: We would like to get rid of tcp_is_fack(tp) only
-		 * constraint here (see above) but figuring out that at
-		 * least tp->reordering SACK blocks reside between ack_seq
-		 * and received_upto is not easy task to do cheaply with
-		 * the available datastructures.
-		 *
-		 * Whether FACK should check here for tp->reordering segs
-		 * in-between one could argue for either way (it would be
-		 * rather simple to implement as we could count fack_count
-		 * during the walk and do tp->fackets_out - fack_count).
-		 */
-		if (after(received_upto, ack_seq)) {
-			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-			tp->retrans_out -= tcp_skb_pcount(skb);
-			*flag |= FLAG_LOST_RETRANS;
-			tcp_skb_mark_lost_uncond_verify(tp, skb);
-			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
-		} else {
-			if (before(ack_seq, new_low_seq))
-				new_low_seq = ack_seq;
-			cnt += tcp_skb_pcount(skb);
-		}
-	}
-
-	if (tp->retrans_out)
-		tp->lost_retrans_low = new_low_seq;
-}
-
 static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 			    struct tcp_sack_block_wire *sp, int num_sacks,
 			    u32 prior_snd_una)
@@ -1838,7 +1774,6 @@ advance_sp:
 	    ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
 		tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
 
-	tcp_mark_lost_retrans(sk, &state->flag);
 	tcp_verify_left_out(tp);
 out:
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 19adedb8c5cc..f6f7f9b4901b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2655,8 +2655,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 			net_dbg_ratelimited("retrans_out leaked\n");
 		}
 #endif
-		if (!tp->retrans_out)
-			tp->lost_retrans_low = tp->snd_nxt;
 		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
 		tp->retrans_out += tcp_skb_pcount(skb);
 
@@ -2664,10 +2662,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		if (!tp->retrans_stamp)
 			tp->retrans_stamp = tcp_skb_timestamp(skb);
 
-		/* snd_nxt is stored to detect loss of retransmitted segment,
-		 * see tcp_input.c tcp_sacktag_write_queue().
-		 */
-		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
 	} else if (err != -EBUSY) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
 	}
-- 
cgit v1.2.3


From 625a5e109a3ed6f36a1008a43069a3462b44a424 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 16 Oct 2015 21:57:45 -0700
Subject: tcp: skb_mstamp_after helper

a helper to prepare the first main RACK patch.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4398411236f1..24f4dfd94c51 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -463,6 +463,15 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
 	return delta_us;
 }
 
+static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
+				    const struct skb_mstamp *t0)
+{
+	s32 diff = t1->stamp_jiffies - t0->stamp_jiffies;
+
+	if (!diff)
+		diff = t1->stamp_us - t0->stamp_us;
+	return diff > 0;
+}
 
 /** 
  *	struct sk_buff - socket buffer
-- 
cgit v1.2.3


From 659a8ad56f490279f0efee43a62ffa1ac914a4e0 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 16 Oct 2015 21:57:46 -0700
Subject: tcp: track the packet timings in RACK

This patch is the first half of the RACK loss recovery.

RACK loss recovery uses the notion of time instead
of packet sequence (FACK) or counts (dupthresh). It's inspired by the
previous FACK heuristic in tcp_mark_lost_retrans(): when a limited
transmit (new data packet) is sacked, then current retransmitted
sequence below the newly sacked sequence must been lost,
since at least one round trip time has elapsed.

But it has several limitations:
1) can't detect tail drops since it depends on limited transmit
2) is disabled upon reordering (assumes no reordering)
3) only enabled in fast recovery ut not timeout recovery

RACK (Recently ACK) addresses these limitations with the notion
of time instead: a packet P1 is lost if a later packet P2 is s/acked,
as at least one round trip has passed.

Since RACK cares about the time sequence instead of the data sequence
of packets, it can detect tail drops when later retransmission is
s/acked while FACK or dupthresh can't. For reordering RACK uses a
dynamically adjusted reordering window ("reo_wnd") to reduce false
positives on ever (small) degree of reordering.

This patch implements tcp_advanced_rack() which tracks the
most recent transmission time among the packets that have been
delivered (ACKed or SACKed) in tp->rack.mstamp. This timestamp
is the key to determine which packet has been lost.

Consider an example that the sender sends six packets:
T1: P1 (lost)
T2: P2
T3: P3
T4: P4
T100: sack of P2. rack.mstamp = T2
T101: retransmit P1
T102: sack of P2,P3,P4. rack.mstamp = T4
T205: ACK of P4 since the hole is repaired. rack.mstamp = T101

We need to be careful about spurious retransmission because it may
falsely advance tp->rack.mstamp by an RTT or an RTO, causing RACK
to falsely mark all packets lost, just like a spurious timeout.

We identify spurious retransmission by the ACK's TS echo value.
If TS option is not applicable but the retransmission is acknowledged
less than min-RTT ago, it is likely to be spurious. We refrain from
using the transmission time of these spurious retransmissions.

The second half is implemented in the next patch that marks packet
lost using RACK timestamp.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  6 ++++++
 include/net/tcp.h        |  5 +++++
 net/ipv4/Makefile        |  1 +
 net/ipv4/tcp_input.c     | 14 ++++++++++++++
 net/ipv4/tcp_minisocks.c |  2 ++
 net/ipv4/tcp_recovery.c  | 32 ++++++++++++++++++++++++++++++++
 6 files changed, 60 insertions(+)
 create mode 100644 net/ipv4/tcp_recovery.c

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8c54863dfc38..5dce9705fe84 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -194,6 +194,12 @@ struct tcp_sock {
 	u32	window_clamp;	/* Maximal window to advertise		*/
 	u32	rcv_ssthresh;	/* Current window clamp			*/
 
+	/* Information of the most recently (s)acked skb */
+	struct tcp_rack {
+		struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+		u8 advanced; /* mstamp advanced since last lost marking */
+		u8 reord;    /* reordering detected */
+	} rack;
 	u16	advmss;		/* Advertised MSS			*/
 	u8	unused;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4a43152229ea..3c3a9fe057d3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1750,6 +1750,11 @@ int tcpv4_offload_init(void);
 void tcp_v4_init(void);
 void tcp_init(void);
 
+/* tcp_recovery.c */
+
+extern void tcp_rack_advance(struct tcp_sock *tp,
+			     const struct skb_mstamp *xmit_time, u8 sacked);
+
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 89aacb630a53..c29809f765dc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,6 +8,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     inet_timewait_sock.o inet_connection_sock.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
 	     tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
+	     tcp_recovery.o \
 	     tcp_offload.o datagram.o raw.o udp.o udplite.o \
 	     udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o \
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1e97e73e5ecf..ce8370525832 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1173,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
 		return sacked;
 
 	if (!(sacked & TCPCB_SACKED_ACKED)) {
+		tcp_rack_advance(tp, xmit_time, sacked);
+
 		if (sacked & TCPCB_SACKED_RETRANS) {
 			/* If the segment is not tagged as lost,
 			 * we do not clear RETRANS, believing
@@ -2256,6 +2258,16 @@ static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
 	       before(tp->rx_opt.rcv_tsecr, when);
 }
 
+/* skb is spurious retransmitted if the returned timestamp echo
+ * reply is prior to the skb transmission time
+ */
+static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
+				     const struct sk_buff *skb)
+{
+	return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
+	       tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
+}
+
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
@@ -3135,6 +3147,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 		if (sacked & TCPCB_SACKED_ACKED)
 			tp->sacked_out -= acked_pcount;
+		else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
+			tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b875c288daaa..1fd5d413a664 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		tcp_ecn_openreq_child(newtp, req);
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
+		newtp->rack.mstamp.v64 = 0;
+		newtp->rack.advanced = 0;
 
 		newtp->saved_syn = req->saved_syn;
 		req->saved_syn = NULL;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
new file mode 100644
index 000000000000..8f66a6584845
--- /dev/null
+++ b/net/ipv4/tcp_recovery.c
@@ -0,0 +1,32 @@
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+/* Record the most recently (re)sent time among the (s)acked packets */
+void tcp_rack_advance(struct tcp_sock *tp,
+		      const struct skb_mstamp *xmit_time, u8 sacked)
+{
+	if (tp->rack.mstamp.v64 &&
+	    !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
+		return;
+
+	if (sacked & TCPCB_RETRANS) {
+		struct skb_mstamp now;
+
+		/* If the sacked packet was retransmitted, it's ambiguous
+		 * whether the retransmission or the original (or the prior
+		 * retransmission) was sacked.
+		 *
+		 * If the original is lost, there is no ambiguity. Otherwise
+		 * we assume the original can be delayed up to aRTT + min_rtt.
+		 * the aRTT term is bounded by the fast recovery or timeout,
+		 * so it's at least one RTT (i.e., retransmission is at least
+		 * an RTT later).
+		 */
+		skb_mstamp_get(&now);
+		if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
+			return;
+	}
+
+	tp->rack.mstamp = *xmit_time;
+	tp->rack.advanced = 1;
+}
-- 
cgit v1.2.3


From 146aa8b1453bd8f1ff2304ffb71b4ee0eb9acdcc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 21 Oct 2015 14:04:48 +0100
Subject: KEYS: Merge the type-specific data with the payload data

Merge the type-specific data with the payload data into one four-word chunk
as it seems pointless to keep them separate.

Use user_key_payload() for accessing the payloads of overloaded
user-defined keys.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-cifs@vger.kernel.org
cc: ecryptfs@vger.kernel.org
cc: linux-ext4@vger.kernel.org
cc: linux-f2fs-devel@lists.sourceforge.net
cc: linux-nfs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: linux-ima-devel@lists.sourceforge.net
---
 Documentation/crypto/asymmetric-keys.txt         | 27 +++++++-------
 Documentation/security/keys.txt                  | 41 +++++++++++++--------
 crypto/asymmetric_keys/asymmetric_keys.h         |  5 ---
 crypto/asymmetric_keys/asymmetric_type.c         | 44 +++++++++++++---------
 crypto/asymmetric_keys/public_key.c              |  4 +-
 crypto/asymmetric_keys/signature.c               |  2 +-
 crypto/asymmetric_keys/x509_parser.h             |  1 +
 crypto/asymmetric_keys/x509_public_key.c         |  9 +++--
 fs/cifs/cifs_spnego.c                            |  6 +--
 fs/cifs/cifsacl.c                                | 25 ++++++-------
 fs/cifs/connect.c                                |  9 +++--
 fs/cifs/sess.c                                   |  2 +-
 fs/cifs/smb2pdu.c                                |  2 +-
 fs/ecryptfs/ecryptfs_kernel.h                    |  5 +--
 fs/ext4/crypto_key.c                             |  4 +-
 fs/f2fs/crypto_key.c                             |  4 +-
 fs/fscache/object-list.c                         |  4 +-
 fs/nfs/nfs4idmap.c                               |  4 +-
 include/crypto/public_key.h                      |  1 -
 include/keys/asymmetric-subtype.h                |  2 +-
 include/keys/asymmetric-type.h                   | 15 ++++++++
 include/keys/user-type.h                         |  8 ++++
 include/linux/key-type.h                         |  3 +-
 include/linux/key.h                              | 33 +++++++----------
 kernel/module_signing.c                          |  1 +
 lib/digsig.c                                     |  7 ++--
 net/ceph/ceph_common.c                           |  2 +-
 net/ceph/crypto.c                                |  6 +--
 net/dns_resolver/dns_key.c                       | 20 +++++-----
 net/dns_resolver/dns_query.c                     |  7 ++--
 net/dns_resolver/internal.h                      |  8 ++++
 net/rxrpc/af_rxrpc.c                             |  2 +-
 net/rxrpc/ar-key.c                               | 32 ++++++++--------
 net/rxrpc/ar-output.c                            |  2 +-
 net/rxrpc/ar-security.c                          |  4 +-
 net/rxrpc/rxkad.c                                | 16 ++++----
 security/integrity/evm/evm_crypto.c              |  2 +-
 security/keys/big_key.c                          | 47 ++++++++++++++++--------
 security/keys/encrypted-keys/encrypted.c         | 18 ++++-----
 security/keys/encrypted-keys/encrypted.h         |  4 +-
 security/keys/encrypted-keys/masterkey_trusted.c |  4 +-
 security/keys/key.c                              | 18 ++++-----
 security/keys/keyctl.c                           |  4 +-
 security/keys/keyring.c                          | 12 +++---
 security/keys/process_keys.c                     |  4 +-
 security/keys/request_key.c                      |  4 +-
 security/keys/request_key_auth.c                 | 12 +++---
 security/keys/trusted.c                          |  6 +--
 security/keys/user_defined.c                     | 14 +++----
 49 files changed, 286 insertions(+), 230 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index b7675904a747..8c07e0ea6bc0 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -186,7 +186,7 @@ and looks like the following:
 					const struct public_key_signature *sig);
 	};
 
-Asymmetric keys point to this with their type_data[0] member.
+Asymmetric keys point to this with their payload[asym_subtype] member.
 
 The owner and name fields should be set to the owning module and the name of
 the subtype.  Currently, the name is only used for print statements.
@@ -269,8 +269,7 @@ mandatory:
 
 	struct key_preparsed_payload {
 		char		*description;
-		void		*type_data[2];
-		void		*payload;
+		void		*payload[4];
 		const void	*data;
 		size_t		datalen;
 		size_t		quotalen;
@@ -283,16 +282,18 @@ mandatory:
      not theirs.
 
      If the parser is happy with the blob, it should propose a description for
-     the key and attach it to ->description, ->type_data[0] should be set to
-     point to the subtype to be used, ->payload should be set to point to the
-     initialised data for that subtype, ->type_data[1] should point to a hex
-     fingerprint and quotalen should be updated to indicate how much quota this
-     key should account for.
-
-     When clearing up, the data attached to ->type_data[1] and ->description
-     will be kfree()'d and the data attached to ->payload will be passed to the
-     subtype's ->destroy() method to be disposed of.  A module reference for
-     the subtype pointed to by ->type_data[0] will be put.
+     the key and attach it to ->description, ->payload[asym_subtype] should be
+     set to point to the subtype to be used, ->payload[asym_crypto] should be
+     set to point to the initialised data for that subtype,
+     ->payload[asym_key_ids] should point to one or more hex fingerprints and
+     quotalen should be updated to indicate how much quota this key should
+     account for.
+
+     When clearing up, the data attached to ->payload[asym_key_ids] and
+     ->description will be kfree()'d and the data attached to
+     ->payload[asm_crypto] will be passed to the subtype's ->destroy() method
+     to be disposed of.  A module reference for the subtype pointed to by
+     ->payload[asym_subtype] will be put.
 
 
      If the data format is not recognised, -EBADMSG should be returned.  If it
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index c9e7f4f223a5..8c183873b2b7 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -1049,12 +1049,12 @@ search a specific keyring, so using keyrings in this way is of limited utility.
 NOTES ON ACCESSING PAYLOAD CONTENTS
 ===================================
 
-The simplest payload is just a number in key->payload.value. In this case,
-there's no need to indulge in RCU or locking when accessing the payload.
+The simplest payload is just data stored in key->payload directly.  In this
+case, there's no need to indulge in RCU or locking when accessing the payload.
 
-More complex payload contents must be allocated and a pointer to them set in
-key->payload.data. One of the following ways must be selected to access the
-data:
+More complex payload contents must be allocated and pointers to them set in the
+key->payload.data[] array.  One of the following ways must be selected to
+access the data:
 
  (1) Unmodifiable key type.
 
@@ -1092,6 +1092,13 @@ data:
      the payload. key->datalen cannot be relied upon to be consistent with the
      payload just dereferenced if the key's semaphore is not held.
 
+     Note that key->payload.data[0] has a shadow that is marked for __rcu
+     usage.  This is called key->payload.rcu_data0.  The following accessors
+     wrap the RCU calls to this element:
+
+	rcu_assign_keypointer(struct key *key, void *data);
+	void *rcu_dereference_key(struct key *key);
+
 
 ===================
 DEFINING A KEY TYPE
@@ -1143,8 +1150,7 @@ The structure has a number of fields, some of which are mandatory:
 
 	struct key_preparsed_payload {
 		char		*description;
-		void		*type_data[2];
-		void		*payload;
+		union key_payload payload;
 		const void	*data;
 		size_t		datalen;
 		size_t		quotalen;
@@ -1160,10 +1166,9 @@ The structure has a number of fields, some of which are mandatory:
      attached as a string to the description field.  This will be used for the
      key description if the caller of add_key() passes NULL or "".
 
-     The method can attach anything it likes to type_data[] and payload.  These
-     are merely passed along to the instantiate() or update() operations.  If
-     set, the expiry time will be applied to the key if it is instantiated from
-     this data.
+     The method can attach anything it likes to payload.  This is merely passed
+     along to the instantiate() or update() operations.  If set, the expiry
+     time will be applied to the key if it is instantiated from this data.
 
      The method should return 0 if successful or a negative error code
      otherwise.
@@ -1172,11 +1177,10 @@ The structure has a number of fields, some of which are mandatory:
  (*) void (*free_preparse)(struct key_preparsed_payload *prep);
 
      This method is only required if the preparse() method is provided,
-     otherwise it is unused.  It cleans up anything attached to the
-     description, type_data and payload fields of the key_preparsed_payload
-     struct as filled in by the preparse() method.  It will always be called
-     after preparse() returns successfully, even if instantiate() or update()
-     succeed.
+     otherwise it is unused.  It cleans up anything attached to the description
+     and payload fields of the key_preparsed_payload struct as filled in by the
+     preparse() method.  It will always be called after preparse() returns
+     successfully, even if instantiate() or update() succeed.
 
 
  (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
@@ -1197,6 +1201,11 @@ The structure has a number of fields, some of which are mandatory:
 
      It is safe to sleep in this method.
 
+     generic_key_instantiate() is provided to simply copy the data from
+     prep->payload.data[] to key->payload.data[], with RCU-safe assignment on
+     the first element.  It will then clear prep->payload.data[] so that the
+     free_preparse method doesn't release the data.
+
 
  (*) int (*update)(struct key *key, const void *data, size_t datalen);
 
diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h
index 3f5b537ab33e..1d450b580245 100644
--- a/crypto/asymmetric_keys/asymmetric_keys.h
+++ b/crypto/asymmetric_keys/asymmetric_keys.h
@@ -14,8 +14,3 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id);
 extern int __asymmetric_key_hex_to_key_id(const char *id,
 					  struct asymmetric_key_id *match_id,
 					  size_t hexlen);
-static inline
-const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key)
-{
-	return key->type_data.p[1];
-}
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 1916680ad81b..9f2165b27d52 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -306,26 +306,35 @@ static int asymmetric_key_preparse(struct key_preparsed_payload *prep)
 	return ret;
 }
 
+/*
+ * Clean up the key ID list
+ */
+static void asymmetric_key_free_kids(struct asymmetric_key_ids *kids)
+{
+	int i;
+
+	if (kids) {
+		for (i = 0; i < ARRAY_SIZE(kids->id); i++)
+			kfree(kids->id[i]);
+		kfree(kids);
+	}
+}
+
 /*
  * Clean up the preparse data
  */
 static void asymmetric_key_free_preparse(struct key_preparsed_payload *prep)
 {
-	struct asymmetric_key_subtype *subtype = prep->type_data[0];
-	struct asymmetric_key_ids *kids = prep->type_data[1];
-	int i;
+	struct asymmetric_key_subtype *subtype = prep->payload.data[asym_subtype];
+	struct asymmetric_key_ids *kids = prep->payload.data[asym_key_ids];
 
 	pr_devel("==>%s()\n", __func__);
 
 	if (subtype) {
-		subtype->destroy(prep->payload[0]);
+		subtype->destroy(prep->payload.data[asym_crypto]);
 		module_put(subtype->owner);
 	}
-	if (kids) {
-		for (i = 0; i < ARRAY_SIZE(kids->id); i++)
-			kfree(kids->id[i]);
-		kfree(kids);
-	}
+	asymmetric_key_free_kids(kids);
 	kfree(prep->description);
 }
 
@@ -335,20 +344,19 @@ static void asymmetric_key_free_preparse(struct key_preparsed_payload *prep)
 static void asymmetric_key_destroy(struct key *key)
 {
 	struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
-	struct asymmetric_key_ids *kids = key->type_data.p[1];
+	struct asymmetric_key_ids *kids = key->payload.data[asym_key_ids];
+	void *data = key->payload.data[asym_crypto];
+
+	key->payload.data[asym_crypto] = NULL;
+	key->payload.data[asym_subtype] = NULL;
+	key->payload.data[asym_key_ids] = NULL;
 
 	if (subtype) {
-		subtype->destroy(key->payload.data);
+		subtype->destroy(data);
 		module_put(subtype->owner);
-		key->type_data.p[0] = NULL;
 	}
 
-	if (kids) {
-		kfree(kids->id[0]);
-		kfree(kids->id[1]);
-		kfree(kids);
-		key->type_data.p[1] = NULL;
-	}
+	asymmetric_key_free_kids(kids);
 }
 
 struct key_type key_type_asymmetric = {
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 81efccbe22d5..6db4c01c6503 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -49,7 +49,7 @@ EXPORT_SYMBOL_GPL(pkey_id_type_name);
 static void public_key_describe(const struct key *asymmetric_key,
 				struct seq_file *m)
 {
-	struct public_key *key = asymmetric_key->payload.data;
+	struct public_key *key = asymmetric_key->payload.data[asym_crypto];
 
 	if (key)
 		seq_printf(m, "%s.%s",
@@ -112,7 +112,7 @@ EXPORT_SYMBOL_GPL(public_key_verify_signature);
 static int public_key_verify_signature_2(const struct key *key,
 					 const struct public_key_signature *sig)
 {
-	const struct public_key *pk = key->payload.data;
+	const struct public_key *pk = key->payload.data[asym_crypto];
 	return public_key_verify_signature(pk, sig);
 }
 
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 7525fd183574..9441240f7d2a 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -37,7 +37,7 @@ int verify_signature(const struct key *key,
 		return -EINVAL;
 	subtype = asymmetric_key_subtype(key);
 	if (!subtype ||
-	    !key->payload.data)
+	    !key->payload.data[0])
 		return -EINVAL;
 	if (!subtype->verify_signature)
 		return -ENOTSUPP;
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index 1de01eaec884..dbeed6018e63 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -11,6 +11,7 @@
 
 #include <linux/time.h>
 #include <crypto/public_key.h>
+#include <keys/asymmetric-type.h>
 
 struct x509_certificate {
 	struct x509_certificate *next;
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 197096632412..64d42981a8d7 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -266,7 +266,8 @@ static int x509_validate_trust(struct x509_certificate *cert,
 	if (!IS_ERR(key))  {
 		if (!use_builtin_keys
 		    || test_bit(KEY_FLAG_BUILTIN, &key->flags))
-			ret = x509_check_signature(key->payload.data, cert);
+			ret = x509_check_signature(key->payload.data[asym_crypto],
+						   cert);
 		key_put(key);
 	}
 	return ret;
@@ -352,9 +353,9 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 
 	/* We're pinning the module by being linked against it */
 	__module_get(public_key_subtype.owner);
-	prep->type_data[0] = &public_key_subtype;
-	prep->type_data[1] = kids;
-	prep->payload[0] = cert->pub;
+	prep->payload.data[asym_subtype] = &public_key_subtype;
+	prep->payload.data[asym_key_ids] = kids;
+	prep->payload.data[asym_crypto] = cert->pub;
 	prep->description = desc;
 	prep->quotalen = 100;
 
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index f4cf200b3c76..6908080e9b6d 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -42,7 +42,7 @@ cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 		goto error;
 
 	/* attach the data */
-	key->payload.data = payload;
+	key->payload.data[0] = payload;
 	ret = 0;
 
 error:
@@ -52,7 +52,7 @@ error:
 static void
 cifs_spnego_key_destroy(struct key *key)
 {
-	kfree(key->payload.data);
+	kfree(key->payload.data[0]);
 }
 
 
@@ -167,7 +167,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
 
 #ifdef CONFIG_CIFS_DEBUG2
 	if (cifsFYI && !IS_ERR(spnego_key)) {
-		struct cifs_spnego_msg *msg = spnego_key->payload.data;
+		struct cifs_spnego_msg *msg = spnego_key->payload.data[0];
 		cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024U,
 				msg->secblob_len + msg->sesskey_len));
 	}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1ea780bc6376..3f93125916bf 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -58,16 +58,15 @@ cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 	 * dereference payload.data!
 	 */
 	if (prep->datalen <= sizeof(key->payload)) {
-		key->payload.value = 0;
-		memcpy(&key->payload.value, prep->data, prep->datalen);
-		key->datalen = prep->datalen;
-		return 0;
+		key->payload.data[0] = NULL;
+		memcpy(&key->payload, prep->data, prep->datalen);
+	} else {
+		payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL);
+		if (!payload)
+			return -ENOMEM;
+		key->payload.data[0] = payload;
 	}
-	payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL);
-	if (!payload)
-		return -ENOMEM;
 
-	key->payload.data = payload;
 	key->datalen = prep->datalen;
 	return 0;
 }
@@ -76,7 +75,7 @@ static inline void
 cifs_idmap_key_destroy(struct key *key)
 {
 	if (key->datalen > sizeof(key->payload))
-		kfree(key->payload.data);
+		kfree(key->payload.data[0]);
 }
 
 static struct key_type cifs_idmap_key_type = {
@@ -233,8 +232,8 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid)
 	 * it could be.
 	 */
 	ksid = sidkey->datalen <= sizeof(sidkey->payload) ?
-		(struct cifs_sid *)&sidkey->payload.value :
-		(struct cifs_sid *)sidkey->payload.data;
+		(struct cifs_sid *)&sidkey->payload :
+		(struct cifs_sid *)sidkey->payload.data[0];
 
 	ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32));
 	if (ksid_size > sidkey->datalen) {
@@ -307,14 +306,14 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
 	if (sidtype == SIDOWNER) {
 		kuid_t uid;
 		uid_t id;
-		memcpy(&id, &sidkey->payload.value, sizeof(uid_t));
+		memcpy(&id, &sidkey->payload.data[0], sizeof(uid_t));
 		uid = make_kuid(&init_user_ns, id);
 		if (uid_valid(uid))
 			fuid = uid;
 	} else {
 		kgid_t gid;
 		gid_t id;
-		memcpy(&id, &sidkey->payload.value, sizeof(gid_t));
+		memcpy(&id, &sidkey->payload.data[0], sizeof(gid_t));
 		gid = make_kgid(&init_user_ns, id);
 		if (gid_valid(gid))
 			fgid = gid;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 773f4dc77630..3f2228570d44 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2325,13 +2325,14 @@ static int
 cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
 {
 	int rc = 0;
-	char *desc, *delim, *payload;
+	const char *delim, *payload;
+	char *desc;
 	ssize_t len;
 	struct key *key;
 	struct TCP_Server_Info *server = ses->server;
 	struct sockaddr_in *sa;
 	struct sockaddr_in6 *sa6;
-	struct user_key_payload *upayload;
+	const struct user_key_payload *upayload;
 
 	desc = kmalloc(CIFSCREDS_DESC_SIZE, GFP_KERNEL);
 	if (!desc)
@@ -2374,14 +2375,14 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
 	}
 
 	down_read(&key->sem);
-	upayload = key->payload.data;
+	upayload = user_key_payload(key);
 	if (IS_ERR_OR_NULL(upayload)) {
 		rc = upayload ? PTR_ERR(upayload) : -EINVAL;
 		goto out_key_put;
 	}
 
 	/* find first : in payload */
-	payload = (char *)upayload->data;
+	payload = upayload->data;
 	delim = strnchr(payload, upayload->datalen, ':');
 	cifs_dbg(FYI, "payload=%s\n", payload);
 	if (!delim) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index bce6fdcd5d48..59727e32ed0f 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -988,7 +988,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
 		goto out;
 	}
 
-	msg = spnego_key->payload.data;
+	msg = spnego_key->payload.data[0];
 	/*
 	 * check version field to make sure that cifs.upcall is
 	 * sending us a response in an expected form
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index ce83e2edbe0a..52d14c9cf052 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -660,7 +660,7 @@ ssetup_ntlmssp_authenticate:
 			goto ssetup_exit;
 		}
 
-		msg = spnego_key->payload.data;
+		msg = spnego_key->payload.data[0];
 		/*
 		 * check version field to make sure that cifs.upcall is
 		 * sending us a response in an expected form
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5ba029e627cc..7b39260c7bba 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -86,7 +86,7 @@ ecryptfs_get_encrypted_key_payload_data(struct key *key)
 {
 	if (key->type == &key_type_encrypted)
 		return (struct ecryptfs_auth_tok *)
-			(&((struct encrypted_key_payload *)key->payload.data)->payload_data);
+			(&((struct encrypted_key_payload *)key->payload.data[0])->payload_data);
 	else
 		return NULL;
 }
@@ -117,8 +117,7 @@ ecryptfs_get_key_payload_data(struct key *key)
 
 	auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
 	if (!auth_tok)
-		return (struct ecryptfs_auth_tok *)
-			(((struct user_key_payload *)key->payload.data)->data);
+		return (struct ecryptfs_auth_tok *)user_key_payload(key)->data;
 	else
 		return auth_tok;
 }
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 1d510c11b100..5c52c79dea46 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -121,7 +121,7 @@ int _ext4_get_encryption_info(struct inode *inode)
 	struct key *keyring_key = NULL;
 	struct ext4_encryption_key *master_key;
 	struct ext4_encryption_context ctx;
-	struct user_key_payload *ukp;
+	const struct user_key_payload *ukp;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct crypto_ablkcipher *ctfm;
 	const char *cipher_str;
@@ -209,7 +209,7 @@ retry:
 	}
 	crypt_info->ci_keyring_key = keyring_key;
 	BUG_ON(keyring_key->type != &key_type_logon);
-	ukp = ((struct user_key_payload *)keyring_key->payload.data);
+	ukp = user_key_payload(keyring_key);
 	if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
 		res = -EINVAL;
 		goto out;
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 9f77de2ef317..5de2d866a25c 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -122,7 +122,7 @@ int _f2fs_get_encryption_info(struct inode *inode)
 	struct key *keyring_key = NULL;
 	struct f2fs_encryption_key *master_key;
 	struct f2fs_encryption_context ctx;
-	struct user_key_payload *ukp;
+	const struct user_key_payload *ukp;
 	struct crypto_ablkcipher *ctfm;
 	const char *cipher_str;
 	char raw_key[F2FS_MAX_KEY_SIZE];
@@ -199,7 +199,7 @@ retry:
 	}
 	crypt_info->ci_keyring_key = keyring_key;
 	BUG_ON(keyring_key->type != &key_type_logon);
-	ukp = ((struct user_key_payload *)keyring_key->payload.data);
+	ukp = user_key_payload(keyring_key);
 	if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
 		res = -EINVAL;
 		goto out;
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 51dde817e1f2..6b028b7c4250 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -316,7 +316,7 @@ static const struct seq_operations fscache_objlist_ops = {
 static void fscache_objlist_config(struct fscache_objlist_data *data)
 {
 #ifdef CONFIG_KEYS
-	struct user_key_payload *confkey;
+	const struct user_key_payload *confkey;
 	unsigned long config;
 	struct key *key;
 	const char *buf;
@@ -329,7 +329,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
 	config = 0;
 	rcu_read_lock();
 
-	confkey = key->payload.data;
+	confkey = user_key_payload(key);
 	buf = confkey->data;
 
 	for (len = confkey->datalen - 1; len >= 0; len--) {
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 2e4902203c35..5ba22c6b0ffa 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -297,7 +297,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
 {
 	const struct cred *saved_cred;
 	struct key *rkey;
-	struct user_key_payload *payload;
+	const struct user_key_payload *payload;
 	ssize_t ret;
 
 	saved_cred = override_creds(id_resolver_cache);
@@ -316,7 +316,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
 	if (ret < 0)
 		goto out_up;
 
-	payload = rcu_dereference(rkey->payload.rcudata);
+	payload = user_key_payload(rkey);
 	if (IS_ERR_OR_NULL(payload)) {
 		ret = PTR_ERR(payload);
 		goto out_up;
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index 067c242b1e15..cc2516df0efa 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -15,7 +15,6 @@
 #define _LINUX_PUBLIC_KEY_H
 
 #include <linux/mpi.h>
-#include <keys/asymmetric-type.h>
 #include <crypto/hash_info.h>
 
 enum pkey_algo {
diff --git a/include/keys/asymmetric-subtype.h b/include/keys/asymmetric-subtype.h
index 4b840e822209..4915d40d3c3c 100644
--- a/include/keys/asymmetric-subtype.h
+++ b/include/keys/asymmetric-subtype.h
@@ -49,7 +49,7 @@ struct asymmetric_key_subtype {
 static inline
 struct asymmetric_key_subtype *asymmetric_key_subtype(const struct key *key)
 {
-	return key->type_data.p[0];
+	return key->payload.data[asym_subtype];
 }
 
 #endif /* _KEYS_ASYMMETRIC_SUBTYPE_H */
diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h
index c0754abb2f56..59c1df9cf922 100644
--- a/include/keys/asymmetric-type.h
+++ b/include/keys/asymmetric-type.h
@@ -18,6 +18,16 @@
 
 extern struct key_type key_type_asymmetric;
 
+/*
+ * The key payload is four words.  The asymmetric-type key uses them as
+ * follows:
+ */
+enum asymmetric_payload_bits {
+	asym_crypto,
+	asym_subtype,
+	asym_key_ids,
+};
+
 /*
  * Identifiers for an asymmetric key ID.  We have three ways of looking up a
  * key derived from an X.509 certificate:
@@ -58,6 +68,11 @@ extern struct asymmetric_key_id *asymmetric_key_generate_id(const void *val_1,
 							    size_t len_1,
 							    const void *val_2,
 							    size_t len_2);
+static inline
+const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key)
+{
+	return key->payload.data[asym_key_ids];
+}
 
 /*
  * The payload is at the discretion of the subtype.
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index cebefb069c44..c56fef40f53e 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -15,6 +15,8 @@
 #include <linux/key.h>
 #include <linux/rcupdate.h>
 
+#ifdef CONFIG_KEYS
+
 /*****************************************************************************/
 /*
  * the payload for a key of type "user" or "logon"
@@ -46,5 +48,11 @@ extern void user_describe(const struct key *user, struct seq_file *m);
 extern long user_read(const struct key *key,
 		      char __user *buffer, size_t buflen);
 
+static inline const struct user_key_payload *user_key_payload(const struct key *key)
+{
+	return (struct user_key_payload *)rcu_dereference_key(key);
+}
+
+#endif /* CONFIG_KEYS */
 
 #endif /* _KEYS_USER_TYPE_H */
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index ff9f1d394235..7463355a198b 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -40,8 +40,7 @@ struct key_construction {
  */
 struct key_preparsed_payload {
 	char		*description;	/* Proposed key description (or NULL) */
-	void		*type_data[2];	/* Private key-type data */
-	void		*payload[2];	/* Proposed payload */
+	union key_payload payload;	/* Proposed payload */
 	const void	*data;		/* Raw data */
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
diff --git a/include/linux/key.h b/include/linux/key.h
index e1d4715f3222..66f705243985 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -89,6 +89,11 @@ struct keyring_index_key {
 	size_t			desc_len;
 };
 
+union key_payload {
+	void __rcu		*rcu_data0;
+	void			*data[4];
+};
+
 /*****************************************************************************/
 /*
  * key reference with possession attribute handling
@@ -186,28 +191,18 @@ struct key {
 		};
 	};
 
-	/* type specific data
-	 * - this is used by the keyring type to index the name
-	 */
-	union {
-		struct list_head	link;
-		unsigned long		x[2];
-		void			*p[2];
-		int			reject_error;
-	} type_data;
-
 	/* key data
 	 * - this is used to hold the data actually used in cryptography or
 	 *   whatever
 	 */
 	union {
-		union {
-			unsigned long		value;
-			void __rcu		*rcudata;
-			void			*data;
-			void			*data2[2];
-		} payload;
-		struct assoc_array keys;
+		union key_payload payload;
+		struct {
+			/* Keyring bits */
+			struct list_head name_link;
+			struct assoc_array keys;
+		};
+		int reject_error;
 	};
 };
 
@@ -336,12 +331,12 @@ static inline bool key_is_instantiated(const struct key *key)
 }
 
 #define rcu_dereference_key(KEY)					\
-	(rcu_dereference_protected((KEY)->payload.rcudata,		\
+	(rcu_dereference_protected((KEY)->payload.rcu_data0,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
 #define rcu_assign_keypointer(KEY, PAYLOAD)				\
 do {									\
-	rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD));		\
+	rcu_assign_pointer((KEY)->payload.rcu_data0, (PAYLOAD));	\
 } while (0)
 
 #ifdef CONFIG_SYSCTL
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index bd62f5cda746..6528a79d998d 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/errno.h>
 #include <keys/system_keyring.h>
 #include <crypto/public_key.h>
 #include "module-internal.h"
diff --git a/lib/digsig.c b/lib/digsig.c
index ae05ea393fc8..07be6c1ef4e2 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -79,12 +79,13 @@ static int digsig_verify_rsa(struct key *key,
 	unsigned char *out1 = NULL;
 	const char *m;
 	MPI in = NULL, res = NULL, pkey[2];
-	uint8_t *p, *datap, *endp;
-	struct user_key_payload *ukp;
+	uint8_t *p, *datap;
+	const uint8_t *endp;
+	const struct user_key_payload *ukp;
 	struct pubkey_hdr *pkh;
 
 	down_read(&key->sem);
-	ukp = key->payload.data;
+	ukp = user_key_payload(key);
 
 	if (ukp->datalen < sizeof(*pkh))
 		goto err1;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 54a00d66509e..78f098a20796 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -318,7 +318,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
 		goto out;
 	}
 
-	ckey = ukey->payload.data;
+	ckey = ukey->payload.data[0];
 	err = ceph_crypto_key_clone(dst, ckey);
 	if (err)
 		goto out_key;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 4440edcce0d6..42e8649c6e79 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -537,7 +537,7 @@ static int ceph_key_preparse(struct key_preparsed_payload *prep)
 	if (ret < 0)
 		goto err_ckey;
 
-	prep->payload[0] = ckey;
+	prep->payload.data[0] = ckey;
 	prep->quotalen = datalen;
 	return 0;
 
@@ -549,14 +549,14 @@ err:
 
 static void ceph_key_free_preparse(struct key_preparsed_payload *prep)
 {
-	struct ceph_crypto_key *ckey = prep->payload[0];
+	struct ceph_crypto_key *ckey = prep->payload.data[0];
 	ceph_crypto_key_destroy(ckey);
 	kfree(ckey);
 }
 
 static void ceph_key_destroy(struct key *key)
 {
-	struct ceph_crypto_key *ckey = key->payload.data;
+	struct ceph_crypto_key *ckey = key->payload.data[0];
 
 	ceph_crypto_key_destroy(ckey);
 	kfree(ckey);
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 31cd4fd75486..c79b85eb4d4c 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -122,7 +122,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 					goto bad_option_value;
 
 				kdebug("dns error no. = %lu", derrno);
-				prep->type_data[0] = ERR_PTR(-derrno);
+				prep->payload.data[dns_key_error] = ERR_PTR(-derrno);
 				continue;
 			}
 
@@ -137,8 +137,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 
 	/* don't cache the result if we're caching an error saying there's no
 	 * result */
-	if (prep->type_data[0]) {
-		kleave(" = 0 [h_error %ld]", PTR_ERR(prep->type_data[0]));
+	if (prep->payload.data[dns_key_error]) {
+		kleave(" = 0 [h_error %ld]", PTR_ERR(prep->payload.data[dns_key_error]));
 		return 0;
 	}
 
@@ -155,7 +155,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 	memcpy(upayload->data, data, result_len);
 	upayload->data[result_len] = '\0';
 
-	prep->payload[0] = upayload;
+	prep->payload.data[dns_key_data] = upayload;
 	kleave(" = 0");
 	return 0;
 }
@@ -167,7 +167,7 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
 {
 	pr_devel("==>%s()\n", __func__);
 
-	kfree(prep->payload[0]);
+	kfree(prep->payload.data[dns_key_data]);
 }
 
 /*
@@ -223,10 +223,10 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
  */
 static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 {
-	int err = key->type_data.x[0];
-
 	seq_puts(m, key->description);
 	if (key_is_instantiated(key)) {
+		int err = PTR_ERR(key->payload.data[dns_key_error]);
+
 		if (err)
 			seq_printf(m, ": %d", err);
 		else
@@ -241,8 +241,10 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 static long dns_resolver_read(const struct key *key,
 			      char __user *buffer, size_t buflen)
 {
-	if (key->type_data.x[0])
-		return key->type_data.x[0];
+	int err = PTR_ERR(key->payload.data[dns_key_error]);
+
+	if (err)
+		return err;
 
 	return user_read(key, buffer, buflen);
 }
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 39d2c39bdf87..4677b6fa6dda 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -70,7 +70,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	      const char *options, char **_result, time_t *_expiry)
 {
 	struct key *rkey;
-	struct user_key_payload *upayload;
+	const struct user_key_payload *upayload;
 	const struct cred *saved_cred;
 	size_t typelen, desclen;
 	char *desc, *cp;
@@ -137,12 +137,11 @@ int dns_query(const char *type, const char *name, size_t namelen,
 		goto put;
 
 	/* If the DNS server gave an error, return that to the caller */
-	ret = rkey->type_data.x[0];
+	ret = PTR_ERR(rkey->payload.data[dns_key_error]);
 	if (ret)
 		goto put;
 
-	upayload = rcu_dereference_protected(rkey->payload.data,
-					     lockdep_is_held(&rkey->sem));
+	upayload = user_key_payload(rkey);
 	len = upayload->datalen;
 
 	ret = -ENOMEM;
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
index 7af1ed39c009..0c570d40e4d6 100644
--- a/net/dns_resolver/internal.h
+++ b/net/dns_resolver/internal.h
@@ -22,6 +22,14 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 
+/*
+ * Layout of key payload words.
+ */
+enum {
+	dns_key_data,
+	dns_key_error,
+};
+
 /*
  * dns_key.c
  */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 25d60ed15284..1f8a144a5dc2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -305,7 +305,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 
 	if (!key)
 		key = rx->key;
-	if (key && !key->payload.data)
+	if (key && !key->payload.data[0])
 		key = NULL; /* a no-security key */
 
 	bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index db0f39f5ef96..da3cc09f683e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -148,10 +148,10 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
 		       token->kad->ticket[6], token->kad->ticket[7]);
 
 	/* count the number of tokens attached */
-	prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1);
+	prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
 
 	/* attach the data */
-	for (pptoken = (struct rxrpc_key_token **)&prep->payload[0];
+	for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
 	     *pptoken;
 	     pptoken = &(*pptoken)->next)
 		continue;
@@ -522,7 +522,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
 		goto inval;
 
 	/* attach the payload */
-	for (pptoken = (struct rxrpc_key_token **)&prep->payload[0];
+	for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
 	     *pptoken;
 	     pptoken = &(*pptoken)->next)
 		continue;
@@ -764,10 +764,10 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
 	memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length);
 
 	/* count the number of tokens attached */
-	prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1);
+	prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
 
 	/* attach the data */
-	pp = (struct rxrpc_key_token **)&prep->payload[0];
+	pp = (struct rxrpc_key_token **)&prep->payload.data[0];
 	while (*pp)
 		pp = &(*pp)->next;
 	*pp = token;
@@ -814,7 +814,7 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
  */
 static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
 {
-	rxrpc_free_token_list(prep->payload[0]);
+	rxrpc_free_token_list(prep->payload.data[0]);
 }
 
 /*
@@ -831,7 +831,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
 	if (prep->datalen != 8)
 		return -EINVAL;
 
-	memcpy(&prep->type_data, prep->data, 8);
+	memcpy(&prep->payload.data[2], prep->data, 8);
 
 	ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ci)) {
@@ -842,7 +842,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
 	if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
 		BUG();
 
-	prep->payload[0] = ci;
+	prep->payload.data[0] = ci;
 	_leave(" = 0");
 	return 0;
 }
@@ -852,8 +852,8 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
  */
 static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
 {
-	if (prep->payload[0])
-		crypto_free_blkcipher(prep->payload[0]);
+	if (prep->payload.data[0])
+		crypto_free_blkcipher(prep->payload.data[0]);
 }
 
 /*
@@ -861,7 +861,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
  */
 static void rxrpc_destroy(struct key *key)
 {
-	rxrpc_free_token_list(key->payload.data);
+	rxrpc_free_token_list(key->payload.data[0]);
 }
 
 /*
@@ -869,9 +869,9 @@ static void rxrpc_destroy(struct key *key)
  */
 static void rxrpc_destroy_s(struct key *key)
 {
-	if (key->payload.data) {
-		crypto_free_blkcipher(key->payload.data);
-		key->payload.data = NULL;
+	if (key->payload.data[0]) {
+		crypto_free_blkcipher(key->payload.data[0]);
+		key->payload.data[0] = NULL;
 	}
 }
 
@@ -1070,7 +1070,7 @@ static long rxrpc_read(const struct key *key,
 	size += 1 * 4;	/* token count */
 
 	ntoks = 0;
-	for (token = key->payload.data; token; token = token->next) {
+	for (token = key->payload.data[0]; token; token = token->next) {
 		toksize = 4;	/* sec index */
 
 		switch (token->security_index) {
@@ -1163,7 +1163,7 @@ static long rxrpc_read(const struct key *key,
 	ENCODE(ntoks);
 
 	tok = 0;
-	for (token = key->payload.data; token; token = token->next) {
+	for (token = key->payload.data[0]; token; token = token->next) {
 		toksize = toksizes[tok++];
 		ENCODE(toksize);
 		oldxdr = xdr;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index c0042807bfc6..a40d3afe93b7 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -158,7 +158,7 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
 			service_id = htons(srx->srx_service);
 		}
 		key = rx->key;
-		if (key && !rx->key->payload.data)
+		if (key && !rx->key->payload.data[0])
 			key = NULL;
 		bundle = rxrpc_get_bundle(rx, trans, key, service_id,
 					  GFP_KERNEL);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index 49b3cc31ee1f..8334474eb26c 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -137,9 +137,9 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 	if (ret < 0)
 		return ret;
 
-	if (!key->payload.data)
+	token = key->payload.data[0];
+	if (!token)
 		return -EKEYREJECTED;
-	token = key->payload.data;
 
 	sec = rxrpc_security_lookup(token->security_index);
 	if (!sec)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index f226709ebd8f..d7a9ab5a9d9c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -67,7 +67,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
 
-	token = conn->key->payload.data;
+	token = conn->key->payload.data[0];
 	conn->security_ix = token->security_index;
 
 	ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
@@ -125,7 +125,7 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	if (!conn->key)
 		return;
 
-	token = conn->key->payload.data;
+	token = conn->key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	desc.tfm = conn->cipher;
@@ -221,7 +221,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	rxkhdr.checksum = 0;
 
 	/* encrypt from the session key */
-	token = call->conn->key->payload.data;
+	token = call->conn->key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
@@ -433,7 +433,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	/* decrypt from the session key */
-	token = call->conn->key->payload.data;
+	token = call->conn->key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
@@ -780,7 +780,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	if (conn->security_level < min_level)
 		goto protocol_error;
 
-	token = conn->key->payload.data;
+	token = conn->key->payload.data[0];
 
 	/* build the response packet */
 	memset(&resp, 0, sizeof(resp));
@@ -848,12 +848,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 		}
 	}
 
-	ASSERT(conn->server_key->payload.data != NULL);
+	ASSERT(conn->server_key->payload.data[0] != NULL);
 	ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
 
-	memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
+	memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
 
-	desc.tfm = conn->server_key->payload.data;
+	desc.tfm = conn->server_key->payload.data[0];
 	desc.info = iv.x;
 	desc.flags = 0;
 
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 159ef3ea4130..461f8d891579 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -247,7 +247,7 @@ int evm_init_key(void)
 		return -ENOENT;
 
 	down_read(&evm_key->sem);
-	ekp = evm_key->payload.data;
+	ekp = evm_key->payload.data[0];
 	if (ekp->decrypted_datalen > MAX_KEY_SIZE) {
 		rc = -EINVAL;
 		goto out;
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index b6adb94f6d52..907c1522ee46 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -20,6 +20,16 @@
 
 MODULE_LICENSE("GPL");
 
+/*
+ * Layout of key payload words.
+ */
+enum {
+	big_key_data,
+	big_key_path,
+	big_key_path_2nd_part,
+	big_key_len,
+};
+
 /*
  * If the data is under this limit, there's no point creating a shm file to
  * hold it as the permanently resident metadata for the shmem fs will be at
@@ -47,7 +57,7 @@ struct key_type key_type_big_key = {
  */
 int big_key_preparse(struct key_preparsed_payload *prep)
 {
-	struct path *path = (struct path *)&prep->payload;
+	struct path *path = (struct path *)&prep->payload.data[big_key_path];
 	struct file *file;
 	ssize_t written;
 	size_t datalen = prep->datalen;
@@ -60,7 +70,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 	/* Set an arbitrary quota */
 	prep->quotalen = 16;
 
-	prep->type_data[1] = (void *)(unsigned long)datalen;
+	prep->payload.data[big_key_len] = (void *)(unsigned long)datalen;
 
 	if (datalen > BIG_KEY_FILE_THRESHOLD) {
 		/* Create a shmem file to store the data in.  This will permit the data
@@ -94,7 +104,8 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 		if (!data)
 			return -ENOMEM;
 
-		prep->payload[0] = memcpy(data, prep->data, prep->datalen);
+		prep->payload.data[big_key_data] = data;
+		memcpy(data, prep->data, prep->datalen);
 	}
 	return 0;
 
@@ -110,10 +121,10 @@ error:
 void big_key_free_preparse(struct key_preparsed_payload *prep)
 {
 	if (prep->datalen > BIG_KEY_FILE_THRESHOLD) {
-		struct path *path = (struct path *)&prep->payload;
+		struct path *path = (struct path *)&prep->payload.data[big_key_path];
 		path_put(path);
 	} else {
-		kfree(prep->payload[0]);
+		kfree(prep->payload.data[big_key_data]);
 	}
 }
 
@@ -123,11 +134,12 @@ void big_key_free_preparse(struct key_preparsed_payload *prep)
  */
 void big_key_revoke(struct key *key)
 {
-	struct path *path = (struct path *)&key->payload.data2;
+	struct path *path = (struct path *)&key->payload.data[big_key_path];
 
 	/* clear the quota */
 	key_payload_reserve(key, 0);
-	if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
+	if (key_is_instantiated(key) &&
+	    (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD)
 		vfs_truncate(path, 0);
 }
 
@@ -136,14 +148,16 @@ void big_key_revoke(struct key *key)
  */
 void big_key_destroy(struct key *key)
 {
-	if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
-		struct path *path = (struct path *)&key->payload.data2;
+	size_t datalen = (size_t)key->payload.data[big_key_len];
+
+	if (datalen) {
+		struct path *path = (struct path *)&key->payload.data[big_key_path];
 		path_put(path);
 		path->mnt = NULL;
 		path->dentry = NULL;
 	} else {
-		kfree(key->payload.data);
-		key->payload.data = NULL;
+		kfree(key->payload.data[big_key_data]);
+		key->payload.data[big_key_data] = NULL;
 	}
 }
 
@@ -152,12 +166,12 @@ void big_key_destroy(struct key *key)
  */
 void big_key_describe(const struct key *key, struct seq_file *m)
 {
-	unsigned long datalen = key->type_data.x[1];
+	size_t datalen = (size_t)key->payload.data[big_key_len];
 
 	seq_puts(m, key->description);
 
 	if (key_is_instantiated(key))
-		seq_printf(m, ": %lu [%s]",
+		seq_printf(m, ": %zu [%s]",
 			   datalen,
 			   datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
 }
@@ -168,14 +182,14 @@ void big_key_describe(const struct key *key, struct seq_file *m)
  */
 long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 {
-	unsigned long datalen = key->type_data.x[1];
+	size_t datalen = (size_t)key->payload.data[big_key_len];
 	long ret;
 
 	if (!buffer || buflen < datalen)
 		return datalen;
 
 	if (datalen > BIG_KEY_FILE_THRESHOLD) {
-		struct path *path = (struct path *)&key->payload.data2;
+		struct path *path = (struct path *)&key->payload.data[big_key_path];
 		struct file *file;
 		loff_t pos;
 
@@ -190,7 +204,8 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 			ret = -EIO;
 	} else {
 		ret = datalen;
-		if (copy_to_user(buffer, key->payload.data, datalen) != 0)
+		if (copy_to_user(buffer, key->payload.data[big_key_data],
+				 datalen) != 0)
 			ret = -EFAULT;
 	}
 
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 7bed4ad7cd76..927db9f35ad6 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -303,10 +303,10 @@ out:
  *
  * Use a user provided key to encrypt/decrypt an encrypted-key.
  */
-static struct key *request_user_key(const char *master_desc, u8 **master_key,
+static struct key *request_user_key(const char *master_desc, const u8 **master_key,
 				    size_t *master_keylen)
 {
-	struct user_key_payload *upayload;
+	const struct user_key_payload *upayload;
 	struct key *ukey;
 
 	ukey = request_key(&key_type_user, master_desc, NULL);
@@ -314,7 +314,7 @@ static struct key *request_user_key(const char *master_desc, u8 **master_key,
 		goto error;
 
 	down_read(&ukey->sem);
-	upayload = ukey->payload.data;
+	upayload = user_key_payload(ukey);
 	*master_key = upayload->data;
 	*master_keylen = upayload->datalen;
 error:
@@ -426,7 +426,7 @@ static int init_blkcipher_desc(struct blkcipher_desc *desc, const u8 *key,
 }
 
 static struct key *request_master_key(struct encrypted_key_payload *epayload,
-				      u8 **master_key, size_t *master_keylen)
+				      const u8 **master_key, size_t *master_keylen)
 {
 	struct key *mkey = NULL;
 
@@ -653,7 +653,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
 {
 	struct key *mkey;
 	u8 derived_key[HASH_SIZE];
-	u8 *master_key;
+	const u8 *master_key;
 	u8 *hmac;
 	const char *hex_encoded_data;
 	unsigned int encrypted_datalen;
@@ -837,7 +837,7 @@ static void encrypted_rcu_free(struct rcu_head *rcu)
  */
 static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 {
-	struct encrypted_key_payload *epayload = key->payload.data;
+	struct encrypted_key_payload *epayload = key->payload.data[0];
 	struct encrypted_key_payload *new_epayload;
 	char *buf;
 	char *new_master_desc = NULL;
@@ -896,7 +896,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
 {
 	struct encrypted_key_payload *epayload;
 	struct key *mkey;
-	u8 *master_key;
+	const u8 *master_key;
 	size_t master_keylen;
 	char derived_key[HASH_SIZE];
 	char *ascii_buf;
@@ -957,13 +957,13 @@ out:
  */
 static void encrypted_destroy(struct key *key)
 {
-	struct encrypted_key_payload *epayload = key->payload.data;
+	struct encrypted_key_payload *epayload = key->payload.data[0];
 
 	if (!epayload)
 		return;
 
 	memset(epayload->decrypted_data, 0, epayload->decrypted_datalen);
-	kfree(key->payload.data);
+	kfree(key->payload.data[0]);
 }
 
 struct key_type key_type_encrypted = {
diff --git a/security/keys/encrypted-keys/encrypted.h b/security/keys/encrypted-keys/encrypted.h
index 8136a2d44c63..47802c0de735 100644
--- a/security/keys/encrypted-keys/encrypted.h
+++ b/security/keys/encrypted-keys/encrypted.h
@@ -5,10 +5,10 @@
 #if defined(CONFIG_TRUSTED_KEYS) || \
   (defined(CONFIG_TRUSTED_KEYS_MODULE) && defined(CONFIG_ENCRYPTED_KEYS_MODULE))
 extern struct key *request_trusted_key(const char *trusted_desc,
-				       u8 **master_key, size_t *master_keylen);
+				       const u8 **master_key, size_t *master_keylen);
 #else
 static inline struct key *request_trusted_key(const char *trusted_desc,
-					      u8 **master_key,
+					      const u8 **master_key,
 					      size_t *master_keylen)
 {
 	return ERR_PTR(-EOPNOTSUPP);
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index 013f7e5d3a2f..b5b4812dbc87 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -29,7 +29,7 @@
  * data, trusted key type data is not visible decrypted from userspace.
  */
 struct key *request_trusted_key(const char *trusted_desc,
-				u8 **master_key, size_t *master_keylen)
+				const u8 **master_key, size_t *master_keylen)
 {
 	struct trusted_key_payload *tpayload;
 	struct key *tkey;
@@ -39,7 +39,7 @@ struct key *request_trusted_key(const char *trusted_desc,
 		goto error;
 
 	down_read(&tkey->sem);
-	tpayload = tkey->payload.data;
+	tpayload = tkey->payload.data[0];
 	*master_key = tpayload->key;
 	*master_keylen = tpayload->key_len;
 error:
diff --git a/security/keys/key.c b/security/keys/key.c
index c0478465d1ac..ab7997ded725 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -554,7 +554,7 @@ int key_reject_and_link(struct key *key,
 	if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
 		/* mark the key as being negatively instantiated */
 		atomic_inc(&key->user->nikeys);
-		key->type_data.reject_error = -error;
+		key->reject_error = -error;
 		smp_wmb();
 		set_bit(KEY_FLAG_NEGATIVE, &key->flags);
 		set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
@@ -1046,14 +1046,14 @@ int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 
 	ret = key_payload_reserve(key, prep->quotalen);
 	if (ret == 0) {
-		key->type_data.p[0] = prep->type_data[0];
-		key->type_data.p[1] = prep->type_data[1];
-		rcu_assign_keypointer(key, prep->payload[0]);
-		key->payload.data2[1] = prep->payload[1];
-		prep->type_data[0] = NULL;
-		prep->type_data[1] = NULL;
-		prep->payload[0] = NULL;
-		prep->payload[1] = NULL;
+		rcu_assign_keypointer(key, prep->payload.data[0]);
+		key->payload.data[1] = prep->payload.data[1];
+		key->payload.data[2] = prep->payload.data[2];
+		key->payload.data[3] = prep->payload.data[3];
+		prep->payload.data[0] = NULL;
+		prep->payload.data[1] = NULL;
+		prep->payload.data[2] = NULL;
+		prep->payload.data[3] = NULL;
 	}
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 6110fa498494..fb111eafcb89 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1027,7 +1027,7 @@ long keyctl_instantiate_key_common(key_serial_t id,
 	if (!instkey)
 		goto error;
 
-	rka = instkey->payload.data;
+	rka = instkey->payload.data[0];
 	if (rka->target_key->serial != id)
 		goto error;
 
@@ -1194,7 +1194,7 @@ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error,
 	if (!instkey)
 		goto error;
 
-	rka = instkey->payload.data;
+	rka = instkey->payload.data[0];
 	if (rka->target_key->serial != id)
 		goto error;
 
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index d33437007ad2..f931ccfeefb0 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -118,7 +118,7 @@ static void keyring_publish_name(struct key *keyring)
 		if (!keyring_name_hash[bucket].next)
 			INIT_LIST_HEAD(&keyring_name_hash[bucket]);
 
-		list_add_tail(&keyring->type_data.link,
+		list_add_tail(&keyring->name_link,
 			      &keyring_name_hash[bucket]);
 
 		write_unlock(&keyring_name_lock);
@@ -387,9 +387,9 @@ static void keyring_destroy(struct key *keyring)
 	if (keyring->description) {
 		write_lock(&keyring_name_lock);
 
-		if (keyring->type_data.link.next != NULL &&
-		    !list_empty(&keyring->type_data.link))
-			list_del(&keyring->type_data.link);
+		if (keyring->name_link.next != NULL &&
+		    !list_empty(&keyring->name_link))
+			list_del(&keyring->name_link);
 
 		write_unlock(&keyring_name_lock);
 	}
@@ -572,7 +572,7 @@ static int keyring_search_iterator(const void *object, void *iterator_data)
 		/* we set a different error code if we pass a negative key */
 		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
 			smp_rmb();
-			ctx->result = ERR_PTR(key->type_data.reject_error);
+			ctx->result = ERR_PTR(key->reject_error);
 			kleave(" = %d [neg]", ctx->skipped_ret);
 			goto skipped;
 		}
@@ -990,7 +990,7 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
 		 * that's readable and that hasn't been revoked */
 		list_for_each_entry(keyring,
 				    &keyring_name_hash[bucket],
-				    type_data.link
+				    name_link
 				    ) {
 			if (!kuid_has_mapping(current_user_ns(), keyring->user->uid))
 				continue;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 43b4cddbf2b3..a3f85d2a00bb 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -457,7 +457,7 @@ key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
 		down_read(&cred->request_key_auth->sem);
 
 		if (key_validate(ctx->cred->request_key_auth) == 0) {
-			rka = ctx->cred->request_key_auth->payload.data;
+			rka = ctx->cred->request_key_auth->payload.data[0];
 
 			ctx->cred = rka->cred;
 			key_ref = search_process_keyrings(ctx);
@@ -647,7 +647,7 @@ try_again:
 			key_ref = ERR_PTR(-EKEYREVOKED);
 			key = NULL;
 		} else {
-			rka = ctx.cred->request_key_auth->payload.data;
+			rka = ctx.cred->request_key_auth->payload.data[0];
 			key = rka->dest_keyring;
 			__key_get(key);
 		}
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 486ef6fa393b..95d5cfc172c6 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -271,7 +271,7 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 			if (cred->request_key_auth) {
 				authkey = cred->request_key_auth;
 				down_read(&authkey->sem);
-				rka = authkey->payload.data;
+				rka = authkey->payload.data[0];
 				if (!test_bit(KEY_FLAG_REVOKED,
 					      &authkey->flags))
 					dest_keyring =
@@ -593,7 +593,7 @@ int wait_for_key_construction(struct key *key, bool intr)
 		return -ERESTARTSYS;
 	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
 		smp_rmb();
-		return key->type_data.reject_error;
+		return key->reject_error;
 	}
 	return key_validate(key);
 }
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 5d672f7580dd..4f0f112fe276 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -59,7 +59,7 @@ static void request_key_auth_free_preparse(struct key_preparsed_payload *prep)
 static int request_key_auth_instantiate(struct key *key,
 					struct key_preparsed_payload *prep)
 {
-	key->payload.data = (struct request_key_auth *)prep->data;
+	key->payload.data[0] = (struct request_key_auth *)prep->data;
 	return 0;
 }
 
@@ -69,7 +69,7 @@ static int request_key_auth_instantiate(struct key *key,
 static void request_key_auth_describe(const struct key *key,
 				      struct seq_file *m)
 {
-	struct request_key_auth *rka = key->payload.data;
+	struct request_key_auth *rka = key->payload.data[0];
 
 	seq_puts(m, "key:");
 	seq_puts(m, key->description);
@@ -84,7 +84,7 @@ static void request_key_auth_describe(const struct key *key,
 static long request_key_auth_read(const struct key *key,
 				  char __user *buffer, size_t buflen)
 {
-	struct request_key_auth *rka = key->payload.data;
+	struct request_key_auth *rka = key->payload.data[0];
 	size_t datalen;
 	long ret;
 
@@ -110,7 +110,7 @@ static long request_key_auth_read(const struct key *key,
  */
 static void request_key_auth_revoke(struct key *key)
 {
-	struct request_key_auth *rka = key->payload.data;
+	struct request_key_auth *rka = key->payload.data[0];
 
 	kenter("{%d}", key->serial);
 
@@ -125,7 +125,7 @@ static void request_key_auth_revoke(struct key *key)
  */
 static void request_key_auth_destroy(struct key *key)
 {
-	struct request_key_auth *rka = key->payload.data;
+	struct request_key_auth *rka = key->payload.data[0];
 
 	kenter("{%d}", key->serial);
 
@@ -179,7 +179,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 		if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags))
 			goto auth_key_revoked;
 
-		irka = cred->request_key_auth->payload.data;
+		irka = cred->request_key_auth->payload.data[0];
 		rka->cred = get_cred(irka->cred);
 		rka->pid = irka->pid;
 
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index d3633cf17c7d..903dace648a1 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1007,7 +1007,7 @@ static void trusted_rcu_free(struct rcu_head *rcu)
  */
 static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 {
-	struct trusted_key_payload *p = key->payload.data;
+	struct trusted_key_payload *p = key->payload.data[0];
 	struct trusted_key_payload *new_p;
 	struct trusted_key_options *new_o;
 	size_t datalen = prep->datalen;
@@ -1114,12 +1114,12 @@ static long trusted_read(const struct key *key, char __user *buffer,
  */
 static void trusted_destroy(struct key *key)
 {
-	struct trusted_key_payload *p = key->payload.data;
+	struct trusted_key_payload *p = key->payload.data[0];
 
 	if (!p)
 		return;
 	memset(p->key, 0, p->key_len);
-	kfree(key->payload.data);
+	kfree(key->payload.data[0]);
 }
 
 struct key_type key_type_trusted = {
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 36b47bbd3d8c..28cb30f80256 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -74,7 +74,7 @@ int user_preparse(struct key_preparsed_payload *prep)
 
 	/* attach the data */
 	prep->quotalen = datalen;
-	prep->payload[0] = upayload;
+	prep->payload.data[0] = upayload;
 	upayload->datalen = datalen;
 	memcpy(upayload->data, prep->data, datalen);
 	return 0;
@@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(user_preparse);
  */
 void user_free_preparse(struct key_preparsed_payload *prep)
 {
-	kfree(prep->payload[0]);
+	kfree(prep->payload.data[0]);
 }
 EXPORT_SYMBOL_GPL(user_free_preparse);
 
@@ -120,7 +120,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
 
 	if (ret == 0) {
 		/* attach the new data, displacing the old */
-		zap = key->payload.data;
+		zap = key->payload.data[0];
 		rcu_assign_keypointer(key, upayload);
 		key->expiry = 0;
 	}
@@ -140,7 +140,7 @@ EXPORT_SYMBOL_GPL(user_update);
  */
 void user_revoke(struct key *key)
 {
-	struct user_key_payload *upayload = key->payload.data;
+	struct user_key_payload *upayload = key->payload.data[0];
 
 	/* clear the quota */
 	key_payload_reserve(key, 0);
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(user_revoke);
  */
 void user_destroy(struct key *key)
 {
-	struct user_key_payload *upayload = key->payload.data;
+	struct user_key_payload *upayload = key->payload.data[0];
 
 	kfree(upayload);
 }
@@ -183,10 +183,10 @@ EXPORT_SYMBOL_GPL(user_describe);
  */
 long user_read(const struct key *key, char __user *buffer, size_t buflen)
 {
-	struct user_key_payload *upayload;
+	const struct user_key_payload *upayload;
 	long ret;
 
-	upayload = rcu_dereference_key(key);
+	upayload = user_key_payload(key);
 	ret = upayload->datalen;
 
 	/* we can return the data as is */
-- 
cgit v1.2.3


From f2e0a53271a439a2ab142645867f0cde45b2b3cd Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Tue, 19 May 2015 22:19:33 +0200
Subject: clk: Add a basic multiplier clock

Some clocks are using a multiplier component, however, unlike their mux,
gate or divider counterpart, these factors don't have a basic clock
implementation.

This leads to code duplication across platforms that want to use that kind
of clocks, and the impossibility to use the composite clocks with such a
clock without defining your own rate operations.

Create such a driver in order to remove these issues, and hopefully factor
the implementations, reducing code size across platforms and consolidating
the various implementations.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
---
 drivers/clk/Makefile         |   1 +
 drivers/clk/clk-multiplier.c | 181 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk-provider.h |  42 ++++++++++
 3 files changed, 224 insertions(+)
 create mode 100644 drivers/clk/clk-multiplier.c

(limited to 'include/linux')

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index d08b3e5985be..0b2101039508 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_COMMON_CLK)	+= clk-divider.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-fixed-factor.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-fixed-rate.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-gate.o
+obj-$(CONFIG_COMMON_CLK)	+= clk-multiplier.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-mux.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-composite.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-fractional-divider.o
diff --git a/drivers/clk/clk-multiplier.c b/drivers/clk/clk-multiplier.c
new file mode 100644
index 000000000000..43ec269fcbff
--- /dev/null
+++ b/drivers/clk/clk-multiplier.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2015 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk-provider.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#define to_clk_multiplier(_hw) container_of(_hw, struct clk_multiplier, hw)
+
+static unsigned long __get_mult(struct clk_multiplier *mult,
+				unsigned long rate,
+				unsigned long parent_rate)
+{
+	if (mult->flags & CLK_MULTIPLIER_ROUND_CLOSEST)
+		return DIV_ROUND_CLOSEST(rate, parent_rate);
+
+	return rate / parent_rate;
+}
+
+static unsigned long clk_multiplier_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	struct clk_multiplier *mult = to_clk_multiplier(hw);
+	unsigned long val;
+
+	val = clk_readl(mult->reg) >> mult->shift;
+	val &= GENMASK(mult->width - 1, 0);
+
+	if (!val && mult->flags & CLK_MULTIPLIER_ZERO_BYPASS)
+		val = 1;
+
+	return parent_rate * val;
+}
+
+static bool __is_best_rate(unsigned long rate, unsigned long new,
+			   unsigned long best, unsigned long flags)
+{
+	if (flags & CLK_MULTIPLIER_ROUND_CLOSEST)
+		return abs(rate - new) < abs(rate - best);
+
+	return new >= rate && new < best;
+}
+
+static unsigned long __bestmult(struct clk_hw *hw, unsigned long rate,
+				unsigned long *best_parent_rate,
+				u8 width, unsigned long flags)
+{
+	unsigned long orig_parent_rate = *best_parent_rate;
+	unsigned long parent_rate, current_rate, best_rate = ~0;
+	unsigned int i, bestmult = 0;
+
+	if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT))
+		return rate / *best_parent_rate;
+
+	for (i = 1; i < ((1 << width) - 1); i++) {
+		if (rate == orig_parent_rate * i) {
+			/*
+			 * This is the best case for us if we have a
+			 * perfect match without changing the parent
+			 * rate.
+			 */
+			*best_parent_rate = orig_parent_rate;
+			return i;
+		}
+
+		parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
+						rate / i);
+		current_rate = parent_rate * i;
+
+		if (__is_best_rate(rate, current_rate, best_rate, flags)) {
+			bestmult = i;
+			best_rate = current_rate;
+			*best_parent_rate = parent_rate;
+		}
+	}
+
+	return bestmult;
+}
+
+static long clk_multiplier_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *parent_rate)
+{
+	struct clk_multiplier *mult = to_clk_multiplier(hw);
+	unsigned long factor = __bestmult(hw, rate, parent_rate,
+					  mult->width, mult->flags);
+
+	return *parent_rate * factor;
+}
+
+static int clk_multiplier_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct clk_multiplier *mult = to_clk_multiplier(hw);
+	unsigned long factor = __get_mult(mult, rate, parent_rate);
+	unsigned long flags = 0;
+	unsigned long val;
+
+	if (mult->lock)
+		spin_lock_irqsave(mult->lock, flags);
+	else
+		__acquire(mult->lock);
+
+	val = clk_readl(mult->reg);
+	val &= ~GENMASK(mult->width + mult->shift - 1, mult->shift);
+	val |= factor << mult->shift;
+	clk_writel(val, mult->reg);
+
+	if (mult->lock)
+		spin_unlock_irqrestore(mult->lock, flags);
+	else
+		__release(mult->lock);
+
+	return 0;
+}
+
+const struct clk_ops clk_multiplier_ops = {
+	.recalc_rate	= clk_multiplier_recalc_rate,
+	.round_rate	= clk_multiplier_round_rate,
+	.set_rate	= clk_multiplier_set_rate,
+};
+EXPORT_SYMBOL_GPL(clk_multiplier_ops);
+
+struct clk *clk_register_multiplier(struct device *dev, const char *name,
+				    const char *parent_name,
+				    unsigned long flags,
+				    void __iomem *reg, u8 shift, u8 width,
+				    u8 clk_mult_flags, spinlock_t *lock)
+{
+	struct clk_init_data init;
+	struct clk_multiplier *mult;
+	struct clk *clk;
+
+	mult = kmalloc(sizeof(*mult), GFP_KERNEL);
+	if (!mult)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &clk_multiplier_ops;
+	init.flags = flags | CLK_IS_BASIC;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	mult->reg = reg;
+	mult->shift = shift;
+	mult->width = width;
+	mult->flags = clk_mult_flags;
+	mult->lock = lock;
+	mult->hw.init = &init;
+
+	clk = clk_register(dev, &mult->hw);
+	if (IS_ERR(clk))
+		kfree(mult);
+
+	return clk;
+}
+EXPORT_SYMBOL_GPL(clk_register_multiplier);
+
+void clk_unregister_multiplier(struct clk *clk)
+{
+	struct clk_multiplier *mult;
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	mult = to_clk_multiplier(hw);
+
+	clk_unregister(clk);
+	kfree(mult);
+}
+EXPORT_SYMBOL_GPL(clk_unregister_multiplier);
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 3ecc07d0da77..6a7dfe33a317 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -518,6 +518,48 @@ struct clk *clk_register_fractional_divider(struct device *dev,
 		void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth,
 		u8 clk_divider_flags, spinlock_t *lock);
 
+/**
+ * struct clk_multiplier - adjustable multiplier clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @reg:	register containing the multiplier
+ * @shift:	shift to the multiplier bit field
+ * @width:	width of the multiplier bit field
+ * @lock:	register lock
+ *
+ * Clock with an adjustable multiplier affecting its output frequency.
+ * Implements .recalc_rate, .set_rate and .round_rate
+ *
+ * Flags:
+ * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read
+ *	from the register, with 0 being a valid value effectively
+ *	zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is
+ *	set, then a null multiplier will be considered as a bypass,
+ *	leaving the parent rate unmodified.
+ * CLK_MULTIPLIER_ROUND_CLOSEST - Makes the best calculated divider to be
+ *	rounded to the closest integer instead of the down one.
+ */
+struct clk_multiplier {
+	struct clk_hw	hw;
+	void __iomem	*reg;
+	u8		shift;
+	u8		width;
+	u8		flags;
+	spinlock_t	*lock;
+};
+
+#define CLK_MULTIPLIER_ZERO_BYPASS		BIT(0)
+#define CLK_MULTIPLIER_ROUND_CLOSEST	BIT(1)
+
+extern const struct clk_ops clk_multiplier_ops;
+
+struct clk *clk_register_multiplier(struct device *dev, const char *name,
+				    const char *parent_name,
+				    unsigned long flags,
+				    void __iomem *reg, u8 shift, u8 width,
+				    u8 clk_mult_flags, spinlock_t *lock);
+void clk_unregister_multiplier(struct clk *clk);
+
 /***
  * struct clk_composite - aggregate clock of mux, divider and gate clocks
  *
-- 
cgit v1.2.3


From aff34e192e4eeacfb8b5ffc68e10a240f2c0c6d7 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 21 Oct 2015 13:19:27 -0400
Subject: block: Move integrity kobject to struct gendisk

The integrity kobject purely exists to support the integrity
subdirectory in sysfs and doesn't really have anything to do with the
blk_integrity data structure. Move the kobject to struct gendisk where
it belongs.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-integrity.c  | 22 +++++++++++-----------
 include/linux/blkdev.h |  2 --
 include/linux/genhd.h  |  1 +
 3 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 75f29cf70188..182bfd2383ea 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -249,8 +249,8 @@ struct integrity_sysfs_entry {
 static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
 				   char *page)
 {
-	struct blk_integrity *bi =
-		container_of(kobj, struct blk_integrity, kobj);
+	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
+	struct blk_integrity *bi = blk_get_integrity(disk);
 	struct integrity_sysfs_entry *entry =
 		container_of(attr, struct integrity_sysfs_entry, attr);
 
@@ -261,8 +261,8 @@ static ssize_t integrity_attr_store(struct kobject *kobj,
 				    struct attribute *attr, const char *page,
 				    size_t count)
 {
-	struct blk_integrity *bi =
-		container_of(kobj, struct blk_integrity, kobj);
+	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
+	struct blk_integrity *bi = blk_get_integrity(disk);
 	struct integrity_sysfs_entry *entry =
 		container_of(attr, struct integrity_sysfs_entry, attr);
 	ssize_t ret = 0;
@@ -385,8 +385,8 @@ subsys_initcall(blk_dev_integrity_init);
 
 static void blk_integrity_release(struct kobject *kobj)
 {
-	struct blk_integrity *bi =
-		container_of(kobj, struct blk_integrity, kobj);
+	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
+	struct blk_integrity *bi = blk_get_integrity(disk);
 
 	kmem_cache_free(integrity_cachep, bi);
 }
@@ -429,14 +429,14 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		if (!bi)
 			return -1;
 
-		if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
+		if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
 					 &disk_to_dev(disk)->kobj,
 					 "%s", "integrity")) {
 			kmem_cache_free(integrity_cachep, bi);
 			return -1;
 		}
 
-		kobject_uevent(&bi->kobj, KOBJ_ADD);
+		kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
 
 		bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
 		bi->interval = queue_logical_block_size(disk->queue);
@@ -479,9 +479,9 @@ void blk_integrity_unregister(struct gendisk *disk)
 
 	bi = disk->integrity;
 
-	kobject_uevent(&bi->kobj, KOBJ_REMOVE);
-	kobject_del(&bi->kobj);
-	kobject_put(&bi->kobj);
+	kobject_uevent(&disk->integrity_kobj, KOBJ_REMOVE);
+	kobject_del(&disk->integrity_kobj);
+	kobject_put(&disk->integrity_kobj);
 	disk->integrity = NULL;
 }
 EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 19c2e947d4d1..830f9c07d4bb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1472,8 +1472,6 @@ struct blk_integrity {
 	unsigned short		tag_size;
 
 	const char		*name;
-
-	struct kobject		kobj;
 };
 
 extern bool blk_integrity_is_initialized(struct gendisk *);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 2adbfa6d02bc..9e6e0dfa97ad 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -199,6 +199,7 @@ struct gendisk {
 	struct disk_events *ev;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
+	struct kobject integrity_kobj;
 #endif
 	int node_id;
 };
-- 
cgit v1.2.3


From 0f8087ecdeac921fc4920f1328f55c15080bc6aa Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 21 Oct 2015 13:19:33 -0400
Subject: block: Consolidate static integrity profile properties

We previously made a complete copy of a device's data integrity profile
even though several of the fields inside the blk_integrity struct are
pointers to fixed template entries in t10-pi.c.

Split the static and per-device portions so that we can reference the
template directly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c               |  8 ++++----
 block/blk-integrity.c               | 17 ++++++++---------
 block/t10-pi.c                      | 16 ++++------------
 drivers/nvdimm/core.c               | 11 +++++++----
 drivers/nvme/host/pci.c             |  8 ++++----
 drivers/scsi/sd_dif.c               | 31 ++++++++++++++++++-------------
 drivers/target/target_core_iblock.c | 10 +++++-----
 include/linux/blkdev.h              | 20 +++++++++++---------
 include/linux/t10-pi.h              |  8 ++++----
 9 files changed, 65 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 14b8faf8b09d..a10ffe19a8dd 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -177,11 +177,11 @@ bool bio_integrity_enabled(struct bio *bio)
 	if (bi == NULL)
 		return false;
 
-	if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
+	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
 	    (bi->flags & BLK_INTEGRITY_VERIFY))
 		return true;
 
-	if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
+	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
 	    (bi->flags & BLK_INTEGRITY_GENERATE))
 		return true;
 
@@ -340,7 +340,7 @@ int bio_integrity_prep(struct bio *bio)
 
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE)
-		bio_integrity_process(bio, bi->generate_fn);
+		bio_integrity_process(bio, bi->profile->generate_fn);
 
 	return 0;
 }
@@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio->bi_error = bio_integrity_process(bio, bi->verify_fn);
+	bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 182bfd2383ea..daf590ab3b46 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -176,10 +176,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 		return -1;
 	}
 
-	if (strcmp(b1->name, b2->name)) {
+	if (b1->profile != b2->profile) {
 		printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
 		       gd1->disk_name, gd2->disk_name,
-		       b1->name, b2->name);
+		       b1->profile->name, b2->profile->name);
 		return -1;
 	}
 
@@ -275,8 +275,8 @@ static ssize_t integrity_attr_store(struct kobject *kobj,
 
 static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
 {
-	if (bi != NULL && bi->name != NULL)
-		return sprintf(page, "%s\n", bi->name);
+	if (bi != NULL && bi->profile->name != NULL)
+		return sprintf(page, "%s\n", bi->profile->name);
 	else
 		return sprintf(page, "none\n");
 }
@@ -401,7 +401,8 @@ bool blk_integrity_is_initialized(struct gendisk *disk)
 {
 	struct blk_integrity *bi = blk_get_integrity(disk);
 
-	return (bi && bi->name && strcmp(bi->name, bi_unsupported_name) != 0);
+	return (bi && bi->profile->name && strcmp(bi->profile->name,
+						  bi_unsupported_name) != 0);
 }
 EXPORT_SYMBOL(blk_integrity_is_initialized);
 
@@ -446,14 +447,12 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 
 	/* Use the provided profile as template */
 	if (template != NULL) {
-		bi->name = template->name;
-		bi->generate_fn = template->generate_fn;
-		bi->verify_fn = template->verify_fn;
+		bi->profile = template->profile;
 		bi->tuple_size = template->tuple_size;
 		bi->tag_size = template->tag_size;
 		bi->flags |= template->flags;
 	} else
-		bi->name = bi_unsupported_name;
+		bi->profile->name = bi_unsupported_name;
 
 	disk->queue->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
 
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 24d6e9715318..2c97912335a9 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -160,38 +160,30 @@ static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
 }
 
-struct blk_integrity t10_pi_type1_crc = {
+struct blk_integrity_profile t10_pi_type1_crc = {
 	.name			= "T10-DIF-TYPE1-CRC",
 	.generate_fn		= t10_pi_type1_generate_crc,
 	.verify_fn		= t10_pi_type1_verify_crc,
-	.tuple_size		= sizeof(struct t10_pi_tuple),
-	.tag_size		= 0,
 };
 EXPORT_SYMBOL(t10_pi_type1_crc);
 
-struct blk_integrity t10_pi_type1_ip = {
+struct blk_integrity_profile t10_pi_type1_ip = {
 	.name			= "T10-DIF-TYPE1-IP",
 	.generate_fn		= t10_pi_type1_generate_ip,
 	.verify_fn		= t10_pi_type1_verify_ip,
-	.tuple_size		= sizeof(struct t10_pi_tuple),
-	.tag_size		= 0,
 };
 EXPORT_SYMBOL(t10_pi_type1_ip);
 
-struct blk_integrity t10_pi_type3_crc = {
+struct blk_integrity_profile t10_pi_type3_crc = {
 	.name			= "T10-DIF-TYPE3-CRC",
 	.generate_fn		= t10_pi_type3_generate_crc,
 	.verify_fn		= t10_pi_type3_verify_crc,
-	.tuple_size		= sizeof(struct t10_pi_tuple),
-	.tag_size		= 0,
 };
 EXPORT_SYMBOL(t10_pi_type3_crc);
 
-struct blk_integrity t10_pi_type3_ip = {
+struct blk_integrity_profile t10_pi_type3_ip = {
 	.name			= "T10-DIF-TYPE3-IP",
 	.generate_fn		= t10_pi_type3_generate_ip,
 	.verify_fn		= t10_pi_type3_verify_ip,
-	.tuple_size		= sizeof(struct t10_pi_tuple),
-	.tag_size		= 0,
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index cb62ec6a12d0..7df89b547ae1 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -399,19 +399,22 @@ static int nd_pi_nop_generate_verify(struct blk_integrity_iter *iter)
 
 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 {
-	struct blk_integrity integrity = {
+	struct blk_integrity bi;
+	static struct blk_integrity_profile profile = {
 		.name = "ND-PI-NOP",
 		.generate_fn = nd_pi_nop_generate_verify,
 		.verify_fn = nd_pi_nop_generate_verify,
-		.tuple_size = meta_size,
-		.tag_size = meta_size,
 	};
 	int ret;
 
 	if (meta_size == 0)
 		return 0;
 
-	ret = blk_integrity_register(disk, &integrity);
+	bi.profile = &profile;
+	bi.tuple_size = meta_size;
+	bi.tag_size = meta_size;
+
+	ret = blk_integrity_register(disk, &bi);
 	if (ret)
 		return ret;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 22d83752ae87..04e3d60a1e45 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -558,7 +558,7 @@ static int nvme_noop_generate(struct blk_integrity_iter *iter)
 	return 0;
 }
 
-struct blk_integrity nvme_meta_noop = {
+struct blk_integrity_profile nvme_meta_noop = {
 	.name			= "NVME_META_NOOP",
 	.generate_fn		= nvme_noop_generate,
 	.verify_fn		= nvme_noop_verify,
@@ -570,14 +570,14 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 
 	switch (ns->pi_type) {
 	case NVME_NS_DPS_PI_TYPE3:
-		integrity = t10_pi_type3_crc;
+		integrity.profile = &t10_pi_type3_crc;
 		break;
 	case NVME_NS_DPS_PI_TYPE1:
 	case NVME_NS_DPS_PI_TYPE2:
-		integrity = t10_pi_type1_crc;
+		integrity.profile = &t10_pi_type1_crc;
 		break;
 	default:
-		integrity = nvme_meta_noop;
+		integrity.profile = &nvme_meta_noop;
 		break;
 	}
 	integrity.tuple_size = ns->ms;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 5c06d292b94c..987bf392c336 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -43,6 +43,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	struct scsi_device *sdp = sdkp->device;
 	struct gendisk *disk = sdkp->disk;
 	u8 type = sdkp->protection_type;
+	struct blk_integrity bi;
 	int dif, dix;
 
 	dif = scsi_host_dif_capable(sdp->host, type);
@@ -55,39 +56,43 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	if (!dix)
 		return;
 
+	memset(&bi, 0, sizeof(bi));
+
 	/* Enable DMA of protection information */
 	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			blk_integrity_register(disk, &t10_pi_type3_ip);
+			bi.profile = &t10_pi_type3_ip;
 		else
-			blk_integrity_register(disk, &t10_pi_type1_ip);
+			bi.profile = &t10_pi_type1_ip;
 
-		disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM;
+		bi.flags |= BLK_INTEGRITY_IP_CHECKSUM;
 	} else
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			blk_integrity_register(disk, &t10_pi_type3_crc);
+			bi.profile = &t10_pi_type3_crc;
 		else
-			blk_integrity_register(disk, &t10_pi_type1_crc);
+			bi.profile = &t10_pi_type1_crc;
 
+	bi.tuple_size = sizeof(struct t10_pi_tuple);
 	sd_printk(KERN_NOTICE, sdkp,
-		  "Enabling DIX %s protection\n", disk->integrity->name);
+		  "Enabling DIX %s protection\n", bi.profile->name);
 
-	/* Signal to block layer that we support sector tagging */
 	if (dif && type) {
-
-		disk->integrity->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+		bi.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
 
 		if (!sdkp->ATO)
-			return;
+			goto out;
 
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			disk->integrity->tag_size = sizeof(u16) + sizeof(u32);
+			bi.tag_size = sizeof(u16) + sizeof(u32);
 		else
-			disk->integrity->tag_size = sizeof(u16);
+			bi.tag_size = sizeof(u16);
 
 		sd_printk(KERN_NOTICE, sdkp, "DIF application tag size %u\n",
-			  disk->integrity->tag_size);
+			  bi.tag_size);
 	}
+
+out:
+	blk_integrity_register(disk, &bi);
 }
 
 /*
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 0f19e11acac2..f29c69120054 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -155,17 +155,17 @@ static int iblock_configure_device(struct se_device *dev)
 	if (bi) {
 		struct bio_set *bs = ib_dev->ibd_bio_set;
 
-		if (!strcmp(bi->name, "T10-DIF-TYPE3-IP") ||
-		    !strcmp(bi->name, "T10-DIF-TYPE1-IP")) {
+		if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") ||
+		    !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) {
 			pr_err("IBLOCK export of blk_integrity: %s not"
-			       " supported\n", bi->name);
+			       " supported\n", bi->profile->name);
 			ret = -ENOSYS;
 			goto out_blkdev_put;
 		}
 
-		if (!strcmp(bi->name, "T10-DIF-TYPE3-CRC")) {
+		if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-CRC")) {
 			dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT;
-		} else if (!strcmp(bi->name, "T10-DIF-TYPE1-CRC")) {
+		} else if (!strcmp(bi->profile->name, "T10-DIF-TYPE1-CRC")) {
 			dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE1_PROT;
 		}
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 830f9c07d4bb..f36c6476f1c7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1462,16 +1462,18 @@ struct blk_integrity_iter {
 
 typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
 
-struct blk_integrity {
-	integrity_processing_fn	*generate_fn;
-	integrity_processing_fn	*verify_fn;
-
-	unsigned short		flags;
-	unsigned short		tuple_size;
-	unsigned short		interval;
-	unsigned short		tag_size;
+struct blk_integrity_profile {
+	integrity_processing_fn		*generate_fn;
+	integrity_processing_fn		*verify_fn;
+	const char			*name;
+};
 
-	const char		*name;
+struct blk_integrity {
+	struct blk_integrity_profile	*profile;
+	unsigned short			flags;
+	unsigned short			tuple_size;
+	unsigned short			interval;
+	unsigned short			tag_size;
 };
 
 extern bool blk_integrity_is_initialized(struct gendisk *);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 6a8b9942632d..dd8de82cf5b5 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -14,9 +14,9 @@ struct t10_pi_tuple {
 };
 
 
-extern struct blk_integrity t10_pi_type1_crc;
-extern struct blk_integrity t10_pi_type1_ip;
-extern struct blk_integrity t10_pi_type3_crc;
-extern struct blk_integrity t10_pi_type3_ip;
+extern struct blk_integrity_profile t10_pi_type1_crc;
+extern struct blk_integrity_profile t10_pi_type1_ip;
+extern struct blk_integrity_profile t10_pi_type3_crc;
+extern struct blk_integrity_profile t10_pi_type3_ip;
 
 #endif
-- 
cgit v1.2.3


From a48f041d91bf1aee599fa2adb53b780ed20c2ee5 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 21 Oct 2015 13:19:38 -0400
Subject: block: Reduce the size of struct blk_integrity

The per-device properties in the blk_integrity structure were previously
unsigned short. However, most of the values fit inside a char. The only
exception is the data interval size and we can work around that by
storing it as a power of two.

This cuts the size of the dynamic portion of blk_integrity in half.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  | 4 ++--
 block/blk-integrity.c  | 6 +++---
 include/linux/blkdev.h | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index a10ffe19a8dd..6a90eca9cea1 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -202,7 +202,7 @@ EXPORT_SYMBOL(bio_integrity_enabled);
 static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
 						   unsigned int sectors)
 {
-	return sectors >> (ilog2(bi->interval) - 9);
+	return sectors >> (bi->interval_exp - 9);
 }
 
 static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
@@ -229,7 +229,7 @@ static int bio_integrity_process(struct bio *bio,
 		bip->bip_vec->bv_offset;
 
 	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	iter.interval = bi->interval;
+	iter.interval = 1 << bi->interval_exp;
 	iter.seed = bip_get_seed(bip);
 	iter.prot_buf = prot_buf;
 
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index daf590ab3b46..c7508654faff 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -155,10 +155,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 	if (!b1 || !b2)
 		return -1;
 
-	if (b1->interval != b2->interval) {
+	if (b1->interval_exp != b2->interval_exp) {
 		pr_err("%s: %s/%s protection interval %u != %u\n",
 		       __func__, gd1->disk_name, gd2->disk_name,
-		       b1->interval, b2->interval);
+		       1 << b1->interval_exp, 1 << b2->interval_exp);
 		return -1;
 	}
 
@@ -440,7 +440,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
 
 		bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
-		bi->interval = queue_logical_block_size(disk->queue);
+		bi->interval_exp = ilog2(queue_logical_block_size(disk->queue));
 		disk->integrity = bi;
 	} else
 		bi = disk->integrity;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f36c6476f1c7..4f1968f15e30 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1470,10 +1470,10 @@ struct blk_integrity_profile {
 
 struct blk_integrity {
 	struct blk_integrity_profile	*profile;
-	unsigned short			flags;
-	unsigned short			tuple_size;
-	unsigned short			interval;
-	unsigned short			tag_size;
+	unsigned char			flags;
+	unsigned char			tuple_size;
+	unsigned char			interval_exp;
+	unsigned char			tag_size;
 };
 
 extern bool blk_integrity_is_initialized(struct gendisk *);
-- 
cgit v1.2.3


From 25520d55cdb6ee289abc68f553d364d22478ff54 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 21 Oct 2015 13:19:49 -0400
Subject: block: Inline blk_integrity in struct gendisk

Up until now the_integrity profile has been dynamically allocated and
attached to struct gendisk after the disk has been made active.

This causes problems because NVMe devices need to register the profile
prior to the partition table being read due to a mandatory metadata
buffer requirement. In addition, DM goes through hoops to deal with
preallocating, but not initializing integrity profiles.

Since the integrity profile is small (4 bytes + a pointer), Christoph
suggested moving it to struct gendisk proper. This requires several
changes:

 - Moving the blk_integrity definition to genhd.h.

 - Inlining blk_integrity in struct gendisk.

 - Removing the dynamic allocation code.

 - Adding helper functions which allow gendisk to set up and tear down
   the integrity sysfs dir when a disk is added/deleted.

 - Adding a blk_integrity_revalidate() callback for updating the stable
   pages bdi setting.

 - The calls that depend on whether a device has an integrity profile or
   not now key off of the bi->profile pointer.

 - Simplifying the integrity support routines in DM (Mike Snitzer).

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-integrity.c     | 160 +++++++++++++++++-----------------------------
 block/genhd.c             |   2 +
 block/partition-generic.c |   1 +
 drivers/md/dm-table.c     |  88 +++++++++++++------------
 drivers/md/md.c           |   9 +--
 drivers/nvdimm/core.c     |   6 +-
 drivers/nvme/host/pci.c   |   5 +-
 fs/block_dev.c            |   2 +-
 include/linux/blkdev.h    |  34 ++++------
 include/linux/genhd.h     |  26 +++++++-
 10 files changed, 152 insertions(+), 181 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7a96f57ed195..4615a3386798 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -30,10 +30,6 @@
 
 #include "blk.h"
 
-static struct kmem_cache *integrity_cachep;
-
-static const char *bi_unsupported_name = "unsupported";
-
 /**
  * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
  * @q:		request queue
@@ -146,13 +142,13 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg);
  */
 int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 {
-	struct blk_integrity *b1 = gd1->integrity;
-	struct blk_integrity *b2 = gd2->integrity;
+	struct blk_integrity *b1 = &gd1->integrity;
+	struct blk_integrity *b2 = &gd2->integrity;
 
-	if (!b1 && !b2)
+	if (!b1->profile && !b2->profile)
 		return 0;
 
-	if (!b1 || !b2)
+	if (!b1->profile || !b2->profile)
 		return -1;
 
 	if (b1->interval_exp != b2->interval_exp) {
@@ -163,21 +159,21 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 	}
 
 	if (b1->tuple_size != b2->tuple_size) {
-		printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
+		pr_err("%s: %s/%s tuple sz %u != %u\n", __func__,
 		       gd1->disk_name, gd2->disk_name,
 		       b1->tuple_size, b2->tuple_size);
 		return -1;
 	}
 
 	if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
-		printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
+		pr_err("%s: %s/%s tag sz %u != %u\n", __func__,
 		       gd1->disk_name, gd2->disk_name,
 		       b1->tag_size, b2->tag_size);
 		return -1;
 	}
 
 	if (b1->profile != b2->profile) {
-		printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
+		pr_err("%s: %s/%s type %s != %s\n", __func__,
 		       gd1->disk_name, gd2->disk_name,
 		       b1->profile->name, b2->profile->name);
 		return -1;
@@ -250,7 +246,7 @@ static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
 				   char *page)
 {
 	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
-	struct blk_integrity *bi = blk_get_integrity(disk);
+	struct blk_integrity *bi = &disk->integrity;
 	struct integrity_sysfs_entry *entry =
 		container_of(attr, struct integrity_sysfs_entry, attr);
 
@@ -262,7 +258,7 @@ static ssize_t integrity_attr_store(struct kobject *kobj,
 				    size_t count)
 {
 	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
-	struct blk_integrity *bi = blk_get_integrity(disk);
+	struct blk_integrity *bi = &disk->integrity;
 	struct integrity_sysfs_entry *entry =
 		container_of(attr, struct integrity_sysfs_entry, attr);
 	ssize_t ret = 0;
@@ -275,7 +271,7 @@ static ssize_t integrity_attr_store(struct kobject *kobj,
 
 static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
 {
-	if (bi != NULL && bi->profile->name != NULL)
+	if (bi->profile && bi->profile->name)
 		return sprintf(page, "%s\n", bi->profile->name);
 	else
 		return sprintf(page, "none\n");
@@ -283,18 +279,13 @@ static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
 
 static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
 {
-	if (bi != NULL)
-		return sprintf(page, "%u\n", bi->tag_size);
-	else
-		return sprintf(page, "0\n");
+	return sprintf(page, "%u\n", bi->tag_size);
 }
 
 static ssize_t integrity_interval_show(struct blk_integrity *bi, char *page)
 {
-	if (bi != NULL)
-		return sprintf(page, "%u\n", 1 << bi->interval_exp);
-	else
-		return sprintf(page, "0\n");
+	return sprintf(page, "%u\n",
+		       bi->interval_exp ? 1 << bi->interval_exp : 0);
 }
 
 static ssize_t integrity_verify_store(struct blk_integrity *bi,
@@ -388,113 +379,78 @@ static const struct sysfs_ops integrity_ops = {
 	.store	= &integrity_attr_store,
 };
 
-static int __init blk_dev_integrity_init(void)
-{
-	integrity_cachep = kmem_cache_create("blkdev_integrity",
-					     sizeof(struct blk_integrity),
-					     0, SLAB_PANIC, NULL);
-	return 0;
-}
-subsys_initcall(blk_dev_integrity_init);
-
-static void blk_integrity_release(struct kobject *kobj)
-{
-	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
-	struct blk_integrity *bi = blk_get_integrity(disk);
-
-	kmem_cache_free(integrity_cachep, bi);
-}
-
 static struct kobj_type integrity_ktype = {
 	.default_attrs	= integrity_attrs,
 	.sysfs_ops	= &integrity_ops,
-	.release	= blk_integrity_release,
 };
 
-bool blk_integrity_is_initialized(struct gendisk *disk)
-{
-	struct blk_integrity *bi = blk_get_integrity(disk);
-
-	return (bi && bi->profile->name && strcmp(bi->profile->name,
-						  bi_unsupported_name) != 0);
-}
-EXPORT_SYMBOL(blk_integrity_is_initialized);
-
 /**
  * blk_integrity_register - Register a gendisk as being integrity-capable
  * @disk:	struct gendisk pointer to make integrity-aware
- * @template:	optional integrity profile to register
+ * @template:	block integrity profile to register
  *
- * Description: When a device needs to advertise itself as being able
- * to send/receive integrity metadata it must use this function to
- * register the capability with the block layer.  The template is a
- * blk_integrity struct with values appropriate for the underlying
- * hardware.  If template is NULL the new profile is allocated but
- * not filled out. See Documentation/block/data-integrity.txt.
+ * Description: When a device needs to advertise itself as being able to
+ * send/receive integrity metadata it must use this function to register
+ * the capability with the block layer. The template is a blk_integrity
+ * struct with values appropriate for the underlying hardware. See
+ * Documentation/block/data-integrity.txt.
  */
-int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
+void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 {
-	struct blk_integrity *bi;
-
-	BUG_ON(disk == NULL);
+	struct blk_integrity *bi = &disk->integrity;
 
-	if (disk->integrity == NULL) {
-		bi = kmem_cache_alloc(integrity_cachep,
-				      GFP_KERNEL | __GFP_ZERO);
-		if (!bi)
-			return -1;
+	bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
+		template->flags;
+	bi->interval_exp = ilog2(queue_logical_block_size(disk->queue));
+	bi->profile = template->profile;
+	bi->tuple_size = template->tuple_size;
+	bi->tag_size = template->tag_size;
 
-		if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
-					 &disk_to_dev(disk)->kobj,
-					 "%s", "integrity")) {
-			kmem_cache_free(integrity_cachep, bi);
-			return -1;
-		}
-
-		kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
-
-		bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
-		bi->interval_exp = ilog2(queue_logical_block_size(disk->queue));
-		disk->integrity = bi;
-	} else
-		bi = disk->integrity;
-
-	/* Use the provided profile as template */
-	if (template != NULL) {
-		bi->profile = template->profile;
-		bi->tuple_size = template->tuple_size;
-		bi->tag_size = template->tag_size;
-		bi->flags |= template->flags;
-	} else
-		bi->profile->name = bi_unsupported_name;
-
-	disk->queue->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
-
-	return 0;
+	blk_integrity_revalidate(disk);
 }
 EXPORT_SYMBOL(blk_integrity_register);
 
 /**
- * blk_integrity_unregister - Remove block integrity profile
- * @disk:	disk whose integrity profile to deallocate
+ * blk_integrity_unregister - Unregister block integrity profile
+ * @disk:	disk whose integrity profile to unregister
  *
- * Description: This function frees all memory used by the block
- * integrity profile.  To be called at device teardown.
+ * Description: This function unregisters the integrity capability from
+ * a block device.
  */
 void blk_integrity_unregister(struct gendisk *disk)
 {
-	struct blk_integrity *bi;
+	blk_integrity_revalidate(disk);
+	memset(&disk->integrity, 0, sizeof(struct blk_integrity));
+}
+EXPORT_SYMBOL(blk_integrity_unregister);
 
-	if (!disk || !disk->integrity)
+void blk_integrity_revalidate(struct gendisk *disk)
+{
+	struct blk_integrity *bi = &disk->integrity;
+
+	if (!(disk->flags & GENHD_FL_UP))
 		return;
 
-	disk->queue->backing_dev_info.capabilities &= ~BDI_CAP_STABLE_WRITES;
+	if (bi->profile)
+		disk->queue->backing_dev_info.capabilities |=
+			BDI_CAP_STABLE_WRITES;
+	else
+		disk->queue->backing_dev_info.capabilities &=
+			~BDI_CAP_STABLE_WRITES;
+}
 
-	bi = disk->integrity;
+void blk_integrity_add(struct gendisk *disk)
+{
+	if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
+				 &disk_to_dev(disk)->kobj, "%s", "integrity"))
+		return;
 
+	kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
+}
+
+void blk_integrity_del(struct gendisk *disk)
+{
 	kobject_uevent(&disk->integrity_kobj, KOBJ_REMOVE);
 	kobject_del(&disk->integrity_kobj);
 	kobject_put(&disk->integrity_kobj);
-	disk->integrity = NULL;
 }
-EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/genhd.c b/block/genhd.c
index 0c706f33a599..e5cafa51567c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -630,6 +630,7 @@ void add_disk(struct gendisk *disk)
 	WARN_ON(retval);
 
 	disk_add_events(disk);
+	blk_integrity_add(disk);
 }
 EXPORT_SYMBOL(add_disk);
 
@@ -638,6 +639,7 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	blk_integrity_del(disk);
 	disk_del_events(disk);
 
 	/* invalidate stuff */
diff --git a/block/partition-generic.c b/block/partition-generic.c
index e7711133284e..3b030157ec85 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -428,6 +428,7 @@ rescan:
 
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
+	blk_integrity_revalidate(disk);
 	check_disk_size_change(disk, bdev);
 	bdev->bd_invalidated = 0;
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e76ed003769e..061152a43730 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1014,15 +1014,16 @@ static int dm_table_build_index(struct dm_table *t)
 	return r;
 }
 
+static bool integrity_profile_exists(struct gendisk *disk)
+{
+	return !!blk_get_integrity(disk);
+}
+
 /*
  * Get a disk whose integrity profile reflects the table's profile.
- * If %match_all is true, all devices' profiles must match.
- * If %match_all is false, all devices must at least have an
- * allocated integrity profile; but uninitialized is ok.
  * Returns NULL if integrity support was inconsistent or unavailable.
  */
-static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
-						    bool match_all)
+static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t)
 {
 	struct list_head *devices = dm_table_get_devices(t);
 	struct dm_dev_internal *dd = NULL;
@@ -1030,10 +1031,8 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
 
 	list_for_each_entry(dd, devices, list) {
 		template_disk = dd->dm_dev->bdev->bd_disk;
-		if (!blk_get_integrity(template_disk))
+		if (!integrity_profile_exists(template_disk))
 			goto no_integrity;
-		if (!match_all && !blk_integrity_is_initialized(template_disk))
-			continue; /* skip uninitialized profiles */
 		else if (prev_disk &&
 			 blk_integrity_compare(prev_disk, template_disk) < 0)
 			goto no_integrity;
@@ -1052,34 +1051,40 @@ no_integrity:
 }
 
 /*
- * Register the mapped device for blk_integrity support if
- * the underlying devices have an integrity profile.  But all devices
- * may not have matching profiles (checking all devices isn't reliable
+ * Register the mapped device for blk_integrity support if the
+ * underlying devices have an integrity profile.  But all devices may
+ * not have matching profiles (checking all devices isn't reliable
  * during table load because this table may use other DM device(s) which
- * must be resumed before they will have an initialized integity profile).
- * Stacked DM devices force a 2 stage integrity profile validation:
- * 1 - during load, validate all initialized integrity profiles match
- * 2 - during resume, validate all integrity profiles match
+ * must be resumed before they will have an initialized integity
+ * profile).  Consequently, stacked DM devices force a 2 stage integrity
+ * profile validation: First pass during table load, final pass during
+ * resume.
  */
-static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md)
+static int dm_table_register_integrity(struct dm_table *t)
 {
+	struct mapped_device *md = t->md;
 	struct gendisk *template_disk = NULL;
 
-	template_disk = dm_table_get_integrity_disk(t, false);
+	template_disk = dm_table_get_integrity_disk(t);
 	if (!template_disk)
 		return 0;
 
-	if (!blk_integrity_is_initialized(dm_disk(md))) {
+	if (!integrity_profile_exists(dm_disk(md))) {
 		t->integrity_supported = 1;
-		return blk_integrity_register(dm_disk(md), NULL);
+		/*
+		 * Register integrity profile during table load; we can do
+		 * this because the final profile must match during resume.
+		 */
+		blk_integrity_register(dm_disk(md),
+				       blk_get_integrity(template_disk));
+		return 0;
 	}
 
 	/*
-	 * If DM device already has an initalized integrity
+	 * If DM device already has an initialized integrity
 	 * profile the new profile should not conflict.
 	 */
-	if (blk_integrity_is_initialized(template_disk) &&
-	    blk_integrity_compare(dm_disk(md), template_disk) < 0) {
+	if (blk_integrity_compare(dm_disk(md), template_disk) < 0) {
 		DMWARN("%s: conflict with existing integrity profile: "
 		       "%s profile mismatch",
 		       dm_device_name(t->md),
@@ -1087,7 +1092,7 @@ static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device
 		return 1;
 	}
 
-	/* Preserve existing initialized integrity profile */
+	/* Preserve existing integrity profile */
 	t->integrity_supported = 1;
 	return 0;
 }
@@ -1112,7 +1117,7 @@ int dm_table_complete(struct dm_table *t)
 		return r;
 	}
 
-	r = dm_table_prealloc_integrity(t, t->md);
+	r = dm_table_register_integrity(t);
 	if (r) {
 		DMERR("could not register integrity profile.");
 		return r;
@@ -1278,29 +1283,30 @@ combine_limits:
 }
 
 /*
- * Set the integrity profile for this device if all devices used have
- * matching profiles.  We're quite deep in the resume path but still
- * don't know if all devices (particularly DM devices this device
- * may be stacked on) have matching profiles.  Even if the profiles
- * don't match we have no way to fail (to resume) at this point.
+ * Verify that all devices have an integrity profile that matches the
+ * DM device's registered integrity profile.  If the profiles don't
+ * match then unregister the DM device's integrity profile.
  */
-static void dm_table_set_integrity(struct dm_table *t)
+static void dm_table_verify_integrity(struct dm_table *t)
 {
 	struct gendisk *template_disk = NULL;
 
-	if (!blk_get_integrity(dm_disk(t->md)))
-		return;
+	if (t->integrity_supported) {
+		/*
+		 * Verify that the original integrity profile
+		 * matches all the devices in this table.
+		 */
+		template_disk = dm_table_get_integrity_disk(t);
+		if (template_disk &&
+		    blk_integrity_compare(dm_disk(t->md), template_disk) >= 0)
+			return;
+	}
 
-	template_disk = dm_table_get_integrity_disk(t, true);
-	if (template_disk)
-		blk_integrity_register(dm_disk(t->md),
-				       blk_get_integrity(template_disk));
-	else if (blk_integrity_is_initialized(dm_disk(t->md)))
-		DMWARN("%s: device no longer has a valid integrity profile",
-		       dm_device_name(t->md));
-	else
+	if (integrity_profile_exists(dm_disk(t->md))) {
 		DMWARN("%s: unable to establish an integrity profile",
 		       dm_device_name(t->md));
+		blk_integrity_unregister(dm_disk(t->md));
+	}
 }
 
 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
@@ -1500,7 +1506,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	else
 		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
 
-	dm_table_set_integrity(t);
+	dm_table_verify_integrity(t);
 
 	/*
 	 * Determine whether or not this queue's I/O timings contribute
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c702de18207a..2af9d590e1a0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1962,12 +1962,9 @@ int md_integrity_register(struct mddev *mddev)
 	 * All component devices are integrity capable and have matching
 	 * profiles, register the common profile for the md device.
 	 */
-	if (blk_integrity_register(mddev->gendisk,
-			bdev_get_integrity(reference->bdev)) != 0) {
-		printk(KERN_ERR "md: failed to register integrity for %s\n",
-			mdname(mddev));
-		return -EINVAL;
-	}
+	blk_integrity_register(mddev->gendisk,
+			       bdev_get_integrity(reference->bdev));
+
 	printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
 	if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
 		printk(KERN_ERR "md: failed to create integrity pool for %s\n",
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 7df89b547ae1..e85848caf8d2 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -405,7 +405,6 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 		.generate_fn = nd_pi_nop_generate_verify,
 		.verify_fn = nd_pi_nop_generate_verify,
 	};
-	int ret;
 
 	if (meta_size == 0)
 		return 0;
@@ -414,10 +413,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 	bi.tuple_size = meta_size;
 	bi.tag_size = meta_size;
 
-	ret = blk_integrity_register(disk, &bi);
-	if (ret)
-		return ret;
-
+	blk_integrity_register(disk, &bi);
 	blk_queue_max_integrity_segments(disk->queue, 1);
 
 	return 0;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 04e3d60a1e45..8d2aeaaa3895 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -538,7 +538,7 @@ static void nvme_dif_remap(struct request *req,
 	virt = bip_get_seed(bip);
 	phys = nvme_block_nr(ns, blk_rq_pos(req));
 	nlb = (blk_rq_bytes(req) >> ns->lba_shift);
-	ts = ns->disk->integrity->tuple_size;
+	ts = ns->disk->integrity.tuple_size;
 
 	for (i = 0; i < nlb; i++, virt++, phys++) {
 		pi = (struct t10_pi_tuple *)p;
@@ -2044,8 +2044,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	ns->pi_type = pi_type;
 	blk_queue_logical_block_size(ns->queue, bs);
 
-	if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
-								!ns->ext)
+	if (ns->ms && !ns->ext)
 		nvme_init_integrity(ns);
 
 	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 073bb57adab1..0a793c7930eb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1075,7 +1075,7 @@ int revalidate_disk(struct gendisk *disk)
 
 	if (disk->fops->revalidate_disk)
 		ret = disk->fops->revalidate_disk(disk);
-
+	blk_integrity_revalidate(disk);
 	bdev = bdget_disk(disk, 0);
 	if (!bdev)
 		return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4f1968f15e30..60669c20190f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1468,16 +1468,7 @@ struct blk_integrity_profile {
 	const char			*name;
 };
 
-struct blk_integrity {
-	struct blk_integrity_profile	*profile;
-	unsigned char			flags;
-	unsigned char			tuple_size;
-	unsigned char			interval_exp;
-	unsigned char			tag_size;
-};
-
-extern bool blk_integrity_is_initialized(struct gendisk *);
-extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
+extern void blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
@@ -1488,15 +1479,20 @@ extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
 extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
 				    struct bio *);
 
-static inline
-struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-	return bdev->bd_disk->integrity;
+	struct blk_integrity *bi = &disk->integrity;
+
+	if (!bi->profile)
+		return NULL;
+
+	return bi;
 }
 
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+static inline
+struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
 {
-	return disk->integrity;
+	return blk_get_integrity(bdev->bd_disk);
 }
 
 static inline bool blk_integrity_rq(struct request *rq)
@@ -1570,10 +1566,9 @@ static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
 {
 	return 0;
 }
-static inline int blk_integrity_register(struct gendisk *d,
+static inline void blk_integrity_register(struct gendisk *d,
 					 struct blk_integrity *b)
 {
-	return 0;
 }
 static inline void blk_integrity_unregister(struct gendisk *d)
 {
@@ -1598,10 +1593,7 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq,
 {
 	return true;
 }
-static inline bool blk_integrity_is_initialized(struct gendisk *g)
-{
-	return 0;
-}
+
 static inline bool integrity_req_gap_back_merge(struct request *req,
 						struct bio *next)
 {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 9e6e0dfa97ad..82f4911e0ad8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -163,6 +163,18 @@ struct disk_part_tbl {
 
 struct disk_events;
 
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+struct blk_integrity {
+	struct blk_integrity_profile	*profile;
+	unsigned char			flags;
+	unsigned char			tuple_size;
+	unsigned char			interval_exp;
+	unsigned char			tag_size;
+};
+
+#endif	/* CONFIG_BLK_DEV_INTEGRITY */
+
 struct gendisk {
 	/* major, first_minor and minors are input parameters only,
 	 * don't use directly.  Use disk_devt() and disk_max_parts().
@@ -198,9 +210,9 @@ struct gendisk {
 	atomic_t sync_io;		/* RAID */
 	struct disk_events *ev;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
-	struct blk_integrity *integrity;
+	struct blk_integrity integrity;
 	struct kobject integrity_kobj;
-#endif
+#endif	/* CONFIG_BLK_DEV_INTEGRITY */
 	int node_id;
 };
 
@@ -728,6 +740,16 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
 #endif
 }
 
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+extern void blk_integrity_add(struct gendisk *);
+extern void blk_integrity_del(struct gendisk *);
+extern void blk_integrity_revalidate(struct gendisk *);
+#else	/* CONFIG_BLK_DEV_INTEGRITY */
+static inline void blk_integrity_add(struct gendisk *disk) { }
+static inline void blk_integrity_del(struct gendisk *disk) { }
+static inline void blk_integrity_revalidate(struct gendisk *disk) { }
+#endif	/* CONFIG_BLK_DEV_INTEGRITY */
+
 #else /* CONFIG_BLOCK */
 
 static inline void printk_all_partitions(void) { }
-- 
cgit v1.2.3


From 3ef28e83ab15799742e55fd13243a5f678b04242 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 21 Oct 2015 13:20:12 -0400
Subject: block: generic request_queue reference counting

Allow pmem, and other synchronous/bio-based block drivers, to fallback
on a per-cpu reference count managed by the core for tracking queue
live/dead state.

The existing per-cpu reference count for the blk_mq case is promoted to
be used in all block i/o scenarios.  This involves initializing it by
default, waiting for it to drop to zero at exit, and holding a live
reference over the invocation of q->make_request_fn() in
generic_make_request().  The blk_mq code continues to take its own
reference per blk_mq request and retains the ability to freeze the
queue, but the check that the queue is frozen is moved to
generic_make_request().

This fixes crash signatures like the following:

 BUG: unable to handle kernel paging request at ffff880140000000
 [..]
 Call Trace:
  [<ffffffff8145e8bf>] ? copy_user_handle_tail+0x5f/0x70
  [<ffffffffa004e1e0>] pmem_do_bvec.isra.11+0x70/0xf0 [nd_pmem]
  [<ffffffffa004e331>] pmem_make_request+0xd1/0x200 [nd_pmem]
  [<ffffffff811c3162>] ? mempool_alloc+0x72/0x1a0
  [<ffffffff8141f8b6>] generic_make_request+0xd6/0x110
  [<ffffffff8141f966>] submit_bio+0x76/0x170
  [<ffffffff81286dff>] submit_bh_wbc+0x12f/0x160
  [<ffffffff81286e62>] submit_bh+0x12/0x20
  [<ffffffff813395bd>] jbd2_write_superblock+0x8d/0x170
  [<ffffffff8133974d>] jbd2_mark_journal_empty+0x5d/0x90
  [<ffffffff813399cb>] jbd2_journal_destroy+0x24b/0x270
  [<ffffffff810bc4ca>] ? put_pwq_unlocked+0x2a/0x30
  [<ffffffff810bc6f5>] ? destroy_workqueue+0x225/0x250
  [<ffffffff81303494>] ext4_put_super+0x64/0x360
  [<ffffffff8124ab1a>] generic_shutdown_super+0x6a/0xf0

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 71 ++++++++++++++++++++++++++++++++++++++------
 block/blk-mq-sysfs.c   |  6 ----
 block/blk-mq.c         | 80 +++++++++++++++-----------------------------------
 block/blk-sysfs.c      |  3 +-
 block/blk.h            | 14 +++++++++
 include/linux/blk-mq.h |  1 -
 include/linux/blkdev.h |  2 +-
 7 files changed, 102 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2eb722d48773..9b4d735cb5b8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -554,13 +554,10 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * Drain all requests queued before DYING marking. Set DEAD flag to
 	 * prevent that q->request_fn() gets invoked after draining finished.
 	 */
-	if (q->mq_ops) {
-		blk_mq_freeze_queue(q);
-		spin_lock_irq(lock);
-	} else {
-		spin_lock_irq(lock);
+	blk_freeze_queue(q);
+	spin_lock_irq(lock);
+	if (!q->mq_ops)
 		__blk_drain_queue(q, true);
-	}
 	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
@@ -570,6 +567,7 @@ void blk_cleanup_queue(struct request_queue *q)
 
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
+	percpu_ref_exit(&q->q_usage_counter);
 
 	spin_lock_irq(lock);
 	if (q->queue_lock != &q->__queue_lock)
@@ -629,6 +627,40 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
+int blk_queue_enter(struct request_queue *q, gfp_t gfp)
+{
+	while (true) {
+		int ret;
+
+		if (percpu_ref_tryget_live(&q->q_usage_counter))
+			return 0;
+
+		if (!(gfp & __GFP_WAIT))
+			return -EBUSY;
+
+		ret = wait_event_interruptible(q->mq_freeze_wq,
+				!atomic_read(&q->mq_freeze_depth) ||
+				blk_queue_dying(q));
+		if (blk_queue_dying(q))
+			return -ENODEV;
+		if (ret)
+			return ret;
+	}
+}
+
+void blk_queue_exit(struct request_queue *q)
+{
+	percpu_ref_put(&q->q_usage_counter);
+}
+
+static void blk_queue_usage_counter_release(struct percpu_ref *ref)
+{
+	struct request_queue *q =
+		container_of(ref, struct request_queue, q_usage_counter);
+
+	wake_up_all(&q->mq_freeze_wq);
+}
+
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
@@ -690,11 +722,22 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
 	init_waitqueue_head(&q->mq_freeze_wq);
 
-	if (blkcg_init_queue(q))
+	/*
+	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
+	 * See blk_register_queue() for details.
+	 */
+	if (percpu_ref_init(&q->q_usage_counter,
+				blk_queue_usage_counter_release,
+				PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
 		goto fail_bdi;
 
+	if (blkcg_init_queue(q))
+		goto fail_ref;
+
 	return q;
 
+fail_ref:
+	percpu_ref_exit(&q->q_usage_counter);
 fail_bdi:
 	bdi_destroy(&q->backing_dev_info);
 fail_split:
@@ -1966,9 +2009,19 @@ void generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		q->make_request_fn(q, bio);
+		if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) {
+
+			q->make_request_fn(q, bio);
+
+			blk_queue_exit(q);
 
-		bio = bio_list_pop(current->bio_list);
+			bio = bio_list_pop(current->bio_list);
+		} else {
+			struct bio *bio_next = bio_list_pop(current->bio_list);
+
+			bio_io_error(bio);
+			bio = bio_next;
+		}
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 }
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 788fffd9b409..6f57a110289c 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -413,12 +413,6 @@ static void blk_mq_sysfs_init(struct request_queue *q)
 		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
 }
 
-/* see blk_register_queue() */
-void blk_mq_finish_init(struct request_queue *q)
-{
-	percpu_ref_switch_to_percpu(&q->mq_usage_counter);
-}
-
 int blk_mq_register_disk(struct gendisk *disk)
 {
 	struct device *dev = disk_to_dev(disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d921cd5177f5..6c240712553a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,47 +78,13 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 	clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
 }
 
-static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
-{
-	while (true) {
-		int ret;
-
-		if (percpu_ref_tryget_live(&q->mq_usage_counter))
-			return 0;
-
-		if (!(gfp & __GFP_WAIT))
-			return -EBUSY;
-
-		ret = wait_event_interruptible(q->mq_freeze_wq,
-				!atomic_read(&q->mq_freeze_depth) ||
-				blk_queue_dying(q));
-		if (blk_queue_dying(q))
-			return -ENODEV;
-		if (ret)
-			return ret;
-	}
-}
-
-static void blk_mq_queue_exit(struct request_queue *q)
-{
-	percpu_ref_put(&q->mq_usage_counter);
-}
-
-static void blk_mq_usage_counter_release(struct percpu_ref *ref)
-{
-	struct request_queue *q =
-		container_of(ref, struct request_queue, mq_usage_counter);
-
-	wake_up_all(&q->mq_freeze_wq);
-}
-
 void blk_mq_freeze_queue_start(struct request_queue *q)
 {
 	int freeze_depth;
 
 	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
 	if (freeze_depth == 1) {
-		percpu_ref_kill(&q->mq_usage_counter);
+		percpu_ref_kill(&q->q_usage_counter);
 		blk_mq_run_hw_queues(q, false);
 	}
 }
@@ -126,18 +92,34 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
 static void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
-	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
+	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
 }
 
 /*
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
  */
-void blk_mq_freeze_queue(struct request_queue *q)
+void blk_freeze_queue(struct request_queue *q)
 {
+	/*
+	 * In the !blk_mq case we are only calling this to kill the
+	 * q_usage_counter, otherwise this increases the freeze depth
+	 * and waits for it to return to zero.  For this reason there is
+	 * no blk_unfreeze_queue(), and blk_freeze_queue() is not
+	 * exported to drivers as the only user for unfreeze is blk_mq.
+	 */
 	blk_mq_freeze_queue_start(q);
 	blk_mq_freeze_queue_wait(q);
 }
+
+void blk_mq_freeze_queue(struct request_queue *q)
+{
+	/*
+	 * ...just an alias to keep freeze and unfreeze actions balanced
+	 * in the blk_mq_* namespace
+	 */
+	blk_freeze_queue(q);
+}
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
 
 void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -147,7 +129,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 	freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
 	WARN_ON_ONCE(freeze_depth < 0);
 	if (!freeze_depth) {
-		percpu_ref_reinit(&q->mq_usage_counter);
+		percpu_ref_reinit(&q->q_usage_counter);
 		wake_up_all(&q->mq_freeze_wq);
 	}
 }
@@ -256,7 +238,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
 	struct blk_mq_alloc_data alloc_data;
 	int ret;
 
-	ret = blk_mq_queue_enter(q, gfp);
+	ret = blk_queue_enter(q, gfp);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -279,7 +261,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
 	}
 	blk_mq_put_ctx(ctx);
 	if (!rq) {
-		blk_mq_queue_exit(q);
+		blk_queue_exit(q);
 		return ERR_PTR(-EWOULDBLOCK);
 	}
 	return rq;
@@ -298,7 +280,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	blk_mq_put_tag(hctx, tag, &ctx->last_tag);
-	blk_mq_queue_exit(q);
+	blk_queue_exit(q);
 }
 
 void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1177,11 +1159,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 	int rw = bio_data_dir(bio);
 	struct blk_mq_alloc_data alloc_data;
 
-	if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) {
-		bio_io_error(bio);
-		return NULL;
-	}
-
+	blk_queue_enter_live(q);
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
@@ -2000,14 +1978,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		hctxs[i]->queue_num = i;
 	}
 
-	/*
-	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
-	 * See blk_register_queue() for details.
-	 */
-	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
-			    PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
-		goto err_hctxs;
-
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
@@ -2088,8 +2058,6 @@ void blk_mq_free_queue(struct request_queue *q)
 
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 	blk_mq_free_hw_queues(q, set);
-
-	percpu_ref_exit(&q->mq_usage_counter);
 }
 
 /* Basically redo blk_mq_init_queue with queue frozen */
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3e44a9da2a13..61fc2633bbea 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -599,9 +599,8 @@ int blk_register_queue(struct gendisk *disk)
 	 */
 	if (!blk_queue_init_done(q)) {
 		queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
+		percpu_ref_switch_to_percpu(&q->q_usage_counter);
 		blk_queue_bypass_end(q);
-		if (q->mq_ops)
-			blk_mq_finish_init(q);
 	}
 
 	ret = blk_trace_init_sysfs(dev);
diff --git a/block/blk.h b/block/blk.h
index 98614ad37c81..5b2cd393afbe 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -72,6 +72,20 @@ void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 bool __blk_end_bidi_request(struct request *rq, int error,
 			    unsigned int nr_bytes, unsigned int bidi_bytes);
+int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+void blk_queue_exit(struct request_queue *q);
+void blk_freeze_queue(struct request_queue *q);
+
+static inline void blk_queue_enter_live(struct request_queue *q)
+{
+	/*
+	 * Given that running in generic_make_request() context
+	 * guarantees that a live reference against q_usage_counter has
+	 * been established, further references under that same context
+	 * need not check that the queue has been frozen (marked dead).
+	 */
+	percpu_ref_get(&q->q_usage_counter);
+}
 
 void blk_rq_timed_out_timer(unsigned long data);
 unsigned long blk_rq_timeout(unsigned long timeout);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5e7d43ab61c0..83cc9d4e5455 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -166,7 +166,6 @@ enum {
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 						  struct request_queue *q);
-void blk_mq_finish_init(struct request_queue *q);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60669c20190f..3e0465257d68 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -450,7 +450,7 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
-	struct percpu_ref	mq_usage_counter;
+	struct percpu_ref	q_usage_counter;
 	struct list_head	all_q_node;
 
 	struct blk_mq_tag_set	*tag_set;
-- 
cgit v1.2.3


From ac6fc48c9fb7d3220ec4e0be0c29bb314ea75f9f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 21 Oct 2015 13:20:18 -0400
Subject: block: move blk_integrity to request_queue

A trace like the following proceeds a crash in bio_integrity_process()
when it goes to use an already freed blk_integrity profile.

 BUG: unable to handle kernel paging request at ffff8800d31b10d8
 IP: [<ffff8800d31b10d8>] 0xffff8800d31b10d8
 PGD 2f65067 PUD 21fffd067 PMD 80000000d30001e3
 Oops: 0011 [#1] SMP
 Dumping ftrace buffer:
 ---------------------------------
    ndctl-2222    2.... 44526245us : disk_release: pmem1s
 systemd--2223    4.... 44573945us : bio_integrity_endio: pmem1s
    <...>-409     4.... 44574005us : bio_integrity_process: pmem1s
 ---------------------------------
[..]
  Call Trace:
  [<ffffffff8144e0f9>] ? bio_integrity_process+0x159/0x2d0
  [<ffffffff8144e4f6>] bio_integrity_verify_fn+0x36/0x60
  [<ffffffff810bd2dc>] process_one_work+0x1cc/0x4e0

Given that a request_queue is pinned while i/o is in flight and that a
gendisk is allowed to have a shorter lifetime, move blk_integrity to
request_queue to satisfy requests arriving after the gendisk has been
torn down.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
[martin: fix the CONFIG_BLK_DEV_INTEGRITY=n case]
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-integrity.c   | 14 +++++++-------
 drivers/nvme/host/pci.c |  2 +-
 include/linux/blkdev.h  |  6 +++++-
 include/linux/genhd.h   |  1 -
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 4615a3386798..5d339ae64d56 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -142,8 +142,8 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg);
  */
 int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 {
-	struct blk_integrity *b1 = &gd1->integrity;
-	struct blk_integrity *b2 = &gd2->integrity;
+	struct blk_integrity *b1 = &gd1->queue->integrity;
+	struct blk_integrity *b2 = &gd2->queue->integrity;
 
 	if (!b1->profile && !b2->profile)
 		return 0;
@@ -246,7 +246,7 @@ static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
 				   char *page)
 {
 	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
-	struct blk_integrity *bi = &disk->integrity;
+	struct blk_integrity *bi = &disk->queue->integrity;
 	struct integrity_sysfs_entry *entry =
 		container_of(attr, struct integrity_sysfs_entry, attr);
 
@@ -258,7 +258,7 @@ static ssize_t integrity_attr_store(struct kobject *kobj,
 				    size_t count)
 {
 	struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
-	struct blk_integrity *bi = &disk->integrity;
+	struct blk_integrity *bi = &disk->queue->integrity;
 	struct integrity_sysfs_entry *entry =
 		container_of(attr, struct integrity_sysfs_entry, attr);
 	ssize_t ret = 0;
@@ -397,7 +397,7 @@ static struct kobj_type integrity_ktype = {
  */
 void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 {
-	struct blk_integrity *bi = &disk->integrity;
+	struct blk_integrity *bi = &disk->queue->integrity;
 
 	bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
 		template->flags;
@@ -420,13 +420,13 @@ EXPORT_SYMBOL(blk_integrity_register);
 void blk_integrity_unregister(struct gendisk *disk)
 {
 	blk_integrity_revalidate(disk);
-	memset(&disk->integrity, 0, sizeof(struct blk_integrity));
+	memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity));
 }
 EXPORT_SYMBOL(blk_integrity_unregister);
 
 void blk_integrity_revalidate(struct gendisk *disk)
 {
-	struct blk_integrity *bi = &disk->integrity;
+	struct blk_integrity *bi = &disk->queue->integrity;
 
 	if (!(disk->flags & GENHD_FL_UP))
 		return;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4757a2c9366e..2fa28680ad0f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -538,7 +538,7 @@ static void nvme_dif_remap(struct request *req,
 	virt = bip_get_seed(bip);
 	phys = nvme_block_nr(ns, blk_rq_pos(req));
 	nlb = (blk_rq_bytes(req) >> ns->lba_shift);
-	ts = ns->disk->integrity.tuple_size;
+	ts = ns->disk->queue->integrity.tuple_size;
 
 	for (i = 0; i < nlb; i++, virt++, phys++) {
 		pi = (struct t10_pi_tuple *)p;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3e0465257d68..cf57884db4b7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -369,6 +369,10 @@ struct request_queue {
 	 */
 	struct kobject mq_kobj;
 
+#ifdef  CONFIG_BLK_DEV_INTEGRITY
+	struct blk_integrity integrity;
+#endif	/* CONFIG_BLK_DEV_INTEGRITY */
+
 #ifdef CONFIG_PM
 	struct device		*dev;
 	int			rpm_status;
@@ -1481,7 +1485,7 @@ extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
 
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-	struct blk_integrity *bi = &disk->integrity;
+	struct blk_integrity *bi = &disk->queue->integrity;
 
 	if (!bi->profile)
 		return NULL;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 82f4911e0ad8..847cc1d91634 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -210,7 +210,6 @@ struct gendisk {
 	atomic_t sync_io;		/* RAID */
 	struct disk_events *ev;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
-	struct blk_integrity integrity;
 	struct kobject integrity_kobj;
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
 	int node_id;
-- 
cgit v1.2.3


From bbd3e064362e5057cc4799ba2e4d68c7593e490b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 15 Oct 2015 14:10:48 +0200
Subject: block: add an API for Persistent Reservations

This commits adds a driver API and ioctls for controlling Persistent
Reservations s/genericly/generically/ at the block layer.  Persistent
Reservations are supported by SCSI and NVMe and allow controlling who gets
access to a device in a shared storage setup.

Note that we add a pr_ops structure to struct block_device_operations
instead of adding the members directly to avoid bloating all instances
of devices that will never support Persistent Reservations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/pr.txt | 119 +++++++++++++++++++++++++++++++++++++++++++++
 block/ioctl.c              | 103 +++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h     |   2 +
 include/linux/pr.h         |  18 +++++++
 include/uapi/linux/pr.h    |  48 ++++++++++++++++++
 5 files changed, 290 insertions(+)
 create mode 100644 Documentation/block/pr.txt
 create mode 100644 include/linux/pr.h
 create mode 100644 include/uapi/linux/pr.h

(limited to 'include/linux')

diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt
new file mode 100644
index 000000000000..d3eb1ca65051
--- /dev/null
+++ b/Documentation/block/pr.txt
@@ -0,0 +1,119 @@
+
+Block layer support for Persistent Reservations
+===============================================
+
+The Linux kernel supports a user space interface for simplified
+Persistent Reservations which map to block devices that support
+these (like SCSI). Persistent Reservations allow restricting
+access to block devices to specific initiators in a shared storage
+setup.
+
+This document gives a general overview of the support ioctl commands.
+For a more detailed reference please refer the the SCSI Primary
+Commands standard, specifically the section on Reservations and the
+"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
+
+All implementations are expected to ensure the reservations survive
+a power loss and cover all connections in a multi path environment.
+These behaviors are optional in SPC but will be automatically applied
+by Linux.
+
+
+The following types of reservations are supported:
+--------------------------------------------------
+
+ - PR_WRITE_EXCLUSIVE
+
+	Only the initiator that owns the reservation can write to the
+	device.  Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS
+
+	Only the initiator that owns the reservation can access the
+	device.
+
+ - PR_WRITE_EXCLUSIVE_REG_ONLY
+
+	Only initiators with a registered key can write to the device,
+	Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS_REG_ONLY
+
+	Only initiators with a registered key can access the device.
+
+ - PR_WRITE_EXCLUSIVE_ALL_REGS
+
+	Only initiators with a registered key can write to the device,
+	Any initiator can read from the device.
+	All initiators with a registered key are considered reservation
+	holders.
+	Please reference the SPC spec on the meaning of a reservation
+	holder if you want to use this type. 
+
+ - PR_EXCLUSIVE_ACCESS_ALL_REGS
+
+	Only initiators with a registered key can access the device.
+	All initiators with a registered key are considered reservation
+	holders.
+	Please reference the SPC spec on the meaning of a reservation
+	holder if you want to use this type. 
+
+
+The following ioctl are supported:
+----------------------------------
+
+1. IOC_PR_REGISTER
+
+This ioctl command registers a new reservation if the new_key argument
+is non-null.  If no existing reservation exists old_key must be zero,
+if an existing reservation should be replaced old_key must contain
+the old reservation key.
+
+If the new_key argument is 0 it unregisters the existing reservation passed
+in old_key.
+
+
+2. IOC_PR_RESERVE
+
+This ioctl command reserves the device and thus restricts access for other
+devices based on the type argument.  The key argument must be the existing
+reservation key for the device as acquired by the IOC_PR_REGISTER,
+IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
+
+
+3. IOC_PR_RELEASE
+
+This ioctl command releases the reservation specified by key and flags
+and thus removes any access restriction implied by it.
+
+
+4. IOC_PR_PREEMPT
+
+This ioctl command releases the existing reservation referred to by
+old_key and replaces it with a a new reservation of type for the
+reservation key new_key.
+
+
+5. IOC_PR_PREEMPT_ABORT
+
+This ioctl command works like IOC_PR_PREEMPT except that it also aborts
+any outstanding command sent over a connection identified by old_key.
+
+6. IOC_PR_CLEAR
+
+This ioctl command unregisters both key and any other reservation key
+registered with the device and drops any existing reservation.
+
+
+Flags
+-----
+
+All the ioctls have a flag field.  Currently only one flag is supported:
+
+ - PR_FL_IGNORE_KEY
+
+	Ignore the existing reservation key.  This is commonly supported for
+	IOC_PR_REGISTER, and some implementation may support the flag for
+	IOC_PR_RESERVE.
+
+For all unknown flags the kernel will return -EOPNOTSUPP.
diff --git a/block/ioctl.c b/block/ioctl.c
index df62b47d2379..0918aed2d847 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -7,6 +7,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fs.h>
 #include <linux/blktrace_api.h>
+#include <linux/pr.h>
 #include <asm/uaccess.h>
 
 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
@@ -295,6 +296,96 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
  */
 EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
 
+static int blkdev_pr_register(struct block_device *bdev,
+		struct pr_registration __user *arg)
+{
+	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+	struct pr_registration reg;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!ops || !ops->pr_register)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&reg, arg, sizeof(reg)))
+		return -EFAULT;
+
+	if (reg.flags & ~PR_FL_IGNORE_KEY)
+		return -EOPNOTSUPP;
+	return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
+}
+
+static int blkdev_pr_reserve(struct block_device *bdev,
+		struct pr_reservation __user *arg)
+{
+	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+	struct pr_reservation rsv;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!ops || !ops->pr_reserve)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&rsv, arg, sizeof(rsv)))
+		return -EFAULT;
+
+	if (rsv.flags & ~PR_FL_IGNORE_KEY)
+		return -EOPNOTSUPP;
+	return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
+}
+
+static int blkdev_pr_release(struct block_device *bdev,
+		struct pr_reservation __user *arg)
+{
+	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+	struct pr_reservation rsv;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!ops || !ops->pr_release)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&rsv, arg, sizeof(rsv)))
+		return -EFAULT;
+
+	if (rsv.flags)
+		return -EOPNOTSUPP;
+	return ops->pr_release(bdev, rsv.key, rsv.type);
+}
+
+static int blkdev_pr_preempt(struct block_device *bdev,
+		struct pr_preempt __user *arg, bool abort)
+{
+	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+	struct pr_preempt p;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!ops || !ops->pr_preempt)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&p, arg, sizeof(p)))
+		return -EFAULT;
+
+	if (p.flags)
+		return -EOPNOTSUPP;
+	return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
+}
+
+static int blkdev_pr_clear(struct block_device *bdev,
+		struct pr_clear __user *arg)
+{
+	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+	struct pr_clear c;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!ops || !ops->pr_clear)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&c, arg, sizeof(c)))
+		return -EFAULT;
+
+	if (c.flags)
+		return -EOPNOTSUPP;
+	return ops->pr_clear(bdev, c.key);
+}
+
 /*
  * Is it an unrecognized ioctl? The correct returns are either
  * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
@@ -477,6 +568,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKTRACESETUP:
 	case BLKTRACETEARDOWN:
 		return blk_trace_ioctl(bdev, cmd, argp);
+	case IOC_PR_REGISTER:
+		return blkdev_pr_register(bdev, argp);
+	case IOC_PR_RESERVE:
+		return blkdev_pr_reserve(bdev, argp);
+	case IOC_PR_RELEASE:
+		return blkdev_pr_release(bdev, argp);
+	case IOC_PR_PREEMPT:
+		return blkdev_pr_preempt(bdev, argp, false);
+	case IOC_PR_PREEMPT_ABORT:
+		return blkdev_pr_preempt(bdev, argp, true);
+	case IOC_PR_CLEAR:
+		return blkdev_pr_clear(bdev, argp);
 	default:
 		return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 19c2e947d4d1..fe25da05e823 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -35,6 +35,7 @@ struct sg_io_hdr;
 struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
+struct pr_ops;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -1633,6 +1634,7 @@ struct block_device_operations {
 	/* this callback is with swap_lock and sometimes page table lock held */
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	struct module *owner;
+	const struct pr_ops *pr_ops;
 };
 
 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
diff --git a/include/linux/pr.h b/include/linux/pr.h
new file mode 100644
index 000000000000..65c01c10b335
--- /dev/null
+++ b/include/linux/pr.h
@@ -0,0 +1,18 @@
+#ifndef LINUX_PR_H
+#define LINUX_PR_H
+
+#include <uapi/linux/pr.h>
+
+struct pr_ops {
+	int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
+			u32 flags);
+	int (*pr_reserve)(struct block_device *bdev, u64 key,
+			enum pr_type type, u32 flags);
+	int (*pr_release)(struct block_device *bdev, u64 key,
+			enum pr_type type);
+	int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
+			enum pr_type type, bool abort);
+	int (*pr_clear)(struct block_device *bdev, u64 key);
+};
+
+#endif /* LINUX_PR_H */
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
new file mode 100644
index 000000000000..57d7c0f916b6
--- /dev/null
+++ b/include/uapi/linux/pr.h
@@ -0,0 +1,48 @@
+#ifndef _UAPI_PR_H
+#define _UAPI_PR_H
+
+enum pr_type {
+	PR_WRITE_EXCLUSIVE		= 1,
+	PR_EXCLUSIVE_ACCESS		= 2,
+	PR_WRITE_EXCLUSIVE_REG_ONLY	= 3,
+	PR_EXCLUSIVE_ACCESS_REG_ONLY	= 4,
+	PR_WRITE_EXCLUSIVE_ALL_REGS	= 5,
+	PR_EXCLUSIVE_ACCESS_ALL_REGS	= 6,
+};
+
+struct pr_reservation {
+	__u64	key;
+	__u32	type;
+	__u32	flags;
+};
+
+struct pr_registration {
+	__u64	old_key;
+	__u64	new_key;
+	__u32	flags;
+	__u32	__pad;
+};
+
+struct pr_preempt {
+	__u64	old_key;
+	__u64	new_key;
+	__u32	type;
+	__u32	flags;
+};
+
+struct pr_clear {
+	__u64	key;
+	__u32	flags;
+	__u32	__pad;
+};
+
+#define PR_FL_IGNORE_KEY	(1 << 0)	/* ignore existing key */
+
+#define IOC_PR_REGISTER		_IOW('p', 200, struct pr_registration)
+#define IOC_PR_RESERVE		_IOW('p', 201, struct pr_reservation)
+#define IOC_PR_RELEASE		_IOW('p', 202, struct pr_reservation)
+#define IOC_PR_PREEMPT		_IOW('p', 203, struct pr_preempt)
+#define IOC_PR_PREEMPT_ABORT	_IOW('p', 204, struct pr_preempt)
+#define IOC_PR_CLEAR		_IOW('p', 205, struct pr_clear)
+
+#endif /* _UAPI_PR_H */
-- 
cgit v1.2.3


From 46c6b2bc88a729366605d0dedb6a35b8cf7cc4f0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 21 Oct 2015 23:51:36 +0200
Subject: iommu: Revive device_group iommu-ops call-back

That call-back is currently unused, change it into a
call-back function for finding the right IOMMU group for a
device.
This is a first step to remove the hard-coded PCI dependency
in the iommu-group code.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 9 ++++++---
 include/linux/iommu.h | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 049df495c274..563746383973 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -814,6 +814,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
  */
 struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 {
+	const struct iommu_ops *ops = dev->bus->iommu_ops;
 	struct iommu_group *group;
 	int ret;
 
@@ -821,10 +822,12 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (group)
 		return group;
 
-	if (!dev_is_pci(dev))
-		return ERR_PTR(-EINVAL);
+	group = ERR_PTR(-EINVAL);
 
-	group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+	if (ops && ops->device_group)
+		group = ops->device_group(dev);
+	else if (dev_is_pci(dev))
+		group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
 
 	if (IS_ERR(group))
 		return group;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f174506a5343..a1e6e8914c17 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -168,7 +168,7 @@ struct iommu_ops {
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
 	int (*add_device)(struct device *dev);
 	void (*remove_device)(struct device *dev);
-	int (*device_group)(struct device *dev, unsigned int *groupid);
+	struct iommu_group *(*device_group)(struct device *dev);
 	int (*domain_get_attr)(struct iommu_domain *domain,
 			       enum iommu_attr attr, void *data);
 	int (*domain_set_attr)(struct iommu_domain *domain,
-- 
cgit v1.2.3


From 5e62292bad10cff25ff75d136c54e62b43bfb0fa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 21 Oct 2015 23:51:37 +0200
Subject: iommu: Export and rename iommu_group_get_for_pci_dev()

Rename that function to pci_device_group() and export it, so
that IOMMU drivers can use it as their device_group
call-back.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 8 ++++++--
 include/linux/iommu.h | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 563746383973..fdea700ca12c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -731,13 +731,17 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
  * Use standard PCI bus topology, isolation features, and DMA alias quirks
  * to find or create an IOMMU group for a device.
  */
-static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
+struct iommu_group *pci_device_group(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct group_for_pci_data data;
 	struct pci_bus *bus;
 	struct iommu_group *group = NULL;
 	u64 devfns[4] = { 0 };
 
+	if (WARN_ON(!dev_is_pci(dev)))
+		return ERR_PTR(-EINVAL);
+
 	/*
 	 * Find the upstream DMA alias for the device.  A device must not
 	 * be aliased due to topology in order to have its own IOMMU group.
@@ -827,7 +831,7 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (ops && ops->device_group)
 		group = ops->device_group(dev);
 	else if (dev_is_pci(dev))
-		group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+		group = pci_device_group(dev);
 
 	if (IS_ERR(group))
 		return group;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a1e6e8914c17..a1a06639a64a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -317,6 +317,9 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain,
 	return domain->ops->map_sg(domain, iova, sg, nents, prot);
 }
 
+/* PCI device grouping function */
+extern struct iommu_group *pci_device_group(struct device *dev);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
-- 
cgit v1.2.3


From 6eab556a40384de94c2d03c8d9d632e5154367f5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 21 Oct 2015 23:51:38 +0200
Subject: iommu: Add generic_device_group() function

This function can be used as a device_group call-back and
just allocates one iommu-group per device.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 15 +++++++++++++++
 include/linux/iommu.h |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index fdea700ca12c..a80c9c5c2650 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -727,6 +727,21 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
 	return data->group != NULL;
 }
 
+/*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per device.
+ */
+struct iommu_group *generic_device_group(struct device *dev)
+{
+	struct iommu_group *group;
+
+	group = iommu_group_alloc();
+	if (IS_ERR(group))
+		return NULL;
+
+	return group;
+}
+
 /*
  * Use standard PCI bus topology, isolation features, and DMA alias quirks
  * to find or create an IOMMU group for a device.
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a1a06639a64a..f28dff313b07 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -319,6 +319,8 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain,
 
 /* PCI device grouping function */
 extern struct iommu_group *pci_device_group(struct device *dev);
+/* Generic device grouping function */
+extern struct iommu_group *generic_device_group(struct device *dev);
 
 #else /* CONFIG_IOMMU_API */
 
-- 
cgit v1.2.3


From 948486544713492f00ac8a9572909101ea892cb0 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Sat, 19 Sep 2015 23:29:53 -0500
Subject: powerpc/fsl: Move fsl_guts.h out of arch/powerpc

Freescale's Layerscape ARM chips use the same structure.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/fsl_guts.h        | 192 -----------------------------
 arch/powerpc/platforms/85xx/mpc85xx_mds.c  |   2 +-
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c  |   2 +-
 arch/powerpc/platforms/85xx/p1022_ds.c     |   2 +-
 arch/powerpc/platforms/85xx/p1022_rdk.c    |   2 +-
 arch/powerpc/platforms/85xx/smp.c          |   2 +-
 arch/powerpc/platforms/85xx/twr_p102x.c    |   2 +-
 arch/powerpc/platforms/86xx/mpc8610_hpcd.c |   2 +-
 drivers/iommu/fsl_pamu.c                   |   2 +-
 include/linux/fsl/guts.h                   | 192 +++++++++++++++++++++++++++++
 sound/soc/fsl/mpc8610_hpcd.c               |   2 +-
 sound/soc/fsl/p1022_ds.c                   |   2 +-
 sound/soc/fsl/p1022_rdk.c                  |   2 +-
 13 files changed, 203 insertions(+), 203 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/fsl_guts.h
 create mode 100644 include/linux/fsl/guts.h

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h
deleted file mode 100644
index 43b6bb1a4a9c..000000000000
--- a/arch/powerpc/include/asm/fsl_guts.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/**
- * Freecale 85xx and 86xx Global Utilties register set
- *
- * Authors: Jeff Brown
- *          Timur Tabi <timur@freescale.com>
- *
- * Copyright 2004,2007,2012 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef __ASM_POWERPC_FSL_GUTS_H__
-#define __ASM_POWERPC_FSL_GUTS_H__
-#ifdef __KERNEL__
-
-/**
- * Global Utility Registers.
- *
- * Not all registers defined in this structure are available on all chips, so
- * you are expected to know whether a given register actually exists on your
- * chip before you access it.
- *
- * Also, some registers are similar on different chips but have slightly
- * different names.  In these cases, one name is chosen to avoid extraneous
- * #ifdefs.
- */
-struct ccsr_guts {
-	__be32	porpllsr;	/* 0x.0000 - POR PLL Ratio Status Register */
-	__be32	porbmsr;	/* 0x.0004 - POR Boot Mode Status Register */
-	__be32	porimpscr;	/* 0x.0008 - POR I/O Impedance Status and Control Register */
-	__be32	pordevsr;	/* 0x.000c - POR I/O Device Status Register */
-	__be32	pordbgmsr;	/* 0x.0010 - POR Debug Mode Status Register */
-	__be32	pordevsr2;	/* 0x.0014 - POR device status register 2 */
-	u8	res018[0x20 - 0x18];
-	__be32	porcir;		/* 0x.0020 - POR Configuration Information Register */
-	u8	res024[0x30 - 0x24];
-	__be32	gpiocr;		/* 0x.0030 - GPIO Control Register */
-	u8	res034[0x40 - 0x34];
-	__be32	gpoutdr;	/* 0x.0040 - General-Purpose Output Data Register */
-	u8	res044[0x50 - 0x44];
-	__be32	gpindr;		/* 0x.0050 - General-Purpose Input Data Register */
-	u8	res054[0x60 - 0x54];
-	__be32	pmuxcr;		/* 0x.0060 - Alternate Function Signal Multiplex Control */
-        __be32  pmuxcr2;	/* 0x.0064 - Alternate function signal multiplex control 2 */
-        __be32  dmuxcr;		/* 0x.0068 - DMA Mux Control Register */
-        u8	res06c[0x70 - 0x6c];
-	__be32	devdisr;	/* 0x.0070 - Device Disable Control */
-#define CCSR_GUTS_DEVDISR_TB1	0x00001000
-#define CCSR_GUTS_DEVDISR_TB0	0x00004000
-	__be32	devdisr2;	/* 0x.0074 - Device Disable Control 2 */
-	u8	res078[0x7c - 0x78];
-	__be32  pmjcr;		/* 0x.007c - 4 Power Management Jog Control Register */
-	__be32	powmgtcsr;	/* 0x.0080 - Power Management Status and Control Register */
-	__be32  pmrccr;		/* 0x.0084 - Power Management Reset Counter Configuration Register */
-	__be32  pmpdccr;	/* 0x.0088 - Power Management Power Down Counter Configuration Register */
-	__be32  pmcdr;		/* 0x.008c - 4Power management clock disable register */
-	__be32	mcpsumr;	/* 0x.0090 - Machine Check Summary Register */
-	__be32	rstrscr;	/* 0x.0094 - Reset Request Status and Control Register */
-	__be32  ectrstcr;	/* 0x.0098 - Exception reset control register */
-	__be32  autorstsr;	/* 0x.009c - Automatic reset status register */
-	__be32	pvr;		/* 0x.00a0 - Processor Version Register */
-	__be32	svr;		/* 0x.00a4 - System Version Register */
-	u8	res0a8[0xb0 - 0xa8];
-	__be32	rstcr;		/* 0x.00b0 - Reset Control Register */
-	u8	res0b4[0xc0 - 0xb4];
-	__be32  iovselsr;	/* 0x.00c0 - I/O voltage select status register
-					     Called 'elbcvselcr' on 86xx SOCs */
-	u8	res0c4[0x100 - 0xc4];
-	__be32	rcwsr[16];	/* 0x.0100 - Reset Control Word Status registers
-					     There are 16 registers */
-	u8	res140[0x224 - 0x140];
-	__be32  iodelay1;	/* 0x.0224 - IO delay control register 1 */
-	__be32  iodelay2;	/* 0x.0228 - IO delay control register 2 */
-	u8	res22c[0x604 - 0x22c];
-	__be32	pamubypenr; 	/* 0x.604 - PAMU bypass enable register */
-	u8	res608[0x800 - 0x608];
-	__be32	clkdvdr;	/* 0x.0800 - Clock Divide Register */
-	u8	res804[0x900 - 0x804];
-	__be32	ircr;		/* 0x.0900 - Infrared Control Register */
-	u8	res904[0x908 - 0x904];
-	__be32	dmacr;		/* 0x.0908 - DMA Control Register */
-	u8	res90c[0x914 - 0x90c];
-	__be32	elbccr;		/* 0x.0914 - eLBC Control Register */
-	u8	res918[0xb20 - 0x918];
-	__be32	ddr1clkdr;	/* 0x.0b20 - DDR1 Clock Disable Register */
-	__be32	ddr2clkdr;	/* 0x.0b24 - DDR2 Clock Disable Register */
-	__be32	ddrclkdr;	/* 0x.0b28 - DDR Clock Disable Register */
-	u8	resb2c[0xe00 - 0xb2c];
-	__be32	clkocr;		/* 0x.0e00 - Clock Out Select Register */
-	u8	rese04[0xe10 - 0xe04];
-	__be32	ddrdllcr;	/* 0x.0e10 - DDR DLL Control Register */
-	u8	rese14[0xe20 - 0xe14];
-	__be32	lbcdllcr;	/* 0x.0e20 - LBC DLL Control Register */
-	__be32  cpfor;		/* 0x.0e24 - L2 charge pump fuse override register */
-	u8	rese28[0xf04 - 0xe28];
-	__be32	srds1cr0;	/* 0x.0f04 - SerDes1 Control Register 0 */
-	__be32	srds1cr1;	/* 0x.0f08 - SerDes1 Control Register 0 */
-	u8	resf0c[0xf2c - 0xf0c];
-	__be32  itcr;		/* 0x.0f2c - Internal transaction control register */
-	u8	resf30[0xf40 - 0xf30];
-	__be32	srds2cr0;	/* 0x.0f40 - SerDes2 Control Register 0 */
-	__be32	srds2cr1;	/* 0x.0f44 - SerDes2 Control Register 0 */
-} __attribute__ ((packed));
-
-
-/* Alternate function signal multiplex control */
-#define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x))
-
-#ifdef CONFIG_PPC_86xx
-
-#define CCSR_GUTS_DMACR_DEV_SSI	0	/* DMA controller/channel set to SSI */
-#define CCSR_GUTS_DMACR_DEV_IR	1	/* DMA controller/channel set to IR */
-
-/*
- * Set the DMACR register in the GUTS
- *
- * The DMACR register determines the source of initiated transfers for each
- * channel on each DMA controller.  Rather than have a bunch of repetitive
- * macros for the bit patterns, we just have a function that calculates
- * them.
- *
- * guts: Pointer to GUTS structure
- * co: The DMA controller (0 or 1)
- * ch: The channel on the DMA controller (0, 1, 2, or 3)
- * device: The device to set as the source (CCSR_GUTS_DMACR_DEV_xx)
- */
-static inline void guts_set_dmacr(struct ccsr_guts __iomem *guts,
-	unsigned int co, unsigned int ch, unsigned int device)
-{
-	unsigned int shift = 16 + (8 * (1 - co) + 2 * (3 - ch));
-
-	clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift);
-}
-
-#define CCSR_GUTS_PMUXCR_LDPSEL		0x00010000
-#define CCSR_GUTS_PMUXCR_SSI1_MASK	0x0000C000	/* Bitmask for SSI1 */
-#define CCSR_GUTS_PMUXCR_SSI1_LA	0x00000000	/* Latched address */
-#define CCSR_GUTS_PMUXCR_SSI1_HI	0x00004000	/* High impedance */
-#define CCSR_GUTS_PMUXCR_SSI1_SSI	0x00008000	/* Used for SSI1 */
-#define CCSR_GUTS_PMUXCR_SSI2_MASK	0x00003000	/* Bitmask for SSI2 */
-#define CCSR_GUTS_PMUXCR_SSI2_LA	0x00000000	/* Latched address */
-#define CCSR_GUTS_PMUXCR_SSI2_HI	0x00001000	/* High impedance */
-#define CCSR_GUTS_PMUXCR_SSI2_SSI	0x00002000	/* Used for SSI2 */
-#define CCSR_GUTS_PMUXCR_LA_22_25_LA	0x00000000	/* Latched Address */
-#define CCSR_GUTS_PMUXCR_LA_22_25_HI	0x00000400	/* High impedance */
-#define CCSR_GUTS_PMUXCR_DBGDRV		0x00000200	/* Signals not driven */
-#define CCSR_GUTS_PMUXCR_DMA2_0		0x00000008
-#define CCSR_GUTS_PMUXCR_DMA2_3		0x00000004
-#define CCSR_GUTS_PMUXCR_DMA1_0		0x00000002
-#define CCSR_GUTS_PMUXCR_DMA1_3		0x00000001
-
-/*
- * Set the DMA external control bits in the GUTS
- *
- * The DMA external control bits in the PMUXCR are only meaningful for
- * channels 0 and 3.  Any other channels are ignored.
- *
- * guts: Pointer to GUTS structure
- * co: The DMA controller (0 or 1)
- * ch: The channel on the DMA controller (0, 1, 2, or 3)
- * value: the new value for the bit (0 or 1)
- */
-static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts,
-	unsigned int co, unsigned int ch, unsigned int value)
-{
-	if ((ch == 0) || (ch == 3)) {
-		unsigned int shift = 2 * (co + 1) - (ch & 1) - 1;
-
-		clrsetbits_be32(&guts->pmuxcr, 1 << shift, value << shift);
-	}
-}
-
-#define CCSR_GUTS_CLKDVDR_PXCKEN	0x80000000
-#define CCSR_GUTS_CLKDVDR_SSICKEN	0x20000000
-#define CCSR_GUTS_CLKDVDR_PXCKINV	0x10000000
-#define CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT 25
-#define CCSR_GUTS_CLKDVDR_PXCKDLY_MASK	0x06000000
-#define CCSR_GUTS_CLKDVDR_PXCKDLY(x) \
-	(((x) & 3) << CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT)
-#define CCSR_GUTS_CLKDVDR_PXCLK_SHIFT	16
-#define CCSR_GUTS_CLKDVDR_PXCLK_MASK	0x001F0000
-#define CCSR_GUTS_CLKDVDR_PXCLK(x) (((x) & 31) << CCSR_GUTS_CLKDVDR_PXCLK_SHIFT)
-#define CCSR_GUTS_CLKDVDR_SSICLK_MASK	0x000000FF
-#define CCSR_GUTS_CLKDVDR_SSICLK(x) ((x) & CCSR_GUTS_CLKDVDR_SSICLK_MASK)
-
-#endif
-
-#endif
-#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index a392e94a07fa..f0be439ceaaa 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -34,6 +34,7 @@
 #include <linux/of_device.h>
 #include <linux/phy.h>
 #include <linux/memblock.h>
+#include <linux/fsl/guts.h>
 
 #include <linux/atomic.h>
 #include <asm/time.h>
@@ -51,7 +52,6 @@
 #include <asm/qe_ic.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
-#include <asm/fsl_guts.h>
 #include "smp.h"
 
 #include "mpc85xx.h"
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index e358bed66d01..50dcc00a0f5a 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -17,6 +17,7 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
+#include <linux/fsl/guts.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -27,7 +28,6 @@
 #include <asm/mpic.h>
 #include <asm/qe.h>
 #include <asm/qe_ic.h>
-#include <asm/fsl_guts.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 6ac986d3f8a3..371df822e88e 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -16,6 +16,7 @@
  * kind, whether express or implied.
  */
 
+#include <linux/fsl/guts.h>
 #include <linux/pci.h>
 #include <linux/of_platform.h>
 #include <asm/div64.h>
@@ -25,7 +26,6 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 #include <asm/udbg.h>
-#include <asm/fsl_guts.h>
 #include <asm/fsl_lbc.h>
 #include "smp.h"
 
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 680232d6ba48..5087becaa8bc 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -12,6 +12,7 @@
  * kind, whether express or implied.
  */
 
+#include <linux/fsl/guts.h>
 #include <linux/pci.h>
 #include <linux/of_platform.h>
 #include <asm/div64.h>
@@ -21,7 +22,6 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 #include <asm/udbg.h>
-#include <asm/fsl_guts.h>
 #include "smp.h"
 
 #include "mpc85xx.h"
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index b8b821697910..6ac7786e076a 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -19,6 +19,7 @@
 #include <linux/kexec.h>
 #include <linux/highmem.h>
 #include <linux/cpu.h>
+#include <linux/fsl/guts.h>
 
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
@@ -26,7 +27,6 @@
 #include <asm/mpic.h>
 #include <asm/cacheflush.h>
 #include <asm/dbell.h>
-#include <asm/fsl_guts.h>
 #include <asm/code-patching.h>
 #include <asm/cputhreads.h>
 
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index 30e002f4648c..892e613519cc 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/fsl/guts.h>
 #include <linux/pci.h>
 #include <linux/of_platform.h>
 
@@ -23,7 +24,6 @@
 #include <asm/mpic.h>
 #include <asm/qe.h>
 #include <asm/qe_ic.h>
-#include <asm/fsl_guts.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 55413a547ea8..437a9c372ae1 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/of.h>
+#include <linux/fsl/guts.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -38,7 +39,6 @@
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/simple_gpio.h>
-#include <asm/fsl_guts.h>
 
 #include "mpc86xx.h"
 
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 2570f2a25dc4..a34355fca37a 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -20,11 +20,11 @@
 
 #include "fsl_pamu.h"
 
+#include <linux/fsl/guts.h>
 #include <linux/interrupt.h>
 #include <linux/genalloc.h>
 
 #include <asm/mpc85xx.h>
-#include <asm/fsl_guts.h>
 
 /* define indexes for each operation mapping scenario */
 #define OMI_QMAN        0x00
diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
new file mode 100644
index 000000000000..84d971ff3fba
--- /dev/null
+++ b/include/linux/fsl/guts.h
@@ -0,0 +1,192 @@
+/**
+ * Freecale 85xx and 86xx Global Utilties register set
+ *
+ * Authors: Jeff Brown
+ *          Timur Tabi <timur@freescale.com>
+ *
+ * Copyright 2004,2007,2012 Freescale Semiconductor, Inc
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __FSL_GUTS_H__
+#define __FSL_GUTS_H__
+
+#include <linux/types.h>
+
+/**
+ * Global Utility Registers.
+ *
+ * Not all registers defined in this structure are available on all chips, so
+ * you are expected to know whether a given register actually exists on your
+ * chip before you access it.
+ *
+ * Also, some registers are similar on different chips but have slightly
+ * different names.  In these cases, one name is chosen to avoid extraneous
+ * #ifdefs.
+ */
+struct ccsr_guts {
+	__be32	porpllsr;	/* 0x.0000 - POR PLL Ratio Status Register */
+	__be32	porbmsr;	/* 0x.0004 - POR Boot Mode Status Register */
+	__be32	porimpscr;	/* 0x.0008 - POR I/O Impedance Status and Control Register */
+	__be32	pordevsr;	/* 0x.000c - POR I/O Device Status Register */
+	__be32	pordbgmsr;	/* 0x.0010 - POR Debug Mode Status Register */
+	__be32	pordevsr2;	/* 0x.0014 - POR device status register 2 */
+	u8	res018[0x20 - 0x18];
+	__be32	porcir;		/* 0x.0020 - POR Configuration Information Register */
+	u8	res024[0x30 - 0x24];
+	__be32	gpiocr;		/* 0x.0030 - GPIO Control Register */
+	u8	res034[0x40 - 0x34];
+	__be32	gpoutdr;	/* 0x.0040 - General-Purpose Output Data Register */
+	u8	res044[0x50 - 0x44];
+	__be32	gpindr;		/* 0x.0050 - General-Purpose Input Data Register */
+	u8	res054[0x60 - 0x54];
+	__be32	pmuxcr;		/* 0x.0060 - Alternate Function Signal Multiplex Control */
+        __be32  pmuxcr2;	/* 0x.0064 - Alternate function signal multiplex control 2 */
+        __be32  dmuxcr;		/* 0x.0068 - DMA Mux Control Register */
+        u8	res06c[0x70 - 0x6c];
+	__be32	devdisr;	/* 0x.0070 - Device Disable Control */
+#define CCSR_GUTS_DEVDISR_TB1	0x00001000
+#define CCSR_GUTS_DEVDISR_TB0	0x00004000
+	__be32	devdisr2;	/* 0x.0074 - Device Disable Control 2 */
+	u8	res078[0x7c - 0x78];
+	__be32  pmjcr;		/* 0x.007c - 4 Power Management Jog Control Register */
+	__be32	powmgtcsr;	/* 0x.0080 - Power Management Status and Control Register */
+	__be32  pmrccr;		/* 0x.0084 - Power Management Reset Counter Configuration Register */
+	__be32  pmpdccr;	/* 0x.0088 - Power Management Power Down Counter Configuration Register */
+	__be32  pmcdr;		/* 0x.008c - 4Power management clock disable register */
+	__be32	mcpsumr;	/* 0x.0090 - Machine Check Summary Register */
+	__be32	rstrscr;	/* 0x.0094 - Reset Request Status and Control Register */
+	__be32  ectrstcr;	/* 0x.0098 - Exception reset control register */
+	__be32  autorstsr;	/* 0x.009c - Automatic reset status register */
+	__be32	pvr;		/* 0x.00a0 - Processor Version Register */
+	__be32	svr;		/* 0x.00a4 - System Version Register */
+	u8	res0a8[0xb0 - 0xa8];
+	__be32	rstcr;		/* 0x.00b0 - Reset Control Register */
+	u8	res0b4[0xc0 - 0xb4];
+	__be32  iovselsr;	/* 0x.00c0 - I/O voltage select status register
+					     Called 'elbcvselcr' on 86xx SOCs */
+	u8	res0c4[0x100 - 0xc4];
+	__be32	rcwsr[16];	/* 0x.0100 - Reset Control Word Status registers
+					     There are 16 registers */
+	u8	res140[0x224 - 0x140];
+	__be32  iodelay1;	/* 0x.0224 - IO delay control register 1 */
+	__be32  iodelay2;	/* 0x.0228 - IO delay control register 2 */
+	u8	res22c[0x604 - 0x22c];
+	__be32	pamubypenr; 	/* 0x.604 - PAMU bypass enable register */
+	u8	res608[0x800 - 0x608];
+	__be32	clkdvdr;	/* 0x.0800 - Clock Divide Register */
+	u8	res804[0x900 - 0x804];
+	__be32	ircr;		/* 0x.0900 - Infrared Control Register */
+	u8	res904[0x908 - 0x904];
+	__be32	dmacr;		/* 0x.0908 - DMA Control Register */
+	u8	res90c[0x914 - 0x90c];
+	__be32	elbccr;		/* 0x.0914 - eLBC Control Register */
+	u8	res918[0xb20 - 0x918];
+	__be32	ddr1clkdr;	/* 0x.0b20 - DDR1 Clock Disable Register */
+	__be32	ddr2clkdr;	/* 0x.0b24 - DDR2 Clock Disable Register */
+	__be32	ddrclkdr;	/* 0x.0b28 - DDR Clock Disable Register */
+	u8	resb2c[0xe00 - 0xb2c];
+	__be32	clkocr;		/* 0x.0e00 - Clock Out Select Register */
+	u8	rese04[0xe10 - 0xe04];
+	__be32	ddrdllcr;	/* 0x.0e10 - DDR DLL Control Register */
+	u8	rese14[0xe20 - 0xe14];
+	__be32	lbcdllcr;	/* 0x.0e20 - LBC DLL Control Register */
+	__be32  cpfor;		/* 0x.0e24 - L2 charge pump fuse override register */
+	u8	rese28[0xf04 - 0xe28];
+	__be32	srds1cr0;	/* 0x.0f04 - SerDes1 Control Register 0 */
+	__be32	srds1cr1;	/* 0x.0f08 - SerDes1 Control Register 0 */
+	u8	resf0c[0xf2c - 0xf0c];
+	__be32  itcr;		/* 0x.0f2c - Internal transaction control register */
+	u8	resf30[0xf40 - 0xf30];
+	__be32	srds2cr0;	/* 0x.0f40 - SerDes2 Control Register 0 */
+	__be32	srds2cr1;	/* 0x.0f44 - SerDes2 Control Register 0 */
+} __attribute__ ((packed));
+
+
+/* Alternate function signal multiplex control */
+#define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x))
+
+#ifdef CONFIG_PPC_86xx
+
+#define CCSR_GUTS_DMACR_DEV_SSI	0	/* DMA controller/channel set to SSI */
+#define CCSR_GUTS_DMACR_DEV_IR	1	/* DMA controller/channel set to IR */
+
+/*
+ * Set the DMACR register in the GUTS
+ *
+ * The DMACR register determines the source of initiated transfers for each
+ * channel on each DMA controller.  Rather than have a bunch of repetitive
+ * macros for the bit patterns, we just have a function that calculates
+ * them.
+ *
+ * guts: Pointer to GUTS structure
+ * co: The DMA controller (0 or 1)
+ * ch: The channel on the DMA controller (0, 1, 2, or 3)
+ * device: The device to set as the source (CCSR_GUTS_DMACR_DEV_xx)
+ */
+static inline void guts_set_dmacr(struct ccsr_guts __iomem *guts,
+	unsigned int co, unsigned int ch, unsigned int device)
+{
+	unsigned int shift = 16 + (8 * (1 - co) + 2 * (3 - ch));
+
+	clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift);
+}
+
+#define CCSR_GUTS_PMUXCR_LDPSEL		0x00010000
+#define CCSR_GUTS_PMUXCR_SSI1_MASK	0x0000C000	/* Bitmask for SSI1 */
+#define CCSR_GUTS_PMUXCR_SSI1_LA	0x00000000	/* Latched address */
+#define CCSR_GUTS_PMUXCR_SSI1_HI	0x00004000	/* High impedance */
+#define CCSR_GUTS_PMUXCR_SSI1_SSI	0x00008000	/* Used for SSI1 */
+#define CCSR_GUTS_PMUXCR_SSI2_MASK	0x00003000	/* Bitmask for SSI2 */
+#define CCSR_GUTS_PMUXCR_SSI2_LA	0x00000000	/* Latched address */
+#define CCSR_GUTS_PMUXCR_SSI2_HI	0x00001000	/* High impedance */
+#define CCSR_GUTS_PMUXCR_SSI2_SSI	0x00002000	/* Used for SSI2 */
+#define CCSR_GUTS_PMUXCR_LA_22_25_LA	0x00000000	/* Latched Address */
+#define CCSR_GUTS_PMUXCR_LA_22_25_HI	0x00000400	/* High impedance */
+#define CCSR_GUTS_PMUXCR_DBGDRV		0x00000200	/* Signals not driven */
+#define CCSR_GUTS_PMUXCR_DMA2_0		0x00000008
+#define CCSR_GUTS_PMUXCR_DMA2_3		0x00000004
+#define CCSR_GUTS_PMUXCR_DMA1_0		0x00000002
+#define CCSR_GUTS_PMUXCR_DMA1_3		0x00000001
+
+/*
+ * Set the DMA external control bits in the GUTS
+ *
+ * The DMA external control bits in the PMUXCR are only meaningful for
+ * channels 0 and 3.  Any other channels are ignored.
+ *
+ * guts: Pointer to GUTS structure
+ * co: The DMA controller (0 or 1)
+ * ch: The channel on the DMA controller (0, 1, 2, or 3)
+ * value: the new value for the bit (0 or 1)
+ */
+static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts,
+	unsigned int co, unsigned int ch, unsigned int value)
+{
+	if ((ch == 0) || (ch == 3)) {
+		unsigned int shift = 2 * (co + 1) - (ch & 1) - 1;
+
+		clrsetbits_be32(&guts->pmuxcr, 1 << shift, value << shift);
+	}
+}
+
+#define CCSR_GUTS_CLKDVDR_PXCKEN	0x80000000
+#define CCSR_GUTS_CLKDVDR_SSICKEN	0x20000000
+#define CCSR_GUTS_CLKDVDR_PXCKINV	0x10000000
+#define CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT 25
+#define CCSR_GUTS_CLKDVDR_PXCKDLY_MASK	0x06000000
+#define CCSR_GUTS_CLKDVDR_PXCKDLY(x) \
+	(((x) & 3) << CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT)
+#define CCSR_GUTS_CLKDVDR_PXCLK_SHIFT	16
+#define CCSR_GUTS_CLKDVDR_PXCLK_MASK	0x001F0000
+#define CCSR_GUTS_CLKDVDR_PXCLK(x) (((x) & 31) << CCSR_GUTS_CLKDVDR_PXCLK_SHIFT)
+#define CCSR_GUTS_CLKDVDR_SSICLK_MASK	0x000000FF
+#define CCSR_GUTS_CLKDVDR_SSICLK(x) ((x) & CCSR_GUTS_CLKDVDR_SSICLK_MASK)
+
+#endif
+
+#endif
diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c
index 9621b9140df6..6f236f170cf5 100644
--- a/sound/soc/fsl/mpc8610_hpcd.c
+++ b/sound/soc/fsl/mpc8610_hpcd.c
@@ -12,11 +12,11 @@
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/fsl/guts.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/slab.h>
 #include <sound/soc.h>
-#include <asm/fsl_guts.h>
 
 #include "fsl_dma.h"
 #include "fsl_ssi.h"
diff --git a/sound/soc/fsl/p1022_ds.c b/sound/soc/fsl/p1022_ds.c
index 71c1a7dc3aeb..747aab0602bd 100644
--- a/sound/soc/fsl/p1022_ds.c
+++ b/sound/soc/fsl/p1022_ds.c
@@ -11,12 +11,12 @@
  */
 
 #include <linux/module.h>
+#include <linux/fsl/guts.h>
 #include <linux/interrupt.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/slab.h>
 #include <sound/soc.h>
-#include <asm/fsl_guts.h>
 
 #include "fsl_dma.h"
 #include "fsl_ssi.h"
diff --git a/sound/soc/fsl/p1022_rdk.c b/sound/soc/fsl/p1022_rdk.c
index ee29048424be..1dd49e5f9675 100644
--- a/sound/soc/fsl/p1022_rdk.c
+++ b/sound/soc/fsl/p1022_rdk.c
@@ -18,12 +18,12 @@
  */
 
 #include <linux/module.h>
+#include <linux/fsl/guts.h>
 #include <linux/interrupt.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/slab.h>
 #include <sound/soc.h>
-#include <asm/fsl_guts.h>
 
 #include "fsl_dma.h"
 #include "fsl_ssi.h"
-- 
cgit v1.2.3


From 3ecb3e09b042e70799ff3a1ff464a5ecaa7547d9 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Mon, 7 Sep 2015 14:45:25 +0300
Subject: usb: chipidea: Use extcon framework for VBUS and ID detect

On recent Qualcomm platforms VBUS and ID lines are not routed to
USB PHY LINK controller. Use extcon framework to receive connect
and disconnect ID and VBUS notification.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
---
 .../devicetree/bindings/usb/ci-hdrc-usb2.txt       |   6 +
 drivers/usb/chipidea/Kconfig                       |   1 +
 drivers/usb/chipidea/core.c                        | 125 +++++++++++++++++++++
 drivers/usb/chipidea/otg.c                         |  39 ++++++-
 include/linux/usb/chipidea.h                       |  23 ++++
 5 files changed, 193 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
index a057b75ba4b5..db48bad84228 100644
--- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -46,6 +46,11 @@ Optional properties:
   (4 bytes), This register represents the maximum length of a the burst
   in 32-bit words while moving data from the USB bus to system memory,
   changing this value takes effect only the SBUSCFG.AHBBRST is 0.
+- extcon: phandles to external connector devices. First phandle should point to
+  external connector, which provide "USB" cable events, the second should point
+  to external connector device, which provide "USB-HOST" cable events. If one
+  of the external connector devices is not required, empty <0> phandle should
+  be specified.
 
 Example:
 
@@ -62,4 +67,5 @@ Example:
 		ahb-burst-config = <0x0>;
 		tx-burst-size-dword = <0x10>; /* 64 bytes */
 		rx-burst-size-dword = <0x10>;
+		extcon = <0>, <&usb_id>;
 	};
diff --git a/drivers/usb/chipidea/Kconfig b/drivers/usb/chipidea/Kconfig
index 5ce3f1d6a6ed..5619b8ca3bf3 100644
--- a/drivers/usb/chipidea/Kconfig
+++ b/drivers/usb/chipidea/Kconfig
@@ -1,6 +1,7 @@
 config USB_CHIPIDEA
 	tristate "ChipIdea Highspeed Dual Role Controller"
 	depends on ((USB_EHCI_HCD && USB_GADGET) || (USB_EHCI_HCD && !USB_GADGET) || (!USB_EHCI_HCD && USB_GADGET)) && HAS_DMA
+	select EXTCON
 	help
 	  Say Y here if your system has a dual role high speed USB
 	  controller based on ChipIdea silicon IP. Currently, only the
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 3feebf7f31f0..573c2876b263 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -47,6 +47,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/extcon.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
@@ -602,9 +603,45 @@ static irqreturn_t ci_irq(int irq, void *data)
 	return ret;
 }
 
+static int ci_vbus_notifier(struct notifier_block *nb, unsigned long event,
+			    void *ptr)
+{
+	struct ci_hdrc_cable *vbus = container_of(nb, struct ci_hdrc_cable, nb);
+	struct ci_hdrc *ci = vbus->ci;
+
+	if (event)
+		vbus->state = true;
+	else
+		vbus->state = false;
+
+	vbus->changed = true;
+
+	ci_irq(ci->irq, ci);
+	return NOTIFY_DONE;
+}
+
+static int ci_id_notifier(struct notifier_block *nb, unsigned long event,
+			  void *ptr)
+{
+	struct ci_hdrc_cable *id = container_of(nb, struct ci_hdrc_cable, nb);
+	struct ci_hdrc *ci = id->ci;
+
+	if (event)
+		id->state = false;
+	else
+		id->state = true;
+
+	id->changed = true;
+
+	ci_irq(ci->irq, ci);
+	return NOTIFY_DONE;
+}
+
 static int ci_get_platdata(struct device *dev,
 		struct ci_hdrc_platform_data *platdata)
 {
+	struct extcon_dev *ext_vbus, *ext_id;
+	struct ci_hdrc_cable *cable;
 	int ret;
 
 	if (!platdata->phy_mode)
@@ -695,9 +732,91 @@ static int ci_get_platdata(struct device *dev,
 		platdata->flags |= CI_HDRC_OVERRIDE_RX_BURST;
 	}
 
+	ext_id = ERR_PTR(-ENODEV);
+	ext_vbus = ERR_PTR(-ENODEV);
+	if (of_property_read_bool(dev->of_node, "extcon")) {
+		/* Each one of them is not mandatory */
+		ext_vbus = extcon_get_edev_by_phandle(dev, 0);
+		if (IS_ERR(ext_vbus) && PTR_ERR(ext_vbus) != -ENODEV)
+			return PTR_ERR(ext_vbus);
+
+		ext_id = extcon_get_edev_by_phandle(dev, 1);
+		if (IS_ERR(ext_id) && PTR_ERR(ext_id) != -ENODEV)
+			return PTR_ERR(ext_id);
+	}
+
+	cable = &platdata->vbus_extcon;
+	cable->nb.notifier_call = ci_vbus_notifier;
+	cable->edev = ext_vbus;
+
+	if (!IS_ERR(ext_vbus)) {
+		ret = extcon_get_cable_state_(cable->edev, EXTCON_USB);
+		if (ret)
+			cable->state = true;
+		else
+			cable->state = false;
+	}
+
+	cable = &platdata->id_extcon;
+	cable->nb.notifier_call = ci_id_notifier;
+	cable->edev = ext_id;
+
+	if (!IS_ERR(ext_id)) {
+		ret = extcon_get_cable_state_(cable->edev, EXTCON_USB_HOST);
+		if (ret)
+			cable->state = false;
+		else
+			cable->state = true;
+	}
 	return 0;
 }
 
+static int ci_extcon_register(struct ci_hdrc *ci)
+{
+	struct ci_hdrc_cable *id, *vbus;
+	int ret;
+
+	id = &ci->platdata->id_extcon;
+	id->ci = ci;
+	if (!IS_ERR(id->edev)) {
+		ret = extcon_register_notifier(id->edev, EXTCON_USB_HOST,
+					       &id->nb);
+		if (ret < 0) {
+			dev_err(ci->dev, "register ID failed\n");
+			return ret;
+		}
+	}
+
+	vbus = &ci->platdata->vbus_extcon;
+	vbus->ci = ci;
+	if (!IS_ERR(vbus->edev)) {
+		ret = extcon_register_notifier(vbus->edev, EXTCON_USB,
+					       &vbus->nb);
+		if (ret < 0) {
+			extcon_unregister_notifier(id->edev, EXTCON_USB_HOST,
+						   &id->nb);
+			dev_err(ci->dev, "register VBUS failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void ci_extcon_unregister(struct ci_hdrc *ci)
+{
+	struct ci_hdrc_cable *cable;
+
+	cable = &ci->platdata->id_extcon;
+	if (!IS_ERR(cable->edev))
+		extcon_unregister_notifier(cable->edev, EXTCON_USB_HOST,
+					   &cable->nb);
+
+	cable = &ci->platdata->vbus_extcon;
+	if (!IS_ERR(cable->edev))
+		extcon_unregister_notifier(cable->edev, EXTCON_USB, &cable->nb);
+}
+
 static DEFINE_IDA(ci_ida);
 
 struct platform_device *ci_hdrc_add_device(struct device *dev,
@@ -921,6 +1040,10 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 	if (ret)
 		goto stop;
 
+	ret = ci_extcon_register(ci);
+	if (ret)
+		goto stop;
+
 	if (ci->supports_runtime_pm) {
 		pm_runtime_set_active(&pdev->dev);
 		pm_runtime_enable(&pdev->dev);
@@ -938,6 +1061,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 	if (!ret)
 		return 0;
 
+	ci_extcon_unregister(ci);
 stop:
 	ci_role_destroy(ci);
 deinit_phy:
@@ -957,6 +1081,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
 	}
 
 	dbg_remove_files(ci);
+	ci_extcon_unregister(ci);
 	ci_role_destroy(ci);
 	ci_hdrc_enter_lpm(ci, true);
 	ci_usb_phy_exit(ci);
diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
index ad6c87a4653c..ab4bd0c2d4ef 100644
--- a/drivers/usb/chipidea/otg.c
+++ b/drivers/usb/chipidea/otg.c
@@ -30,7 +30,44 @@
  */
 u32 hw_read_otgsc(struct ci_hdrc *ci, u32 mask)
 {
-	return hw_read(ci, OP_OTGSC, mask);
+	struct ci_hdrc_cable *cable;
+	u32 val = hw_read(ci, OP_OTGSC, mask);
+
+	/*
+	 * If using extcon framework for VBUS and/or ID signal
+	 * detection overwrite OTGSC register value
+	 */
+	cable = &ci->platdata->vbus_extcon;
+	if (!IS_ERR(cable->edev)) {
+		if (cable->changed)
+			val |= OTGSC_BSVIS;
+		else
+			val &= ~OTGSC_BSVIS;
+
+		cable->changed = false;
+
+		if (cable->state)
+			val |= OTGSC_BSV;
+		else
+			val &= ~OTGSC_BSV;
+	}
+
+	cable = &ci->platdata->id_extcon;
+	if (!IS_ERR(cable->edev)) {
+		if (cable->changed)
+			val |= OTGSC_IDIS;
+		else
+			val &= ~OTGSC_IDIS;
+
+		cable->changed = false;
+
+		if (cable->state)
+			val |= OTGSC_ID;
+		else
+			val &= ~OTGSC_ID;
+	}
+
+	return val;
 }
 
 /**
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index a41833cd184c..c5cddc6901d0 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -5,9 +5,28 @@
 #ifndef __LINUX_USB_CHIPIDEA_H
 #define __LINUX_USB_CHIPIDEA_H
 
+#include <linux/extcon.h>
 #include <linux/usb/otg.h>
 
 struct ci_hdrc;
+
+/**
+ * struct ci_hdrc_cable - structure for external connector cable state tracking
+ * @state: current state of the line
+ * @changed: set to true when extcon event happen
+ * @edev: device which generate events
+ * @ci: driver state of the chipidea device
+ * @nb: hold event notification callback
+ * @conn: used for notification registration
+ */
+struct ci_hdrc_cable {
+	bool				state;
+	bool				changed;
+	struct extcon_dev		*edev;
+	struct ci_hdrc			*ci;
+	struct notifier_block		nb;
+};
+
 struct ci_hdrc_platform_data {
 	const char	*name;
 	/* offset of the capability registers */
@@ -48,6 +67,10 @@ struct ci_hdrc_platform_data {
 	u32			ahb_burst_config;
 	u32			tx_burst_size;
 	u32			rx_burst_size;
+
+	/* VBUS and ID signal state tracking, using extcon framework */
+	struct ci_hdrc_cable		vbus_extcon;
+	struct ci_hdrc_cable		id_extcon;
 };
 
 /* Default offset of capability registers */
-- 
cgit v1.2.3


From 1fbf46280eb6866c762de5ec8ba35f09097b0d53 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Tue, 8 Sep 2015 22:18:14 -0300
Subject: usb: chipidea: Add support for 'phy-clkgate-delay-us' property

Add support for the optional 'phy-clkgate-delay-us' property that is
used to describe the delay time between putting PHY into low power mode
and turning off the PHY clock.

Signed-off-by: Li Jun <jun.li@freescale.com>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
---
 drivers/usb/chipidea/core.c  | 7 +++++++
 include/linux/usb/chipidea.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 573c2876b263..f4fd76ab3aef 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -688,6 +688,10 @@ static int ci_get_platdata(struct device *dev,
 	if (of_usb_get_maximum_speed(dev->of_node) == USB_SPEED_FULL)
 		platdata->flags |= CI_HDRC_FORCE_FULLSPEED;
 
+	if (of_find_property(dev->of_node, "phy-clkgate-delay-us", NULL))
+		of_property_read_u32(dev->of_node, "phy-clkgate-delay-us",
+				     &platdata->phy_clkgate_delay_us);
+
 	platdata->itc_setting = 1;
 	if (of_find_property(dev->of_node, "itc-setting", NULL)) {
 		ret = of_property_read_u32(dev->of_node, "itc-setting",
@@ -1121,6 +1125,9 @@ static void ci_controller_suspend(struct ci_hdrc *ci)
 {
 	disable_irq(ci->irq);
 	ci_hdrc_enter_lpm(ci, true);
+	if (ci->platdata->phy_clkgate_delay_us)
+		usleep_range(ci->platdata->phy_clkgate_delay_us,
+			     ci->platdata->phy_clkgate_delay_us + 50);
 	usb_phy_set_suspend(ci->usb_phy, 1);
 	ci->in_lpm = true;
 	enable_irq(ci->irq);
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index c5cddc6901d0..5dd75fa47dd8 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -71,6 +71,7 @@ struct ci_hdrc_platform_data {
 	/* VBUS and ID signal state tracking, using extcon framework */
 	struct ci_hdrc_cable		vbus_extcon;
 	struct ci_hdrc_cable		id_extcon;
+	u32			phy_clkgate_delay_us;
 };
 
 /* Default offset of capability registers */
-- 
cgit v1.2.3


From e2aacd963a06fc558a809ecb62f5833e6c340b28 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 20 Oct 2015 10:08:59 -0400
Subject: net: mdio-gpio: move platform data header

This header file only contains the platform data structure definition,
so move it to the include/linux/platform_data/ directory.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-gpio.c             |  2 +-
 include/linux/mdio-gpio.h               | 33 ---------------------------------
 include/linux/platform_data/mdio-gpio.h | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 34 deletions(-)
 delete mode 100644 include/linux/mdio-gpio.h
 create mode 100644 include/linux/platform_data/mdio-gpio.h

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 3bc9f03349f3..95f51d7267b3 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -25,7 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
-#include <linux/mdio-gpio.h>
+#include <linux/platform_data/mdio-gpio.h>
 
 #include <linux/of_gpio.h>
 #include <linux/of_mdio.h>
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
deleted file mode 100644
index 11f00cdabe3d..000000000000
--- a/include/linux/mdio-gpio.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * MDIO-GPIO bus platform data structures
- *
- * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __LINUX_MDIO_GPIO_H
-#define __LINUX_MDIO_GPIO_H
-
-#include <linux/mdio-bitbang.h>
-
-struct mdio_gpio_platform_data {
-	/* GPIO numbers for bus pins */
-	unsigned int mdc;
-	unsigned int mdio;
-	unsigned int mdo;
-
-	bool mdc_active_low;
-	bool mdio_active_low;
-	bool mdo_active_low;
-
-	u32 phy_mask;
-	u32 phy_ignore_ta_mask;
-	int irqs[PHY_MAX_ADDR];
-	/* reset callback */
-	int (*reset)(struct mii_bus *bus);
-};
-
-#endif /* __LINUX_MDIO_GPIO_H */
diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h
new file mode 100644
index 000000000000..11f00cdabe3d
--- /dev/null
+++ b/include/linux/platform_data/mdio-gpio.h
@@ -0,0 +1,33 @@
+/*
+ * MDIO-GPIO bus platform data structures
+ *
+ * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __LINUX_MDIO_GPIO_H
+#define __LINUX_MDIO_GPIO_H
+
+#include <linux/mdio-bitbang.h>
+
+struct mdio_gpio_platform_data {
+	/* GPIO numbers for bus pins */
+	unsigned int mdc;
+	unsigned int mdio;
+	unsigned int mdo;
+
+	bool mdc_active_low;
+	bool mdio_active_low;
+	bool mdo_active_low;
+
+	u32 phy_mask;
+	u32 phy_ignore_ta_mask;
+	int irqs[PHY_MAX_ADDR];
+	/* reset callback */
+	int (*reset)(struct mii_bus *bus);
+};
+
+#endif /* __LINUX_MDIO_GPIO_H */
-- 
cgit v1.2.3


From 9a8928359736ab170303ee8a2cc15db54e3a4a8f Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 15 Oct 2015 14:44:38 +0300
Subject: net/mlx4_core: Add support for filtering multicast loopback

Update device capabilities regarding HW filtering multicast loopback support.

Add MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB attribute to mlx4_update_qp to
enable changing QP context to support filtering incoming multicast
loopback traffic according the sender's counter index.

Set the corresponding bits in QP context to force the loopback source
checks if attribute is given and HW supports it.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c            |  6 +++++
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 19 +++++++++++++-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 30 +++++++++++++++++-----
 include/linux/mlx4/device.h                        |  2 ++
 include/linux/mlx4/qp.h                            | 24 +++++++++++++----
 5 files changed, 68 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e8ec1dec5789..b3be3a060311 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -155,6 +155,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[27] = "Port beacon support",
 		[28] = "RX-ALL support",
 		[29] = "802.1ad offload support",
+		[31] = "Modifying loopback source checks using UPDATE_QP support",
+		[32] = "Loopback source checks support",
 	};
 	int i;
 
@@ -964,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+	if (field32 & (1 << 18))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB;
+	if (field32 & (1 << 19))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
 	if (field32 & (1 << 26))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
 	if (field32 & (1 << 20))
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 20268634a9ab..b16249577aa2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -436,6 +436,23 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
 	}
 
+	if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) {
+		if (!(dev->caps.flags2
+		      & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+			mlx4_warn(dev,
+				  "Trying to set src check LB, but it isn't supported\n");
+			err = -ENOTSUPP;
+			goto out;
+		}
+		pri_addr_path_mask |=
+			1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB;
+		if (params->flags &
+		    MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) {
+			cmd->qp_context.pri_path.fl |=
+				MLX4_FL_ETH_SRC_CHECK_MC_LB;
+		}
+	}
+
 	if (attr & MLX4_UPDATE_QP_VSD) {
 		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
 		if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
@@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 	err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
 		       MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
 		       MLX4_CMD_NATIVE);
-
+out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 731423ca575d..502f3350088e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -770,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 			}
 		}
 
+		/* preserve IF_COUNTER flag */
+		qpc->pri_path.vlan_control &=
+			MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
 		if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
 		    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
-			qpc->pri_path.vlan_control =
+			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
@@ -780,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
 		} else if (0 != vp_oper->state.default_vlan) {
-			qpc->pri_path.vlan_control =
+			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
 		} else { /* priority tagged */
-			qpc->pri_path.vlan_control =
+			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
 		}
@@ -3762,9 +3765,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 	update_gid(dev, inbox, (u8)slave);
 	adjust_proxy_tun_qkey(dev, vhcr, qpc);
 	orig_sched_queue = qpc->pri_path.sched_queue;
-	err = update_vport_qp_param(dev, inbox, slave, qpn);
-	if (err)
-		return err;
 
 	err = get_res(dev, slave, qpn, RES_QP, &qp);
 	if (err)
@@ -3774,6 +3774,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 		goto out;
 	}
 
+	err = update_vport_qp_param(dev, inbox, slave, qpn);
+	if (err)
+		goto out;
+
 	err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
 out:
 	/* if no error, save sched queue value passed in by VF. This is
@@ -4208,7 +4212,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave,
 
 }
 
-#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)
+#define MLX4_UPD_QP_PATH_MASK_SUPPORTED      (                                \
+	1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX                     |\
+	1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)
 int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_vhcr *vhcr,
 			   struct mlx4_cmd_mailbox *inbox,
@@ -4231,6 +4237,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
 	    (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
 		return -EPERM;
 
+	if ((pri_addr_path_mask &
+	     (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
+		!(dev->caps.flags2 &
+		  MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+			mlx4_warn(dev,
+				  "Src check LB for slave %d isn't supported\n",
+				   slave);
+		return -ENOTSUPP;
+	}
+
 	/* Just change the smac for the QP */
 	err = get_res(dev, slave, qpn, RES_QP, &rqp);
 	if (err) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index baad4cb8e9b0..dac6872dbaea 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -214,6 +214,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_IGNORE_FCS		= 1LL <<  28,
 	MLX4_DEV_CAP_FLAG2_PHV_EN		= 1LL <<  29,
 	MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN	= 1LL <<  30,
+	MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
+	MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
 };
 
 enum {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index de45a51b3f04..fe052e234906 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -135,7 +135,10 @@ struct mlx4_rss_context {
 
 struct mlx4_qp_path {
 	u8			fl;
-	u8			vlan_control;
+	union {
+		u8			vlan_control;
+		u8			control;
+	};
 	u8			disable_pkey_check;
 	u8			pkey_index;
 	u8			counter_index;
@@ -156,9 +159,16 @@ struct mlx4_qp_path {
 };
 
 enum { /* fl */
-	MLX4_FL_CV      = 1 << 6,
-	MLX4_FL_ETH_HIDE_CQE_VLAN       = 1 << 2
+	MLX4_FL_CV	= 1 << 6,
+	MLX4_FL_ETH_HIDE_CQE_VLAN	= 1 << 2,
+	MLX4_FL_ETH_SRC_CHECK_MC_LB	= 1 << 1,
+	MLX4_FL_ETH_SRC_CHECK_UC_LB	= 1 << 0,
 };
+
+enum { /* control */
+	MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER	= 1 << 7,
+};
+
 enum { /* vlan_control */
 	MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED	= 1 << 6,
 	MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED	= 1 << 5, /* 802.1p priority tag */
@@ -254,6 +264,8 @@ enum {
 	MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE		= 14 + 32,
 	MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX		= 15 + 32,
 	MLX4_UPD_QP_PATH_MASK_FVL_RX			= 16 + 32,
+	MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB	= 18 + 32,
+	MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB	= 19 + 32,
 };
 
 enum { /* param3 */
@@ -436,11 +448,13 @@ enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_VSD		= 1 << 1,
 	MLX4_UPDATE_QP_RATE_LIMIT	= 1 << 2,
 	MLX4_UPDATE_QP_QOS_VPORT	= 1 << 3,
-	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 4) - 1
+	MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB      = 1 << 4,
+	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 5) - 1
 };
 
 enum mlx4_update_qp_params_flags {
-	MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE		= 1 << 0,
+	MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB     = 1 << 0,
+	MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE		= 1 << 1,
 };
 
 struct mlx4_update_qp_params {
-- 
cgit v1.2.3


From dbf650b67bb4db1b95807d2aafe2d7cfafd458da Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Oct 2015 13:17:40 -0700
Subject: tcp: fastopen: limit max_qlen

Allowing an application to set whatever limit for
the list of recently RST fastopen sessions [1] is not wise,
as it open ways to deplete kernel memory.

Cap the user provided limit by somaxconn sysctl,
like listen() backlog.

[1] https://tools.ietf.org/html/rfc7413#section-5.1

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 5dce9705fe84..c906f4534581 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -392,8 +392,9 @@ static inline bool tcp_passive_fastopen(const struct sock *sk)
 static inline void fastopen_queue_tune(struct sock *sk, int backlog)
 {
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+	int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
 
-	queue->fastopenq.max_qlen = backlog;
+	queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
 }
 
 static inline void tcp_saved_syn_free(struct tcp_sock *tp)
-- 
cgit v1.2.3


From ee1d267423a1f8041e2b1a33fc23e4393c67677e Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Tue, 20 Oct 2015 00:39:03 -0700
Subject: pstore: add pstore unregister

pstore doesn't support unregistering yet. It was marked as TODO.
This patch adds some code to fix it:
 1) Add functions to unregister kmsg/console/ftrace/pmsg.
 2) Add a function to free compression buffer.
 3) Unmap the memory and free it.
 4) Add a function to unregister pstore filesystem.

Signed-off-by: Geliang Tang <geliangtang@163.com>
Acked-by: Kees Cook <keescook@chromium.org>
[Removed __exit annotation from ramoops_remove(). Reported by Arnd Bergmann]
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 fs/pstore/Kconfig      |  2 +-
 fs/pstore/Makefile     |  6 +++---
 fs/pstore/ftrace.c     | 25 +++++++++++++++++++------
 fs/pstore/inode.c      |  9 +++++++++
 fs/pstore/internal.h   |  4 ++++
 fs/pstore/platform.c   | 35 +++++++++++++++++++++++++++++++++++
 fs/pstore/pmsg.c       |  7 +++++++
 fs/pstore/ram.c        | 19 ++++++++-----------
 include/linux/pstore.h | 14 +-------------
 kernel/printk/printk.c |  1 +
 10 files changed, 88 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 916b8e23d968..360ae43f590c 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,5 +1,5 @@
 config PSTORE
-	bool "Persistent store support"
+	tristate "Persistent store support"
 	default n
 	select ZLIB_DEFLATE
 	select ZLIB_INFLATE
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index e647d8e81712..b8803cc07fce 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -2,12 +2,12 @@
 # Makefile for the linux pstorefs routines.
 #
 
-obj-y += pstore.o
+obj-$(CONFIG_PSTORE) += pstore.o
 
 pstore-objs += inode.o platform.o
-obj-$(CONFIG_PSTORE_FTRACE)	+= ftrace.o
+pstore-$(CONFIG_PSTORE_FTRACE)	+= ftrace.o
 
-obj-$(CONFIG_PSTORE_PMSG)	+= pmsg.o
+pstore-$(CONFIG_PSTORE_PMSG)	+= pmsg.o
 
 ramoops-objs += ram.o ram_core.o
 obj-$(CONFIG_PSTORE_RAM)	+= ramoops.o
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 76a4eeb92982..d4887705bb61 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -104,22 +104,23 @@ static const struct file_operations pstore_knob_fops = {
 	.write	= pstore_ftrace_knob_write,
 };
 
+static struct dentry *pstore_ftrace_dir;
+
 void pstore_register_ftrace(void)
 {
-	struct dentry *dir;
 	struct dentry *file;
 
 	if (!psinfo->write_buf)
 		return;
 
-	dir = debugfs_create_dir("pstore", NULL);
-	if (!dir) {
+	pstore_ftrace_dir = debugfs_create_dir("pstore", NULL);
+	if (!pstore_ftrace_dir) {
 		pr_err("%s: unable to create pstore directory\n", __func__);
 		return;
 	}
 
-	file = debugfs_create_file("record_ftrace", 0600, dir, NULL,
-				   &pstore_knob_fops);
+	file = debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir,
+				   NULL, &pstore_knob_fops);
 	if (!file) {
 		pr_err("%s: unable to create record_ftrace file\n", __func__);
 		goto err_file;
@@ -127,5 +128,17 @@ void pstore_register_ftrace(void)
 
 	return;
 err_file:
-	debugfs_remove(dir);
+	debugfs_remove(pstore_ftrace_dir);
+}
+
+void pstore_unregister_ftrace(void)
+{
+	mutex_lock(&pstore_ftrace_lock);
+	if (pstore_ftrace_enabled) {
+		unregister_ftrace_function(&pstore_ftrace_ops);
+		pstore_ftrace_enabled = 0;
+	}
+	mutex_unlock(&pstore_ftrace_lock);
+
+	debugfs_remove_recursive(pstore_ftrace_dir);
 }
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 3adcc4669fac..3586491f26ba 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -178,6 +178,7 @@ static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
 }
 
 static const struct file_operations pstore_file_operations = {
+	.owner		= THIS_MODULE,
 	.open		= pstore_file_open,
 	.read		= pstore_file_read,
 	.llseek		= pstore_file_llseek,
@@ -456,6 +457,7 @@ static void pstore_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type pstore_fs_type = {
+	.owner          = THIS_MODULE,
 	.name		= "pstore",
 	.mount		= pstore_mount,
 	.kill_sb	= pstore_kill_sb,
@@ -479,5 +481,12 @@ out:
 }
 module_init(init_pstore_fs)
 
+static void __exit exit_pstore_fs(void)
+{
+	unregister_filesystem(&pstore_fs_type);
+	sysfs_remove_mount_point(fs_kobj, "pstore");
+}
+module_exit(exit_pstore_fs)
+
 MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
 MODULE_LICENSE("GPL");
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index c36ba2cd0b5d..96253c474ea5 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -41,14 +41,18 @@ pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
 
 #ifdef CONFIG_PSTORE_FTRACE
 extern void pstore_register_ftrace(void);
+extern void pstore_unregister_ftrace(void);
 #else
 static inline void pstore_register_ftrace(void) {}
+static inline void pstore_unregister_ftrace(void) {}
 #endif
 
 #ifdef CONFIG_PSTORE_PMSG
 extern void pstore_register_pmsg(void);
+extern void pstore_unregister_pmsg(void);
 #else
 static inline void pstore_register_pmsg(void) {}
+static inline void pstore_unregister_pmsg(void) {}
 #endif
 
 extern struct pstore_info *psinfo;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 1b1124942b4e..0aab920efff7 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -237,6 +237,14 @@ static void allocate_buf_for_compression(void)
 
 }
 
+static void free_buf_for_compression(void)
+{
+	kfree(stream.workspace);
+	stream.workspace = NULL;
+	kfree(big_oops_buf);
+	big_oops_buf = NULL;
+}
+
 /*
  * Called when compression fails, since the printk buffer
  * would be fetched for compression calling it again when
@@ -358,6 +366,11 @@ static void pstore_register_kmsg(void)
 	kmsg_dump_register(&pstore_dumper);
 }
 
+static void pstore_unregister_kmsg(void)
+{
+	kmsg_dump_unregister(&pstore_dumper);
+}
+
 #ifdef CONFIG_PSTORE_CONSOLE
 static void pstore_console_write(struct console *con, const char *s, unsigned c)
 {
@@ -395,8 +408,14 @@ static void pstore_register_console(void)
 {
 	register_console(&pstore_console);
 }
+
+static void pstore_unregister_console(void)
+{
+	unregister_console(&pstore_console);
+}
 #else
 static void pstore_register_console(void) {}
+static void pstore_unregister_console(void) {}
 #endif
 
 static int pstore_write_compat(enum pstore_type_id type,
@@ -467,12 +486,28 @@ int pstore_register(struct pstore_info *psi)
 	 */
 	backend = psi->name;
 
+	module_put(owner);
+
 	pr_info("Registered %s as persistent store backend\n", psi->name);
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pstore_register);
 
+void pstore_unregister(struct pstore_info *psi)
+{
+	pstore_unregister_pmsg();
+	pstore_unregister_ftrace();
+	pstore_unregister_console();
+	pstore_unregister_kmsg();
+
+	free_buf_for_compression();
+
+	psinfo = NULL;
+	backend = NULL;
+}
+EXPORT_SYMBOL_GPL(pstore_unregister);
+
 /*
  * Read all the records from the persistent store. Create
  * files in our filesystem.  Don't warn about -EEXIST errors
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index 5a2f05a16c1e..7de20cd3797f 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -114,3 +114,10 @@ err_class:
 err:
 	return;
 }
+
+void pstore_unregister_pmsg(void)
+{
+	device_destroy(pmsg_class, MKDEV(pmsg_major, 0));
+	class_destroy(pmsg_class);
+	unregister_chrdev(pmsg_major, PMSG_NAME);
+}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 6c26c4daaec9..319c3a60cfa5 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -578,30 +578,27 @@ fail_out:
 	return err;
 }
 
-static int __exit ramoops_remove(struct platform_device *pdev)
+static int ramoops_remove(struct platform_device *pdev)
 {
-#if 0
-	/* TODO(kees): We cannot unload ramoops since pstore doesn't support
-	 * unregistering yet.
-	 */
 	struct ramoops_context *cxt = &oops_cxt;
 
-	iounmap(cxt->virt_addr);
-	release_mem_region(cxt->phys_addr, cxt->size);
+	pstore_unregister(&cxt->pstore);
 	cxt->max_dump_cnt = 0;
 
-	/* TODO(kees): When pstore supports unregistering, call it here. */
 	kfree(cxt->pstore.buf);
 	cxt->pstore.bufsize = 0;
 
+	persistent_ram_free(cxt->mprz);
+	persistent_ram_free(cxt->fprz);
+	persistent_ram_free(cxt->cprz);
+	ramoops_free_przs(cxt);
+
 	return 0;
-#endif
-	return -EBUSY;
 }
 
 static struct platform_driver ramoops_driver = {
 	.probe		= ramoops_probe,
-	.remove		= __exit_p(ramoops_remove),
+	.remove		= ramoops_remove,
 	.driver		= {
 		.name	= "ramoops",
 	},
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 8e7a25b068b0..831479f8df8f 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -75,20 +75,8 @@ struct pstore_info {
 
 #define	PSTORE_FLAGS_FRAGILE	1
 
-#ifdef CONFIG_PSTORE
 extern int pstore_register(struct pstore_info *);
+extern void pstore_unregister(struct pstore_info *);
 extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason);
-#else
-static inline int
-pstore_register(struct pstore_info *psi)
-{
-	return -ENODEV;
-}
-static inline bool
-pstore_cannot_block_path(enum kmsg_dump_reason reason)
-{
-	return false;
-}
-#endif
 
 #endif /*_LINUX_PSTORE_H*/
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8f0324ef72ab..b16f35487b67 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -517,6 +517,7 @@ int check_syslog_permissions(int type, int source)
 ok:
 	return security_syslog(type);
 }
+EXPORT_SYMBOL_GPL(check_syslog_permissions);
 
 static void append_char(char **pp, char *e, char c)
 {
-- 
cgit v1.2.3


From e55c34a66f87e78fb1fc6b623b78c5ad74b475af Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 22 Oct 2015 13:38:13 -0400
Subject: locks: introduce locks_lock_inode_wait()

Users of the locks API commonly call either posix_lock_file_wait() or
flock_lock_file_wait() depending upon the lock type.  Add a new function
locks_lock_inode_wait() which will check and call the correct function for
the type of lock passed in.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c         | 24 ++++++++++++++++++++++++
 include/linux/fs.h | 11 +++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index c74c9df419bc..c1745119fc5b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1881,6 +1881,30 @@ int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 }
 EXPORT_SYMBOL(flock_lock_inode_wait);
 
+/**
+ * locks_lock_inode_wait - Apply a lock to an inode
+ * @inode: inode of the file to apply to
+ * @fl: The lock to be applied
+ *
+ * Apply a POSIX or FLOCK style lock request to an inode.
+ */
+int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+{
+	int res = 0;
+	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+		case FL_POSIX:
+			res = posix_lock_inode_wait(inode, fl);
+			break;
+		case FL_FLOCK:
+			res = flock_lock_inode_wait(inode, fl);
+			break;
+		default:
+			BUG();
+	}
+	return res;
+}
+EXPORT_SYMBOL(locks_lock_inode_wait);
+
 /**
  *	sys_flock: - flock() system call.
  *	@fd: the file descriptor to lock.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 72d8a844c692..b064d4c0b233 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1059,6 +1059,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *);
 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
@@ -1177,6 +1178,11 @@ static inline int flock_lock_inode_wait(struct inode *inode,
 	return -ENOLCK;
 }
 
+static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+{
+	return -ENOLCK;
+}
+
 static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	return 0;
@@ -1225,6 +1231,11 @@ static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
 	return flock_lock_inode_wait(file_inode(filp), fl);
 }
 
+static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
+{
+	return locks_lock_inode_wait(file_inode(filp), fl);
+}
+
 struct fasync_struct {
 	spinlock_t		fa_lock;
 	int			magic;
-- 
cgit v1.2.3


From 616fb38fa7a9599293e05ae1fa9acfaf73922434 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 22 Oct 2015 13:38:15 -0400
Subject: locks: cleanup posix_lock_inode_wait and flock_lock_inode_wait

All callers use locks_lock_inode_wait() instead.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c         |  9 +++------
 include/linux/fs.h | 24 ------------------------
 2 files changed, 3 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 4667f8226747..0d2b3267e2a3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1171,10 +1171,9 @@ EXPORT_SYMBOL(posix_lock_file);
  * @inode: inode of file to which lock request should be applied
  * @fl: The lock to be applied
  *
- * Variant of posix_lock_file_wait that does not take a filp, and so can be
- * used after the filp has already been torn down.
+ * Apply a POSIX style lock request to an inode.
  */
-int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 {
 	int error;
 	might_sleep ();
@@ -1191,7 +1190,6 @@ int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	}
 	return error;
 }
-EXPORT_SYMBOL(posix_lock_inode_wait);
 
 /**
  * locks_mandatory_locked - Check for an active lock
@@ -1862,7 +1860,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
  *
  * Apply a FLOCK style lock request to an inode.
  */
-int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 {
 	int error;
 	might_sleep();
@@ -1879,7 +1877,6 @@ int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	}
 	return error;
 }
-EXPORT_SYMBOL(flock_lock_inode_wait);
 
 /**
  * locks_lock_inode_wait - Apply a lock to an inode
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b064d4c0b233..49749688156d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1053,12 +1053,10 @@ extern void locks_remove_file(struct file *);
 extern void locks_release_private(struct file_lock *);
 extern void posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
-extern int posix_lock_inode_wait(struct inode *, struct file_lock *);
 extern int posix_unblock_lock(struct file_lock *);
 extern int vfs_test_lock(struct file *, struct file_lock *);
 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
-extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl);
 extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
@@ -1145,12 +1143,6 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
 	return -ENOLCK;
 }
 
-static inline int posix_lock_inode_wait(struct inode *inode,
-					struct file_lock *fl)
-{
-	return -ENOLCK;
-}
-
 static inline int posix_unblock_lock(struct file_lock *waiter)
 {
 	return -ENOENT;
@@ -1172,12 +1164,6 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
 	return 0;
 }
 
-static inline int flock_lock_inode_wait(struct inode *inode,
-					struct file_lock *request)
-{
-	return -ENOLCK;
-}
-
 static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 {
 	return -ENOLCK;
@@ -1221,16 +1207,6 @@ static inline struct inode *file_inode(const struct file *f)
 	return f->f_inode;
 }
 
-static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
-{
-	return posix_lock_inode_wait(file_inode(filp), fl);
-}
-
-static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
-{
-	return flock_lock_inode_wait(file_inode(filp), fl);
-}
-
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 {
 	return locks_lock_inode_wait(file_inode(filp), fl);
-- 
cgit v1.2.3


From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 27 Aug 2015 16:41:15 +0200
Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks

Some times it is useful for architecture implementations of KVM to know
when the VCPU thread is about to block or when it comes back from
blocking (arm/arm64 needs to know this to properly implement timers, for
example).

Therefore provide a generic architecture callback function in line with
what we do elsewhere for KVM generic-arch interactions.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     | 3 +++
 arch/arm64/include/asm/kvm_host.h   | 3 +++
 arch/mips/include/asm/kvm_host.h    | 2 ++
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 arch/s390/include/asm/kvm_host.h    | 2 ++
 arch/x86/include/asm/kvm_host.h     | 3 +++
 include/linux/kvm_host.h            | 2 ++
 virt/kvm/kvm_main.c                 | 3 +++
 8 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c4072d9f32c7..84da97901f1f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -234,4 +234,7 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
 
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ed039688c221..e4f4d65f7d2b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -255,4 +255,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
 
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 5a1a882e0a75..6ded8d347af9 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 827a38d7a9db..c9f122d00920 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -718,5 +718,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8ced426091e1..72a614c68ed8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee0382088..b28f0f142ecb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1233,4 +1233,7 @@ int x86_set_memory_region(struct kvm *kvm,
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1bef9e21e725..4a86f5f072c0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -625,6 +625,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8db1d9361993..7873d6daccb1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2018,6 +2018,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		} while (single_task_running() && ktime_before(cur, stop));
 	}
 
+	kvm_arch_vcpu_blocking(vcpu);
+
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
@@ -2031,6 +2033,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	finish_wait(&vcpu->wq, &wait);
 	cur = ktime_get();
 
+	kvm_arch_vcpu_unblocking(vcpu);
 out:
 	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
 
-- 
cgit v1.2.3


From fc4099f17240767554ff3a73977acb78ef615404 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 22 Oct 2015 18:17:16 -0700
Subject: openvswitch: Fix egress tunnel info.

While transitioning to netdev based vport we broke OVS
feature which allows user to retrieve tunnel packet egress
information for lwtunnel devices.  Following patch fixes it
by introducing ndo operation to get the tunnel egress info.
Same ndo operation can be used for lwtunnel devices and compat
ovs-tnl-vport devices. So after adding such device operation
we can remove similar operation from ovs-vport.

Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c           | 40 ++++++++++++++++++++++++-----
 drivers/net/vxlan.c            | 41 +++++++++++++++++++++++++++++
 include/linux/netdevice.h      |  7 +++++
 include/net/dst_metadata.h     | 32 +++++++++++++++++++++++
 net/core/dev.c                 | 27 ++++++++++++++++++++
 net/ipv4/ip_gre.c              | 46 ++++++++++++++++++++++++++-------
 net/openvswitch/actions.c      |  9 +++----
 net/openvswitch/datapath.c     |  5 ++--
 net/openvswitch/datapath.h     |  1 -
 net/openvswitch/flow_netlink.c | 18 +++++--------
 net/openvswitch/flow_netlink.h |  6 ++---
 net/openvswitch/vport-geneve.c | 13 ----------
 net/openvswitch/vport-gre.c    |  8 ------
 net/openvswitch/vport-vxlan.c  | 19 --------------
 net/openvswitch/vport.c        | 58 ------------------------------------------
 net/openvswitch/vport.h        | 35 -------------------------
 16 files changed, 192 insertions(+), 173 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cde29f8a37bf..445071c163cb 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -594,14 +594,12 @@ static struct rtable *geneve_get_rt(struct sk_buff *skb,
 	rt = ip_route_output_key(geneve->net, fl4);
 	if (IS_ERR(rt)) {
 		netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
-		dev->stats.tx_carrier_errors++;
-		return rt;
+		return ERR_PTR(-ENETUNREACH);
 	}
 	if (rt->dst.dev == dev) { /* is this necessary? */
 		netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
-		dev->stats.collisions++;
 		ip_rt_put(rt);
-		return ERR_PTR(-EINVAL);
+		return ERR_PTR(-ELOOP);
 	}
 	return rt;
 }
@@ -627,12 +625,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel_info *info = NULL;
 	struct rtable *rt = NULL;
 	const struct iphdr *iip; /* interior IP header */
+	int err = -EINVAL;
 	struct flowi4 fl4;
 	__u8 tos, ttl;
 	__be16 sport;
 	bool udp_csum;
 	__be16 df;
-	int err;
 
 	if (geneve->collect_md) {
 		info = skb_tunnel_info(skb);
@@ -647,7 +645,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	rt = geneve_get_rt(skb, dev, &fl4, info);
 	if (IS_ERR(rt)) {
 		netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
-		dev->stats.tx_carrier_errors++;
+		err = PTR_ERR(rt);
 		goto tx_error;
 	}
 
@@ -699,10 +697,37 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 tx_error:
 	dev_kfree_skb(skb);
 err:
-	dev->stats.tx_errors++;
+	if (err == -ELOOP)
+		dev->stats.collisions++;
+	else if (err == -ENETUNREACH)
+		dev->stats.tx_carrier_errors++;
+	else
+		dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
+static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = geneve_get_rt(skb, dev, &fl4, info);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	info->key.tp_src = udp_flow_src_port(geneve->net, skb,
+					     1, USHRT_MAX, true);
+	info->key.tp_dst = geneve->dst_port;
+	return 0;
+}
+
 static const struct net_device_ops geneve_netdev_ops = {
 	.ndo_init		= geneve_init,
 	.ndo_uninit		= geneve_uninit,
@@ -713,6 +738,7 @@ static const struct net_device_ops geneve_netdev_ops = {
 	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_fill_metadata_dst	= geneve_fill_metadata_dst,
 };
 
 static void geneve_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index afdc65fd5bc5..c1587ece28cf 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2337,6 +2337,46 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
+				struct ip_tunnel_info *info,
+				__be16 sport, __be16 dport)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	memset(&fl4, 0, sizeof(fl4));
+	fl4.flowi4_tos = RT_TOS(info->key.tos);
+	fl4.flowi4_mark = skb->mark;
+	fl4.flowi4_proto = IPPROTO_UDP;
+	fl4.daddr = info->key.u.ipv4.dst;
+
+	rt = ip_route_output_key(vxlan->net, &fl4);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+	ip_rt_put(rt);
+
+	info->key.u.ipv4.src = fl4.saddr;
+	info->key.tp_src = sport;
+	info->key.tp_dst = dport;
+	return 0;
+}
+
+static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	__be16 sport, dport;
+
+	sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+				  vxlan->cfg.port_max, true);
+	dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+
+	if (ip_tunnel_info_af(info) == AF_INET)
+		return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+	return -EINVAL;
+}
+
 static const struct net_device_ops vxlan_netdev_ops = {
 	.ndo_init		= vxlan_init,
 	.ndo_uninit		= vxlan_uninit,
@@ -2351,6 +2391,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
 	.ndo_fdb_add		= vxlan_fdb_add,
 	.ndo_fdb_del		= vxlan_fdb_delete,
 	.ndo_fdb_dump		= vxlan_fdb_dump,
+	.ndo_fill_metadata_dst	= vxlan_fill_metadata_dst,
 };
 
 /* Info for udev, that this is a virtual tunnel endpoint */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d15e3831440..210d11a75e4f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1054,6 +1054,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	This function is used to pass protocol port error state information
  *	to the switch driver. The switch driver can react to the proto_down
  *      by doing a phys down on the associated switch port.
+ * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
+ *	This function is used to get egress tunnel information for given skb.
+ *	This is useful for retrieving outer tunnel header parameters while
+ *	sampling packet.
  *
  */
 struct net_device_ops {
@@ -1227,6 +1231,8 @@ struct net_device_ops {
 	int			(*ndo_get_iflink)(const struct net_device *dev);
 	int			(*ndo_change_proto_down)(struct net_device *dev,
 							 bool proto_down);
+	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
+						       struct sk_buff *skb);
 };
 
 /**
@@ -2203,6 +2209,7 @@ void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
 int dev_get_iflink(const struct net_device *dev);
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index af9d5382f6cb..ce009710120c 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -60,6 +60,38 @@ static inline struct metadata_dst *tun_rx_dst(int md_size)
 	return tun_dst;
 }
 
+static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+	int md_size = md_dst->u.tun_info.options_len;
+	struct metadata_dst *new_md;
+
+	if (!md_dst)
+		return ERR_PTR(-EINVAL);
+
+	new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
+	if (!new_md)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+	       sizeof(struct ip_tunnel_info) + md_size);
+	skb_dst_drop(skb);
+	dst_hold(&new_md->dst);
+	skb_dst_set(skb, &new_md->dst);
+	return new_md;
+}
+
+static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb)
+{
+	struct metadata_dst *dst;
+
+	dst = tun_dst_unclone(skb);
+	if (IS_ERR(dst))
+		return NULL;
+
+	return &dst->u.tun_info;
+}
+
 static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
 						 __be16 flags,
 						 __be64 tunnel_id,
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..c14748d051e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/stat.h>
 #include <net/dst.h>
+#include <net/dst_metadata.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
@@ -681,6 +682,32 @@ int dev_get_iflink(const struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_get_iflink);
 
+/**
+ *	dev_fill_metadata_dst - Retrieve tunnel egress information.
+ *	@dev: targeted interface
+ *	@skb: The packet.
+ *
+ *	For better visibility of tunnel traffic OVS needs to retrieve
+ *	egress tunnel information for a packet. Following API allows
+ *	user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info;
+
+	if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
+		return -EINVAL;
+
+	info = skb_tunnel_info_unclone(skb);
+	if (!info)
+		return -ENOMEM;
+	if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+		return -EINVAL;
+
+	return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d90519..614521437e30 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
+static struct rtable *gre_get_rt(struct sk_buff *skb,
+				 struct net_device *dev,
+				 struct flowi4 *fl,
+				 const struct ip_tunnel_key *key)
+{
+	struct net *net = dev_net(dev);
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->u.ipv4.dst;
+	fl->saddr = key->u.ipv4.src;
+	fl->flowi4_tos = RT_TOS(key->tos);
+	fl->flowi4_mark = skb->mark;
+	fl->flowi4_proto = IPPROTO_GRE;
+
+	return ip_route_output_key(net, fl);
+}
+
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel_info *tun_info;
-	struct net *net = dev_net(dev);
 	const struct ip_tunnel_key *key;
 	struct flowi4 fl;
 	struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto err_free_skb;
 
 	key = &tun_info->key;
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = key->u.ipv4.dst;
-	fl.saddr = key->u.ipv4.src;
-	fl.flowi4_tos = RT_TOS(key->tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = gre_get_rt(skb, dev, &fl, key);
 	if (IS_ERR(rt))
 		goto err_free_skb;
 
@@ -566,6 +575,24 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = gre_get_rt(skb, dev, &fl4, &info->key);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	return 0;
+}
+
 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 			      struct net_device *dev)
 {
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
 	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c6a39bf2c3b9..0bf0f406de52 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -768,7 +768,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			    struct sw_flow_key *key, const struct nlattr *attr,
 			    const struct nlattr *actions, int actions_len)
 {
-	struct ip_tunnel_info info;
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
@@ -796,11 +795,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			if (vport) {
 				int err;
 
-				upcall.egress_tun_info = &info;
-				err = ovs_vport_get_egress_tun_info(vport, skb,
-								    &upcall);
-				if (err)
-					upcall.egress_tun_info = NULL;
+				err = dev_fill_metadata_dst(vport->dev, skb);
+				if (!err)
+					upcall.egress_tun_info = skb_tunnel_info(skb);
 			}
 
 			break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b816ff871528..c5d08ee37730 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -490,9 +490,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 
 	if (upcall_info->egress_tun_info) {
 		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
-		err = ovs_nla_put_egress_tunnel_key(user_skb,
-						    upcall_info->egress_tun_info,
-						    upcall_info->egress_tun_opts);
+		err = ovs_nla_put_tunnel_info(user_skb,
+					      upcall_info->egress_tun_info);
 		BUG_ON(err);
 		nla_nest_end(user_skb, nla);
 	}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a99f44..67bdecd9fdc1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
  */
 struct dp_upcall_info {
 	struct ip_tunnel_info *egress_tun_info;
-	const void *egress_tun_opts;
 	const struct nlattr *userdata;
 	const struct nlattr *actions;
 	int actions_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index bd710bc37469..38536c137c54 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -717,7 +717,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
-	if (tun_opts) {
+	if (swkey_tun_opts_len) {
 		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
 		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
 			    swkey_tun_opts_len, tun_opts))
@@ -749,13 +749,12 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
-				  const struct ip_tunnel_info *egress_tun_info,
-				  const void *egress_tun_opts)
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info)
 {
-	return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
-				    egress_tun_opts,
-				    egress_tun_info->options_len);
+	return __ipv4_tun_to_nlattr(skb, &tun_info->key,
+				    ip_tunnel_info_opts(tun_info),
+				    tun_info->options_len);
 }
 
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -2383,10 +2382,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 		if (!start)
 			return -EMSGSIZE;
 
-		err = ipv4_tun_to_nlattr(skb, &tun_info->key,
-					 tun_info->options_len ?
-					     ip_tunnel_info_opts(tun_info) : NULL,
-					 tun_info->options_len);
+		err = ovs_nla_put_tunnel_info(skb, tun_info);
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0baf449..47dd142eca1c 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
 int ovs_nla_get_match(struct net *, struct sw_flow_match *,
 		      const struct nlattr *key, const struct nlattr *mask,
 		      bool log);
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
-				  const struct ip_tunnel_info *,
-				  const void *egress_tun_opts);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info);
 
 bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 2735e9c4a3b8..5f8aaaaa0785 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
 	return 0;
 }
 
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				      struct dp_upcall_info *upcall)
-{
-	struct geneve_port *geneve_port = geneve_vport(vport);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	__be16 dport = htons(geneve_port->port_no);
-	__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
-	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-					  skb, IPPROTO_UDP, sport, dport);
-}
-
 static struct vport *geneve_tnl_create(const struct vport_parms *parms)
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
 	.get_options	= geneve_get_options,
 	.send		= ovs_netdev_send,
 	.owner          = THIS_MODULE,
-	.get_egress_tun_info	= geneve_get_egress_tun_info,
 };
 
 static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 4d24481669c9..64225bf5eb40 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
 	return ovs_netdev_link(vport, parms->name);
 }
 
-static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				   struct dp_upcall_info *upcall)
-{
-	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-					  skb, IPPROTO_GRE, 0, 0);
-}
-
 static struct vport_ops ovs_gre_vport_ops = {
 	.type		= OVS_VPORT_TYPE_GRE,
 	.create		= gre_create,
 	.send		= ovs_netdev_send,
-	.get_egress_tun_info	= gre_get_egress_tun_info,
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c11413d5075f..e1c9c0888037 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
 	return ovs_netdev_link(vport, parms->name);
 }
 
-static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				     struct dp_upcall_info *upcall)
-{
-	struct vxlan_dev *vxlan = netdev_priv(vport->dev);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	__be16 dst_port = vxlan_dev_dst_port(vxlan);
-	__be16 src_port;
-	int port_min;
-	int port_max;
-
-	inet_get_local_port_range(net, &port_min, &port_max);
-	src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
-	return ovs_tunnel_get_egress_info(upcall, net,
-					  skb, IPPROTO_UDP,
-					  src_port, dst_port);
-}
-
 static struct vport_ops ovs_vxlan_netdev_vport_ops = {
 	.type			= OVS_VPORT_TYPE_VXLAN,
 	.create			= vxlan_create,
 	.destroy		= ovs_netdev_tunnel_destroy,
 	.get_options		= vxlan_get_options,
 	.send			= ovs_netdev_send,
-	.get_egress_tun_info	= vxlan_get_egress_tun_info,
 };
 
 static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 12a36ac21eda..320c765ce44a 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -479,61 +479,3 @@ void ovs_vport_deferred_free(struct vport *vport)
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
 EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-			       struct net *net,
-			       struct sk_buff *skb,
-			       u8 ipproto,
-			       __be16 tp_src,
-			       __be16 tp_dst)
-{
-	struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
-	const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
-	const struct ip_tunnel_key *tun_key;
-	u32 skb_mark = skb->mark;
-	struct rtable *rt;
-	struct flowi4 fl;
-
-	if (unlikely(!tun_info))
-		return -EINVAL;
-	if (ip_tunnel_info_af(tun_info) != AF_INET)
-		return -EINVAL;
-
-	tun_key = &tun_info->key;
-
-	/* Route lookup to get srouce IP address.
-	 * The process may need to be changed if the corresponding process
-	 * in vports ops changed.
-	 */
-	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
-
-	ip_rt_put(rt);
-
-	/* Generate egress_tun_info based on tun_info,
-	 * saddr, tp_src and tp_dst
-	 */
-	ip_tunnel_key_init(&egress_tun_info->key,
-			   fl.saddr, tun_key->u.ipv4.dst,
-			   tun_key->tos,
-			   tun_key->ttl,
-			   tp_src, tp_dst,
-			   tun_key->tun_id,
-			   tun_key->tun_flags);
-	egress_tun_info->options_len = tun_info->options_len;
-	egress_tun_info->mode = tun_info->mode;
-	upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct dp_upcall_info *upcall)
-{
-	/* get_egress_tun_info() is only implemented on tunnel ports. */
-	if (unlikely(!vport->ops->get_egress_tun_info))
-		return -EINVAL;
-
-	return vport->ops->get_egress_tun_info(vport, skb, upcall);
-}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index a413f3ae6a7b..d341ad6f3afe 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/u64_stats_sync.h>
-#include <net/route.h>
 
 #include "datapath.h"
 
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
 int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
 u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
 
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-			       struct net *net,
-			       struct sk_buff *,
-			       u8 ipproto,
-			       __be16 tp_src,
-			       __be16 tp_dst);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct dp_upcall_info *upcall);
-
 /**
  * struct vport_portids - array of netlink portids of a vport.
  *                        must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
  * have any configuration.
  * @send: Send a packet on the device.
  * zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
  */
 struct vport_ops {
 	enum ovs_vport_type type;
@@ -154,9 +141,6 @@ struct vport_ops {
 	int (*get_options)(const struct vport *, struct sk_buff *);
 
 	void (*send)(struct vport *, struct sk_buff *);
-	int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
-				   struct dp_upcall_info *upcall);
-
 	struct module *owner;
 	struct list_head list;
 };
@@ -215,25 +199,6 @@ static inline const char *ovs_vport_name(struct vport *vport)
 int ovs_vport_ops_register(struct vport_ops *ops);
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
-static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
-						     const struct ip_tunnel_key *key,
-						     u32 mark,
-						     struct flowi4 *fl,
-						     u8 protocol)
-{
-	struct rtable *rt;
-
-	memset(fl, 0, sizeof(*fl));
-	fl->daddr = key->u.ipv4.dst;
-	fl->saddr = key->u.ipv4.src;
-	fl->flowi4_tos = RT_TOS(key->tos);
-	fl->flowi4_mark = mark;
-	fl->flowi4_proto = protocol;
-
-	rt = ip_route_output_key(net, fl);
-	return rt;
-}
-
 static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 {
 	vport->ops->send(vport, skb);
-- 
cgit v1.2.3


From c56d4450eb6886225a5a0bb231ad2cea9f03284a Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Sun, 18 Oct 2015 19:55:04 +0530
Subject: PCI: Turn off Request Attributes to avoid Chelsio T5 Completion
 erratum

The Chelsio T5 has a PCIe compliance erratum that causes Malformed TLP or
Unexpected Completion errors in some systems, which may cause device access
timeouts.

Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same values
for the Attribute as were supplied in the header of the corresponding
Request, except as explicitly allowed when IDO is used."

Instead of copying the Attributes from the Request to the Completion, the
T5 always generates Completions with zero Attributes.  The receiver of a
Completion whose Attributes don't match the Request may accept it (which
itself seems non-compliant based on sec 2.3.2), or it may handle it as a
Malformed TLP or an Unexpected Completion, which will probably lead to a
device access timeout.

Work around this by disabling "Relaxed Ordering" and "No Snoop" in the Root
Port so it always generate Requests with zero Attributes.

This does affect all other devices which are downstream of that Root Port,
but these are performance optimizations that should not make a functional
difference.

Note that Configuration Space accesses are never supposed to have TLP
Attributes, so we're safe waiting till after any Configuration Space
accesses to do the Root Port "fixup".

Based on original work by Casey Leedom <leedom@chelsio.com>

[bhelgaas: changelog, comments, rename to pci_find_pcie_root_port(), rework
to use pci_upstream_bridge() and check for Root Port device type, edit
diagnostics to clarify intent and devices affected]
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c    | 24 ++++++++++++++++++++++
 drivers/pci/quirks.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h  |  1 +
 3 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6a9a1116f1eb..09b4a35e21a8 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -457,6 +457,30 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 }
 EXPORT_SYMBOL(pci_find_parent_resource);
 
+/**
+ * pci_find_pcie_root_port - return PCIe Root Port
+ * @dev: PCI device to query
+ *
+ * Traverse up the parent chain and return the PCIe Root Port PCI Device
+ * for a given PCI Device.
+ */
+struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev)
+{
+	struct pci_dev *bridge, *highest_pcie_bridge = NULL;
+
+	bridge = pci_upstream_bridge(dev);
+	while (bridge && pci_is_pcie(bridge)) {
+		highest_pcie_bridge = bridge;
+		bridge = pci_upstream_bridge(bridge);
+	}
+
+	if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT)
+		return NULL;
+
+	return highest_pcie_bridge;
+}
+EXPORT_SYMBOL(pci_find_pcie_root_port);
+
 /**
  * pci_wait_for_pending - wait for @mask bit(s) to clear in status word @pos
  * @dev: the PCI device to operate on
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6a30252cd79f..eb3c98e0fb2b 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3691,6 +3691,63 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6868, PCI_CLASS_NOT_DEFINED, 8,
 DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6869, PCI_CLASS_NOT_DEFINED, 8,
 			      quirk_tw686x_class);
 
+/*
+ * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
+ * values for the Attribute as were supplied in the header of the
+ * corresponding Request, except as explicitly allowed when IDO is used."
+ *
+ * If a non-compliant device generates a completion with a different
+ * attribute than the request, the receiver may accept it (which itself
+ * seems non-compliant based on sec 2.3.2), or it may handle it as a
+ * Malformed TLP or an Unexpected Completion, which will probably lead to a
+ * device access timeout.
+ *
+ * If the non-compliant device generates completions with zero attributes
+ * (instead of copying the attributes from the request), we can work around
+ * this by disabling the "Relaxed Ordering" and "No Snoop" attributes in
+ * upstream devices so they always generate requests with zero attributes.
+ *
+ * This affects other devices under the same Root Port, but since these
+ * attributes are performance hints, there should be no functional problem.
+ *
+ * Note that Configuration Space accesses are never supposed to have TLP
+ * Attributes, so we're safe waiting till after any Configuration Space
+ * accesses to do the Root Port fixup.
+ */
+static void quirk_disable_root_port_attributes(struct pci_dev *pdev)
+{
+	struct pci_dev *root_port = pci_find_pcie_root_port(pdev);
+
+	if (!root_port) {
+		dev_warn(&pdev->dev, "PCIe Completion erratum may cause device errors\n");
+		return;
+	}
+
+	dev_info(&root_port->dev, "Disabling No Snoop/Relaxed Ordering Attributes to avoid PCIe Completion erratum in %s\n",
+		 dev_name(&pdev->dev));
+	pcie_capability_clear_and_set_word(root_port, PCI_EXP_DEVCTL,
+					   PCI_EXP_DEVCTL_RELAX_EN |
+					   PCI_EXP_DEVCTL_NOSNOOP_EN, 0);
+}
+
+/*
+ * The Chelsio T5 chip fails to copy TLP Attributes from a Request to the
+ * Completion it generates.
+ */
+static void quirk_chelsio_T5_disable_root_port_attributes(struct pci_dev *pdev)
+{
+	/*
+	 * This mask/compare operation selects for Physical Function 4 on a
+	 * T5.  We only need to fix up the Root Port once for any of the
+	 * PFs.  PF[0..3] have PCI Device IDs of 0x50xx, but PF4 is uniquely
+	 * 0x54xx so we use that one,
+	 */
+	if ((pdev->device & 0xff00) == 0x5400)
+		quirk_disable_root_port_attributes(pdev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+			 quirk_chelsio_T5_disable_root_port_attributes);
+
 /*
  * AMD has indicated that the devices below do not support peer-to-peer
  * in any system where they are found in the southbridge with an AMD
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b54fbf1571b1..e828e7b4afec 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -820,6 +820,7 @@ void pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
+struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev);
 u8 pci_swizzle_interrupt_pin(const struct pci_dev *dev, u8 pin);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
 u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
-- 
cgit v1.2.3


From 67a2e213e7e937c41c52ab5bc46bf3f4de469f6e Mon Sep 17 00:00:00 2001
From: Rohit Vaswani <rvaswani@codeaurora.org>
Date: Thu, 22 Oct 2015 13:32:11 -0700
Subject: mm: cma: fix incorrect type conversion for size during dma allocation

This was found during userspace fuzzing test when a large size dma cma
allocation is made by driver(like ion) through userspace.

  show_stack+0x10/0x1c
  dump_stack+0x74/0xc8
  kasan_report_error+0x2b0/0x408
  kasan_report+0x34/0x40
  __asan_storeN+0x15c/0x168
  memset+0x20/0x44
  __dma_alloc_coherent+0x114/0x18c

Signed-off-by: Rohit Vaswani <rvaswani@codeaurora.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dma-contiguous.c  | 2 +-
 include/linux/cma.h            | 2 +-
 include/linux/dma-contiguous.h | 4 ++--
 mm/cma.c                       | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 950fff9ce453..a12ff9863d7e 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -187,7 +187,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * global one. Requires architecture specific dev_get_cma_area() helper
  * function.
  */
-struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 				       unsigned int align)
 {
 	if (align > CONFIG_CMA_ALIGNMENT)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index f7ef093ec49a..29f9e774ab76 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -26,6 +26,6 @@ extern int __init cma_declare_contiguous(phys_addr_t base,
 extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					unsigned int order_per_bit,
 					struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align);
+extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 #endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 569bbd039896..fec734df1524 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -111,7 +111,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 	return ret;
 }
 
-struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 				       unsigned int order);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count);
@@ -144,7 +144,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 }
 
 static inline
-struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 				       unsigned int order)
 {
 	return NULL;
diff --git a/mm/cma.c b/mm/cma.c
index e7d1db533025..4eb56badf37e 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -361,7 +361,7 @@ err:
  * This function allocates part of contiguous memory on specific
  * contiguous memory area.
  */
-struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
+struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
 {
 	unsigned long mask, offset, pfn, start = 0;
 	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
@@ -371,7 +371,7 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
 	if (!cma || !cma->count)
 		return NULL;
 
-	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+	pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma,
 		 count, align);
 
 	if (!count)
-- 
cgit v1.2.3


From 79907146fb5b1778035870db895fb2bf64061284 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 16 Oct 2015 11:32:42 +0200
Subject: overflow-arith: begin to add support for overflow builtin functions

The idea of the overflow-arith.h header is to collect overflow checking
functions in one central place.

If gcc compiler supports the __builtin_overflow_* builtins we use them
because they might give better performance, otherwise the code falls
back to normal overflow checking functions.

The builtin_overflow functions are supported by gcc-5 and clang. The
matter of supporting clang is to just provide a corresponding
CC_HAVE_BUILTIN_OVERFLOW, because the specific overflow checking builtins
don't differ between gcc and clang.

I just provide overflow_usub function here as I intend this to get merged
into net, more functions will definitely follow as they are needed.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compiler-gcc.h   |  4 ++++
 include/linux/overflow-arith.h | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 include/linux/overflow-arith.h

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index dfaa7b3e9ae9..82c159e0532a 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -237,6 +237,10 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
+#if GCC_VERSION >= 50000
+#define CC_HAVE_BUILTIN_OVERFLOW
+#endif
+
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h
new file mode 100644
index 000000000000..e12ccf854a70
--- /dev/null
+++ b/include/linux/overflow-arith.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <linux/kernel.h>
+
+#ifdef CC_HAVE_BUILTIN_OVERFLOW
+
+#define overflow_usub __builtin_usub_overflow
+
+#else
+
+static inline bool overflow_usub(unsigned int a, unsigned int b,
+				 unsigned int *res)
+{
+	*res = a - b;
+	return *res > a ? true : false;
+}
+
+#endif
-- 
cgit v1.2.3


From dd461d6aa894761fe67c30ddf81eec0d08be216b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 28 Aug 2015 06:57:55 +0000
Subject: if_link: Add control trust VF

Add netlink directives and ndo entry to trust VF user.

This controls the special permission of VF user.
The administrator will dedicatedly trust VF user to use some features
which impacts security and/or performance.

The administrator never turn it on unless VF user is fully trusted.

CC: Sy Jong Choi <sy.jong.choi@intel.com>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Krishneil Singh <Krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/if_link.h      |  1 +
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h |  6 ++++++
 net/core/rtnetlink.c         | 24 +++++++++++++++++++++---
 4 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index ae5d0d22955d..f923d15b432c 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -24,5 +24,6 @@ struct ifla_vf_info {
 	__u32 min_tx_rate;
 	__u32 max_tx_rate;
 	__u32 rss_query_en;
+	__u32 trusted;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69fdd427c8cb..773383859bd9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -881,6 +881,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
+ * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
  * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
@@ -1109,6 +1110,8 @@ struct net_device_ops {
 						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
+	int			(*ndo_set_vf_trust)(struct net_device *dev,
+						    int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e3b6217f34f1..a7aea8418abb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -550,6 +550,7 @@ enum {
 				 * on/off switch
 				 */
 	IFLA_VF_STATS,		/* network device statistics */
+	IFLA_VF_TRUST,		/* Trust VF */
 	__IFLA_VF_MAX,
 };
 
@@ -611,6 +612,11 @@ enum {
 
 #define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
 
+struct ifla_vf_trust {
+	__u32 vf;
+	__u32 setting;
+};
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7c78b5aca944..504bd17b7456 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -838,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 /* IFLA_VF_STATS_BROADCAST */
 			 nla_total_size(sizeof(__u64)) +
 			 /* IFLA_VF_STATS_MULTICAST */
-			 nla_total_size(sizeof(__u64)));
+			 nla_total_size(sizeof(__u64)) +
+			 nla_total_size(sizeof(struct ifla_vf_trust)));
 		return size;
 	} else
 		return 0;
@@ -1161,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_link_state vf_linkstate;
 			struct ifla_vf_rss_query_en vf_rss_query_en;
 			struct ifla_vf_stats vf_stats;
+			struct ifla_vf_trust vf_trust;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1170,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 */
 			ivi.spoofchk = -1;
 			ivi.rss_query_en = -1;
+			ivi.trusted = -1;
 			memset(ivi.mac, 0, sizeof(ivi.mac));
 			/* The default value for VF link state is "auto"
 			 * IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1183,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
 				vf_linkstate.vf =
-				vf_rss_query_en.vf = ivi.vf;
+				vf_rss_query_en.vf =
+				vf_trust.vf = ivi.vf;
 
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
@@ -1194,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf_rss_query_en.setting = ivi.rss_query_en;
+			vf_trust.setting = ivi.trusted;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
 			if (!vf) {
 				nla_nest_cancel(skb, vfinfo);
@@ -1211,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				    &vf_linkstate) ||
 			    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
 				    sizeof(vf_rss_query_en),
-				    &vf_rss_query_en))
+				    &vf_rss_query_en) ||
+			    nla_put(skb, IFLA_VF_TRUST,
+				    sizeof(vf_trust), &vf_trust))
 				goto nla_put_failure;
 			memset(&vf_stats, 0, sizeof(vf_stats));
 			if (dev->netdev_ops->ndo_get_vf_stats)
@@ -1348,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
 	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
+	[IFLA_VF_TRUST]		= { .len = sizeof(struct ifla_vf_trust) },
 };
 
 static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1587,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 			return err;
 	}
 
+	if (tb[IFLA_VF_TRUST]) {
+		struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_trust)
+			err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
+		if (err < 0)
+			return err;
+	}
+
 	return err;
 }
 
-- 
cgit v1.2.3


From 21dd19fed3c3eb42a3877600f4a97a774323e562 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 22 Oct 2015 10:37:49 +0200
Subject: net: phy: Add nested variants of mdiobus read/write

Since nested variants of mdiobus_read/write are used in multiple
drivers, add nested variants in the mdiobus core.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h        |  2 ++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 12f44c53cc8e..88cb4592b6fb 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -371,6 +371,33 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
 }
 EXPORT_SYMBOL(mdiobus_scan);
 
+/**
+ * mdiobus_read_nested - Nested version of the mdiobus_read function
+ * @bus: the mii_bus struct
+ * @addr: the phy address
+ * @regnum: register number to read
+ *
+ * In case of nested MDIO bus access avoid lockdep false positives by
+ * using mutex_lock_nested().
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum)
+{
+	int retval;
+
+	BUG_ON(in_interrupt());
+
+	mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING);
+	retval = bus->read(bus, addr, regnum);
+	mutex_unlock(&bus->mdio_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL(mdiobus_read_nested);
+
 /**
  * mdiobus_read - Convenience function for reading a given MII mgmt register
  * @bus: the mii_bus struct
@@ -395,6 +422,34 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
 }
 EXPORT_SYMBOL(mdiobus_read);
 
+/**
+ * mdiobus_write_nested - Nested version of the mdiobus_write function
+ * @bus: the mii_bus struct
+ * @addr: the phy address
+ * @regnum: register number to write
+ * @val: value to write to @regnum
+ *
+ * In case of nested MDIO bus access avoid lockdep false positives by
+ * using mutex_lock_nested().
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val)
+{
+	int err;
+
+	BUG_ON(in_interrupt());
+
+	mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING);
+	err = bus->write(bus, addr, regnum, val);
+	mutex_unlock(&bus->mdio_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(mdiobus_write_nested);
+
 /**
  * mdiobus_write - Convenience function for writing a given MII mgmt register
  * @bus: the mii_bus struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4c477e6ece33..05fde31b6dc6 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -213,7 +213,9 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
 void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);
 struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
 int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
+int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum);
 int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
+int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 
 
 #define PHY_INTERRUPT_DISABLED	0x0
-- 
cgit v1.2.3


From 62cedb9f135794ec26a93ae29e5f0231ab263c84 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 25 Jun 2015 16:35:49 +0100
Subject: mm: memory hotplug with an existing resource

Add add_memory_resource() to add memory using an existing "System RAM"
resource.  This is useful if the memory region is being located by
finding a free resource slot with allocate_resource().

Xen guests will make use of this in their balloon driver to hotplug
arbitrary amounts of memory in response to toolstack requests.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
Reviewed-by: Tang Chen <tangchen@cn.fujitsu.com>
---
 include/linux/memory_hotplug.h |  2 ++
 mm/memory_hotplug.c            | 29 +++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8f60e899b33c..2ea574ff9714 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -11,6 +11,7 @@ struct zone;
 struct pglist_data;
 struct mem_section;
 struct memory_block;
+struct resource;
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 
@@ -266,6 +267,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
+extern int add_memory_resource(int nid, struct resource *resource);
 extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 		bool for_device);
 extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index aa992e2df58a..0780d118d26e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1232,23 +1232,21 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 }
 
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-int __ref add_memory(int nid, u64 start, u64 size)
+int __ref add_memory_resource(int nid, struct resource *res)
 {
+	u64 start, size;
 	pg_data_t *pgdat = NULL;
 	bool new_pgdat;
 	bool new_node;
-	struct resource *res;
 	int ret;
 
+	start = res->start;
+	size = resource_size(res);
+
 	ret = check_hotplug_memory_range(start, size);
 	if (ret)
 		return ret;
 
-	res = register_memory_resource(start, size);
-	ret = -EEXIST;
-	if (!res)
-		return ret;
-
 	{	/* Stupid hack to suppress address-never-null warning */
 		void *p = NODE_DATA(nid);
 		new_pgdat = !p;
@@ -1300,13 +1298,28 @@ error:
 	/* rollback pgdat allocation and others */
 	if (new_pgdat)
 		rollback_node_hotadd(nid, pgdat);
-	release_memory_resource(res);
 	memblock_remove(start, size);
 
 out:
 	mem_hotplug_done();
 	return ret;
 }
+EXPORT_SYMBOL_GPL(add_memory_resource);
+
+int __ref add_memory(int nid, u64 start, u64 size)
+{
+	struct resource *res;
+	int ret;
+
+	res = register_memory_resource(start, size);
+	if (!res)
+		return -EEXIST;
+
+	ret = add_memory_resource(nid, res);
+	if (ret < 0)
+		release_memory_resource(res);
+	return ret;
+}
 EXPORT_SYMBOL_GPL(add_memory);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-- 
cgit v1.2.3


From a1fdeaa71c95e5c6eba40245f84f762202dc69bb Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 22 Oct 2015 18:59:22 +0200
Subject: spi: fix kernel-doc warnings about missing return desc in spi.h

When building docs with make htmldocs, warnings about not having
a description for the return value are reported, i.e:

warning: No description found for return value of 'spi_write'

Fix these by following the kernel-doc conventions explained in
Documentation/kernel-doc-nano-HOWTO.txt.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e1f21778cb54..635bff60eda5 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -847,8 +847,10 @@ extern int spi_bus_unlock(struct spi_master *master);
  * @len: data buffer size
  * Context: can sleep
  *
- * This writes the buffer and returns zero or a negative error code.
+ * This function writes the buffer @buf.
  * Callable only from contexts that can sleep.
+ *
+ * Return: zero on success, else a negative error code.
  */
 static inline int
 spi_write(struct spi_device *spi, const void *buf, size_t len)
@@ -871,8 +873,10 @@ spi_write(struct spi_device *spi, const void *buf, size_t len)
  * @len: data buffer size
  * Context: can sleep
  *
- * This reads the buffer and returns zero or a negative error code.
+ * This function reads the buffer @buf.
  * Callable only from contexts that can sleep.
+ *
+ * Return: zero on success, else a negative error code.
  */
 static inline int
 spi_read(struct spi_device *spi, void *buf, size_t len)
@@ -899,7 +903,7 @@ spi_read(struct spi_device *spi, void *buf, size_t len)
  *
  * For more specific semantics see spi_sync().
  *
- * It returns zero on success, else a negative error code.
+ * Return: Return: zero on success, else a negative error code.
  */
 static inline int
 spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers,
@@ -923,9 +927,10 @@ extern int spi_write_then_read(struct spi_device *spi,
  * @cmd: command to be written before data is read back
  * Context: can sleep
  *
- * This returns the (unsigned) eight bit number returned by the
- * device, or else a negative error code.  Callable only from
- * contexts that can sleep.
+ * Callable only from contexts that can sleep.
+ *
+ * Return: the (unsigned) eight bit number returned by the
+ * device, or else a negative error code.
  */
 static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd)
 {
@@ -944,12 +949,13 @@ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd)
  * @cmd: command to be written before data is read back
  * Context: can sleep
  *
- * This returns the (unsigned) sixteen bit number returned by the
- * device, or else a negative error code.  Callable only from
- * contexts that can sleep.
- *
  * The number is returned in wire-order, which is at least sometimes
  * big-endian.
+ *
+ * Callable only from contexts that can sleep.
+ *
+ * Return: the (unsigned) sixteen bit number returned by the
+ * device, or else a negative error code.
  */
 static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
 {
@@ -968,13 +974,13 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
  * @cmd: command to be written before data is read back
  * Context: can sleep
  *
- * This returns the (unsigned) sixteen bit number returned by the device in cpu
- * endianness, or else a negative error code. Callable only from contexts that
- * can sleep.
- *
  * This function is similar to spi_w8r16, with the exception that it will
  * convert the read 16 bit data word from big-endian to native endianness.
  *
+ * Callable only from contexts that can sleep.
+ *
+ * Return: the (unsigned) sixteen bit number returned by the device in cpu
+ * endianness, or else a negative error code.
  */
 static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
 
-- 
cgit v1.2.3


From 62a615e083604d291af0cb18f9b4549531ea4f94 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Oct 2015 12:16:41 +0300
Subject: mfd: core: redo ACPI matching of the children devices

There is at least one board on the market, i.e. Intel Galileo Gen2, that uses
_ADR to distinguish the devices under one actual device. Due to this we have to
improve the quirk in the MFD core to handle that board.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/acpi/enumeration.txt | 11 +++++---
 drivers/mfd/mfd-core.c             | 52 ++++++++++++++++++++++++++------------
 include/linux/mfd/core.h           | 10 ++++++--
 3 files changed, 52 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index b731b292e812..a91ec5af52df 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -347,13 +347,18 @@ For the first case, the MFD drivers do not need to do anything. The
 resulting child platform device will have its ACPI_COMPANION() set to point
 to the parent device.
 
-If the ACPI namespace has a device that we can match using an ACPI id,
-the id should be set like:
+If the ACPI namespace has a device that we can match using an ACPI id or ACPI
+adr, the cell should be set like:
+
+	static struct mfd_cell_acpi_match my_subdevice_cell_acpi_match = {
+		.pnpid = "XYZ0001",
+		.adr = 0,
+	};
 
 	static struct mfd_cell my_subdevice_cell = {
 		.name = "my_subdevice",
 		/* set the resources relative to the parent */
-		.acpi_pnpid = "XYZ0001",
+		.acpi_match = &my_subdevice_cell_acpi_match,
 	};
 
 The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index c17635d3e504..60b60dc63ddd 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -82,29 +82,49 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 static void mfd_acpi_add_device(const struct mfd_cell *cell,
 				struct platform_device *pdev)
 {
-	struct acpi_device *parent_adev;
+	const struct mfd_cell_acpi_match *match = cell->acpi_match;
+	struct acpi_device *parent, *child;
 	struct acpi_device *adev;
 
-	parent_adev = ACPI_COMPANION(pdev->dev.parent);
-	if (!parent_adev)
+	parent = ACPI_COMPANION(pdev->dev.parent);
+	if (!parent)
 		return;
 
 	/*
-	 * MFD child device gets its ACPI handle either from the ACPI
-	 * device directly under the parent that matches the acpi_pnpid or
-	 * it will use the parent handle if is no acpi_pnpid is given.
+	 * MFD child device gets its ACPI handle either from the ACPI device
+	 * directly under the parent that matches the either _HID or _CID, or
+	 * _ADR or it will use the parent handle if is no ID is given.
+	 *
+	 * Note that use of _ADR is a grey area in the ACPI specification,
+	 * though Intel Galileo Gen2 is using it to distinguish the children
+	 * devices.
 	 */
-	adev = parent_adev;
-	if (cell->acpi_pnpid) {
-		struct acpi_device_id ids[2] = {};
-		struct acpi_device *child_adev;
-
-		strlcpy(ids[0].id, cell->acpi_pnpid, sizeof(ids[0].id));
-		list_for_each_entry(child_adev, &parent_adev->children, node)
-			if (acpi_match_device_ids(child_adev, ids)) {
-				adev = child_adev;
-				break;
+	adev = parent;
+	if (match) {
+		if (match->pnpid) {
+			struct acpi_device_id ids[2] = {};
+
+			strlcpy(ids[0].id, match->pnpid, sizeof(ids[0].id));
+			list_for_each_entry(child, &parent->children, node) {
+				if (acpi_match_device_ids(child, ids)) {
+					adev = child;
+					break;
+				}
+			}
+		} else {
+			unsigned long long adr;
+			acpi_status status;
+
+			list_for_each_entry(child, &parent->children, node) {
+				status = acpi_evaluate_integer(child->handle,
+							       "_ADR", NULL,
+							       &adr);
+				if (ACPI_SUCCESS(status) && match->adr == adr) {
+					adev = child;
+					break;
+				}
 			}
+		}
 	}
 
 	ACPI_COMPANION_SET(&pdev->dev, adev);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index a76bc100bf97..27dac3ff18b9 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -18,6 +18,12 @@
 
 struct irq_domain;
 
+/* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */
+struct mfd_cell_acpi_match {
+	const char			*pnpid;
+	const unsigned long long	adr;
+};
+
 /*
  * This struct describes the MFD part ("cell").
  * After registration the copy of this structure will become the platform data
@@ -44,8 +50,8 @@ struct mfd_cell {
 	 */
 	const char		*of_compatible;
 
-	/* Matches ACPI PNP id, either _HID or _CID */
-	const char		*acpi_pnpid;
+	/* Matches ACPI */
+	const struct mfd_cell_acpi_match	*acpi_match;
 
 	/*
 	 * These resources can be specified relative to the parent device.
-- 
cgit v1.2.3


From 0d0f4aab4e4d290138a4ae7f2ef8469e48c9a669 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Wed, 7 Oct 2015 14:39:55 +0300
Subject: lockd: get rid of reference-counted NSM RPC clients

Currently we have reference-counted per-net NSM RPC client
which created on the first monitor request and destroyed
after the last unmonitor request. It's needed because
RPC client need to know 'utsname()->nodename', but utsname()
might be NULL when nsm_unmonitor() called.

So instead of holding the rpc client we could just save nodename
in struct nlm_host and pass it to the rpc_create().
Thus ther is no need in keeping rpc client until last
unmonitor request. We could create separate RPC clients
for each monitor/unmonitor requests.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/host.c             |  1 +
 fs/lockd/mon.c              | 89 ++++++++-------------------------------------
 fs/lockd/netns.h            |  3 --
 fs/lockd/svc.c              |  1 -
 include/linux/lockd/lockd.h |  1 +
 5 files changed, 17 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index b5f3c3ab0d5f..d716c9993a26 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 	host->h_nsmhandle  = nsm;
 	host->h_addrbuf    = nsm->sm_addrbuf;
 	host->net	   = ni->net;
+	strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
 
 out:
 	return host;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6c05cd17e520..19166d4a8d31 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -42,7 +42,7 @@ struct nsm_args {
 	u32			proc;
 
 	char			*mon_name;
-	char			*nodename;
+	const char		*nodename;
 };
 
 struct nsm_res {
@@ -86,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
 	return rpc_create(&args);
 }
 
-static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
-		struct rpc_clnt *clnt)
-{
-	spin_lock(&ln->nsm_clnt_lock);
-	if (ln->nsm_users == 0) {
-		if (clnt == NULL)
-			goto out;
-		ln->nsm_clnt = clnt;
-	}
-	clnt = ln->nsm_clnt;
-	ln->nsm_users++;
-out:
-	spin_unlock(&ln->nsm_clnt_lock);
-	return clnt;
-}
-
-static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename)
-{
-	struct rpc_clnt	*clnt, *new;
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-	clnt = nsm_client_set(ln, NULL);
-	if (clnt != NULL)
-		goto out;
-
-	clnt = new = nsm_create(net, nodename);
-	if (IS_ERR(clnt))
-		goto out;
-
-	clnt = nsm_client_set(ln, new);
-	if (clnt != new)
-		rpc_shutdown_client(new);
-out:
-	return clnt;
-}
-
-static void nsm_client_put(struct net *net)
-{
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-	struct rpc_clnt	*clnt = NULL;
-
-	spin_lock(&ln->nsm_clnt_lock);
-	ln->nsm_users--;
-	if (ln->nsm_users == 0) {
-		clnt = ln->nsm_clnt;
-		ln->nsm_clnt = NULL;
-	}
-	spin_unlock(&ln->nsm_clnt_lock);
-	if (clnt != NULL)
-		rpc_shutdown_client(clnt);
-}
-
 static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
-			 struct rpc_clnt *clnt)
+			 const struct nlm_host *host)
 {
 	int		status;
+	struct rpc_clnt *clnt;
 	struct nsm_args args = {
 		.priv		= &nsm->sm_priv,
 		.prog		= NLM_PROGRAM,
 		.vers		= 3,
 		.proc		= NLMPROC_NSM_NOTIFY,
 		.mon_name	= nsm->sm_mon_name,
-		.nodename	= clnt->cl_nodename,
+		.nodename	= host->nodename,
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &args,
@@ -157,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
 
 	memset(res, 0, sizeof(*res));
 
+	clnt = nsm_create(host->net, host->nodename);
+	if (IS_ERR(clnt)) {
+		dprintk("lockd: failed to create NSM upcall transport, "
+			"status=%ld, net=%p\n", PTR_ERR(clnt), host->net);
+		return PTR_ERR(clnt);
+	}
+
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
 	if (status == -ECONNREFUSED) {
@@ -170,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
 				status);
 	else
 		status = 0;
+
+	rpc_shutdown_client(clnt);
 	return status;
 }
 
@@ -189,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host)
 	struct nsm_handle *nsm = host->h_nsmhandle;
 	struct nsm_res	res;
 	int		status;
-	struct rpc_clnt *clnt;
-	const char *nodename = NULL;
 
 	dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
 
 	if (nsm->sm_monitored)
 		return 0;
 
-	if (host->h_rpcclnt)
-		nodename = host->h_rpcclnt->cl_nodename;
-
 	/*
 	 * Choose whether to record the caller_name or IP address of
 	 * this peer in the local rpc.statd's database.
 	 */
 	nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
 
-	clnt = nsm_client_get(host->net, nodename);
-	if (IS_ERR(clnt)) {
-		status = PTR_ERR(clnt);
-		dprintk("lockd: failed to create NSM upcall transport, "
-				"status=%d, net=%p\n", status, host->net);
-		return status;
-	}
-
-	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
+	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host);
 	if (unlikely(res.status != 0))
 		status = -EIO;
 	if (unlikely(status < 0)) {
@@ -246,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host)
 
 	if (atomic_read(&nsm->sm_count) == 1
 	 && nsm->sm_monitored && !nsm->sm_sticky) {
-		struct lockd_net *ln = net_generic(host->net, lockd_net_id);
-
 		dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
 
-		status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
+		status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host);
 		if (res.status != 0)
 			status = -EIO;
 		if (status < 0)
@@ -258,8 +201,6 @@ void nsm_unmonitor(const struct nlm_host *host)
 					nsm->sm_name);
 		else
 			nsm->sm_monitored = 0;
-
-		nsm_client_put(host->net);
 	}
 }
 
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 89fe011b1335..5426189406c1 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -12,9 +12,6 @@ struct lockd_net {
 	struct delayed_work grace_period_end;
 	struct lock_manager lockd_manager;
 
-	spinlock_t nsm_clnt_lock;
-	unsigned int nsm_users;
-	struct rpc_clnt *nsm_clnt;
 	struct list_head nsm_handles;
 };
 
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 0dff13f41808..5f31ebd96c06 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -592,7 +592,6 @@ static int lockd_init_net(struct net *net)
 	INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
 	INIT_LIST_HEAD(&ln->lockd_manager.list);
 	ln->lockd_manager.block_opens = false;
-	spin_lock_init(&ln->nsm_clnt_lock);
 	INIT_LIST_HEAD(&ln->nsm_handles);
 	return 0;
 }
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index fd3b65bf51b5..c15373894a42 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -68,6 +68,7 @@ struct nlm_host {
 	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
 	char			*h_addrbuf;	/* address eyecatcher */
 	struct net		*net;		/* host net */
+	char			nodename[UNX_MAXNODENAME + 1];
 };
 
 /*
-- 
cgit v1.2.3


From 778620364ef525e83597a6edee4d0a69db67fd3d Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.com>
Date: Fri, 16 Oct 2015 08:59:08 +1100
Subject: sunrpc/cache: make cache flushing more reliable.

The caches used to store sunrpc authentication information can be
flushed by writing a timestamp to a file in /proc.

This timestamp has a one-second resolution and any entry in cache that
was last_refreshed *before* that time is treated as expired.

This is problematic as it is not possible to reliably flush the cache
without interrupting NFS service.
If the current time is written to the "flush" file, any entry that was
added since the current second started will still be treated as valid.
If one second beyond than the current time is written to the file
then no entries can be valid until the second ticks over.  This will
mean that no NFS request will be handled for up to 1 second.

To resolve this issue we make two changes:

1/ treat an entry as expired if the timestamp when it was last_refreshed
  is before *or the same as* the expiry time.  This means that current
  code which writes out the current time will now flush the cache
  reliably.

2/ when a new entry in added to the cache -  set the last_refresh timestamp
  to 1 second *beyond* the current flush time, when that not in the
  past.
  This ensures that newly added entries will always be valid.

Now that we have a very reliable way to flush the cache, and also
since we are using "since-boot" timestamps which are monotonic,
change cache_purge() to set the smallest future flush_time which
will work, and leave it there: don't revert to '1'.

Also disable the setting of the 'flush_time' far into the future.
That has never been useful and is now awkward as it would cause
last_refresh times to be strange.
Finally: if a request is made to set the 'flush_time' to the current
second, assume the intent is to flush the cache and advance it, if
necessary, to 1 second beyond the current 'flush_time' so that all
active entries will be deemed to be expired.

As part of this we need to add a 'cache_detail' arg to cache_init()
and cache_fresh_locked() so they can find the current ->flush_time.

Signed-off-by: NeilBrown <neilb@suse.com>
Reported-by: Olaf Kirch <okir@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 16 ++++++++-----
 net/sunrpc/cache.c           | 53 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 51 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 03d3b4c92d9f..ed03c9f7f908 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -48,8 +48,10 @@
 struct cache_head {
 	struct hlist_node	cache_list;
 	time_t		expiry_time;	/* After time time, don't use the data */
-	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall 
-					 * was sent, else this is when update was received
+	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall was
+					 * sent, else this is when update was
+					 * received, though it is alway set to
+					 * be *after* ->flush_time.
 					 */
 	struct kref	ref;
 	unsigned long	flags;
@@ -105,8 +107,12 @@ struct cache_detail {
 	/* fields below this comment are for internal use
 	 * and should not be touched by cache owners
 	 */
-	time_t			flush_time;		/* flush all cache items with last_refresh
-							 * earlier than this */
+	time_t			flush_time;		/* flush all cache items with
+							 * last_refresh at or earlier
+							 * than this.  last_refresh
+							 * is never set at or earlier
+							 * than this.
+							 */
 	struct list_head	others;
 	time_t			nextcheck;
 	int			entries;
@@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
 	return  (h->expiry_time < seconds_since_boot()) ||
-		(detail->flush_time > h->last_refresh);
+		(detail->flush_time >= h->last_refresh);
 }
 
 extern int cache_check(struct cache_detail *detail,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
 static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
 {
 	time_t now = seconds_since_boot();
 	INIT_HLIST_NODE(&h->cache_list);
 	h->flags = 0;
 	kref_init(&h->ref);
 	h->expiry_time = now + CACHE_NEW_EXPIRY;
+	if (now <= detail->flush_time)
+		/* ensure it isn't already expired */
+		now = detail->flush_time + 1;
 	h->last_refresh = now;
 }
 
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 	 * we might get lose if we need to
 	 * cache_put it soon.
 	 */
-	cache_init(new);
+	cache_init(new, detail);
 	detail->init(new, key);
 
 	write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+			       struct cache_detail *detail)
 {
+	time_t now = seconds_since_boot();
+	if (now <= detail->flush_time)
+		/* ensure it isn't immediately treated as expired */
+		now = detail->flush_time + 1;
 	head->expiry_time = expiry;
-	head->last_refresh = seconds_since_boot();
+	head->last_refresh = now;
 	smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
 	set_bit(CACHE_VALID, &head->flags);
 }
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
-			cache_fresh_locked(old, new->expiry_time);
+			cache_fresh_locked(old, new->expiry_time, detail);
 			write_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(old, detail);
 			return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 		cache_put(old, detail);
 		return NULL;
 	}
-	cache_init(tmp);
+	cache_init(tmp, detail);
 	detail->init(tmp, old);
 
 	write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
 	detail->entries++;
 	cache_get(tmp);
-	cache_fresh_locked(tmp, new->expiry_time);
-	cache_fresh_locked(old, 0);
+	cache_fresh_locked(tmp, new->expiry_time, detail);
+	cache_fresh_locked(old, 0, detail);
 	write_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail);
 	cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 	rv = cache_is_valid(h);
 	if (rv == -EAGAIN) {
 		set_bit(CACHE_NEGATIVE, &h->flags);
-		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+				   detail);
 		rv = -ENOENT;
 	}
 	write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
 
 void cache_purge(struct cache_detail *detail)
 {
-	detail->flush_time = LONG_MAX;
+	time_t now = seconds_since_boot();
+	if (detail->flush_time >= now)
+		now = detail->flush_time + 1;
+	/* 'now' is the maximum value any 'last_refresh' can have */
+	detail->flush_time = now;
 	detail->nextcheck = seconds_since_boot();
 	cache_flush();
-	detail->flush_time = 1;
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 {
 	char tbuf[20];
 	char *bp, *ep;
+	time_t then, now;
 
 	if (*ppos || count > sizeof(tbuf)-1)
 		return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 		return -EINVAL;
 
 	bp = tbuf;
-	cd->flush_time = get_expiry(&bp);
-	cd->nextcheck = seconds_since_boot();
+	then = get_expiry(&bp);
+	now = seconds_since_boot();
+	cd->nextcheck = now;
+	/* Can only set flush_time to 1 second beyond "now", or
+	 * possibly 1 second beyond flushtime.  This is because
+	 * flush_time never goes backwards so it mustn't get too far
+	 * ahead of time.
+	 */
+	if (then >= now) {
+		/* Want to flush everything, so behave like cache_purge() */
+		if (cd->flush_time >= now)
+			now = cd->flush_time + 1;
+		then = now;
+	}
+
+	cd->flush_time = then;
 	cache_flush();
 
 	*ppos += count;
-- 
cgit v1.2.3


From acba7855dda0d6e7d87dec2f89b4d9eebb36bbe2 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 21 Oct 2015 16:26:44 -0700
Subject: clk: Remove clk_{register,unregister}_multiplier()

These APIs aren't used, so remove them. This can be reverted if
we get a user at some point.

Reviewed-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Suggested-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-multiplier.c | 51 --------------------------------------------
 include/linux/clk-provider.h |  7 ------
 2 files changed, 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-multiplier.c b/drivers/clk/clk-multiplier.c
index 43ec269fcbff..fe7806506bf3 100644
--- a/drivers/clk/clk-multiplier.c
+++ b/drivers/clk/clk-multiplier.c
@@ -128,54 +128,3 @@ const struct clk_ops clk_multiplier_ops = {
 	.set_rate	= clk_multiplier_set_rate,
 };
 EXPORT_SYMBOL_GPL(clk_multiplier_ops);
-
-struct clk *clk_register_multiplier(struct device *dev, const char *name,
-				    const char *parent_name,
-				    unsigned long flags,
-				    void __iomem *reg, u8 shift, u8 width,
-				    u8 clk_mult_flags, spinlock_t *lock)
-{
-	struct clk_init_data init;
-	struct clk_multiplier *mult;
-	struct clk *clk;
-
-	mult = kmalloc(sizeof(*mult), GFP_KERNEL);
-	if (!mult)
-		return ERR_PTR(-ENOMEM);
-
-	init.name = name;
-	init.ops = &clk_multiplier_ops;
-	init.flags = flags | CLK_IS_BASIC;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
-
-	mult->reg = reg;
-	mult->shift = shift;
-	mult->width = width;
-	mult->flags = clk_mult_flags;
-	mult->lock = lock;
-	mult->hw.init = &init;
-
-	clk = clk_register(dev, &mult->hw);
-	if (IS_ERR(clk))
-		kfree(mult);
-
-	return clk;
-}
-EXPORT_SYMBOL_GPL(clk_register_multiplier);
-
-void clk_unregister_multiplier(struct clk *clk)
-{
-	struct clk_multiplier *mult;
-	struct clk_hw *hw;
-
-	hw = __clk_get_hw(clk);
-	if (!hw)
-		return;
-
-	mult = to_clk_multiplier(hw);
-
-	clk_unregister(clk);
-	kfree(mult);
-}
-EXPORT_SYMBOL_GPL(clk_unregister_multiplier);
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index e9a4d1ea556e..837cd7c7c8a7 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -554,13 +554,6 @@ struct clk_multiplier {
 
 extern const struct clk_ops clk_multiplier_ops;
 
-struct clk *clk_register_multiplier(struct device *dev, const char *name,
-				    const char *parent_name,
-				    unsigned long flags,
-				    void __iomem *reg, u8 shift, u8 width,
-				    u8 clk_mult_flags, spinlock_t *lock);
-void clk_unregister_multiplier(struct clk *clk);
-
 /***
  * struct clk_composite - aggregate clock of mux, divider and gate clocks
  *
-- 
cgit v1.2.3


From c59f419bdd1f056e1ceacbd29f7be7bcff746a5d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 25 Oct 2015 10:00:32 +0100
Subject: net/xps: Fix calculation of initial number of xps queues

The existing code breaks on architectures where the L1 cache size
(L1_CACHE_BYTES) is smaller or equal the size of struct xps_map.

The new code ensures that we get at minimum one initial xps queue, or even more
as long as it fits into the next multiple of L1_CACHE_SIZE.

Signed-off-by: Helge Deller <deller@gmx.de>
Acked-by: Alexander Duyck <aduyck@mirantis.com>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d15e3831440..2212c8225deb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -718,8 +718,8 @@ struct xps_map {
 	u16 queues[0];
 };
 #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
-#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
-    / sizeof(u16))
+#define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \
+       - sizeof(struct xps_map)) / sizeof(u16))
 
 /*
  * This structure holds all XPS maps for device.  Maps are indexed by CPU.
-- 
cgit v1.2.3


From c0e5c4450494d74c8deb4f47ddcbb74c94937e20 Mon Sep 17 00:00:00 2001
From: Dustin Byford <dustin@cumulusnetworks.com>
Date: Fri, 23 Oct 2015 12:27:06 -0700
Subject: acpi: add acpi_preset_companion() stub

Add a stub for acpi_preset_companion().  Fixes build failures when
acpi_preset_companion() is used and CONFIG_ACPI is not set.

Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Dustin Byford <dustin@cumulusnetworks.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 43856d19cf4d..43b55e751dea 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -477,6 +477,11 @@ static inline bool has_acpi_companion(struct device *dev)
 	return false;
 }
 
+static inline void acpi_preset_companion(struct device *dev,
+					 struct acpi_device *parent, u64 addr)
+{
+}
+
 static inline const char *acpi_dev_name(struct acpi_device *adev)
 {
 	return NULL;
-- 
cgit v1.2.3


From d787dcdb9c8f412b1dd0727f90d3f793a61a2551 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 23 Oct 2015 20:41:31 +0200
Subject: bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus

Reduced Serial Bus (RSB) is an Allwinner proprietery interface
used to communicate with PMICs and other peripheral ICs.

RSB is a two-wire push-pull serial bus that supports 1 master
device and up to 15 active slave devices.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/bus/Kconfig       |  11 +
 drivers/bus/Makefile      |   1 +
 drivers/bus/sunxi-rsb.c   | 783 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sunxi-rsb.h | 105 +++++++
 4 files changed, 900 insertions(+)
 create mode 100644 drivers/bus/sunxi-rsb.c
 create mode 100644 include/linux/sunxi-rsb.h

(limited to 'include/linux')

diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 1a82f3a17681..78382de19ed9 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -121,6 +121,17 @@ config SIMPLE_PM_BUS
 	  Controller (BSC, sometimes called "LBSC within Bus Bridge", or
 	  "External Bus Interface") as found on several Renesas ARM SoCs.
 
+config SUNXI_RSB
+	tristate "Allwinner sunXi Reduced Serial Bus Driver"
+	  default MACH_SUN8I || MACH_SUN9I
+	  depends on ARCH_SUNXI
+	  select REGMAP
+	  help
+	  Say y here to enable support for Allwinner's Reduced Serial Bus
+	  (RSB) support. This controller is responsible for communicating
+	  with various RSB based devices, such as AXP223, AXP8XX PMICs,
+	  and AC100/AC200 ICs.
+
 config VEXPRESS_CONFIG
 	bool "Versatile Express configuration bus"
 	default y if ARCH_VEXPRESS
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 790e7b933fb2..fcb9f9794a1f 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -15,5 +15,6 @@ obj-$(CONFIG_MVEBU_MBUS) 	+= mvebu-mbus.o
 obj-$(CONFIG_OMAP_INTERCONNECT)	+= omap_l3_smx.o omap_l3_noc.o
 
 obj-$(CONFIG_OMAP_OCP2SCP)	+= omap-ocp2scp.o
+obj-$(CONFIG_SUNXI_RSB)		+= sunxi-rsb.o
 obj-$(CONFIG_SIMPLE_PM_BUS)	+= simple-pm-bus.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
new file mode 100644
index 000000000000..846bc29c157d
--- /dev/null
+++ b/drivers/bus/sunxi-rsb.c
@@ -0,0 +1,783 @@
+/*
+ * RSB (Reduced Serial Bus) driver.
+ *
+ * Author: Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ * The RSB controller looks like an SMBus controller which only supports
+ * byte and word data transfers. But, it differs from standard SMBus
+ * protocol on several aspects:
+ * - it uses addresses set at runtime to address slaves. Runtime addresses
+ *   are sent to slaves using their 12bit hardware addresses. Up to 15
+ *   runtime addresses are available.
+ * - it adds a parity bit every 8bits of data and address for read and
+ *   write accesses; this replaces the ack bit
+ * - only one read access is required to read a byte (instead of a write
+ *   followed by a read access in standard SMBus protocol)
+ * - there's no Ack bit after each read access
+ *
+ * This means this bus cannot be used to interface with standard SMBus
+ * devices. Devices known to support this interface include the AXP223,
+ * AXP809, and AXP806 PMICs, and the AC100 audio codec, all from X-Powers.
+ *
+ * A description of the operation and wire protocol can be found in the
+ * RSB section of Allwinner's A80 user manual, which can be found at
+ *
+ *     https://github.com/allwinner-zh/documents/tree/master/A80
+ *
+ * This document is officially released by Allwinner.
+ *
+ * This driver is based on i2c-sun6i-p2wi.c, the P2WI bus driver.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/clk/clk-conf.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/sunxi-rsb.h>
+#include <linux/types.h>
+
+/* RSB registers */
+#define RSB_CTRL	0x0	/* Global control */
+#define RSB_CCR		0x4	/* Clock control */
+#define RSB_INTE	0x8	/* Interrupt controls */
+#define RSB_INTS	0xc	/* Interrupt status */
+#define RSB_ADDR	0x10	/* Address to send with read/write command */
+#define RSB_DATA	0x1c	/* Data to read/write */
+#define RSB_LCR		0x24	/* Line control */
+#define RSB_DMCR	0x28	/* Device mode (init) control */
+#define RSB_CMD		0x2c	/* RSB Command */
+#define RSB_DAR		0x30	/* Device address / runtime address */
+
+/* CTRL fields */
+#define RSB_CTRL_START_TRANS		BIT(7)
+#define RSB_CTRL_ABORT_TRANS		BIT(6)
+#define RSB_CTRL_GLOBAL_INT_ENB		BIT(1)
+#define RSB_CTRL_SOFT_RST		BIT(0)
+
+/* CLK CTRL fields */
+#define RSB_CCR_SDA_OUT_DELAY(v)	(((v) & 0x7) << 8)
+#define RSB_CCR_MAX_CLK_DIV		0xff
+#define RSB_CCR_CLK_DIV(v)		((v) & RSB_CCR_MAX_CLK_DIV)
+
+/* STATUS fields */
+#define RSB_INTS_TRANS_ERR_ACK		BIT(16)
+#define RSB_INTS_TRANS_ERR_DATA_BIT(v)	(((v) >> 8) & 0xf)
+#define RSB_INTS_TRANS_ERR_DATA		GENMASK(11, 8)
+#define RSB_INTS_LOAD_BSY		BIT(2)
+#define RSB_INTS_TRANS_ERR		BIT(1)
+#define RSB_INTS_TRANS_OVER		BIT(0)
+
+/* LINE CTRL fields*/
+#define RSB_LCR_SCL_STATE		BIT(5)
+#define RSB_LCR_SDA_STATE		BIT(4)
+#define RSB_LCR_SCL_CTL			BIT(3)
+#define RSB_LCR_SCL_CTL_EN		BIT(2)
+#define RSB_LCR_SDA_CTL			BIT(1)
+#define RSB_LCR_SDA_CTL_EN		BIT(0)
+
+/* DEVICE MODE CTRL field values */
+#define RSB_DMCR_DEVICE_START		BIT(31)
+#define RSB_DMCR_MODE_DATA		(0x7c << 16)
+#define RSB_DMCR_MODE_REG		(0x3e << 8)
+#define RSB_DMCR_DEV_ADDR		0x00
+
+/* CMD values */
+#define RSB_CMD_RD8			0x8b
+#define RSB_CMD_RD16			0x9c
+#define RSB_CMD_RD32			0xa6
+#define RSB_CMD_WR8			0x4e
+#define RSB_CMD_WR16			0x59
+#define RSB_CMD_WR32			0x63
+#define RSB_CMD_STRA			0xe8
+
+/* DAR fields */
+#define RSB_DAR_RTA(v)			(((v) & 0xff) << 16)
+#define RSB_DAR_DA(v)			((v) & 0xffff)
+
+#define RSB_MAX_FREQ			20000000
+
+#define RSB_CTRL_NAME			"sunxi-rsb"
+
+struct sunxi_rsb_addr_map {
+	u16 hwaddr;
+	u8 rtaddr;
+};
+
+struct sunxi_rsb {
+	struct device *dev;
+	void __iomem *regs;
+	struct clk *clk;
+	struct reset_control *rstc;
+	struct completion complete;
+	struct mutex lock;
+	unsigned int status;
+};
+
+/* bus / slave device related functions */
+static struct bus_type sunxi_rsb_bus;
+
+static int sunxi_rsb_device_match(struct device *dev, struct device_driver *drv)
+{
+	return of_driver_match_device(dev, drv);
+}
+
+static int sunxi_rsb_device_probe(struct device *dev)
+{
+	const struct sunxi_rsb_driver *drv = to_sunxi_rsb_driver(dev->driver);
+	struct sunxi_rsb_device *rdev = to_sunxi_rsb_device(dev);
+	int ret;
+
+	if (!drv->probe)
+		return -ENODEV;
+
+	if (!rdev->irq) {
+		int irq = -ENOENT;
+
+		if (dev->of_node)
+			irq = of_irq_get(dev->of_node, 0);
+
+		if (irq == -EPROBE_DEFER)
+			return irq;
+		if (irq < 0)
+			irq = 0;
+
+		rdev->irq = irq;
+	}
+
+	ret = of_clk_set_defaults(dev->of_node, false);
+	if (ret < 0)
+		return ret;
+
+	return drv->probe(rdev);
+}
+
+static int sunxi_rsb_device_remove(struct device *dev)
+{
+	const struct sunxi_rsb_driver *drv = to_sunxi_rsb_driver(dev->driver);
+
+	return drv->remove(to_sunxi_rsb_device(dev));
+}
+
+static struct bus_type sunxi_rsb_bus = {
+	.name		= RSB_CTRL_NAME,
+	.match		= sunxi_rsb_device_match,
+	.probe		= sunxi_rsb_device_probe,
+	.remove		= sunxi_rsb_device_remove,
+};
+
+static void sunxi_rsb_dev_release(struct device *dev)
+{
+	struct sunxi_rsb_device *rdev = to_sunxi_rsb_device(dev);
+
+	kfree(rdev);
+}
+
+/**
+ * sunxi_rsb_device_create() - allocate and add an RSB device
+ * @rsb:	RSB controller
+ * @node:	RSB slave device node
+ * @hwaddr:	RSB slave hardware address
+ * @rtaddr:	RSB slave runtime address
+ */
+static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb,
+		struct device_node *node, u16 hwaddr, u8 rtaddr)
+{
+	int err;
+	struct sunxi_rsb_device *rdev;
+
+	rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
+	if (!rdev)
+		return ERR_PTR(-ENOMEM);
+
+	rdev->rsb = rsb;
+	rdev->hwaddr = hwaddr;
+	rdev->rtaddr = rtaddr;
+	rdev->dev.bus = &sunxi_rsb_bus;
+	rdev->dev.parent = rsb->dev;
+	rdev->dev.of_node = node;
+	rdev->dev.release = sunxi_rsb_dev_release;
+
+	dev_set_name(&rdev->dev, "%s-%x", RSB_CTRL_NAME, hwaddr);
+
+	err = device_register(&rdev->dev);
+	if (err < 0) {
+		dev_err(&rdev->dev, "Can't add %s, status %d\n",
+			dev_name(&rdev->dev), err);
+		goto err_device_add;
+	}
+
+	dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev));
+
+err_device_add:
+	put_device(&rdev->dev);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * sunxi_rsb_device_unregister(): unregister an RSB device
+ * @rdev:	rsb_device to be removed
+ */
+static void sunxi_rsb_device_unregister(struct sunxi_rsb_device *rdev)
+{
+	device_unregister(&rdev->dev);
+}
+
+static int sunxi_rsb_remove_devices(struct device *dev, void *data)
+{
+	struct sunxi_rsb_device *rdev = to_sunxi_rsb_device(dev);
+
+	if (dev->bus == &sunxi_rsb_bus)
+		sunxi_rsb_device_unregister(rdev);
+
+	return 0;
+}
+
+/**
+ * sunxi_rsb_driver_register() - Register device driver with RSB core
+ * @rdrv:	device driver to be associated with slave-device.
+ *
+ * This API will register the client driver with the RSB framework.
+ * It is typically called from the driver's module-init function.
+ */
+int sunxi_rsb_driver_register(struct sunxi_rsb_driver *rdrv)
+{
+	rdrv->driver.bus = &sunxi_rsb_bus;
+	return driver_register(&rdrv->driver);
+}
+EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register);
+
+/* common code that starts a transfer */
+static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb)
+{
+	if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) {
+		dev_dbg(rsb->dev, "RSB transfer still in progress\n");
+		return -EBUSY;
+	}
+
+	reinit_completion(&rsb->complete);
+
+	writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER,
+	       rsb->regs + RSB_INTE);
+	writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB,
+	       rsb->regs + RSB_CTRL);
+
+	if (!wait_for_completion_io_timeout(&rsb->complete,
+					    msecs_to_jiffies(100))) {
+		dev_dbg(rsb->dev, "RSB timeout\n");
+
+		/* abort the transfer */
+		writel(RSB_CTRL_ABORT_TRANS, rsb->regs + RSB_CTRL);
+
+		/* clear any interrupt flags */
+		writel(readl(rsb->regs + RSB_INTS), rsb->regs + RSB_INTS);
+
+		return -ETIMEDOUT;
+	}
+
+	if (rsb->status & RSB_INTS_LOAD_BSY) {
+		dev_dbg(rsb->dev, "RSB busy\n");
+		return -EBUSY;
+	}
+
+	if (rsb->status & RSB_INTS_TRANS_ERR) {
+		if (rsb->status & RSB_INTS_TRANS_ERR_ACK) {
+			dev_dbg(rsb->dev, "RSB slave nack\n");
+			return -EINVAL;
+		}
+
+		if (rsb->status & RSB_INTS_TRANS_ERR_DATA) {
+			dev_dbg(rsb->dev, "RSB transfer data error\n");
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
+			  u32 *buf, size_t len)
+{
+	u32 cmd;
+	int ret;
+
+	if (!buf)
+		return -EINVAL;
+
+	switch (len) {
+	case 1:
+		cmd = RSB_CMD_RD8;
+		break;
+	case 2:
+		cmd = RSB_CMD_RD16;
+		break;
+	case 4:
+		cmd = RSB_CMD_RD32;
+		break;
+	default:
+		dev_err(rsb->dev, "Invalid access width: %d\n", len);
+		return -EINVAL;
+	}
+
+	mutex_lock(&rsb->lock);
+
+	writel(addr, rsb->regs + RSB_ADDR);
+	writel(RSB_DAR_RTA(rtaddr), rsb->regs + RSB_DAR);
+	writel(cmd, rsb->regs + RSB_CMD);
+
+	ret = _sunxi_rsb_run_xfer(rsb);
+	if (ret)
+		goto out;
+
+	*buf = readl(rsb->regs + RSB_DATA);
+
+	mutex_unlock(&rsb->lock);
+
+out:
+	return ret;
+}
+
+static int sunxi_rsb_write(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
+			   const u32 *buf, size_t len)
+{
+	u32 cmd;
+	int ret;
+
+	if (!buf)
+		return -EINVAL;
+
+	switch (len) {
+	case 1:
+		cmd = RSB_CMD_WR8;
+		break;
+	case 2:
+		cmd = RSB_CMD_WR16;
+		break;
+	case 4:
+		cmd = RSB_CMD_WR32;
+		break;
+	default:
+		dev_err(rsb->dev, "Invalid access width: %d\n", len);
+		return -EINVAL;
+	}
+
+	mutex_lock(&rsb->lock);
+
+	writel(addr, rsb->regs + RSB_ADDR);
+	writel(RSB_DAR_RTA(rtaddr), rsb->regs + RSB_DAR);
+	writel(*buf, rsb->regs + RSB_DATA);
+	writel(cmd, rsb->regs + RSB_CMD);
+	ret = _sunxi_rsb_run_xfer(rsb);
+
+	mutex_unlock(&rsb->lock);
+
+	return ret;
+}
+
+/* RSB regmap functions */
+struct sunxi_rsb_ctx {
+	struct sunxi_rsb_device *rdev;
+	int size;
+};
+
+static int regmap_sunxi_rsb_reg_read(void *context, unsigned int reg,
+				     unsigned int *val)
+{
+	struct sunxi_rsb_ctx *ctx = context;
+	struct sunxi_rsb_device *rdev = ctx->rdev;
+
+	if (reg > 0xff)
+		return -EINVAL;
+
+	return sunxi_rsb_read(rdev->rsb, rdev->rtaddr, reg, val, ctx->size);
+}
+
+static int regmap_sunxi_rsb_reg_write(void *context, unsigned int reg,
+				      unsigned int val)
+{
+	struct sunxi_rsb_ctx *ctx = context;
+	struct sunxi_rsb_device *rdev = ctx->rdev;
+
+	return sunxi_rsb_write(rdev->rsb, rdev->rtaddr, reg, &val, ctx->size);
+}
+
+static void regmap_sunxi_rsb_free_ctx(void *context)
+{
+	struct sunxi_rsb_ctx *ctx = context;
+
+	kfree(ctx);
+}
+
+static struct regmap_bus regmap_sunxi_rsb = {
+	.reg_write = regmap_sunxi_rsb_reg_write,
+	.reg_read = regmap_sunxi_rsb_reg_read,
+	.free_context = regmap_sunxi_rsb_free_ctx,
+	.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
+	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+};
+
+static struct sunxi_rsb_ctx *regmap_sunxi_rsb_init_ctx(struct sunxi_rsb_device *rdev,
+		const struct regmap_config *config)
+{
+	struct sunxi_rsb_ctx *ctx;
+
+	switch (config->val_bits) {
+	case 8:
+	case 16:
+	case 32:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->rdev = rdev;
+	ctx->size = config->val_bits / 8;
+
+	return ctx;
+}
+
+struct regmap *__devm_regmap_init_sunxi_rsb(struct sunxi_rsb_device *rdev,
+					    const struct regmap_config *config,
+					    struct lock_class_key *lock_key,
+					    const char *lock_name)
+{
+	struct sunxi_rsb_ctx *ctx = regmap_sunxi_rsb_init_ctx(rdev, config);
+
+	if (IS_ERR(ctx))
+		return ERR_CAST(ctx);
+
+	return __devm_regmap_init(&rdev->dev, &regmap_sunxi_rsb, ctx, config,
+				  lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_sunxi_rsb);
+
+/* RSB controller driver functions */
+static irqreturn_t sunxi_rsb_irq(int irq, void *dev_id)
+{
+	struct sunxi_rsb *rsb = dev_id;
+	u32 status;
+
+	status = readl(rsb->regs + RSB_INTS);
+	rsb->status = status;
+
+	/* Clear interrupts */
+	status &= (RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR |
+		   RSB_INTS_TRANS_OVER);
+	writel(status, rsb->regs + RSB_INTS);
+
+	complete(&rsb->complete);
+
+	return IRQ_HANDLED;
+}
+
+static int sunxi_rsb_init_device_mode(struct sunxi_rsb *rsb)
+{
+	int ret = 0;
+	u32 reg;
+
+	/* send init sequence */
+	writel(RSB_DMCR_DEVICE_START | RSB_DMCR_MODE_DATA |
+	       RSB_DMCR_MODE_REG | RSB_DMCR_DEV_ADDR, rsb->regs + RSB_DMCR);
+
+	readl_poll_timeout(rsb->regs + RSB_DMCR, reg,
+			   !(reg & RSB_DMCR_DEVICE_START), 100, 250000);
+	if (reg & RSB_DMCR_DEVICE_START)
+		ret = -ETIMEDOUT;
+
+	/* clear interrupt status bits */
+	writel(readl(rsb->regs + RSB_INTS), rsb->regs + RSB_INTS);
+
+	return ret;
+}
+
+/*
+ * There are 15 valid runtime addresses, though Allwinner typically
+ * skips the first, for unknown reasons, and uses the following three.
+ *
+ * 0x17, 0x2d, 0x3a, 0x4e, 0x59, 0x63, 0x74, 0x8b,
+ * 0x9c, 0xa6, 0xb1, 0xc5, 0xd2, 0xe8, 0xff
+ *
+ * No designs with 2 RSB slave devices sharing identical hardware
+ * addresses on the same bus have been seen in the wild. All designs
+ * use 0x2d for the primary PMIC, 0x3a for the secondary PMIC if
+ * there is one, and 0x45 for peripheral ICs.
+ *
+ * The hardware does not seem to support re-setting runtime addresses.
+ * Attempts to do so result in the slave devices returning a NACK.
+ * Hence we just hardcode the mapping here, like Allwinner does.
+ */
+
+static const struct sunxi_rsb_addr_map sunxi_rsb_addr_maps[] = {
+	{ 0x3e3, 0x2d }, /* Primary PMIC: AXP223, AXP809, AXP81X, ... */
+	{ 0x745, 0x3a }, /* Secondary PMIC: AXP806, ... */
+	{ 0xe89, 0x45 }, /* Peripheral IC: AC100, ... */
+};
+
+static u8 sunxi_rsb_get_rtaddr(u16 hwaddr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sunxi_rsb_addr_maps); i++)
+		if (hwaddr == sunxi_rsb_addr_maps[i].hwaddr)
+			return sunxi_rsb_addr_maps[i].rtaddr;
+
+	return 0; /* 0 is an invalid runtime address */
+}
+
+static int of_rsb_register_devices(struct sunxi_rsb *rsb)
+{
+	struct device *dev = rsb->dev;
+	struct device_node *child, *np = dev->of_node;
+	u32 hwaddr;
+	u8 rtaddr;
+	int ret;
+
+	if (!np)
+		return -EINVAL;
+
+	/* Runtime addresses for all slaves should be set first */
+	for_each_available_child_of_node(np, child) {
+		dev_dbg(dev, "setting child %s runtime address\n",
+			child->full_name);
+
+		ret = of_property_read_u32(child, "reg", &hwaddr);
+		if (ret) {
+			dev_err(dev, "%s: invalid 'reg' property: %d\n",
+				child->full_name, ret);
+			continue;
+		}
+
+		rtaddr = sunxi_rsb_get_rtaddr(hwaddr);
+		if (!rtaddr) {
+			dev_err(dev, "%s: unknown hardware device address\n",
+				child->full_name);
+			continue;
+		}
+
+		/*
+		 * Since no devices have been registered yet, we are the
+		 * only ones using the bus, we can skip locking the bus.
+		 */
+
+		/* setup command parameters */
+		writel(RSB_CMD_STRA, rsb->regs + RSB_CMD);
+		writel(RSB_DAR_RTA(rtaddr) | RSB_DAR_DA(hwaddr),
+		       rsb->regs + RSB_DAR);
+
+		/* send command */
+		ret = _sunxi_rsb_run_xfer(rsb);
+		if (ret)
+			dev_warn(dev, "%s: set runtime address failed: %d\n",
+				 child->full_name, ret);
+	}
+
+	/* Then we start adding devices and probing them */
+	for_each_available_child_of_node(np, child) {
+		struct sunxi_rsb_device *rdev;
+
+		dev_dbg(dev, "adding child %s\n", child->full_name);
+
+		ret = of_property_read_u32(child, "reg", &hwaddr);
+		if (ret)
+			continue;
+
+		rtaddr = sunxi_rsb_get_rtaddr(hwaddr);
+		if (!rtaddr)
+			continue;
+
+		rdev = sunxi_rsb_device_create(rsb, child, hwaddr, rtaddr);
+		if (IS_ERR(rdev))
+			dev_err(dev, "failed to add child device %s: %ld\n",
+				child->full_name, PTR_ERR(rdev));
+	}
+
+	return 0;
+}
+
+static const struct of_device_id sunxi_rsb_of_match_table[] = {
+	{ .compatible = "allwinner,sun8i-a23-rsb" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table);
+
+static int sunxi_rsb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *r;
+	struct sunxi_rsb *rsb;
+	unsigned long p_clk_freq;
+	u32 clk_delay, clk_freq = 3000000;
+	int clk_div, irq, ret;
+	u32 reg;
+
+	of_property_read_u32(np, "clock-frequency", &clk_freq);
+	if (clk_freq > RSB_MAX_FREQ) {
+		dev_err(dev,
+			"clock-frequency (%u Hz) is too high (max = 20MHz)\n",
+			clk_freq);
+		return -EINVAL;
+	}
+
+	rsb = devm_kzalloc(dev, sizeof(*rsb), GFP_KERNEL);
+	if (!rsb)
+		return -ENOMEM;
+
+	rsb->dev = dev;
+	platform_set_drvdata(pdev, rsb);
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rsb->regs = devm_ioremap_resource(dev, r);
+	if (IS_ERR(rsb->regs))
+		return PTR_ERR(rsb->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "failed to retrieve irq: %d\n", irq);
+		return irq;
+	}
+
+	rsb->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(rsb->clk)) {
+		ret = PTR_ERR(rsb->clk);
+		dev_err(dev, "failed to retrieve clk: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(rsb->clk);
+	if (ret) {
+		dev_err(dev, "failed to enable clk: %d\n", ret);
+		return ret;
+	}
+
+	p_clk_freq = clk_get_rate(rsb->clk);
+
+	rsb->rstc = devm_reset_control_get(dev, NULL);
+	if (IS_ERR(rsb->rstc)) {
+		ret = PTR_ERR(rsb->rstc);
+		dev_err(dev, "failed to retrieve reset controller: %d\n", ret);
+		goto err_clk_disable;
+	}
+
+	ret = reset_control_deassert(rsb->rstc);
+	if (ret) {
+		dev_err(dev, "failed to deassert reset line: %d\n", ret);
+		goto err_clk_disable;
+	}
+
+	init_completion(&rsb->complete);
+	mutex_init(&rsb->lock);
+
+	/* reset the controller */
+	writel(RSB_CTRL_SOFT_RST, rsb->regs + RSB_CTRL);
+	readl_poll_timeout(rsb->regs + RSB_CTRL, reg,
+			   !(reg & RSB_CTRL_SOFT_RST), 1000, 100000);
+
+	/*
+	 * Clock frequency and delay calculation code is from
+	 * Allwinner U-boot sources.
+	 *
+	 * From A83 user manual:
+	 * bus clock frequency = parent clock frequency / (2 * (divider + 1))
+	 */
+	clk_div = p_clk_freq / clk_freq / 2;
+	if (!clk_div)
+		clk_div = 1;
+	else if (clk_div > RSB_CCR_MAX_CLK_DIV + 1)
+		clk_div = RSB_CCR_MAX_CLK_DIV + 1;
+
+	clk_delay = clk_div >> 1;
+	if (!clk_delay)
+		clk_delay = 1;
+
+	dev_info(dev, "RSB running at %lu Hz\n", p_clk_freq / clk_div / 2);
+	writel(RSB_CCR_SDA_OUT_DELAY(clk_delay) | RSB_CCR_CLK_DIV(clk_div - 1),
+	       rsb->regs + RSB_CCR);
+
+	ret = devm_request_irq(dev, irq, sunxi_rsb_irq, 0, RSB_CTRL_NAME, rsb);
+	if (ret) {
+		dev_err(dev, "can't register interrupt handler irq %d: %d\n",
+			irq, ret);
+		goto err_reset_assert;
+	}
+
+	/* initialize all devices on the bus into RSB mode */
+	ret = sunxi_rsb_init_device_mode(rsb);
+	if (ret)
+		dev_warn(dev, "Initialize device mode failed: %d\n", ret);
+
+	of_rsb_register_devices(rsb);
+
+	return 0;
+
+err_reset_assert:
+	reset_control_assert(rsb->rstc);
+
+err_clk_disable:
+	clk_disable_unprepare(rsb->clk);
+
+	return ret;
+}
+
+static int sunxi_rsb_remove(struct platform_device *pdev)
+{
+	struct sunxi_rsb *rsb = platform_get_drvdata(pdev);
+
+	device_for_each_child(rsb->dev, NULL, sunxi_rsb_remove_devices);
+	reset_control_assert(rsb->rstc);
+	clk_disable_unprepare(rsb->clk);
+
+	return 0;
+}
+
+static struct platform_driver sunxi_rsb_driver = {
+	.probe = sunxi_rsb_probe,
+	.remove	= sunxi_rsb_remove,
+	.driver	= {
+		.name = RSB_CTRL_NAME,
+		.of_match_table = sunxi_rsb_of_match_table,
+	},
+};
+
+static int __init sunxi_rsb_init(void)
+{
+	int ret;
+
+	ret = bus_register(&sunxi_rsb_bus);
+	if (ret) {
+		pr_err("failed to register sunxi sunxi_rsb bus: %d\n", ret);
+		return ret;
+	}
+
+	return platform_driver_register(&sunxi_rsb_driver);
+}
+module_init(sunxi_rsb_init);
+
+static void __exit sunxi_rsb_exit(void)
+{
+	platform_driver_unregister(&sunxi_rsb_driver);
+	bus_unregister(&sunxi_rsb_bus);
+}
+module_exit(sunxi_rsb_exit);
+
+MODULE_AUTHOR("Chen-Yu Tsai <wens@csie.org>");
+MODULE_DESCRIPTION("Allwinner sunXi Reduced Serial Bus controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sunxi-rsb.h b/include/linux/sunxi-rsb.h
new file mode 100644
index 000000000000..7e75bb0346d0
--- /dev/null
+++ b/include/linux/sunxi-rsb.h
@@ -0,0 +1,105 @@
+/*
+ * Allwinner Reduced Serial Bus Driver
+ *
+ * Copyright (c) 2015 Chen-Yu Tsai
+ *
+ * Author: Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#ifndef _SUNXI_RSB_H
+#define _SUNXI_RSB_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+
+struct sunxi_rsb;
+
+/**
+ * struct sunxi_rsb_device - Basic representation of an RSB device
+ * @dev:	Driver model representation of the device.
+ * @ctrl:	RSB controller managing the bus hosting this device.
+ * @rtaddr:	This device's runtime address
+ * @hwaddr:	This device's hardware address
+ */
+struct sunxi_rsb_device {
+	struct device		dev;
+	struct sunxi_rsb	*rsb;
+	int			irq;
+	u8			rtaddr;
+	u16			hwaddr;
+};
+
+static inline struct sunxi_rsb_device *to_sunxi_rsb_device(struct device *d)
+{
+	return container_of(d, struct sunxi_rsb_device, dev);
+}
+
+static inline void *sunxi_rsb_device_get_drvdata(const struct sunxi_rsb_device *rdev)
+{
+	return dev_get_drvdata(&rdev->dev);
+}
+
+static inline void sunxi_rsb_device_set_drvdata(struct sunxi_rsb_device *rdev,
+						void *data)
+{
+	dev_set_drvdata(&rdev->dev, data);
+}
+
+/**
+ * struct sunxi_rsb_driver - RSB slave device driver
+ * @driver:	RSB device drivers should initialize name and owner field of
+ *		this structure.
+ * @probe:	binds this driver to a RSB device.
+ * @remove:	unbinds this driver from the RSB device.
+ */
+struct sunxi_rsb_driver {
+	struct device_driver driver;
+	int (*probe)(struct sunxi_rsb_device *rdev);
+	int (*remove)(struct sunxi_rsb_device *rdev);
+};
+
+static inline struct sunxi_rsb_driver *to_sunxi_rsb_driver(struct device_driver *d)
+{
+	return container_of(d, struct sunxi_rsb_driver, driver);
+}
+
+int sunxi_rsb_driver_register(struct sunxi_rsb_driver *rdrv);
+
+/**
+ * sunxi_rsb_driver_unregister() - unregister an RSB client driver
+ * @rdrv:	the driver to unregister
+ */
+static inline void sunxi_rsb_driver_unregister(struct sunxi_rsb_driver *rdrv)
+{
+	if (rdrv)
+		driver_unregister(&rdrv->driver);
+}
+
+#define module_sunxi_rsb_driver(__sunxi_rsb_driver) \
+	module_driver(__sunxi_rsb_driver, sunxi_rsb_driver_register, \
+			sunxi_rsb_driver_unregister)
+
+struct regmap *__devm_regmap_init_sunxi_rsb(struct sunxi_rsb_device *rdev,
+					    const struct regmap_config *config,
+					    struct lock_class_key *lock_key,
+					    const char *lock_name);
+
+/**
+ * devm_regmap_init_sunxi_rsb(): Initialise managed register map
+ *
+ * @rdev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_sunxi_rsb(rdev, config)			\
+	__regmap_lockdep_wrapper(__devm_regmap_init_sunxi_rsb, #config,	\
+				 rdev, config)
+
+#endif /* _SUNXI_RSB_H */
-- 
cgit v1.2.3


From 7904b5c4988e18b50056b5e71a3ffca752a8a451 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 22 Sep 2015 17:13:19 -0400
Subject: tracepoint: Give priority to probes of tracepoints

In order to guarantee that a probe will be called before other probes that
are attached to a tracepoint, there needs to be a mechanism to provide
priority of one probe over the others.

Adding a prio field to the struct tracepoint_func, which lets the probes be
sorted by the priority set in the structure. If no priority is specified,
then a priority of 10 is given (this is a macro, and perhaps may be changed
in the future).

Now probes may be added to affect other probes that are attached to a
tracepoint with a guaranteed order.

One use case would be to allow tracing of tracepoints be able to filter by
pid. A special (higher priority probe) may be added to the sched_switch
tracepoint and set the necessary flags of the other tracepoints to notify
them if they should be traced or not. In case a tracepoint is enabled at the
sched_switch tracepoint too, the order of the two are not random.

Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 13 ++++++++++
 kernel/tracepoint.c        | 61 +++++++++++++++++++++++++++++++++++++---------
 2 files changed, 63 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index afada369c5b7..6b79537a42b1 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -26,6 +26,7 @@ struct notifier_block;
 struct tracepoint_func {
 	void *func;
 	void *data;
+	int prio;
 };
 
 struct tracepoint {
@@ -42,9 +43,14 @@ struct trace_enum_map {
 	unsigned long		enum_value;
 };
 
+#define TRACEPOINT_DEFAULT_PRIO	10
+
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
+tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
+			       int prio);
+extern int
 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
 extern void
 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
@@ -207,6 +213,13 @@ extern void syscall_unregfunc(void);
 						(void *)probe, data);	\
 	}								\
 	static inline int						\
+	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
+				   int prio)				\
+	{								\
+		return tracepoint_probe_register_prio(&__tracepoint_##name, \
+					      (void *)probe, data, prio); \
+	}								\
+	static inline int						\
 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
 	{								\
 		return tracepoint_probe_unregister(&__tracepoint_##name,\
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 3490407dc7b7..ecd536de603a 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -91,11 +91,13 @@ static void debug_print_probes(struct tracepoint_func *funcs)
 		printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
 }
 
-static struct tracepoint_func *func_add(struct tracepoint_func **funcs,
-		struct tracepoint_func *tp_func)
+static struct tracepoint_func *
+func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
+	 int prio)
 {
-	int nr_probes = 0;
 	struct tracepoint_func *old, *new;
+	int nr_probes = 0;
+	int pos = -1;
 
 	if (WARN_ON(!tp_func->func))
 		return ERR_PTR(-EINVAL);
@@ -104,18 +106,33 @@ static struct tracepoint_func *func_add(struct tracepoint_func **funcs,
 	old = *funcs;
 	if (old) {
 		/* (N -> N+1), (N != 0, 1) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++)
+		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
+			/* Insert before probes of lower priority */
+			if (pos < 0 && old[nr_probes].prio < prio)
+				pos = nr_probes;
 			if (old[nr_probes].func == tp_func->func &&
 			    old[nr_probes].data == tp_func->data)
 				return ERR_PTR(-EEXIST);
+		}
 	}
 	/* + 2 : one for new probe, one for NULL func */
 	new = allocate_probes(nr_probes + 2);
 	if (new == NULL)
 		return ERR_PTR(-ENOMEM);
-	if (old)
-		memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
-	new[nr_probes] = *tp_func;
+	if (old) {
+		if (pos < 0) {
+			pos = nr_probes;
+			memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
+		} else {
+			/* Copy higher priority probes ahead of the new probe */
+			memcpy(new, old, pos * sizeof(struct tracepoint_func));
+			/* Copy the rest after it. */
+			memcpy(new + pos + 1, old + pos,
+			       (nr_probes - pos) * sizeof(struct tracepoint_func));
+		}
+	} else
+		pos = 0;
+	new[pos] = *tp_func;
 	new[nr_probes + 1].func = NULL;
 	*funcs = new;
 	debug_print_probes(*funcs);
@@ -174,7 +191,7 @@ static void *func_remove(struct tracepoint_func **funcs,
  * Add the probe function to a tracepoint.
  */
 static int tracepoint_add_func(struct tracepoint *tp,
-		struct tracepoint_func *func)
+			       struct tracepoint_func *func, int prio)
 {
 	struct tracepoint_func *old, *tp_funcs;
 
@@ -183,7 +200,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
 
 	tp_funcs = rcu_dereference_protected(tp->funcs,
 			lockdep_is_held(&tracepoints_mutex));
-	old = func_add(&tp_funcs, func);
+	old = func_add(&tp_funcs, func, prio);
 	if (IS_ERR(old)) {
 		WARN_ON_ONCE(1);
 		return PTR_ERR(old);
@@ -240,6 +257,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
  * @tp: tracepoint
  * @probe: probe handler
  * @data: tracepoint data
+ * @prio: priority of this function over other registered functions
  *
  * Returns 0 if ok, error value on error.
  * Note: if @tp is within a module, the caller is responsible for
@@ -247,7 +265,8 @@ static int tracepoint_remove_func(struct tracepoint *tp,
  * performed either with a tracepoint module going notifier, or from
  * within module exit functions.
  */
-int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
+int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
+				   void *data, int prio)
 {
 	struct tracepoint_func tp_func;
 	int ret;
@@ -255,10 +274,30 @@ int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
 	mutex_lock(&tracepoints_mutex);
 	tp_func.func = probe;
 	tp_func.data = data;
-	ret = tracepoint_add_func(tp, &tp_func);
+	tp_func.prio = prio;
+	ret = tracepoint_add_func(tp, &tp_func, prio);
 	mutex_unlock(&tracepoints_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @tp: tracepoint
+ * @probe: probe handler
+ * @data: tracepoint data
+ * @prio: priority of this function over other registered functions
+ *
+ * Returns 0 if ok, error value on error.
+ * Note: if @tp is within a module, the caller is responsible for
+ * unregistering the probe before the module is gone. This can be
+ * performed either with a tracepoint module going notifier, or from
+ * within module exit functions.
+ */
+int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
+{
+	return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
+}
 EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 
 /**
-- 
cgit v1.2.3


From 3fdaf80f4a836911c0eda1cee92f8aa625f90197 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 25 Sep 2015 12:58:44 -0400
Subject: tracing: Implement event pid filtering

Add the necessary hooks to use the pids loaded in set_event_pid to filter
all the events enabled in the tracing instance that match the pids listed.

Two probes are added to both sched_switch and sched_wakeup tracepoints to be
called before other probes are called and after the other probes are called.
The first is used to set the necessary flags to let the probes know to test
if they should be traced or not.

The sched_switch pre probe will set the "ignore_pid" flag if neither the
previous or next task has a matching pid.

The sched_switch probe will set the "ignore_pid" flag if the next task
does not match the matching pid.

The pre probe allows for probes tracing sched_switch to be traced if
necessary.

The sched_wakeup pre probe will set the "ignore_pid" flag if neither the
current task nor the wakee task has a matching pid.

The sched_wakeup post probe will set the "ignore_pid" flag if the current
task does not have a matching pid.

Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h |   7 ++
 kernel/trace/trace.h         |   2 +
 kernel/trace/trace_events.c  | 148 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 154 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f85693bbcdc3..429fdfc3baf5 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -328,6 +328,7 @@ enum {
 	EVENT_FILE_FL_SOFT_DISABLED_BIT,
 	EVENT_FILE_FL_TRIGGER_MODE_BIT,
 	EVENT_FILE_FL_TRIGGER_COND_BIT,
+	EVENT_FILE_FL_PID_FILTER_BIT,
 };
 
 /*
@@ -341,6 +342,7 @@ enum {
  *                   tracepoint may be enabled)
  *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
+ *  PID_FILTER    - When set, the event is filtered based on pid
  */
 enum {
 	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -351,6 +353,7 @@ enum {
 	EVENT_FILE_FL_SOFT_DISABLED	= (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
 	EVENT_FILE_FL_TRIGGER_MODE	= (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
 	EVENT_FILE_FL_TRIGGER_COND	= (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
+	EVENT_FILE_FL_PID_FILTER	= (1 << EVENT_FILE_FL_PID_FILTER_BIT),
 };
 
 struct trace_event_file {
@@ -429,6 +432,8 @@ extern enum event_trigger_type event_triggers_call(struct trace_event_file *file
 extern void event_triggers_post_call(struct trace_event_file *file,
 				     enum event_trigger_type tt);
 
+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
+
 /**
  * trace_trigger_soft_disabled - do triggers and test if soft disabled
  * @file: The file pointer of the event to test
@@ -448,6 +453,8 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 			event_triggers_call(file, NULL);
 		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 			return true;
+		if (eflags & EVENT_FILE_FL_PID_FILTER)
+			return trace_event_ignore_this_pid(file);
 	}
 	return false;
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 250481043bb5..89ffdaf3e371 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -156,6 +156,8 @@ struct trace_array_cpu {
 	pid_t			pid;
 	kuid_t			uid;
 	char			comm[TASK_COMM_LEN];
+
+	bool			ignore_pid;
 };
 
 struct tracer;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2ad7014707ee..ab07058e27c1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -22,6 +22,8 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 
+#include <trace/events/sched.h>
+
 #include <asm/setup.h>
 
 #include "trace_output.h"
@@ -212,12 +214,32 @@ int trace_event_raw_init(struct trace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
+{
+	struct trace_array *tr = trace_file->tr;
+	struct trace_array_cpu *data;
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+	if (!pid_list)
+		return false;
+
+	data = this_cpu_ptr(tr->trace_buffer.data);
+
+	return data->ignore_pid;
+}
+EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
+
 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 				 struct trace_event_file *trace_file,
 				 unsigned long len)
 {
 	struct trace_event_call *event_call = trace_file->event_call;
 
+	if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
+	    trace_event_ignore_this_pid(trace_file))
+		return NULL;
+
 	local_save_flags(fbuffer->flags);
 	fbuffer->pc = preempt_count();
 	fbuffer->trace_file = trace_file;
@@ -459,15 +481,114 @@ static int cmp_pid(const void *key, const void *elt)
 	return 1;
 }
 
+static bool
+check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
+{
+	pid_t search_pid;
+	pid_t *pid;
+
+	/*
+	 * Return false, because if filtered_pids does not exist,
+	 * all pids are good to trace.
+	 */
+	if (!filtered_pids)
+		return false;
+
+	search_pid = task->pid;
+
+	pid = bsearch(&search_pid, filtered_pids->pids,
+		      filtered_pids->nr_pids, sizeof(pid_t),
+		      cmp_pid);
+	if (!pid)
+		return true;
+
+	return false;
+}
+
+static void
+event_filter_pid_sched_switch_probe_pre(void *data,
+		    struct task_struct *prev, struct task_struct *next)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, prev) &&
+		       check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_switch_probe_post(void *data,
+		    struct task_struct *prev, struct task_struct *next)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	/* Nothing to do if we are already tracing */
+	if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
+		return;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, task));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	/* Nothing to do if we are not tracing */
+	if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
+		return;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	/* Set tracing if current is enabled */
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, current));
+}
+
 static void __ftrace_clear_event_pids(struct trace_array *tr)
 {
 	struct trace_pid_list *pid_list;
+	struct trace_event_file *file;
+	int cpu;
 
 	pid_list = rcu_dereference_protected(tr->filtered_pids,
 					     lockdep_is_held(&event_mutex));
 	if (!pid_list)
 		return;
 
+	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
+	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
+
+	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
+	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
+
+	list_for_each_entry(file, &tr->events, list) {
+		clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+	}
+
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
+
 	rcu_assign_pointer(tr->filtered_pids, NULL);
 
 	/* Wait till all users are no longer using pid filtering */
@@ -1429,13 +1550,14 @@ static int max_pids(struct trace_pid_list *pid_list)
 }
 
 static ssize_t
-ftrace_event_pid_write(struct file *file, const char __user *ubuf,
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
 {
-	struct seq_file *m = file->private_data;
+	struct seq_file *m = filp->private_data;
 	struct trace_array *tr = m->private;
 	struct trace_pid_list *filtered_pids = NULL;
 	struct trace_pid_list *pid_list = NULL;
+	struct trace_event_file *file;
 	struct trace_parser parser;
 	unsigned long val;
 	loff_t this_pos;
@@ -1564,15 +1686,35 @@ ftrace_event_pid_write(struct file *file, const char __user *ubuf,
 
 	rcu_assign_pointer(tr->filtered_pids, pid_list);
 
-	mutex_unlock(&event_mutex);
+	list_for_each_entry(file, &tr->events, list) {
+		set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+	}
 
 	if (filtered_pids) {
 		synchronize_sched();
 
 		free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
 		kfree(filtered_pids);
+	} else {
+		/*
+		 * Register a probe that is called before all other probes
+		 * to set ignore_pid if next or prev do not match.
+		 * Register a probe this is called after all other probes
+		 * to only keep ignore_pid set if next pid matches.
+		 */
+		register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
+						 tr, INT_MAX);
+		register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
+						 tr, 0);
+
+		register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
+						 tr, INT_MAX);
+		register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
+						 tr, 0);
 	}
 
+	mutex_unlock(&event_mutex);
+
 	ret = read;
 	*ppos += read;
 
-- 
cgit v1.2.3


From 49e4b84333f338d4f183f28f1f3c1131b9fb2b5a Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Sun, 25 Oct 2015 01:02:19 +0800
Subject: ACPI: Use correct IRQ when uninstalling ACPI interrupt handler

Currently when the system is trying to uninstall the ACPI interrupt
handler, it uses acpi_gbl_FADT.sci_interrupt as the IRQ number.
However, the IRQ number that the ACPI interrupt handled is installed
for comes from acpi_gsi_to_irq() and that is the number that should
be used for the handler removal.

Fix this problem by using the mapped IRQ returned from acpi_gsi_to_irq()
as appropriate.

Cc: All applicable <stable@vger.kernel.org>
Acked-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osl.c   | 9 ++++++---
 include/linux/acpi.h | 6 ++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 739a4a6b3b9b..2a25919f8eab 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -81,6 +81,7 @@ static struct workqueue_struct *kacpid_wq;
 static struct workqueue_struct *kacpi_notify_wq;
 static struct workqueue_struct *kacpi_hotplug_wq;
 static bool acpi_os_initialized;
+unsigned int acpi_sci_irq = INVALID_ACPI_IRQ;
 
 /*
  * This list of permanent mappings is for memory that may be accessed from
@@ -856,17 +857,19 @@ acpi_os_install_interrupt_handler(u32 gsi, acpi_osd_handler handler,
 		acpi_irq_handler = NULL;
 		return AE_NOT_ACQUIRED;
 	}
+	acpi_sci_irq = irq;
 
 	return AE_OK;
 }
 
-acpi_status acpi_os_remove_interrupt_handler(u32 irq, acpi_osd_handler handler)
+acpi_status acpi_os_remove_interrupt_handler(u32 gsi, acpi_osd_handler handler)
 {
-	if (irq != acpi_gbl_FADT.sci_interrupt)
+	if (gsi != acpi_gbl_FADT.sci_interrupt || !acpi_sci_irq_valid())
 		return AE_BAD_PARAMETER;
 
-	free_irq(irq, acpi_irq);
+	free_irq(acpi_sci_irq, acpi_irq);
 	acpi_irq_handler = NULL;
+	acpi_sci_irq = INVALID_ACPI_IRQ;
 
 	return AE_OK;
 }
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 43856d19cf4d..1ae6ba06f9ac 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -193,6 +193,12 @@ int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base);
 void acpi_irq_stats_init(void);
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;
+extern unsigned int acpi_sci_irq;
+#define INVALID_ACPI_IRQ	((unsigned)-1)
+static inline bool acpi_sci_irq_valid(void)
+{
+	return acpi_sci_irq != INVALID_ACPI_IRQ;
+}
 
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
-- 
cgit v1.2.3


From 8890624a4e8c2c7046d63bfd15d7331af9f55f10 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Thu, 18 Sep 2014 00:12:50 +0200
Subject: arcnet: com20020-pci: add led trigger support

The EAE PLX-PCI card has special leds on the the main io pci resource
bar. This patch adds support to trigger the conflict and data leds with
the packages.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/net/arcnet/arcdevice.h    | 19 ++++++++++
 drivers/net/arcnet/arcnet.c       | 72 ++++++++++++++++++++++++++++++++++++++
 drivers/net/arcnet/com20020-pci.c | 73 +++++++++++++++++++++++++++++++++++++++
 drivers/net/arcnet/com20020.h     | 10 ++++++
 include/linux/leds.h              |  7 ++++
 5 files changed, 181 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index d7fdea11e694..2edc0c0ab7c7 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -237,6 +237,8 @@ struct Outgoing {
 		numsegs;	/* number of segments */
 };
 
+#define ARCNET_LED_NAME_SZ (IFNAMSIZ + 6)
+
 struct arcnet_local {
 	uint8_t config,		/* current value of CONFIG register */
 		timeout,	/* Extended timeout for COM20020 */
@@ -260,6 +262,11 @@ struct arcnet_local {
 	/* On preemtive and SMB a lock is needed */
 	spinlock_t lock;
 
+	struct led_trigger *tx_led_trig;
+	char tx_led_trig_name[ARCNET_LED_NAME_SZ];
+	struct led_trigger *recon_led_trig;
+	char recon_led_trig_name[ARCNET_LED_NAME_SZ];
+
 	/*
 	 * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of
 	 * which can be used for either sending or receiving.  The new dynamic
@@ -309,6 +316,8 @@ struct arcnet_local {
 		int (*reset)(struct net_device *dev, int really_reset);
 		void (*open)(struct net_device *dev);
 		void (*close)(struct net_device *dev);
+		void (*datatrigger) (struct net_device * dev, int enable);
+		void (*recontrigger) (struct net_device * dev, int enable);
 
 		void (*copy_to_card)(struct net_device *dev, int bufnum,
 				     int offset, void *buf, int count);
@@ -319,6 +328,16 @@ struct arcnet_local {
 	void __iomem *mem_start;	/* pointer to ioremap'ed MMIO */
 };
 
+enum arcnet_led_event {
+	ARCNET_LED_EVENT_RECON,
+	ARCNET_LED_EVENT_OPEN,
+	ARCNET_LED_EVENT_STOP,
+	ARCNET_LED_EVENT_TX,
+};
+
+void arcnet_led_event(struct net_device *netdev, enum arcnet_led_event event);
+void devm_arcnet_led_init(struct net_device *netdev, int index, int subid);
+
 #if ARCNET_DEBUG_MAX & D_SKB
 void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc);
 #else
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 542e2b46b9eb..4242522ae86b 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -52,6 +52,8 @@
 #include <linux/init.h>
 #include <linux/jiffies.h>
 
+#include <linux/leds.h>
+
 #include "arcdevice.h"
 #include "com9026.h"
 
@@ -189,6 +191,71 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum,
 
 #endif
 
+/* Trigger a LED event in response to a ARCNET device event */
+void arcnet_led_event(struct net_device *dev, enum arcnet_led_event event)
+{
+	struct arcnet_local *lp = netdev_priv(dev);
+	unsigned long led_delay = 350;
+	unsigned long tx_delay = 50;
+
+	switch (event) {
+	case ARCNET_LED_EVENT_RECON:
+		led_trigger_blink_oneshot(lp->recon_led_trig,
+					  &led_delay, &led_delay, 0);
+		break;
+	case ARCNET_LED_EVENT_OPEN:
+		led_trigger_event(lp->tx_led_trig, LED_OFF);
+		led_trigger_event(lp->recon_led_trig, LED_OFF);
+		break;
+	case ARCNET_LED_EVENT_STOP:
+		led_trigger_event(lp->tx_led_trig, LED_OFF);
+		led_trigger_event(lp->recon_led_trig, LED_OFF);
+		break;
+	case ARCNET_LED_EVENT_TX:
+		led_trigger_blink_oneshot(lp->tx_led_trig,
+					  &tx_delay, &tx_delay, 0);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(arcnet_led_event);
+
+static void arcnet_led_release(struct device *gendev, void *res)
+{
+	struct arcnet_local *lp = netdev_priv(to_net_dev(gendev));
+
+	led_trigger_unregister_simple(lp->tx_led_trig);
+	led_trigger_unregister_simple(lp->recon_led_trig);
+}
+
+/* Register ARCNET LED triggers for a arcnet device
+ *
+ * This is normally called from a driver's probe function
+ */
+void devm_arcnet_led_init(struct net_device *netdev, int index, int subid)
+{
+	struct arcnet_local *lp = netdev_priv(netdev);
+	void *res;
+
+	res = devres_alloc(arcnet_led_release, 0, GFP_KERNEL);
+	if (!res) {
+		netdev_err(netdev, "cannot register LED triggers\n");
+		return;
+	}
+
+	snprintf(lp->tx_led_trig_name, sizeof(lp->tx_led_trig_name),
+		 "arc%d-%d-tx", index, subid);
+	snprintf(lp->recon_led_trig_name, sizeof(lp->recon_led_trig_name),
+		 "arc%d-%d-recon", index, subid);
+
+	led_trigger_register_simple(lp->tx_led_trig_name,
+				    &lp->tx_led_trig);
+	led_trigger_register_simple(lp->recon_led_trig_name,
+				    &lp->recon_led_trig);
+
+	devres_add(&netdev->dev, res);
+}
+EXPORT_SYMBOL_GPL(devm_arcnet_led_init);
+
 /* Unregister a protocol driver from the arc_proto_map.  Protocol drivers
  * are responsible for registering themselves, but the unregister routine
  * is pretty generic so we'll do it here.
@@ -425,6 +492,7 @@ int arcnet_open(struct net_device *dev)
 
 	netif_start_queue(dev);
 
+	arcnet_led_event(dev, ARCNET_LED_EVENT_OPEN);
 	return 0;
 
  out_module_put:
@@ -438,6 +506,7 @@ int arcnet_close(struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 
+	arcnet_led_event(dev, ARCNET_LED_EVENT_STOP);
 	netif_stop_queue(dev);
 
 	/* flush TX and disable RX */
@@ -585,6 +654,8 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 	arc_printk(D_DEBUG, dev, "%s: %d: %s, status: %x\n",
 		   __FILE__, __LINE__, __func__, lp->hw.status(dev));
 
+	arcnet_led_event(dev, ARCNET_LED_EVENT_TX);
+
 	spin_unlock_irqrestore(&lp->lock, flags);
 	return retval;		/* no need to try again */
 }
@@ -837,6 +908,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 			arc_printk(D_RECON, dev, "Network reconfiguration detected (status=%Xh)\n",
 				   status);
+			arcnet_led_event(dev, ARCNET_LED_EVENT_RECON);
 			/* MYRECON bit is at bit 7 of diagstatus */
 			if (diagstatus & 0x80)
 				arc_printk(D_RECON, dev, "Put out that recon myself\n");
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 637a6110cec6..239de38fbd6a 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -41,6 +41,7 @@
 #include <linux/pci.h>
 #include <linux/list.h>
 #include <linux/io.h>
+#include <linux/leds.h>
 
 #include "arcdevice.h"
 #include "com20020.h"
@@ -62,6 +63,36 @@ module_param(clockp, int, 0);
 module_param(clockm, int, 0);
 MODULE_LICENSE("GPL");
 
+static void led_tx_set(struct led_classdev *led_cdev,
+			     enum led_brightness value)
+{
+	struct com20020_dev *card;
+	struct com20020_priv *priv;
+	struct com20020_pci_card_info *ci;
+
+	card = container_of(led_cdev, struct com20020_dev, tx_led);
+
+	priv = card->pci_priv;
+	ci = priv->ci;
+
+	outb(!!value, priv->misc + ci->leds[card->index].green);
+}
+
+static void led_recon_set(struct led_classdev *led_cdev,
+			     enum led_brightness value)
+{
+	struct com20020_dev *card;
+	struct com20020_priv *priv;
+	struct com20020_pci_card_info *ci;
+
+	card = container_of(led_cdev, struct com20020_dev, recon_led);
+
+	priv = card->pci_priv;
+	ci = priv->ci;
+
+	outb(!!value, priv->misc + ci->leds[card->index].red);
+}
+
 static void com20020pci_remove(struct pci_dev *pdev);
 
 static int com20020pci_probe(struct pci_dev *pdev,
@@ -170,14 +201,41 @@ static int com20020pci_probe(struct pci_dev *pdev,
 
 		card->index = i;
 		card->pci_priv = priv;
+		card->tx_led.brightness_set = led_tx_set;
+		card->tx_led.default_trigger = devm_kasprintf(&pdev->dev,
+						GFP_KERNEL, "arc%d-%d-tx",
+						dev->dev_id, i);
+		card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+						"pci:green:tx:%d-%d",
+						dev->dev_id, i);
+
+		card->tx_led.dev = &dev->dev;
+		card->recon_led.brightness_set = led_recon_set;
+		card->recon_led.default_trigger = devm_kasprintf(&pdev->dev,
+						GFP_KERNEL, "arc%d-%d-recon",
+						dev->dev_id, i);
+		card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+						"pci:red:recon:%d-%d",
+						dev->dev_id, i);
+		card->recon_led.dev = &dev->dev;
 		card->dev = dev;
 
+		ret = devm_led_classdev_register(&pdev->dev, &card->tx_led);
+		if (ret)
+			goto out_port;
+
+		ret = devm_led_classdev_register(&pdev->dev, &card->recon_led);
+		if (ret)
+			goto out_port;
+
 		dev_set_drvdata(&dev->dev, card);
 
 		ret = com20020_found(dev, IRQF_SHARED);
 		if (ret)
 			goto out_port;
 
+		devm_arcnet_led_init(dev, dev->dev_id, i);
+
 		list_add(&card->list, &priv->list_dev);
 	}
 
@@ -261,6 +319,12 @@ static struct com20020_pci_card_info card_info_eae_arc1 = {
 		.offset = 0x10,
 		.size = 0x04,
 	},
+	.leds = {
+		{
+			.green = 0x0,
+			.red = 0x1,
+		},
+	},
 	.rotary = 0x0,
 	.flags = ARC_CAN_10MBIT,
 };
@@ -284,6 +348,15 @@ static struct com20020_pci_card_info card_info_eae_ma1 = {
 		.offset = 0x10,
 		.size = 0x04,
 	},
+	.leds = {
+		{
+			.green = 0x0,
+			.red = 0x1,
+		}, {
+			.green = 0x2,
+			.red = 0x3,
+		},
+	},
 	.rotary = 0x0,
 	.flags = ARC_CAN_10MBIT,
 };
diff --git a/drivers/net/arcnet/com20020.h b/drivers/net/arcnet/com20020.h
index f2ed2eff3ae3..0bcc5d0a6903 100644
--- a/drivers/net/arcnet/com20020.h
+++ b/drivers/net/arcnet/com20020.h
@@ -26,6 +26,7 @@
  */
 #ifndef __COM20020_H
 #define __COM20020_H
+#include <linux/leds.h>
 
 int com20020_check(struct net_device *dev);
 int com20020_found(struct net_device *dev, int shared);
@@ -36,6 +37,11 @@ extern const struct net_device_ops com20020_netdev_ops;
 
 #define PLX_PCI_MAX_CARDS 2
 
+struct ledoffsets {
+	int green;
+	int red;
+};
+
 struct com20020_pci_channel_map {
 	u32 bar;
 	u32 offset;
@@ -49,6 +55,7 @@ struct com20020_pci_card_info {
 	struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS];
 	struct com20020_pci_channel_map misc_map;
 
+	struct ledoffsets leds[PLX_PCI_MAX_CARDS];
 	int rotary;
 
 	unsigned int flags;
@@ -64,6 +71,9 @@ struct com20020_dev {
 	struct list_head list;
 	struct net_device *dev;
 
+	struct led_classdev tx_led;
+	struct led_classdev recon_led;
+
 	struct com20020_priv *pci_priv;
 	int index;
 };
diff --git a/include/linux/leds.h b/include/linux/leds.h
index b122eeafb5dc..fa359c79c825 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -283,6 +283,13 @@ static inline void led_trigger_register_simple(const char *name,
 static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {}
 static inline void led_trigger_event(struct led_trigger *trigger,
 				enum led_brightness event) {}
+static inline void led_trigger_blink(struct led_trigger *trigger,
+				      unsigned long *delay_on,
+				      unsigned long *delay_off) {}
+static inline void led_trigger_blink_oneshot(struct led_trigger *trigger,
+				      unsigned long *delay_on,
+				      unsigned long *delay_off,
+				      int invert) {}
 static inline void led_trigger_set_default(struct led_classdev *led_cdev) {}
 static inline void led_trigger_set(struct led_classdev *led_cdev,
 				struct led_trigger *trigger) {}
-- 
cgit v1.2.3


From 98a3be44ffa67b812de7aa7aed9f2331edcfb1a5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Oct 2015 12:16:41 +0300
Subject: mfd: core: redo ACPI matching of the children devices

There is at least one board on the market, i.e. Intel Galileo Gen2, that uses
_ADR to distinguish the devices under one actual device. Due to this we have to
improve the quirk in the MFD core to handle that board.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/acpi/enumeration.txt | 11 +++++---
 drivers/mfd/mfd-core.c             | 52 ++++++++++++++++++++++++++------------
 include/linux/mfd/core.h           | 10 ++++++--
 3 files changed, 52 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index b731b292e812..a91ec5af52df 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -347,13 +347,18 @@ For the first case, the MFD drivers do not need to do anything. The
 resulting child platform device will have its ACPI_COMPANION() set to point
 to the parent device.
 
-If the ACPI namespace has a device that we can match using an ACPI id,
-the id should be set like:
+If the ACPI namespace has a device that we can match using an ACPI id or ACPI
+adr, the cell should be set like:
+
+	static struct mfd_cell_acpi_match my_subdevice_cell_acpi_match = {
+		.pnpid = "XYZ0001",
+		.adr = 0,
+	};
 
 	static struct mfd_cell my_subdevice_cell = {
 		.name = "my_subdevice",
 		/* set the resources relative to the parent */
-		.acpi_pnpid = "XYZ0001",
+		.acpi_match = &my_subdevice_cell_acpi_match,
 	};
 
 The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index c17635d3e504..60b60dc63ddd 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -82,29 +82,49 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 static void mfd_acpi_add_device(const struct mfd_cell *cell,
 				struct platform_device *pdev)
 {
-	struct acpi_device *parent_adev;
+	const struct mfd_cell_acpi_match *match = cell->acpi_match;
+	struct acpi_device *parent, *child;
 	struct acpi_device *adev;
 
-	parent_adev = ACPI_COMPANION(pdev->dev.parent);
-	if (!parent_adev)
+	parent = ACPI_COMPANION(pdev->dev.parent);
+	if (!parent)
 		return;
 
 	/*
-	 * MFD child device gets its ACPI handle either from the ACPI
-	 * device directly under the parent that matches the acpi_pnpid or
-	 * it will use the parent handle if is no acpi_pnpid is given.
+	 * MFD child device gets its ACPI handle either from the ACPI device
+	 * directly under the parent that matches the either _HID or _CID, or
+	 * _ADR or it will use the parent handle if is no ID is given.
+	 *
+	 * Note that use of _ADR is a grey area in the ACPI specification,
+	 * though Intel Galileo Gen2 is using it to distinguish the children
+	 * devices.
 	 */
-	adev = parent_adev;
-	if (cell->acpi_pnpid) {
-		struct acpi_device_id ids[2] = {};
-		struct acpi_device *child_adev;
-
-		strlcpy(ids[0].id, cell->acpi_pnpid, sizeof(ids[0].id));
-		list_for_each_entry(child_adev, &parent_adev->children, node)
-			if (acpi_match_device_ids(child_adev, ids)) {
-				adev = child_adev;
-				break;
+	adev = parent;
+	if (match) {
+		if (match->pnpid) {
+			struct acpi_device_id ids[2] = {};
+
+			strlcpy(ids[0].id, match->pnpid, sizeof(ids[0].id));
+			list_for_each_entry(child, &parent->children, node) {
+				if (acpi_match_device_ids(child, ids)) {
+					adev = child;
+					break;
+				}
+			}
+		} else {
+			unsigned long long adr;
+			acpi_status status;
+
+			list_for_each_entry(child, &parent->children, node) {
+				status = acpi_evaluate_integer(child->handle,
+							       "_ADR", NULL,
+							       &adr);
+				if (ACPI_SUCCESS(status) && match->adr == adr) {
+					adev = child;
+					break;
+				}
 			}
+		}
 	}
 
 	ACPI_COMPANION_SET(&pdev->dev, adev);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index a76bc100bf97..27dac3ff18b9 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -18,6 +18,12 @@
 
 struct irq_domain;
 
+/* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */
+struct mfd_cell_acpi_match {
+	const char			*pnpid;
+	const unsigned long long	adr;
+};
+
 /*
  * This struct describes the MFD part ("cell").
  * After registration the copy of this structure will become the platform data
@@ -44,8 +50,8 @@ struct mfd_cell {
 	 */
 	const char		*of_compatible;
 
-	/* Matches ACPI PNP id, either _HID or _CID */
-	const char		*acpi_pnpid;
+	/* Matches ACPI */
+	const struct mfd_cell_acpi_match	*acpi_match;
 
 	/*
 	 * These resources can be specified relative to the parent device.
-- 
cgit v1.2.3


From 62a2e633474107a9ae93dfedf16341ec6414a907 Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Date: Tue, 25 Aug 2015 14:34:28 +0530
Subject: mfd: 88pm80x: Add 88pm860 chip type support

Add chip identification support for 88PM860 device
to the pm80x_chip_mapping table.

Signed-off-by: Vaibhav Hiremath <vaibhav.hiremath@linaro.org>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/88pm80x.c       | 2 ++
 include/linux/mfd/88pm80x.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm80x.c b/drivers/mfd/88pm80x.c
index 5e72f65ef94c..63445ea6b0bf 100644
--- a/drivers/mfd/88pm80x.c
+++ b/drivers/mfd/88pm80x.c
@@ -33,6 +33,8 @@ static struct pm80x_chip_mapping chip_mapping[] = {
 	{0x3,	CHIP_PM800},
 	/* 88PM805 chip id number */
 	{0x0,	CHIP_PM805},
+	/* 88PM860 chip id number */
+	{0x4,	CHIP_PM860},
 };
 
 /*
diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h
index 8fcad63fab55..d409ceb2231e 100644
--- a/include/linux/mfd/88pm80x.h
+++ b/include/linux/mfd/88pm80x.h
@@ -21,6 +21,7 @@ enum {
 	CHIP_INVALID = 0,
 	CHIP_PM800,
 	CHIP_PM805,
+	CHIP_PM860,
 	CHIP_MAX,
 };
 
-- 
cgit v1.2.3


From d785334a0d5deff30a487c74324b842d2179553d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Date: Mon, 14 Sep 2015 21:12:45 +0900
Subject: mfd: s2mps11: Add manual shutdown method for Odroid XU3

On Odroid XU3 board (with S2MPS11 PMIC) the PWRHOLD bit in CTRL1
register must be manually set to 0 before initiating power off sequence.

One of usual power down methods for Exynos based devices looks like:
1. PWRHOLD pin of PMIC is connected to PSHOLD of Exynos SoC.
2. Exynos holds up this pin during system operation.
3. ACOKB pin of PMIC is pulled up to VBATT and optionally to pin in
   other device.
4. When PWRHOLD/PSHOLD goes low, the PMIC will turn off the power if
   ACOKB goes high.

On Odroid XU3 family the difference is in (3) - the ACOKB is grounded.
This means that PMIC must manually set PWRHOLD field to low and then
wait for signal from Application Processor (the usual change in
PWRHOLD/PSHOLD pin will actually cut off the power).

The patch adds respective binding allowing Odroid XU3 device to be
powered off.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
Reported-by: Anand Moon <linux.amoon@gmail.com>
Tested-by: Anand Moon <linux.amoon@gmail.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c              | 30 ++++++++++++++++++++++++++++++
 include/linux/mfd/samsung/core.h    |  2 ++
 include/linux/mfd/samsung/s2mps11.h |  1 +
 3 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index d206a3e8fe87..2d1137a7a0ee 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -278,6 +278,8 @@ static struct sec_platform_data *sec_pmic_i2c_parse_dt_pdata(
 	 * not parsed here.
 	 */
 
+	pd->manual_poweroff = of_property_read_bool(dev->of_node,
+						"samsung,s2mps11-acokb-ground");
 	return pd;
 }
 #else
@@ -440,6 +442,33 @@ static int sec_pmic_remove(struct i2c_client *i2c)
 	return 0;
 }
 
+static void sec_pmic_shutdown(struct i2c_client *i2c)
+{
+	struct sec_pmic_dev *sec_pmic = i2c_get_clientdata(i2c);
+	unsigned int reg, mask;
+
+	if (!sec_pmic->pdata->manual_poweroff)
+		return;
+
+	switch (sec_pmic->device_type) {
+	case S2MPS11X:
+		reg = S2MPS11_REG_CTRL1;
+		mask = S2MPS11_CTRL1_PWRHOLD_MASK;
+		break;
+	default:
+		/*
+		 * Currently only one board with S2MPS11 needs this, so just
+		 * ignore the rest.
+		 */
+		dev_warn(sec_pmic->dev,
+			"Unsupported device %lu for manual power off\n",
+			sec_pmic->device_type);
+		return;
+	}
+
+	regmap_update_bits(sec_pmic->regmap_pmic, reg, mask, 0);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int sec_pmic_suspend(struct device *dev)
 {
@@ -491,6 +520,7 @@ static struct i2c_driver sec_pmic_driver = {
 	},
 	.probe = sec_pmic_probe,
 	.remove = sec_pmic_remove,
+	.shutdown = sec_pmic_shutdown,
 	.id_table = sec_pmic_id,
 };
 
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 75115384f3fc..aa78957e092f 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -132,6 +132,8 @@ struct sec_platform_data {
 	int				buck2_init;
 	int				buck3_init;
 	int				buck4_init;
+	/* Whether or not manually set PWRHOLD to low during shutdown. */
+	bool				manual_poweroff;
 };
 
 /**
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index 7981a9d77d3f..b288965e8101 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -179,6 +179,7 @@ enum s2mps11_regulators {
 #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1)
 #define S2MPS11_RAMP_DELAY	25000		/* uV/us */
 
+#define S2MPS11_CTRL1_PWRHOLD_MASK	BIT(4)
 
 #define S2MPS11_BUCK2_RAMP_SHIFT	6
 #define S2MPS11_BUCK34_RAMP_SHIFT	4
-- 
cgit v1.2.3


From 9111fa5c4032589e9b7ccc01e330810ba05726bf Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Wed, 9 Sep 2015 09:49:22 +0100
Subject: mfd: arizona: Add register bits to support the ANC block

Some Arizona devices have a hardware ANC block present. This patch adds
the registers necessary to configure this hardware block.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5110-tables.c           | 186 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/arizona/registers.h |  70 +++++++++++++
 2 files changed, 256 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 46cad3280114..2bb2d0467a92 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -1633,6 +1633,185 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000EF8, 0x0000 },    /* R3832  - ISRC 3 CTRL 3 */
 	{ 0x00000F00, 0x0000 },    /* R3840  - Clock Control */
 	{ 0x00000F01, 0x0000 },    /* R3841  - ANC_SRC */
+	{ 0x00000F08, 0x001c },    /* R3848  - ANC Coefficient */
+	{ 0x00000F09, 0x0000 },    /* R3849  - ANC Coefficient */
+	{ 0x00000F0A, 0x0000 },    /* R3850  - ANC Coefficient */
+	{ 0x00000F0B, 0x0000 },    /* R3851  - ANC Coefficient */
+	{ 0x00000F0C, 0x0000 },    /* R3852  - ANC Coefficient */
+	{ 0x00000F0D, 0x0000 },    /* R3853  - ANC Coefficient */
+	{ 0x00000F0E, 0x0000 },    /* R3854  - ANC Coefficient */
+	{ 0x00000F0F, 0x0000 },    /* R3855  - ANC Coefficient */
+	{ 0x00000F10, 0x0000 },    /* R3856  - ANC Coefficient */
+	{ 0x00000F11, 0x0000 },    /* R3857  - ANC Coefficient */
+	{ 0x00000F12, 0x0000 },    /* R3858  - ANC Coefficient */
+	{ 0x00000F15, 0x0000 },    /* R3861  - FCL Filter Control */
+	{ 0x00000F17, 0x0004 },    /* R3863  - FCL ADC Reformatter Control */
+	{ 0x00000F18, 0x0004 },    /* R3864  - ANC Coefficient */
+	{ 0x00000F19, 0x0002 },    /* R3865  - ANC Coefficient */
+	{ 0x00000F1A, 0x0000 },    /* R3866  - ANC Coefficient */
+	{ 0x00000F1B, 0x0010 },    /* R3867  - ANC Coefficient */
+	{ 0x00000F1C, 0x0000 },    /* R3868  - ANC Coefficient */
+	{ 0x00000F1D, 0x0000 },    /* R3869  - ANC Coefficient */
+	{ 0x00000F1E, 0x0000 },    /* R3870  - ANC Coefficient */
+	{ 0x00000F1F, 0x0000 },    /* R3871  - ANC Coefficient */
+	{ 0x00000F20, 0x0000 },    /* R3872  - ANC Coefficient */
+	{ 0x00000F21, 0x0000 },    /* R3873  - ANC Coefficient */
+	{ 0x00000F22, 0x0000 },    /* R3874  - ANC Coefficient */
+	{ 0x00000F23, 0x0000 },    /* R3875  - ANC Coefficient */
+	{ 0x00000F24, 0x0000 },    /* R3876  - ANC Coefficient */
+	{ 0x00000F25, 0x0000 },    /* R3877  - ANC Coefficient */
+	{ 0x00000F26, 0x0000 },    /* R3878  - ANC Coefficient */
+	{ 0x00000F27, 0x0000 },    /* R3879  - ANC Coefficient */
+	{ 0x00000F28, 0x0000 },    /* R3880  - ANC Coefficient */
+	{ 0x00000F29, 0x0000 },    /* R3881  - ANC Coefficient */
+	{ 0x00000F2A, 0x0000 },    /* R3882  - ANC Coefficient */
+	{ 0x00000F2B, 0x0000 },    /* R3883  - ANC Coefficient */
+	{ 0x00000F2C, 0x0000 },    /* R3884  - ANC Coefficient */
+	{ 0x00000F2D, 0x0000 },    /* R3885  - ANC Coefficient */
+	{ 0x00000F2E, 0x0000 },    /* R3886  - ANC Coefficient */
+	{ 0x00000F2F, 0x0000 },    /* R3887  - ANC Coefficient */
+	{ 0x00000F30, 0x0000 },    /* R3888  - ANC Coefficient */
+	{ 0x00000F31, 0x0000 },    /* R3889  - ANC Coefficient */
+	{ 0x00000F32, 0x0000 },    /* R3890  - ANC Coefficient */
+	{ 0x00000F33, 0x0000 },    /* R3891  - ANC Coefficient */
+	{ 0x00000F34, 0x0000 },    /* R3892  - ANC Coefficient */
+	{ 0x00000F35, 0x0000 },    /* R3893  - ANC Coefficient */
+	{ 0x00000F36, 0x0000 },    /* R3894  - ANC Coefficient */
+	{ 0x00000F37, 0x0000 },    /* R3895  - ANC Coefficient */
+	{ 0x00000F38, 0x0000 },    /* R3896  - ANC Coefficient */
+	{ 0x00000F39, 0x0000 },    /* R3897  - ANC Coefficient */
+	{ 0x00000F3A, 0x0000 },    /* R3898  - ANC Coefficient */
+	{ 0x00000F3B, 0x0000 },    /* R3899  - ANC Coefficient */
+	{ 0x00000F3C, 0x0000 },    /* R3900  - ANC Coefficient */
+	{ 0x00000F3D, 0x0000 },    /* R3901  - ANC Coefficient */
+	{ 0x00000F3E, 0x0000 },    /* R3902  - ANC Coefficient */
+	{ 0x00000F3F, 0x0000 },    /* R3903  - ANC Coefficient */
+	{ 0x00000F40, 0x0000 },    /* R3904  - ANC Coefficient */
+	{ 0x00000F41, 0x0000 },    /* R3905  - ANC Coefficient */
+	{ 0x00000F42, 0x0000 },    /* R3906  - ANC Coefficient */
+	{ 0x00000F43, 0x0000 },    /* R3907  - ANC Coefficient */
+	{ 0x00000F44, 0x0000 },    /* R3908  - ANC Coefficient */
+	{ 0x00000F45, 0x0000 },    /* R3909  - ANC Coefficient */
+	{ 0x00000F46, 0x0000 },    /* R3910  - ANC Coefficient */
+	{ 0x00000F47, 0x0000 },    /* R3911  - ANC Coefficient */
+	{ 0x00000F48, 0x0000 },    /* R3912  - ANC Coefficient */
+	{ 0x00000F49, 0x0000 },    /* R3913  - ANC Coefficient */
+	{ 0x00000F4A, 0x0000 },    /* R3914  - ANC Coefficient */
+	{ 0x00000F4B, 0x0000 },    /* R3915  - ANC Coefficient */
+	{ 0x00000F4C, 0x0000 },    /* R3916  - ANC Coefficient */
+	{ 0x00000F4D, 0x0000 },    /* R3917  - ANC Coefficient */
+	{ 0x00000F4E, 0x0000 },    /* R3918  - ANC Coefficient */
+	{ 0x00000F4F, 0x0000 },    /* R3919  - ANC Coefficient */
+	{ 0x00000F50, 0x0000 },    /* R3920  - ANC Coefficient */
+	{ 0x00000F51, 0x0000 },    /* R3921  - ANC Coefficient */
+	{ 0x00000F52, 0x0000 },    /* R3922  - ANC Coefficient */
+	{ 0x00000F53, 0x0000 },    /* R3923  - ANC Coefficient */
+	{ 0x00000F54, 0x0000 },    /* R3924  - ANC Coefficient */
+	{ 0x00000F55, 0x0000 },    /* R3925  - ANC Coefficient */
+	{ 0x00000F56, 0x0000 },    /* R3926  - ANC Coefficient */
+	{ 0x00000F57, 0x0000 },    /* R3927  - ANC Coefficient */
+	{ 0x00000F58, 0x0000 },    /* R3928  - ANC Coefficient */
+	{ 0x00000F59, 0x0000 },    /* R3929  - ANC Coefficient */
+	{ 0x00000F5A, 0x0000 },    /* R3930  - ANC Coefficient */
+	{ 0x00000F5B, 0x0000 },    /* R3931  - ANC Coefficient */
+	{ 0x00000F5C, 0x0000 },    /* R3932  - ANC Coefficient */
+	{ 0x00000F5D, 0x0000 },    /* R3933  - ANC Coefficient */
+	{ 0x00000F5E, 0x0000 },    /* R3934  - ANC Coefficient */
+	{ 0x00000F5F, 0x0000 },    /* R3935  - ANC Coefficient */
+	{ 0x00000F60, 0x0000 },    /* R3936  - ANC Coefficient */
+	{ 0x00000F61, 0x0000 },    /* R3937  - ANC Coefficient */
+	{ 0x00000F62, 0x0000 },    /* R3938  - ANC Coefficient */
+	{ 0x00000F63, 0x0000 },    /* R3939  - ANC Coefficient */
+	{ 0x00000F64, 0x0000 },    /* R3940  - ANC Coefficient */
+	{ 0x00000F65, 0x0000 },    /* R3941  - ANC Coefficient */
+	{ 0x00000F66, 0x0000 },    /* R3942  - ANC Coefficient */
+	{ 0x00000F67, 0x0000 },    /* R3943  - ANC Coefficient */
+	{ 0x00000F68, 0x0000 },    /* R3944  - ANC Coefficient */
+	{ 0x00000F69, 0x0000 },    /* R3945  - ANC Coefficient */
+	{ 0x00000F70, 0x0000 },    /* R3952  - FCR Filter Control */
+	{ 0x00000F72, 0x0004 },    /* R3954  - FCR ADC Reformatter Control */
+	{ 0x00000F73, 0x0004 },    /* R3955  - ANC Coefficient */
+	{ 0x00000F74, 0x0002 },    /* R3956  - ANC Coefficient */
+	{ 0x00000F75, 0x0000 },    /* R3957  - ANC Coefficient */
+	{ 0x00000F76, 0x0010 },    /* R3958  - ANC Coefficient */
+	{ 0x00000F77, 0x0000 },    /* R3959  - ANC Coefficient */
+	{ 0x00000F78, 0x0000 },    /* R3960  - ANC Coefficient */
+	{ 0x00000F79, 0x0000 },    /* R3961  - ANC Coefficient */
+	{ 0x00000F7A, 0x0000 },    /* R3962  - ANC Coefficient */
+	{ 0x00000F7B, 0x0000 },    /* R3963  - ANC Coefficient */
+	{ 0x00000F7C, 0x0000 },    /* R3964  - ANC Coefficient */
+	{ 0x00000F7D, 0x0000 },    /* R3965  - ANC Coefficient */
+	{ 0x00000F7E, 0x0000 },    /* R3966  - ANC Coefficient */
+	{ 0x00000F7F, 0x0000 },    /* R3967  - ANC Coefficient */
+	{ 0x00000F80, 0x0000 },    /* R3968  - ANC Coefficient */
+	{ 0x00000F81, 0x0000 },    /* R3969  - ANC Coefficient */
+	{ 0x00000F82, 0x0000 },    /* R3970  - ANC Coefficient */
+	{ 0x00000F83, 0x0000 },    /* R3971  - ANC Coefficient */
+	{ 0x00000F84, 0x0000 },    /* R3972  - ANC Coefficient */
+	{ 0x00000F85, 0x0000 },    /* R3973  - ANC Coefficient */
+	{ 0x00000F86, 0x0000 },    /* R3974  - ANC Coefficient */
+	{ 0x00000F87, 0x0000 },    /* R3975  - ANC Coefficient */
+	{ 0x00000F88, 0x0000 },    /* R3976  - ANC Coefficient */
+	{ 0x00000F89, 0x0000 },    /* R3977  - ANC Coefficient */
+	{ 0x00000F8A, 0x0000 },    /* R3978  - ANC Coefficient */
+	{ 0x00000F8B, 0x0000 },    /* R3979  - ANC Coefficient */
+	{ 0x00000F8C, 0x0000 },    /* R3980  - ANC Coefficient */
+	{ 0x00000F8D, 0x0000 },    /* R3981  - ANC Coefficient */
+	{ 0x00000F8E, 0x0000 },    /* R3982  - ANC Coefficient */
+	{ 0x00000F8F, 0x0000 },    /* R3983  - ANC Coefficient */
+	{ 0x00000F90, 0x0000 },    /* R3984  - ANC Coefficient */
+	{ 0x00000F91, 0x0000 },    /* R3985  - ANC Coefficient */
+	{ 0x00000F92, 0x0000 },    /* R3986  - ANC Coefficient */
+	{ 0x00000F93, 0x0000 },    /* R3987  - ANC Coefficient */
+	{ 0x00000F94, 0x0000 },    /* R3988  - ANC Coefficient */
+	{ 0x00000F95, 0x0000 },    /* R3989  - ANC Coefficient */
+	{ 0x00000F96, 0x0000 },    /* R3990  - ANC Coefficient */
+	{ 0x00000F97, 0x0000 },    /* R3991  - ANC Coefficient */
+	{ 0x00000F98, 0x0000 },    /* R3992  - ANC Coefficient */
+	{ 0x00000F99, 0x0000 },    /* R3993  - ANC Coefficient */
+	{ 0x00000F9A, 0x0000 },    /* R3994  - ANC Coefficient */
+	{ 0x00000F9B, 0x0000 },    /* R3995  - ANC Coefficient */
+	{ 0x00000F9C, 0x0000 },    /* R3996  - ANC Coefficient */
+	{ 0x00000F9D, 0x0000 },    /* R3997  - ANC Coefficient */
+	{ 0x00000F9E, 0x0000 },    /* R3998  - ANC Coefficient */
+	{ 0x00000F9F, 0x0000 },    /* R3999  - ANC Coefficient */
+	{ 0x00000FA0, 0x0000 },    /* R4000  - ANC Coefficient */
+	{ 0x00000FA1, 0x0000 },    /* R4001  - ANC Coefficient */
+	{ 0x00000FA2, 0x0000 },    /* R4002  - ANC Coefficient */
+	{ 0x00000FA3, 0x0000 },    /* R4003  - ANC Coefficient */
+	{ 0x00000FA4, 0x0000 },    /* R4004  - ANC Coefficient */
+	{ 0x00000FA5, 0x0000 },    /* R4005  - ANC Coefficient */
+	{ 0x00000FA6, 0x0000 },    /* R4006  - ANC Coefficient */
+	{ 0x00000FA7, 0x0000 },    /* R4007  - ANC Coefficient */
+	{ 0x00000FA8, 0x0000 },    /* R4008  - ANC Coefficient */
+	{ 0x00000FA9, 0x0000 },    /* R4009  - ANC Coefficient */
+	{ 0x00000FAA, 0x0000 },    /* R4010  - ANC Coefficient */
+	{ 0x00000FAB, 0x0000 },    /* R4011  - ANC Coefficient */
+	{ 0x00000FAC, 0x0000 },    /* R4012  - ANC Coefficient */
+	{ 0x00000FAD, 0x0000 },    /* R4013  - ANC Coefficient */
+	{ 0x00000FAE, 0x0000 },    /* R4014  - ANC Coefficient */
+	{ 0x00000FAF, 0x0000 },    /* R4015  - ANC Coefficient */
+	{ 0x00000FB0, 0x0000 },    /* R4016  - ANC Coefficient */
+	{ 0x00000FB1, 0x0000 },    /* R4017  - ANC Coefficient */
+	{ 0x00000FB2, 0x0000 },    /* R4018  - ANC Coefficient */
+	{ 0x00000FB3, 0x0000 },    /* R4019  - ANC Coefficient */
+	{ 0x00000FB4, 0x0000 },    /* R4020  - ANC Coefficient */
+	{ 0x00000FB5, 0x0000 },    /* R4021  - ANC Coefficient */
+	{ 0x00000FB6, 0x0000 },    /* R4022  - ANC Coefficient */
+	{ 0x00000FB7, 0x0000 },    /* R4023  - ANC Coefficient */
+	{ 0x00000FB8, 0x0000 },    /* R4024  - ANC Coefficient */
+	{ 0x00000FB9, 0x0000 },    /* R4025  - ANC Coefficient */
+	{ 0x00000FBA, 0x0000 },    /* R4026  - ANC Coefficient */
+	{ 0x00000FBB, 0x0000 },    /* R4027  - ANC Coefficient */
+	{ 0x00000FBC, 0x0000 },    /* R4028  - ANC Coefficient */
+	{ 0x00000FBD, 0x0000 },    /* R4029  - ANC Coefficient */
+	{ 0x00000FBE, 0x0000 },    /* R4030  - ANC Coefficient */
+	{ 0x00000FBF, 0x0000 },    /* R4031  - ANC Coefficient */
+	{ 0x00000FC0, 0x0000 },    /* R4032  - ANC Coefficient */
+	{ 0x00000FC1, 0x0000 },    /* R4033  - ANC Coefficient */
+	{ 0x00000FC2, 0x0000 },    /* R4034  - ANC Coefficient */
+	{ 0x00000FC3, 0x0000 },    /* R4035  - ANC Coefficient */
+	{ 0x00000FC4, 0x0000 },    /* R4036  - ANC Coefficient */
 	{ 0x00001100, 0x0010 },    /* R4352  - DSP1 Control 1 */
 	{ 0x00001200, 0x0010 },    /* R4608  - DSP2 Control 1 */
 	{ 0x00001300, 0x0010 },    /* R4864  - DSP3 Control 1 */
@@ -2710,6 +2889,13 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_CLOCK_CONTROL:
 	case ARIZONA_ANC_SRC:
 	case ARIZONA_DSP_STATUS:
+	case ARIZONA_ANC_COEFF_START ... ARIZONA_ANC_COEFF_END:
+	case ARIZONA_FCL_FILTER_CONTROL:
+	case ARIZONA_FCL_ADC_REFORMATTER_CONTROL:
+	case ARIZONA_FCL_COEFF_START ... ARIZONA_FCL_COEFF_END:
+	case ARIZONA_FCR_FILTER_CONTROL:
+	case ARIZONA_FCR_ADC_REFORMATTER_CONTROL:
+	case ARIZONA_FCR_COEFF_START ... ARIZONA_FCR_COEFF_END:
 	case ARIZONA_DSP1_CONTROL_1:
 	case ARIZONA_DSP1_CLOCKING_1:
 	case ARIZONA_DSP1_STATUS_1:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index c7c11c900196..cd7e78eae006 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -1065,6 +1065,16 @@
 #define ARIZONA_CLOCK_CONTROL                    0xF00
 #define ARIZONA_ANC_SRC                          0xF01
 #define ARIZONA_DSP_STATUS                       0xF02
+#define ARIZONA_ANC_COEFF_START                  0xF08
+#define ARIZONA_ANC_COEFF_END                    0xF12
+#define ARIZONA_FCL_FILTER_CONTROL               0xF15
+#define ARIZONA_FCL_ADC_REFORMATTER_CONTROL      0xF17
+#define ARIZONA_FCL_COEFF_START                  0xF18
+#define ARIZONA_FCL_COEFF_END                    0xF69
+#define ARIZONA_FCR_FILTER_CONTROL               0xF70
+#define ARIZONA_FCR_ADC_REFORMATTER_CONTROL      0xF72
+#define ARIZONA_FCR_COEFF_START                  0xF73
+#define ARIZONA_FCR_COEFF_END                    0xFC4
 #define ARIZONA_DSP1_CONTROL_1                   0x1100
 #define ARIZONA_DSP1_CLOCKING_1                  0x1101
 #define ARIZONA_DSP1_STATUS_1                    0x1104
@@ -8050,6 +8060,66 @@
 #define ARIZONA_ISRC3_NOTCH_ENA_SHIFT                 0  /* ISRC3_NOTCH_ENA */
 #define ARIZONA_ISRC3_NOTCH_ENA_WIDTH                 1  /* ISRC3_NOTCH_ENA */
 
+/*
+ * R3840 (0xF00) - Clock Control
+ */
+#define ARIZONA_EXT_NG_SEL_CLR                   0x0080  /* EXT_NG_SEL_CLR */
+#define ARIZONA_EXT_NG_SEL_CLR_MASK              0x0080  /* EXT_NG_SEL_CLR */
+#define ARIZONA_EXT_NG_SEL_CLR_SHIFT                  7  /* EXT_NG_SEL_CLR */
+#define ARIZONA_EXT_NG_SEL_CLR_WIDTH                  1  /* EXT_NG_SEL_CLR */
+#define ARIZONA_EXT_NG_SEL_SET                   0x0040  /* EXT_NG_SEL_SET */
+#define ARIZONA_EXT_NG_SEL_SET_MASK              0x0040  /* EXT_NG_SEL_SET */
+#define ARIZONA_EXT_NG_SEL_SET_SHIFT                  6  /* EXT_NG_SEL_SET */
+#define ARIZONA_EXT_NG_SEL_SET_WIDTH                  1  /* EXT_NG_SEL_SET */
+#define ARIZONA_CLK_R_ENA_CLR                    0x0020  /* CLK_R_ENA_CLR */
+#define ARIZONA_CLK_R_ENA_CLR_MASK               0x0020  /* CLK_R_ENA_CLR */
+#define ARIZONA_CLK_R_ENA_CLR_SHIFT                   5  /* CLK_R_ENA_CLR */
+#define ARIZONA_CLK_R_ENA_CLR_WIDTH                   1  /* CLK_R_ENA_CLR */
+#define ARIZONA_CLK_R_ENA_SET                    0x0010  /* CLK_R_ENA_SET */
+#define ARIZONA_CLK_R_ENA_SET_MASK               0x0010  /* CLK_R_ENA_SET */
+#define ARIZONA_CLK_R_ENA_SET_SHIFT                   4  /* CLK_R_ENA_SET */
+#define ARIZONA_CLK_R_ENA_SET_WIDTH                   1  /* CLK_R_ENA_SET */
+#define ARIZONA_CLK_NG_ENA_CLR                   0x0008  /* CLK_NG_ENA_CLR */
+#define ARIZONA_CLK_NG_ENA_CLR_MASK              0x0008  /* CLK_NG_ENA_CLR */
+#define ARIZONA_CLK_NG_ENA_CLR_SHIFT                  3  /* CLK_NG_ENA_CLR */
+#define ARIZONA_CLK_NG_ENA_CLR_WIDTH                  1  /* CLK_NG_ENA_CLR */
+#define ARIZONA_CLK_NG_ENA_SET                   0x0004  /* CLK_NG_ENA_SET */
+#define ARIZONA_CLK_NG_ENA_SET_MASK              0x0004  /* CLK_NG_ENA_SET */
+#define ARIZONA_CLK_NG_ENA_SET_SHIFT                  2  /* CLK_NG_ENA_SET */
+#define ARIZONA_CLK_NG_ENA_SET_WIDTH                  1  /* CLK_NG_ENA_SET */
+#define ARIZONA_CLK_L_ENA_CLR                    0x0002  /* CLK_L_ENA_CLR */
+#define ARIZONA_CLK_L_ENA_CLR_MASK               0x0002  /* CLK_L_ENA_CLR */
+#define ARIZONA_CLK_L_ENA_CLR_SHIFT                   1  /* CLK_L_ENA_CLR */
+#define ARIZONA_CLK_L_ENA_CLR_WIDTH                   1  /* CLK_L_ENA_CLR */
+#define ARIZONA_CLK_L_ENA_SET                    0x0001  /* CLK_L_ENA_SET */
+#define ARIZONA_CLK_L_ENA_SET_MASK               0x0001  /* CLK_L_ENA_SET */
+#define ARIZONA_CLK_L_ENA_SET_SHIFT                   0  /* CLK_L_ENA_SET */
+#define ARIZONA_CLK_L_ENA_SET_WIDTH                   1  /* CLK_L_ENA_SET */
+
+/*
+ * R3841 (0xF01) - ANC SRC
+ */
+#define ARIZONA_IN_RXANCR_SEL_MASK               0x0070  /* IN_RXANCR_SEL - [4:6] */
+#define ARIZONA_IN_RXANCR_SEL_SHIFT                   4  /* IN_RXANCR_SEL - [4:6] */
+#define ARIZONA_IN_RXANCR_SEL_WIDTH                   3  /* IN_RXANCR_SEL - [4:6] */
+#define ARIZONA_IN_RXANCL_SEL_MASK               0x0007  /* IN_RXANCL_SEL - [0:2] */
+#define ARIZONA_IN_RXANCL_SEL_SHIFT                   0  /* IN_RXANCL_SEL - [0:2] */
+#define ARIZONA_IN_RXANCL_SEL_WIDTH                   3  /* IN_RXANCL_SEL - [0:2] */
+
+/*
+ * R3863 (0xF17) - FCL ADC Reformatter Control
+ */
+#define ARIZONA_FCL_MIC_MODE_SEL                 0x000C  /* FCL_MIC_MODE_SEL - [2:3] */
+#define ARIZONA_FCL_MIC_MODE_SEL_SHIFT                2  /* FCL_MIC_MODE_SEL - [2:3] */
+#define ARIZONA_FCL_MIC_MODE_SEL_WIDTH                2  /* FCL_MIC_MODE_SEL - [2:3] */
+
+/*
+ * R3954 (0xF72) - FCR ADC Reformatter Control
+ */
+#define ARIZONA_FCR_MIC_MODE_SEL                 0x000C  /* FCR_MIC_MODE_SEL - [2:3] */
+#define ARIZONA_FCR_MIC_MODE_SEL_SHIFT                2  /* FCR_MIC_MODE_SEL - [2:3] */
+#define ARIZONA_FCR_MIC_MODE_SEL_WIDTH                2  /* FCR_MIC_MODE_SEL - [2:3] */
+
 /*
  * R4352 (0x1100) - DSP1 Control 1
  */
-- 
cgit v1.2.3


From f90d2e4035d456cb20c0b784725d556eb4de4d8a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 15 Sep 2015 15:19:45 +0200
Subject: mmc: core: Convert __mmc_switch() into an internal core function

As there are no users of the __mmc_switch() API, except for the mmc core
itself, let's convert it from an exported function into an internal.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.c | 1 -
 drivers/mmc/core/mmc_ops.h | 3 +++
 include/linux/mmc/core.h   | 2 --
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 0e9ae1c276c8..57f3a21783e9 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -579,7 +579,6 @@ out:
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(__mmc_switch);
 
 int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		unsigned int timeout_ms)
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index f498f9ae21f0..f1b8e81aaa28 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -28,6 +28,9 @@ int mmc_bus_test(struct mmc_card *card, u8 bus_width);
 int mmc_send_hpi_cmd(struct mmc_card *card, u32 *status);
 int mmc_can_ext_csd(struct mmc_card *card);
 int mmc_switch_status_error(struct mmc_host *host, u32 status);
+int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
+		unsigned int timeout_ms, bool use_busy_signal, bool send_status,
+		bool ignore_crc);
 
 #endif
 
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 258daf914c6d..79a31d3c3788 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -152,8 +152,6 @@ extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
 extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
-extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool,
-			bool, bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 extern int mmc_send_tuning(struct mmc_host *host);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
-- 
cgit v1.2.3


From 9eadcc0581a8ccaf4c2378aa1c193fb164304f1d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 2 Oct 2015 10:56:11 +0200
Subject: mmc: core: Remove MMC_CLKGATE

MMC_CLKGATE was once invented to save power by gating the bus clock at
request inactivity. At that time it served its purpose. The modern way to
deal with power saving for these scenarios, is by using runtime PM.

Nowadays, several host drivers have deployed runtime PM, but for those
that haven't and which still cares power saving at request inactivity,
it's certainly time to deploy runtime PM as it has been around for several
years now.

To simplify code to mmc core and thus decrease maintenance efforts, this
patch removes all code related to MMC_CLKGATE.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/mmc/mmc-dev-attrs.txt |  10 --
 drivers/mmc/core/Kconfig            |  10 --
 drivers/mmc/core/core.c             | 139 +++-----------------
 drivers/mmc/core/core.h             |   3 -
 drivers/mmc/core/debugfs.c          |   5 -
 drivers/mmc/core/host.c             | 245 ------------------------------------
 drivers/mmc/core/mmc.c              |   2 -
 drivers/mmc/core/quirks.c           |  18 ---
 drivers/mmc/core/sd.c               |   2 -
 drivers/mmc/core/sdio.c             |   7 +-
 drivers/mmc/core/sdio_irq.c         |  14 +--
 include/linux/mmc/card.h            |   1 -
 include/linux/mmc/host.h            |  32 -----
 13 files changed, 19 insertions(+), 469 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index 189bab09255a..caa555706f89 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -72,13 +72,3 @@ Note on raw_rpmb_size_mult:
 	"raw_rpmb_size_mult" is a mutliple of 128kB block.
 	RPMB size in byte is calculated by using the following equation:
 	RPMB partition size = 128kB x raw_rpmb_size_mult
-
-SD/MMC/SDIO Clock Gating Attribute
-==================================
-
-Read and write access is provided to following attribute.
-This attribute appears only if CONFIG_MMC_CLKGATE is enabled.
-
-	clkgate_delay	Tune the clock gating delay with desired value in milliseconds.
-
-echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index 9ebee72d9c3f..4c33d7690f2f 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -1,13 +1,3 @@
 #
 # MMC core configuration
 #
-
-config MMC_CLKGATE
-	bool "MMC host clock gating"
-	help
-	  This will attempt to aggressively gate the clock to the MMC card.
-	  This is done to save power due to gating off the logic and bus
-	  noise when the MMC card is not in use. Your host driver has to
-	  support handling this in order for it to be of any use.
-
-	  If unsure, say N.
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 21cda23b247c..1a36b021b44e 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -187,8 +187,6 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 
 		if (mrq->done)
 			mrq->done(mrq);
-
-		mmc_host_clk_release(host);
 	}
 }
 
@@ -292,7 +290,6 @@ static int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 			mrq->stop->mrq = mrq;
 		}
 	}
-	mmc_host_clk_hold(host);
 	led_trigger_event(host->led, LED_FULL);
 	__mmc_start_request(host, mrq);
 
@@ -542,11 +539,8 @@ static void mmc_wait_for_req_done(struct mmc_host *host,
 static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 		 bool is_first_req)
 {
-	if (host->ops->pre_req) {
-		mmc_host_clk_hold(host);
+	if (host->ops->pre_req)
 		host->ops->pre_req(host, mrq, is_first_req);
-		mmc_host_clk_release(host);
-	}
 }
 
 /**
@@ -561,11 +555,8 @@ static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
 			 int err)
 {
-	if (host->ops->post_req) {
-		mmc_host_clk_hold(host);
+	if (host->ops->post_req)
 		host->ops->post_req(host, mrq, err);
-		mmc_host_clk_release(host);
-	}
 }
 
 /**
@@ -850,9 +841,9 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
 		unsigned int timeout_us, limit_us;
 
 		timeout_us = data->timeout_ns / 1000;
-		if (mmc_host_clk_rate(card->host))
+		if (card->host->ios.clock)
 			timeout_us += data->timeout_clks * 1000 /
-				(mmc_host_clk_rate(card->host) / 1000);
+				(card->host->ios.clock / 1000);
 
 		if (data->flags & MMC_DATA_WRITE)
 			/*
@@ -1050,8 +1041,6 @@ static inline void mmc_set_ios(struct mmc_host *host)
 		 ios->power_mode, ios->chip_select, ios->vdd,
 		 ios->bus_width, ios->timing);
 
-	if (ios->clock > 0)
-		mmc_set_ungated(host);
 	host->ops->set_ios(host, ios);
 }
 
@@ -1060,17 +1049,15 @@ static inline void mmc_set_ios(struct mmc_host *host)
  */
 void mmc_set_chip_select(struct mmc_host *host, int mode)
 {
-	mmc_host_clk_hold(host);
 	host->ios.chip_select = mode;
 	mmc_set_ios(host);
-	mmc_host_clk_release(host);
 }
 
 /*
  * Sets the host clock to the highest possible frequency that
  * is below "hz".
  */
-static void __mmc_set_clock(struct mmc_host *host, unsigned int hz)
+void mmc_set_clock(struct mmc_host *host, unsigned int hz)
 {
 	WARN_ON(hz && hz < host->f_min);
 
@@ -1081,68 +1068,6 @@ static void __mmc_set_clock(struct mmc_host *host, unsigned int hz)
 	mmc_set_ios(host);
 }
 
-void mmc_set_clock(struct mmc_host *host, unsigned int hz)
-{
-	mmc_host_clk_hold(host);
-	__mmc_set_clock(host, hz);
-	mmc_host_clk_release(host);
-}
-
-#ifdef CONFIG_MMC_CLKGATE
-/*
- * This gates the clock by setting it to 0 Hz.
- */
-void mmc_gate_clock(struct mmc_host *host)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->clk_lock, flags);
-	host->clk_old = host->ios.clock;
-	host->ios.clock = 0;
-	host->clk_gated = true;
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mmc_set_ios(host);
-}
-
-/*
- * This restores the clock from gating by using the cached
- * clock value.
- */
-void mmc_ungate_clock(struct mmc_host *host)
-{
-	/*
-	 * We should previously have gated the clock, so the clock shall
-	 * be 0 here! The clock may however be 0 during initialization,
-	 * when some request operations are performed before setting
-	 * the frequency. When ungate is requested in that situation
-	 * we just ignore the call.
-	 */
-	if (host->clk_old) {
-		BUG_ON(host->ios.clock);
-		/* This call will also set host->clk_gated to false */
-		__mmc_set_clock(host, host->clk_old);
-	}
-}
-
-void mmc_set_ungated(struct mmc_host *host)
-{
-	unsigned long flags;
-
-	/*
-	 * We've been given a new frequency while the clock is gated,
-	 * so make sure we regard this as ungating it.
-	 */
-	spin_lock_irqsave(&host->clk_lock, flags);
-	host->clk_gated = false;
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-}
-
-#else
-void mmc_set_ungated(struct mmc_host *host)
-{
-}
-#endif
-
 int mmc_execute_tuning(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
@@ -1157,9 +1082,7 @@ int mmc_execute_tuning(struct mmc_card *card)
 	else
 		opcode = MMC_SEND_TUNING_BLOCK;
 
-	mmc_host_clk_hold(host);
 	err = host->ops->execute_tuning(host, opcode);
-	mmc_host_clk_release(host);
 
 	if (err)
 		pr_err("%s: tuning execution failed\n", mmc_hostname(host));
@@ -1174,10 +1097,8 @@ int mmc_execute_tuning(struct mmc_card *card)
  */
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
 {
-	mmc_host_clk_hold(host);
 	host->ios.bus_mode = mode;
 	mmc_set_ios(host);
-	mmc_host_clk_release(host);
 }
 
 /*
@@ -1185,10 +1106,8 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
  */
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 {
-	mmc_host_clk_hold(host);
 	host->ios.bus_width = width;
 	mmc_set_ios(host);
-	mmc_host_clk_release(host);
 }
 
 /*
@@ -1532,11 +1451,8 @@ int __mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage)
 	int old_signal_voltage = host->ios.signal_voltage;
 
 	host->ios.signal_voltage = signal_voltage;
-	if (host->ops->start_signal_voltage_switch) {
-		mmc_host_clk_hold(host);
+	if (host->ops->start_signal_voltage_switch)
 		err = host->ops->start_signal_voltage_switch(host, &host->ios);
-		mmc_host_clk_release(host);
-	}
 
 	if (err)
 		host->ios.signal_voltage = old_signal_voltage;
@@ -1570,20 +1486,17 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, u32 ocr)
 		pr_warn("%s: cannot verify signal voltage switch\n",
 			mmc_hostname(host));
 
-	mmc_host_clk_hold(host);
-
 	cmd.opcode = SD_SWITCH_VOLTAGE;
 	cmd.arg = 0;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
 	err = mmc_wait_for_cmd(host, &cmd, 0);
 	if (err)
-		goto err_command;
+		return err;
+
+	if (!mmc_host_is_spi(host) && (cmd.resp[0] & R1_ERROR))
+		return -EIO;
 
-	if (!mmc_host_is_spi(host) && (cmd.resp[0] & R1_ERROR)) {
-		err = -EIO;
-		goto err_command;
-	}
 	/*
 	 * The card should drive cmd and dat[0:3] low immediately
 	 * after the response of cmd11, but wait 1 ms to be sure
@@ -1632,9 +1545,6 @@ power_cycle:
 		mmc_power_cycle(host, ocr);
 	}
 
-err_command:
-	mmc_host_clk_release(host);
-
 	return err;
 }
 
@@ -1643,10 +1553,8 @@ err_command:
  */
 void mmc_set_timing(struct mmc_host *host, unsigned int timing)
 {
-	mmc_host_clk_hold(host);
 	host->ios.timing = timing;
 	mmc_set_ios(host);
-	mmc_host_clk_release(host);
 }
 
 /*
@@ -1654,10 +1562,8 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
  */
 void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
 {
-	mmc_host_clk_hold(host);
 	host->ios.drv_type = drv_type;
 	mmc_set_ios(host);
-	mmc_host_clk_release(host);
 }
 
 int mmc_select_drive_strength(struct mmc_card *card, unsigned int max_dtr,
@@ -1665,7 +1571,6 @@ int mmc_select_drive_strength(struct mmc_card *card, unsigned int max_dtr,
 {
 	struct mmc_host *host = card->host;
 	int host_drv_type = SD_DRIVER_TYPE_B;
-	int drive_strength;
 
 	*drv_type = 0;
 
@@ -1688,14 +1593,10 @@ int mmc_select_drive_strength(struct mmc_card *card, unsigned int max_dtr,
 	 * information and let the hardware specific code
 	 * return what is possible given the options
 	 */
-	mmc_host_clk_hold(host);
-	drive_strength = host->ops->select_drive_strength(card, max_dtr,
-							  host_drv_type,
-							  card_drv_type,
-							  drv_type);
-	mmc_host_clk_release(host);
-
-	return drive_strength;
+	return host->ops->select_drive_strength(card, max_dtr,
+						host_drv_type,
+						card_drv_type,
+						drv_type);
 }
 
 /*
@@ -1714,8 +1615,6 @@ void mmc_power_up(struct mmc_host *host, u32 ocr)
 	if (host->ios.power_mode == MMC_POWER_ON)
 		return;
 
-	mmc_host_clk_hold(host);
-
 	mmc_pwrseq_pre_power_on(host);
 
 	host->ios.vdd = fls(ocr) - 1;
@@ -1749,8 +1648,6 @@ void mmc_power_up(struct mmc_host *host, u32 ocr)
 	 * time required to reach a stable voltage.
 	 */
 	mmc_delay(10);
-
-	mmc_host_clk_release(host);
 }
 
 void mmc_power_off(struct mmc_host *host)
@@ -1758,8 +1655,6 @@ void mmc_power_off(struct mmc_host *host)
 	if (host->ios.power_mode == MMC_POWER_OFF)
 		return;
 
-	mmc_host_clk_hold(host);
-
 	mmc_pwrseq_power_off(host);
 
 	host->ios.clock = 0;
@@ -1775,8 +1670,6 @@ void mmc_power_off(struct mmc_host *host)
 	 * can be successfully turned on again.
 	 */
 	mmc_delay(1);
-
-	mmc_host_clk_release(host);
 }
 
 void mmc_power_cycle(struct mmc_host *host, u32 ocr)
@@ -1992,7 +1885,7 @@ static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
 		 */
 		timeout_clks <<= 1;
 		timeout_us += (timeout_clks * 1000) /
-			      (mmc_host_clk_rate(card->host) / 1000);
+			      (card->host->ios.clock / 1000);
 
 		erase_timeout = timeout_us / 1000;
 
@@ -2440,9 +2333,7 @@ static void mmc_hw_reset_for_init(struct mmc_host *host)
 {
 	if (!(host->caps & MMC_CAP_HW_RESET) || !host->ops->hw_reset)
 		return;
-	mmc_host_clk_hold(host);
 	host->ops->hw_reset(host);
-	mmc_host_clk_release(host);
 }
 
 int mmc_hw_reset(struct mmc_host *host)
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 1a22a82209b2..09241e56d628 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -40,9 +40,6 @@ void mmc_init_erase(struct mmc_card *card);
 
 void mmc_set_chip_select(struct mmc_host *host, int mode);
 void mmc_set_clock(struct mmc_host *host, unsigned int hz);
-void mmc_gate_clock(struct mmc_host *host);
-void mmc_ungate_clock(struct mmc_host *host);
-void mmc_set_ungated(struct mmc_host *host);
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index f4db93e03e88..154aced0b91b 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -255,11 +255,6 @@ void mmc_add_host_debugfs(struct mmc_host *host)
 			&mmc_clock_fops))
 		goto err_node;
 
-#ifdef CONFIG_MMC_CLKGATE
-	if (!debugfs_create_u32("clk_delay", (S_IRUSR | S_IWUSR),
-				root, &host->clk_delay))
-		goto err_node;
-#endif
 #ifdef CONFIG_FAIL_MMC_REQUEST
 	if (fail_request)
 		setup_fault_attr(&fail_default_attr, fail_request);
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 5466f25f0281..970e6906930b 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -61,246 +61,6 @@ void mmc_unregister_host_class(void)
 	class_unregister(&mmc_host_class);
 }
 
-#ifdef CONFIG_MMC_CLKGATE
-static ssize_t clkgate_delay_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct mmc_host *host = cls_dev_to_mmc_host(dev);
-	return snprintf(buf, PAGE_SIZE, "%lu\n", host->clkgate_delay);
-}
-
-static ssize_t clkgate_delay_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct mmc_host *host = cls_dev_to_mmc_host(dev);
-	unsigned long flags, value;
-
-	if (kstrtoul(buf, 0, &value))
-		return -EINVAL;
-
-	spin_lock_irqsave(&host->clk_lock, flags);
-	host->clkgate_delay = value;
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-	return count;
-}
-
-/*
- * Enabling clock gating will make the core call out to the host
- * once up and once down when it performs a request or card operation
- * intermingled in any fashion. The driver will see this through
- * set_ios() operations with ios.clock field set to 0 to gate (disable)
- * the block clock, and to the old frequency to enable it again.
- */
-static void mmc_host_clk_gate_delayed(struct mmc_host *host)
-{
-	unsigned long tick_ns;
-	unsigned long freq = host->ios.clock;
-	unsigned long flags;
-
-	if (!freq) {
-		pr_debug("%s: frequency set to 0 in disable function, "
-			 "this means the clock is already disabled.\n",
-			 mmc_hostname(host));
-		return;
-	}
-	/*
-	 * New requests may have appeared while we were scheduling,
-	 * then there is no reason to delay the check before
-	 * clk_disable().
-	 */
-	spin_lock_irqsave(&host->clk_lock, flags);
-
-	/*
-	 * Delay n bus cycles (at least 8 from MMC spec) before attempting
-	 * to disable the MCI block clock. The reference count may have
-	 * gone up again after this delay due to rescheduling!
-	 */
-	if (!host->clk_requests) {
-		spin_unlock_irqrestore(&host->clk_lock, flags);
-		tick_ns = DIV_ROUND_UP(1000000000, freq);
-		ndelay(host->clk_delay * tick_ns);
-	} else {
-		/* New users appeared while waiting for this work */
-		spin_unlock_irqrestore(&host->clk_lock, flags);
-		return;
-	}
-	mutex_lock(&host->clk_gate_mutex);
-	spin_lock_irqsave(&host->clk_lock, flags);
-	if (!host->clk_requests) {
-		spin_unlock_irqrestore(&host->clk_lock, flags);
-		/* This will set host->ios.clock to 0 */
-		mmc_gate_clock(host);
-		spin_lock_irqsave(&host->clk_lock, flags);
-		pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
-	}
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mutex_unlock(&host->clk_gate_mutex);
-}
-
-/*
- * Internal work. Work to disable the clock at some later point.
- */
-static void mmc_host_clk_gate_work(struct work_struct *work)
-{
-	struct mmc_host *host = container_of(work, struct mmc_host,
-					      clk_gate_work.work);
-
-	mmc_host_clk_gate_delayed(host);
-}
-
-/**
- *	mmc_host_clk_hold - ungate hardware MCI clocks
- *	@host: host to ungate.
- *
- *	Makes sure the host ios.clock is restored to a non-zero value
- *	past this call.	Increase clock reference count and ungate clock
- *	if we're the first user.
- */
-void mmc_host_clk_hold(struct mmc_host *host)
-{
-	unsigned long flags;
-
-	/* cancel any clock gating work scheduled by mmc_host_clk_release() */
-	cancel_delayed_work_sync(&host->clk_gate_work);
-	mutex_lock(&host->clk_gate_mutex);
-	spin_lock_irqsave(&host->clk_lock, flags);
-	if (host->clk_gated) {
-		spin_unlock_irqrestore(&host->clk_lock, flags);
-		mmc_ungate_clock(host);
-		spin_lock_irqsave(&host->clk_lock, flags);
-		pr_debug("%s: ungated MCI clock\n", mmc_hostname(host));
-	}
-	host->clk_requests++;
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mutex_unlock(&host->clk_gate_mutex);
-}
-
-/**
- *	mmc_host_may_gate_card - check if this card may be gated
- *	@card: card to check.
- */
-static bool mmc_host_may_gate_card(struct mmc_card *card)
-{
-	/* If there is no card we may gate it */
-	if (!card)
-		return true;
-	/*
-	 * Don't gate SDIO cards! These need to be clocked at all times
-	 * since they may be independent systems generating interrupts
-	 * and other events. The clock requests counter from the core will
-	 * go down to zero since the core does not need it, but we will not
-	 * gate the clock, because there is somebody out there that may still
-	 * be using it.
-	 */
-	return !(card->quirks & MMC_QUIRK_BROKEN_CLK_GATING);
-}
-
-/**
- *	mmc_host_clk_release - gate off hardware MCI clocks
- *	@host: host to gate.
- *
- *	Calls the host driver with ios.clock set to zero as often as possible
- *	in order to gate off hardware MCI clocks. Decrease clock reference
- *	count and schedule disabling of clock.
- */
-void mmc_host_clk_release(struct mmc_host *host)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->clk_lock, flags);
-	host->clk_requests--;
-	if (mmc_host_may_gate_card(host->card) &&
-	    !host->clk_requests)
-		schedule_delayed_work(&host->clk_gate_work,
-				      msecs_to_jiffies(host->clkgate_delay));
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-}
-
-/**
- *	mmc_host_clk_rate - get current clock frequency setting
- *	@host: host to get the clock frequency for.
- *
- *	Returns current clock frequency regardless of gating.
- */
-unsigned int mmc_host_clk_rate(struct mmc_host *host)
-{
-	unsigned long freq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->clk_lock, flags);
-	if (host->clk_gated)
-		freq = host->clk_old;
-	else
-		freq = host->ios.clock;
-	spin_unlock_irqrestore(&host->clk_lock, flags);
-	return freq;
-}
-
-/**
- *	mmc_host_clk_init - set up clock gating code
- *	@host: host with potential clock to control
- */
-static inline void mmc_host_clk_init(struct mmc_host *host)
-{
-	host->clk_requests = 0;
-	/* Hold MCI clock for 8 cycles by default */
-	host->clk_delay = 8;
-	/*
-	 * Default clock gating delay is 0ms to avoid wasting power.
-	 * This value can be tuned by writing into sysfs entry.
-	 */
-	host->clkgate_delay = 0;
-	host->clk_gated = false;
-	INIT_DELAYED_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
-	spin_lock_init(&host->clk_lock);
-	mutex_init(&host->clk_gate_mutex);
-}
-
-/**
- *	mmc_host_clk_exit - shut down clock gating code
- *	@host: host with potential clock to control
- */
-static inline void mmc_host_clk_exit(struct mmc_host *host)
-{
-	/*
-	 * Wait for any outstanding gate and then make sure we're
-	 * ungated before exiting.
-	 */
-	if (cancel_delayed_work_sync(&host->clk_gate_work))
-		mmc_host_clk_gate_delayed(host);
-	if (host->clk_gated)
-		mmc_host_clk_hold(host);
-	/* There should be only one user now */
-	WARN_ON(host->clk_requests > 1);
-}
-
-static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
-{
-	host->clkgate_delay_attr.show = clkgate_delay_show;
-	host->clkgate_delay_attr.store = clkgate_delay_store;
-	sysfs_attr_init(&host->clkgate_delay_attr.attr);
-	host->clkgate_delay_attr.attr.name = "clkgate_delay";
-	host->clkgate_delay_attr.attr.mode = S_IRUGO | S_IWUSR;
-	if (device_create_file(&host->class_dev, &host->clkgate_delay_attr))
-		pr_err("%s: Failed to create clkgate_delay sysfs entry\n",
-				mmc_hostname(host));
-}
-#else
-
-static inline void mmc_host_clk_init(struct mmc_host *host)
-{
-}
-
-static inline void mmc_host_clk_exit(struct mmc_host *host)
-{
-}
-
-static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
-{
-}
-
-#endif
-
 void mmc_retune_enable(struct mmc_host *host)
 {
 	host->can_retune = 1;
@@ -583,8 +343,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 		return NULL;
 	}
 
-	mmc_host_clk_init(host);
-
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
@@ -633,7 +391,6 @@ int mmc_add_host(struct mmc_host *host)
 #ifdef CONFIG_DEBUG_FS
 	mmc_add_host_debugfs(host);
 #endif
-	mmc_host_clk_sysfs_init(host);
 
 	mmc_start_host(host);
 	register_pm_notifier(&host->pm_notify);
@@ -663,8 +420,6 @@ void mmc_remove_host(struct mmc_host *host)
 	device_del(&host->class_dev);
 
 	led_trigger_unregister_simple(host->led);
-
-	mmc_host_clk_exit(host);
 }
 
 EXPORT_SYMBOL(mmc_remove_host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index f6cd995dbe92..479b84a00bd7 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1931,14 +1931,12 @@ static int mmc_reset(struct mmc_host *host)
 	if (!mmc_can_reset(card))
 		return -EOPNOTSUPP;
 
-	mmc_host_clk_hold(host);
 	mmc_set_clock(host, host->f_init);
 
 	host->ops->hw_reset(host);
 
 	/* Set initial state and call mmc_set_ios */
 	mmc_set_initial_state(host);
-	mmc_host_clk_release(host);
 
 	return mmc_init_card(host, card->ocr, card);
 }
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index dd1d1e0fe322..fad660b95809 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -35,25 +35,7 @@
 #define SDIO_DEVICE_ID_MARVELL_8797_F0	0x9128
 #endif
 
-/*
- * This hook just adds a quirk for all sdio devices
- */
-static void add_quirk_for_sdio_devices(struct mmc_card *card, int data)
-{
-	if (mmc_card_sdio(card))
-		card->quirks |= data;
-}
-
 static const struct mmc_fixup mmc_fixup_methods[] = {
-	/* by default sdio devices are considered CLK_GATING broken */
-	/* good cards will be whitelisted as they are tested */
-	SDIO_FIXUP(SDIO_ANY_ID, SDIO_ANY_ID,
-		   add_quirk_for_sdio_devices,
-		   MMC_QUIRK_BROKEN_CLK_GATING),
-
-	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
-
 	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
 		   add_quirk, MMC_QUIRK_NONSTD_FUNC_IF),
 
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index e124db0fc178..b1b9200a4715 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -800,9 +800,7 @@ static int mmc_sd_get_ro(struct mmc_host *host)
 	if (!host->ops->get_ro)
 		return -1;
 
-	mmc_host_clk_hold(host);
 	ro = host->ops->get_ro(host);
-	mmc_host_clk_release(host);
 
 	return ro;
 }
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 95bc1014b7a2..16d838e6d623 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -956,13 +956,10 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	}
 
 	if (!err && host->sdio_irqs) {
-		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD))
 			wake_up_process(host->sdio_irq_thread);
-		} else if (host->caps & MMC_CAP_SDIO_IRQ) {
-			mmc_host_clk_hold(host);
+		else if (host->caps & MMC_CAP_SDIO_IRQ)
 			host->ops->enable_sdio_irq(host, 1);
-			mmc_host_clk_release(host);
-		}
 	}
 
 	mmc_release_host(host);
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 09cc67d028f0..91bbbfb29f3f 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -168,21 +168,15 @@ static int sdio_irq_thread(void *_host)
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (host->caps & MMC_CAP_SDIO_IRQ) {
-			mmc_host_clk_hold(host);
+		if (host->caps & MMC_CAP_SDIO_IRQ)
 			host->ops->enable_sdio_irq(host, 1);
-			mmc_host_clk_release(host);
-		}
 		if (!kthread_should_stop())
 			schedule_timeout(period);
 		set_current_state(TASK_RUNNING);
 	} while (!kthread_should_stop());
 
-	if (host->caps & MMC_CAP_SDIO_IRQ) {
-		mmc_host_clk_hold(host);
+	if (host->caps & MMC_CAP_SDIO_IRQ)
 		host->ops->enable_sdio_irq(host, 0);
-		mmc_host_clk_release(host);
-	}
 
 	pr_debug("%s: IRQ thread exiting with code %d\n",
 		 mmc_hostname(host), ret);
@@ -208,9 +202,7 @@ static int sdio_card_irq_get(struct mmc_card *card)
 				return err;
 			}
 		} else if (host->caps & MMC_CAP_SDIO_IRQ) {
-			mmc_host_clk_hold(host);
 			host->ops->enable_sdio_irq(host, 1);
-			mmc_host_clk_release(host);
 		}
 	}
 
@@ -229,9 +221,7 @@ static int sdio_card_irq_put(struct mmc_card *card)
 			atomic_set(&host->sdio_irq_thread_abort, 1);
 			kthread_stop(host->sdio_irq_thread);
 		} else if (host->caps & MMC_CAP_SDIO_IRQ) {
-			mmc_host_clk_hold(host);
 			host->ops->enable_sdio_irq(host, 0);
-			mmc_host_clk_release(host);
 		}
 	}
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index fdd0779ccdfa..eb0151bac50c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -269,7 +269,6 @@ struct mmc_card {
 						/* for byte mode */
 #define MMC_QUIRK_NONSTD_SDIO	(1<<2)		/* non-standard SDIO card attached */
 						/* (missing CIA registers) */
-#define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 #define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
 #define MMC_QUIRK_INAND_CMD38	(1<<6)		/* iNAND devices have broken CMD38 */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 83b81fd865f3..cfb3c99a6b4b 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -292,18 +292,6 @@ struct mmc_host {
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-#ifdef CONFIG_MMC_CLKGATE
-	int			clk_requests;	/* internal reference counter */
-	unsigned int		clk_delay;	/* number of MCI clk hold cycles */
-	bool			clk_gated;	/* clock gated */
-	struct delayed_work	clk_gate_work; /* delayed clock gate */
-	unsigned int		clk_old;	/* old clock value cache */
-	spinlock_t		clk_lock;	/* lock for clk fields */
-	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
-	struct device_attribute clkgate_delay_attr;
-	unsigned long           clkgate_delay;
-#endif
-
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
 	unsigned short		max_segs;	/* see blk_queue_max_segments */
@@ -479,26 +467,6 @@ static inline int mmc_host_packed_wr(struct mmc_host *host)
 	return host->caps2 & MMC_CAP2_PACKED_WR;
 }
 
-#ifdef CONFIG_MMC_CLKGATE
-void mmc_host_clk_hold(struct mmc_host *host);
-void mmc_host_clk_release(struct mmc_host *host);
-unsigned int mmc_host_clk_rate(struct mmc_host *host);
-
-#else
-static inline void mmc_host_clk_hold(struct mmc_host *host)
-{
-}
-
-static inline void mmc_host_clk_release(struct mmc_host *host)
-{
-}
-
-static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
-{
-	return host->ios.clock;
-}
-#endif
-
 static inline int mmc_card_hs(struct mmc_card *card)
 {
 	return card->host->ios.timing == MMC_TIMING_SD_HS ||
-- 
cgit v1.2.3


From 2086f801cb2a796279e817e68255654c4cfd3be3 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 12 Oct 2015 14:48:25 +0200
Subject: mmc: core: Add mmc_regulator_set_vqmmc()

This adds logic to the MMC core to set VQMMC.  This is expected to be
called by MMC drivers like dw_mmc as part of (or instead of) their
start_signal_voltage_switch() callback.

A few notes:

* When setting the signal voltage to 3.3V we do our best to make VQMMC
  and VMMC match.  It's been reported that this makes some old cards
  happy since they were tested back in the day before UHS when VQMMC
  and VMMC were provided by the same regulator.  A nice side effect of
  this is that we don't end up on the hairy edge of VQMMC (2.7V),
  which some EEs claim is a little too close to the minimum for
  comfort.
  This is done in two steps. At first we try to find a VQMMC within
  a 0.3V tolerance of VMMC and if this is not supported by the
  supplying regulator we try to find a suitable voltage within the
  whole 2.7V-3.6V area of the spec.

* The two step approach is currently necessary, as the used
  regulator_set_voltage_triplet(min, target, max) uses a simple
  implementation that just tries two basic steps:
	regulator_set_voltage(target, max);
	regulator_set_voltage(min, target);
  So with only one step with 2.7-3.6V borders, if a suitable voltage
  is a bit below VMMC, we would directly get the lowest 2.7V
  which some boards (like Rockchips) don't like at all.

* When setting the signal voltage to 1.8V or 1.2V we aim for that
  specific voltage instead of picking the lowest one in the range.

* We very purposely don't print errors in mmc_regulator_set_vqmmc().
  There are cases where the MMC core will try several different
  voltages and we don't want to pollute the logs.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c  | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h |  7 +++++
 2 files changed, 85 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7aa3b305b65d..5ae89e48fd85 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1394,6 +1394,84 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
 
+static int mmc_regulator_set_voltage_if_supported(struct regulator *regulator,
+						  int min_uV, int target_uV,
+						  int max_uV)
+{
+	/*
+	 * Check if supported first to avoid errors since we may try several
+	 * signal levels during power up and don't want to show errors.
+	 */
+	if (!regulator_is_supported_voltage(regulator, min_uV, max_uV))
+		return -EINVAL;
+
+	return regulator_set_voltage_triplet(regulator, min_uV, target_uV,
+					     max_uV);
+}
+
+/**
+ * mmc_regulator_set_vqmmc - Set VQMMC as per the ios
+ *
+ * For 3.3V signaling, we try to match VQMMC to VMMC as closely as possible.
+ * That will match the behavior of old boards where VQMMC and VMMC were supplied
+ * by the same supply.  The Bus Operating conditions for 3.3V signaling in the
+ * SD card spec also define VQMMC in terms of VMMC.
+ * If this is not possible we'll try the full 2.7-3.6V of the spec.
+ *
+ * For 1.2V and 1.8V signaling we'll try to get as close as possible to the
+ * requested voltage.  This is definitely a good idea for UHS where there's a
+ * separate regulator on the card that's trying to make 1.8V and it's best if
+ * we match.
+ *
+ * This function is expected to be used by a controller's
+ * start_signal_voltage_switch() function.
+ */
+int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct device *dev = mmc_dev(mmc);
+	int ret, volt, min_uV, max_uV;
+
+	/* If no vqmmc supply then we can't change the voltage */
+	if (IS_ERR(mmc->supply.vqmmc))
+		return -EINVAL;
+
+	switch (ios->signal_voltage) {
+	case MMC_SIGNAL_VOLTAGE_120:
+		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						1100000, 1200000, 1300000);
+	case MMC_SIGNAL_VOLTAGE_180:
+		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						1700000, 1800000, 1950000);
+	case MMC_SIGNAL_VOLTAGE_330:
+		ret = mmc_ocrbitnum_to_vdd(mmc->ios.vdd, &volt, &max_uV);
+		if (ret < 0)
+			return ret;
+
+		dev_dbg(dev, "%s: found vmmc voltage range of %d-%duV\n",
+			__func__, volt, max_uV);
+
+		min_uV = max(volt - 300000, 2700000);
+		max_uV = min(max_uV + 200000, 3600000);
+
+		/*
+		 * Due to a limitation in the current implementation of
+		 * regulator_set_voltage_triplet() which is taking the lowest
+		 * voltage possible if below the target, search for a suitable
+		 * voltage in two steps and try to stay close to vmmc
+		 * with a 0.3V tolerance at first.
+		 */
+		if (!mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						min_uV, volt, max_uV))
+			return 0;
+
+		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						2700000, volt, 3600000);
+	default:
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL_GPL(mmc_regulator_set_vqmmc);
+
 #endif /* CONFIG_REGULATOR */
 
 int mmc_regulator_get_supply(struct mmc_host *mmc)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cfb3c99a6b4b..8673ffe3d86e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -411,6 +411,7 @@ int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
 			struct regulator *supply,
 			unsigned short vdd_bit);
+int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios);
 #else
 static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
 {
@@ -423,6 +424,12 @@ static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
 {
 	return 0;
 }
+
+static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc,
+					  struct mmc_ios *ios)
+{
+	return -EINVAL;
+}
 #endif
 
 int mmc_regulator_get_supply(struct mmc_host *mmc);
-- 
cgit v1.2.3


From 3fc7eaef44dbcbcd602b6bcd0ac6efba7a30b108 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 16 Sep 2015 14:41:23 +0800
Subject: mmc: dw_mmc: Add external dma interface support

DesignWare MMC Controller can supports two types of DMA
mode: external dma and internal dma. We get a RK312x platform
integrated dw_mmc and ARM pl330 dma controller. This patch add
edmac ops to support these platforms. I've tested it on RK31xx
platform with edmac mode and RK3288 platform with idmac mode.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/Kconfig        |  11 +-
 drivers/mmc/host/dw_mmc-pltfm.c |   2 +
 drivers/mmc/host/dw_mmc.c       | 268 ++++++++++++++++++++++++++++++++--------
 drivers/mmc/host/dw_mmc.h       |   6 +
 include/linux/mmc/dw_mmc.h      |  23 +++-
 5 files changed, 246 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 56f5b1487008..ef54084315cc 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -615,15 +615,7 @@ config MMC_DW
 	help
 	  This selects support for the Synopsys DesignWare Mobile Storage IP
 	  block, this provides host support for SD and MMC interfaces, in both
-	  PIO and external DMA modes.
-
-config MMC_DW_IDMAC
-	bool "Internal DMAC interface"
-	depends on MMC_DW
-	help
-	  This selects support for the internal DMAC block within the Synopsys
-	  Designware Mobile Storage IP block. This disables the external DMA
-	  interface.
+	  PIO, internal DMA mode and external DMA mode.
 
 config MMC_DW_PLTFM
 	tristate "Synopsys Designware MCI Support as platform device"
@@ -652,7 +644,6 @@ config MMC_DW_K3
 	tristate "K3 specific extensions for Synopsys DW Memory Card Interface"
 	depends on MMC_DW
 	select MMC_DW_PLTFM
-	select MMC_DW_IDMAC
 	help
 	  This selects support for Hisilicon K3 SoC specific extensions to the
 	  Synopsys DesignWare Memory Card Interface driver. Select this option
diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c
index ec6dbcdec693..7e1d13b68b06 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.c
+++ b/drivers/mmc/host/dw_mmc-pltfm.c
@@ -59,6 +59,8 @@ int dw_mci_pltfm_register(struct platform_device *pdev,
 	host->pdata = pdev->dev.platform_data;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* Get registers' physical base address */
+	host->phy_regs = (void *)(regs->start);
 	host->regs = devm_ioremap_resource(&pdev->dev, regs);
 	if (IS_ERR(host->regs))
 		return PTR_ERR(host->regs);
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index b1b7e7fa072c..7fe0315142e6 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -56,7 +56,6 @@
 #define DW_MCI_FREQ_MAX	200000000	/* unit: HZ */
 #define DW_MCI_FREQ_MIN	400000		/* unit: HZ */
 
-#ifdef CONFIG_MMC_DW_IDMAC
 #define IDMAC_INT_CLR		(SDMMC_IDMAC_INT_AI | SDMMC_IDMAC_INT_NI | \
 				 SDMMC_IDMAC_INT_CES | SDMMC_IDMAC_INT_DU | \
 				 SDMMC_IDMAC_INT_FBE | SDMMC_IDMAC_INT_RI | \
@@ -102,7 +101,6 @@ struct idmac_desc {
 
 /* Each descriptor can transfer up to 4KB of data in chained mode */
 #define DW_MCI_DESC_DATA_LENGTH	0x1000
-#endif /* CONFIG_MMC_DW_IDMAC */
 
 static bool dw_mci_reset(struct dw_mci *host);
 static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset);
@@ -407,7 +405,6 @@ static int dw_mci_get_dma_dir(struct mmc_data *data)
 		return DMA_FROM_DEVICE;
 }
 
-#ifdef CONFIG_MMC_DW_IDMAC
 static void dw_mci_dma_cleanup(struct dw_mci *host)
 {
 	struct mmc_data *data = host->data;
@@ -445,12 +442,21 @@ static void dw_mci_idmac_stop_dma(struct dw_mci *host)
 	mci_writel(host, BMOD, temp);
 }
 
-static void dw_mci_idmac_complete_dma(struct dw_mci *host)
+static void dw_mci_dmac_complete_dma(void *arg)
 {
+	struct dw_mci *host = arg;
 	struct mmc_data *data = host->data;
 
 	dev_vdbg(host->dev, "DMA complete\n");
 
+	if ((host->use_dma == TRANS_MODE_EDMAC) &&
+	    data && (data->flags & MMC_DATA_READ))
+		/* Invalidate cache after read */
+		dma_sync_sg_for_cpu(mmc_dev(host->cur_slot->mmc),
+				    data->sg,
+				    data->sg_len,
+				    DMA_FROM_DEVICE);
+
 	host->dma_ops->cleanup(host);
 
 	/*
@@ -564,7 +570,7 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 	wmb(); /* drain writebuffer */
 }
 
-static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
+static int dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
 {
 	u32 temp;
 
@@ -589,6 +595,8 @@ static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
 
 	/* Start it running */
 	mci_writel(host, PLDMND, 1);
+
+	return 0;
 }
 
 static int dw_mci_idmac_init(struct dw_mci *host)
@@ -669,10 +677,112 @@ static const struct dw_mci_dma_ops dw_mci_idmac_ops = {
 	.init = dw_mci_idmac_init,
 	.start = dw_mci_idmac_start_dma,
 	.stop = dw_mci_idmac_stop_dma,
-	.complete = dw_mci_idmac_complete_dma,
+	.complete = dw_mci_dmac_complete_dma,
+	.cleanup = dw_mci_dma_cleanup,
+};
+
+static void dw_mci_edmac_stop_dma(struct dw_mci *host)
+{
+	dmaengine_terminate_all(host->dms->ch);
+}
+
+static int dw_mci_edmac_start_dma(struct dw_mci *host,
+					    unsigned int sg_len)
+{
+	struct dma_slave_config cfg;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct scatterlist *sgl = host->data->sg;
+	const u32 mszs[] = {1, 4, 8, 16, 32, 64, 128, 256};
+	u32 sg_elems = host->data->sg_len;
+	u32 fifoth_val;
+	u32 fifo_offset = host->fifo_reg - host->regs;
+	int ret = 0;
+
+	/* Set external dma config: burst size, burst width */
+	cfg.dst_addr = (dma_addr_t)(host->phy_regs + fifo_offset);
+	cfg.src_addr = cfg.dst_addr;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	/* Match burst msize with external dma config */
+	fifoth_val = mci_readl(host, FIFOTH);
+	cfg.dst_maxburst = mszs[(fifoth_val >> 28) & 0x7];
+	cfg.src_maxburst = cfg.dst_maxburst;
+
+	if (host->data->flags & MMC_DATA_WRITE)
+		cfg.direction = DMA_MEM_TO_DEV;
+	else
+		cfg.direction = DMA_DEV_TO_MEM;
+
+	ret = dmaengine_slave_config(host->dms->ch, &cfg);
+	if (ret) {
+		dev_err(host->dev, "Failed to config edmac.\n");
+		return -EBUSY;
+	}
+
+	desc = dmaengine_prep_slave_sg(host->dms->ch, sgl,
+				       sg_len, cfg.direction,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		dev_err(host->dev, "Can't prepare slave sg.\n");
+		return -EBUSY;
+	}
+
+	/* Set dw_mci_dmac_complete_dma as callback */
+	desc->callback = dw_mci_dmac_complete_dma;
+	desc->callback_param = (void *)host;
+	dmaengine_submit(desc);
+
+	/* Flush cache before write */
+	if (host->data->flags & MMC_DATA_WRITE)
+		dma_sync_sg_for_device(mmc_dev(host->cur_slot->mmc), sgl,
+				       sg_elems, DMA_TO_DEVICE);
+
+	dma_async_issue_pending(host->dms->ch);
+
+	return 0;
+}
+
+static int dw_mci_edmac_init(struct dw_mci *host)
+{
+	/* Request external dma channel */
+	host->dms = kzalloc(sizeof(struct dw_mci_dma_slave), GFP_KERNEL);
+	if (!host->dms)
+		return -ENOMEM;
+
+	host->dms->ch = dma_request_slave_channel(host->dev, "rx-tx");
+	if (!host->dms->ch) {
+		dev_err(host->dev,
+			"Failed to get external DMA channel %d\n",
+			host->dms->ch->chan_id);
+		kfree(host->dms);
+		host->dms = NULL;
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static void dw_mci_edmac_exit(struct dw_mci *host)
+{
+	if (host->dms) {
+		if (host->dms->ch) {
+			dma_release_channel(host->dms->ch);
+			host->dms->ch = NULL;
+		}
+		kfree(host->dms);
+		host->dms = NULL;
+	}
+}
+
+static const struct dw_mci_dma_ops dw_mci_edmac_ops = {
+	.init = dw_mci_edmac_init,
+	.exit = dw_mci_edmac_exit,
+	.start = dw_mci_edmac_start_dma,
+	.stop = dw_mci_edmac_stop_dma,
+	.complete = dw_mci_dmac_complete_dma,
 	.cleanup = dw_mci_dma_cleanup,
 };
-#endif /* CONFIG_MMC_DW_IDMAC */
 
 static int dw_mci_pre_dma_transfer(struct dw_mci *host,
 				   struct mmc_data *data,
@@ -752,7 +862,6 @@ static void dw_mci_post_req(struct mmc_host *mmc,
 
 static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
 {
-#ifdef CONFIG_MMC_DW_IDMAC
 	unsigned int blksz = data->blksz;
 	const u32 mszs[] = {1, 4, 8, 16, 32, 64, 128, 256};
 	u32 fifo_width = 1 << host->data_shift;
@@ -760,6 +869,10 @@ static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
 	u32 msize = 0, rx_wmark = 1, tx_wmark, tx_wmark_invers;
 	int idx = ARRAY_SIZE(mszs) - 1;
 
+	/* pio should ship this scenario */
+	if (!host->use_dma)
+		return;
+
 	tx_wmark = (host->fifo_depth) / 2;
 	tx_wmark_invers = host->fifo_depth - tx_wmark;
 
@@ -788,7 +901,6 @@ static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
 done:
 	fifoth_val = SDMMC_SET_FIFOTH(msize, rx_wmark, tx_wmark);
 	mci_writel(host, FIFOTH, fifoth_val);
-#endif
 }
 
 static void dw_mci_ctrl_rd_thld(struct dw_mci *host, struct mmc_data *data)
@@ -850,10 +962,12 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 
 	host->using_dma = 1;
 
-	dev_vdbg(host->dev,
-		 "sd sg_cpu: %#lx sg_dma: %#lx sg_len: %d\n",
-		 (unsigned long)host->sg_cpu, (unsigned long)host->sg_dma,
-		 sg_len);
+	if (host->use_dma == TRANS_MODE_IDMAC)
+		dev_vdbg(host->dev,
+			 "sd sg_cpu: %#lx sg_dma: %#lx sg_len: %d\n",
+			 (unsigned long)host->sg_cpu,
+			 (unsigned long)host->sg_dma,
+			 sg_len);
 
 	/*
 	 * Decide the MSIZE and RX/TX Watermark.
@@ -875,7 +989,11 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
 	mci_writel(host, INTMASK, temp);
 	spin_unlock_irqrestore(&host->irq_lock, irqflags);
 
-	host->dma_ops->start(host, sg_len);
+	if (host->dma_ops->start(host, sg_len)) {
+		/* We can't do DMA */
+		dev_err(host->dev, "%s: failed to start DMA.\n", __func__);
+		return -ENODEV;
+	}
 
 	return 0;
 }
@@ -2338,15 +2456,17 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 
 	}
 
-#ifdef CONFIG_MMC_DW_IDMAC
-	/* Handle DMA interrupts */
+	if (host->use_dma != TRANS_MODE_IDMAC)
+		return IRQ_HANDLED;
+
+	/* Handle IDMA interrupts */
 	if (host->dma_64bit_address == 1) {
 		pending = mci_readl(host, IDSTS64);
 		if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
 			mci_writel(host, IDSTS64, SDMMC_IDMAC_INT_TI |
 							SDMMC_IDMAC_INT_RI);
 			mci_writel(host, IDSTS64, SDMMC_IDMAC_INT_NI);
-			host->dma_ops->complete(host);
+			host->dma_ops->complete((void *)host);
 		}
 	} else {
 		pending = mci_readl(host, IDSTS);
@@ -2354,10 +2474,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 			mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI |
 							SDMMC_IDMAC_INT_RI);
 			mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI);
-			host->dma_ops->complete(host);
+			host->dma_ops->complete((void *)host);
 		}
 	}
-#endif
 
 	return IRQ_HANDLED;
 }
@@ -2466,13 +2585,21 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 		goto err_host_allocated;
 
 	/* Useful defaults if platform data is unset. */
-	if (host->use_dma) {
+	if (host->use_dma == TRANS_MODE_IDMAC) {
 		mmc->max_segs = host->ring_size;
 		mmc->max_blk_size = 65536;
 		mmc->max_seg_size = 0x1000;
 		mmc->max_req_size = mmc->max_seg_size * host->ring_size;
 		mmc->max_blk_count = mmc->max_req_size / 512;
+	} else if (host->use_dma == TRANS_MODE_EDMAC) {
+		mmc->max_segs = 64;
+		mmc->max_blk_size = 65536;
+		mmc->max_blk_count = 65535;
+		mmc->max_req_size =
+				mmc->max_blk_size * mmc->max_blk_count;
+		mmc->max_seg_size = mmc->max_req_size;
 	} else {
+		/* TRANS_MODE_PIO */
 		mmc->max_segs = 64;
 		mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
 		mmc->max_blk_count = 512;
@@ -2512,38 +2639,74 @@ static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
 static void dw_mci_init_dma(struct dw_mci *host)
 {
 	int addr_config;
-	/* Check ADDR_CONFIG bit in HCON to find IDMAC address bus width */
-	addr_config = (mci_readl(host, HCON) >> 27) & 0x01;
-
-	if (addr_config == 1) {
-		/* host supports IDMAC in 64-bit address mode */
-		host->dma_64bit_address = 1;
-		dev_info(host->dev, "IDMAC supports 64-bit address mode.\n");
-		if (!dma_set_mask(host->dev, DMA_BIT_MASK(64)))
-			dma_set_coherent_mask(host->dev, DMA_BIT_MASK(64));
-	} else {
-		/* host supports IDMAC in 32-bit address mode */
-		host->dma_64bit_address = 0;
-		dev_info(host->dev, "IDMAC supports 32-bit address mode.\n");
-	}
+	struct device *dev = host->dev;
+	struct device_node *np = dev->of_node;
 
-	/* Alloc memory for sg translation */
-	host->sg_cpu = dmam_alloc_coherent(host->dev, PAGE_SIZE,
-					  &host->sg_dma, GFP_KERNEL);
-	if (!host->sg_cpu) {
-		dev_err(host->dev, "%s: could not alloc DMA memory\n",
-			__func__);
+	/*
+	* Check tansfer mode from HCON[17:16]
+	* Clear the ambiguous description of dw_mmc databook:
+	* 2b'00: No DMA Interface -> Actually means using Internal DMA block
+	* 2b'01: DesignWare DMA Interface -> Synopsys DW-DMA block
+	* 2b'10: Generic DMA Interface -> non-Synopsys generic DMA block
+	* 2b'11: Non DW DMA Interface -> pio only
+	* Compared to DesignWare DMA Interface, Generic DMA Interface has a
+	* simpler request/acknowledge handshake mechanism and both of them
+	* are regarded as external dma master for dw_mmc.
+	*/
+	host->use_dma = SDMMC_GET_TRANS_MODE(mci_readl(host, HCON));
+	if (host->use_dma == DMA_INTERFACE_IDMA) {
+		host->use_dma = TRANS_MODE_IDMAC;
+	} else if (host->use_dma == DMA_INTERFACE_DWDMA ||
+		   host->use_dma == DMA_INTERFACE_GDMA) {
+		host->use_dma = TRANS_MODE_EDMAC;
+	} else {
 		goto no_dma;
 	}
 
 	/* Determine which DMA interface to use */
-#ifdef CONFIG_MMC_DW_IDMAC
-	host->dma_ops = &dw_mci_idmac_ops;
-	dev_info(host->dev, "Using internal DMA controller.\n");
-#endif
+	if (host->use_dma == TRANS_MODE_IDMAC) {
+		/*
+		* Check ADDR_CONFIG bit in HCON to find
+		* IDMAC address bus width
+		*/
+		addr_config = (mci_readl(host, HCON) >> 27) & 0x01;
+
+		if (addr_config == 1) {
+			/* host supports IDMAC in 64-bit address mode */
+			host->dma_64bit_address = 1;
+			dev_info(host->dev,
+				 "IDMAC supports 64-bit address mode.\n");
+			if (!dma_set_mask(host->dev, DMA_BIT_MASK(64)))
+				dma_set_coherent_mask(host->dev,
+						      DMA_BIT_MASK(64));
+		} else {
+			/* host supports IDMAC in 32-bit address mode */
+			host->dma_64bit_address = 0;
+			dev_info(host->dev,
+				 "IDMAC supports 32-bit address mode.\n");
+		}
 
-	if (!host->dma_ops)
-		goto no_dma;
+		/* Alloc memory for sg translation */
+		host->sg_cpu = dmam_alloc_coherent(host->dev, PAGE_SIZE,
+						   &host->sg_dma, GFP_KERNEL);
+		if (!host->sg_cpu) {
+			dev_err(host->dev,
+				"%s: could not alloc DMA memory\n",
+				__func__);
+			goto no_dma;
+		}
+
+		host->dma_ops = &dw_mci_idmac_ops;
+		dev_info(host->dev, "Using internal DMA controller.\n");
+	} else {
+		/* TRANS_MODE_EDMAC: check dma bindings again */
+		if ((of_property_count_strings(np, "dma-names") < 0) ||
+		    (!of_find_property(np, "dmas", NULL))) {
+			goto no_dma;
+		}
+		host->dma_ops = &dw_mci_edmac_ops;
+		dev_info(host->dev, "Using external DMA controller.\n");
+	}
 
 	if (host->dma_ops->init && host->dma_ops->start &&
 	    host->dma_ops->stop && host->dma_ops->cleanup) {
@@ -2557,12 +2720,11 @@ static void dw_mci_init_dma(struct dw_mci *host)
 		goto no_dma;
 	}
 
-	host->use_dma = 1;
 	return;
 
 no_dma:
 	dev_info(host->dev, "Using PIO mode.\n");
-	host->use_dma = 0;
+	host->use_dma = TRANS_MODE_PIO;
 }
 
 static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
@@ -2645,10 +2807,9 @@ static bool dw_mci_reset(struct dw_mci *host)
 		}
 	}
 
-#if IS_ENABLED(CONFIG_MMC_DW_IDMAC)
-	/* It is also recommended that we reset and reprogram idmac */
-	dw_mci_idmac_reset(host);
-#endif
+	if (host->use_dma == TRANS_MODE_IDMAC)
+		/* It is also recommended that we reset and reprogram idmac */
+		dw_mci_idmac_reset(host);
 
 	ret = true;
 
@@ -3062,6 +3223,9 @@ EXPORT_SYMBOL(dw_mci_remove);
  */
 int dw_mci_suspend(struct dw_mci *host)
 {
+	if (host->use_dma && host->dma_ops->exit)
+		host->dma_ops->exit(host);
+
 	return 0;
 }
 EXPORT_SYMBOL(dw_mci_suspend);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 8ce4674730a6..811d4673a1c5 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -148,6 +148,12 @@
 #define SDMMC_SET_FIFOTH(m, r, t)	(((m) & 0x7) << 28 | \
 					 ((r) & 0xFFF) << 16 | \
 					 ((t) & 0xFFF))
+/* HCON register defines */
+#define DMA_INTERFACE_IDMA		(0x0)
+#define DMA_INTERFACE_DWDMA		(0x1)
+#define DMA_INTERFACE_GDMA		(0x2)
+#define DMA_INTERFACE_NODMA		(0x3)
+#define SDMMC_GET_TRANS_MODE(x)		(((x)>>16) & 0x3)
 /* Internal DMAC interrupt defines */
 #define SDMMC_IDMAC_INT_AI		BIT(9)
 #define SDMMC_IDMAC_INT_NI		BIT(8)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 134c57422740..f67b2ec18e6d 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -16,6 +16,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/mmc/core.h>
+#include <linux/dmaengine.h>
 
 #define MAX_MCI_SLOTS	2
 
@@ -40,6 +41,17 @@ enum {
 
 struct mmc_data;
 
+enum {
+	TRANS_MODE_PIO = 0,
+	TRANS_MODE_IDMAC,
+	TRANS_MODE_EDMAC
+};
+
+struct dw_mci_dma_slave {
+	struct dma_chan *ch;
+	enum dma_transfer_direction direction;
+};
+
 /**
  * struct dw_mci - MMC controller state shared between all slots
  * @lock: Spinlock protecting the queue and associated data.
@@ -154,7 +166,14 @@ struct dw_mci {
 	dma_addr_t		sg_dma;
 	void			*sg_cpu;
 	const struct dw_mci_dma_ops	*dma_ops;
+	/* For idmac */
 	unsigned int		ring_size;
+
+	/* For edmac */
+	struct dw_mci_dma_slave *dms;
+	/* Registers's physical base address */
+	void                    *phy_regs;
+
 	u32			cmd_status;
 	u32			data_status;
 	u32			stop_cmdr;
@@ -208,8 +227,8 @@ struct dw_mci {
 struct dw_mci_dma_ops {
 	/* DMA Ops */
 	int (*init)(struct dw_mci *host);
-	void (*start)(struct dw_mci *host, unsigned int sg_len);
-	void (*complete)(struct dw_mci *host);
+	int (*start)(struct dw_mci *host, unsigned int sg_len);
+	void (*complete)(void *host);
 	void (*stop)(struct dw_mci *host);
 	void (*cleanup)(struct dw_mci *host);
 	void (*exit)(struct dw_mci *host);
-- 
cgit v1.2.3


From 679c51cffc3b316bd89ecc91ef92603dd6d4fc68 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 26 Oct 2015 11:55:34 -0700
Subject: clk: Add stubs for of_clk_*() APIs when CONFIG_OF=n

Compiling the versatile clock driver with COMPILE_TEST=y and CONFIG_OF=n
leads to the following error:

   drivers/clk/versatile/clk-sp810.c: In function 'clk_sp810_of_setup':
   drivers/clk/versatile/clk-sp810.c:103:6: error: implicit declaration of
function 'of_clk_parent_fill' [-Werror=implicit-function-declaration]

Silence it by providing stubs APIs for of_clk_parent_fill().
Throw in a stub for of_clk_get_parent_count() too because we're
in the area.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/clk-provider.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 837cd7c7c8a7..ff7284fb37b2 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -726,6 +726,15 @@ static inline struct clk *of_clk_src_onecell_get(
 {
 	return ERR_PTR(-ENOENT);
 }
+static inline int of_clk_get_parent_count(struct device_node *np)
+{
+	return 0;
+}
+static inline int of_clk_parent_fill(struct device_node *np,
+				     const char **parents, unsigned int size)
+{
+	return 0;
+}
 static inline const char *of_clk_get_parent_name(struct device_node *np,
 						 int index)
 {
-- 
cgit v1.2.3


From be68bf883170b3e4123fc4ff3745e38fb45a573e Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Sat, 24 Oct 2015 18:55:22 +0200
Subject: clk: Add clk_hw_is_enabled() for use by clk providers

Add clk_hw_is_enabled() to the provider APIs so clk providers can
use a struct clk_hw instead of a struct clk to check if a clk is
enabled or not.

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c            | 5 +++++
 include/linux/clk-provider.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 2eae76f21d6f..f13c3f4228d4 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -427,6 +427,11 @@ bool clk_hw_is_prepared(const struct clk_hw *hw)
 	return clk_core_is_prepared(hw->core);
 }
 
+bool clk_hw_is_enabled(const struct clk_hw *hw)
+{
+	return clk_core_is_enabled(hw->core);
+}
+
 bool __clk_is_enabled(struct clk *clk)
 {
 	if (!clk)
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index ff7284fb37b2..c56988ac63f7 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -654,6 +654,7 @@ unsigned long clk_hw_get_rate(const struct clk_hw *hw);
 unsigned long __clk_get_flags(struct clk *clk);
 unsigned long clk_hw_get_flags(const struct clk_hw *hw);
 bool clk_hw_is_prepared(const struct clk_hw *hw);
+bool clk_hw_is_enabled(const struct clk_hw *hw);
 bool __clk_is_enabled(struct clk *clk);
 struct clk *__clk_lookup(const char *name);
 int __clk_mux_determine_rate(struct clk_hw *hw,
-- 
cgit v1.2.3


From da4689c0263ee5f4eee64e166a6bee6a68b9242e Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 27 Oct 2015 08:36:08 +0900
Subject: iommu/vt-d: Expose struct svm_dev_ops without CONFIG_INTEL_IOMMU_SVM

The point in providing an inline version of intel_svm_bind_mm() which
just returns -ENOSYS is that people are supposed to be able to *use* it
and just see that it fails. So we need to let them have a definition of
struct svm_dev_ops (and the flags) too.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/intel-svm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 0a48ccff24ae..3c25794042f9 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -16,8 +16,6 @@
 #ifndef __INTEL_SVM_H__
 #define __INTEL_SVM_H__
 
-#ifdef CONFIG_INTEL_IOMMU_SVM
-
 struct device;
 
 struct svm_dev_ops {
@@ -55,6 +53,8 @@ struct svm_dev_ops {
  */
 #define SVM_FLAG_SUPERVISOR_MODE	(1<<1)
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
  * @dev:	Device to be granted acccess
-- 
cgit v1.2.3


From 1be5336bc7ba050ee07d352643bf4c01c513553c Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 16 Oct 2015 10:18:10 +0300
Subject: dmaengine: edma: New device tree binding

With the old binding and driver architecture we had many issues:
No way to assign eDMA channels to event queues, thus not able to tune the
system by moving specific DMA channels to low/high priority servicing. We
moved the cyclic channels to high priority within the code, but that was
just a workaround to this issue.
Memcopy was fundamentally broken: even if the driver scanned the DT/devices
in the booted system for direct DMA users (which is not effective when the
events are going through a crossbar) and created a map of 'used' channels,
this information was not really usable. Since via dmaengien API the eDMA
driver will be called with _some_ channel number, we would try to request
this channel when any channel is requested for memcpy. By luck we got
channel which is not used by any device most of the time so things worked,
but if a device would have been using the given channel, but not requested
it, the memcpy channel would have been waiting for HW event.
The old code had the am33xx/am43xx DMA event router handling embedded. This
should have been done in a separate driver since it is not part of the
actual eDMA IP.
There were no way to 'lock' PaRAM slots to be used by the DSP for example
when booting with DT.
In DT boot the edma node used more than one hwmod which is not a good
practice and the kernel prints warning because of this.

With the new bindings and the changes in the driver we can:
- No regression with Legacy binding and non DT boot
- DMA channels can be assigned to any TC (to set priority)
- PaRAM slots can be reserved for other cores to use
- Dynamic power management for CC and TCs, if only TC0 is used all other TC
  can be powered down for example

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/ti-edma.txt | 117 +++++-
 drivers/dma/edma.c                                | 486 +++++++++++++++-------
 include/linux/platform_data/edma.h                |   3 +
 3 files changed, 459 insertions(+), 147 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt
index 5ba525a10035..d3d0a4fb1c73 100644
--- a/Documentation/devicetree/bindings/dma/ti-edma.txt
+++ b/Documentation/devicetree/bindings/dma/ti-edma.txt
@@ -1,4 +1,119 @@
-TI EDMA
+Texas Instruments eDMA
+
+The eDMA3 consists of two components: Channel controller (CC) and Transfer
+Controller(s) (TC). The CC is the main entry for DMA users since it is
+responsible for the DMA channel handling, while the TCs are responsible to
+execute the actual DMA tansfer.
+
+------------------------------------------------------------------------------
+eDMA3 Channel Controller
+
+Required properties:
+- compatible:	"ti,edma3-tpcc" for the channel controller(s)
+- #dma-cells:	Should be set to <2>. The first number is the DMA request
+		number and the second is the TC the channel is serviced on.
+- reg:		Memory map of eDMA CC
+- reg-names:	"edma3_cc"
+- interrupts:	Interrupt lines for CCINT, MPERR and CCERRINT.
+- interrupt-names: "edma3_ccint", "emda3_mperr" and "edma3_ccerrint"
+- ti,tptcs:	List of TPTCs associated with the eDMA in the following form:
+		<&tptc_phandle TC_priority_number>. The highest priority is 0.
+
+Optional properties:
+- ti,hwmods:	Name of the hwmods associated to the eDMA CC
+- ti,edma-memcpy-channels: List of channels allocated to be used for memcpy, iow
+		these channels will be SW triggered channels. The list must
+		contain 16 bits numbers, see example.
+- ti,edma-reserved-slot-ranges: PaRAM slot ranges which should not be used by
+		the driver, they are allocated to be used by for example the
+		DSP. See example.
+
+------------------------------------------------------------------------------
+eDMA3 Transfer Controller
+
+Required properties:
+- compatible:	"ti,edma3-tptc" for the transfer controller(s)
+- reg:		Memory map of eDMA TC
+- interrupts:	Interrupt number for TCerrint.
+
+Optional properties:
+- ti,hwmods:	Name of the hwmods associated to the given eDMA TC
+- interrupt-names: "edma3_tcerrint"
+
+------------------------------------------------------------------------------
+Example:
+
+edma: edma@49000000 {
+	compatible = "ti,edma3-tpcc";
+	ti,hwmods = "tpcc";
+	reg =	<0x49000000 0x10000>;
+	reg-names = "edma3_cc";
+	interrupts = <12 13 14>;
+	interrupt-names = "edma3_ccint", "emda3_mperr", "edma3_ccerrint";
+	dma-requests = <64>;
+	#dma-cells = <2>;
+
+	ti,tptcs = <&edma_tptc0 7>, <&edma_tptc1 7>, <&edma_tptc2 0>;
+
+	/* Channel 20 and 21 is allocated for memcpy */
+	ti,edma-memcpy-channels = /bits/ 16 <20 21>;
+	/* The following PaRAM slots are reserved: 35-45 and 100-110 */
+	ti,edma-reserved-slot-ranges = /bits/ 16 <35 10>,
+				       /bits/ 16 <100 10>;
+};
+
+edma_tptc0: tptc@49800000 {
+	compatible = "ti,edma3-tptc";
+	ti,hwmods = "tptc0";
+	reg =	<0x49800000 0x100000>;
+	interrupts = <112>;
+	interrupt-names = "edm3_tcerrint";
+};
+
+edma_tptc1: tptc@49900000 {
+	compatible = "ti,edma3-tptc";
+	ti,hwmods = "tptc1";
+	reg =	<0x49900000 0x100000>;
+	interrupts = <113>;
+	interrupt-names = "edm3_tcerrint";
+};
+
+edma_tptc2: tptc@49a00000 {
+	compatible = "ti,edma3-tptc";
+	ti,hwmods = "tptc2";
+	reg =	<0x49a00000 0x100000>;
+	interrupts = <114>;
+	interrupt-names = "edm3_tcerrint";
+};
+
+sham: sham@53100000 {
+	compatible = "ti,omap4-sham";
+	ti,hwmods = "sham";
+	reg = <0x53100000 0x200>;
+	interrupts = <109>;
+	/* DMA channel 36 executed on eDMA TC0 - low priority queue */
+	dmas = <&edma 36 0>;
+	dma-names = "rx";
+};
+
+mcasp0: mcasp@48038000 {
+	compatible = "ti,am33xx-mcasp-audio";
+	ti,hwmods = "mcasp0";
+	reg = <0x48038000 0x2000>,
+		<0x46000000 0x400000>;
+	reg-names = "mpu", "dat";
+	interrupts = <80>, <81>;
+	interrupt-names = "tx", "rx";
+	status = "disabled";
+	/* DMA channels 8 and 9 executed on eDMA TC2 - high priority queue */
+	dmas = <&edma 8 2>,
+	       <&edma 9 2>;
+	dma-names = "tx", "rx";
+};
+
+------------------------------------------------------------------------------
+DEPRECATED binding, new DTS files must use the ti,edma3-tpcc/ti,edma3-tptc
+binding.
 
 Required properties:
 - compatible : "ti,edma3"
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d4d71e60da1b..31722d436a42 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -201,13 +201,20 @@ struct edma_desc {
 
 struct edma_cc;
 
+struct edma_tc {
+	struct device_node		*node;
+	u16				id;
+};
+
 struct edma_chan {
 	struct virt_dma_chan		vchan;
 	struct list_head		node;
 	struct edma_desc		*edesc;
 	struct edma_cc			*ecc;
+	struct edma_tc			*tc;
 	int				ch_num;
 	bool				alloced;
+	bool				hw_triggered;
 	int				slot[EDMA_MAX_SLOTS];
 	int				missed;
 	struct dma_slave_config		cfg;
@@ -218,6 +225,7 @@ struct edma_cc {
 	struct edma_soc_info		*info;
 	void __iomem			*base;
 	int				id;
+	bool				legacy_mode;
 
 	/* eDMA3 resource information */
 	unsigned			num_channels;
@@ -228,20 +236,16 @@ struct edma_cc {
 	bool				chmap_exist;
 	enum dma_event_q		default_queue;
 
-	bool				unused_chan_list_done;
-	/* The slot_inuse bit for each PaRAM slot is clear unless the
-	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
+	/*
+	 * The slot_inuse bit for each PaRAM slot is clear unless the slot is
+	 * in use by Linux or if it is allocated to be used by DSP.
 	 */
 	unsigned long *slot_inuse;
 
-	/* The channel_unused bit for each channel is clear unless
-	 * it is not being used on this platform. It uses a bit
-	 * of SOC-specific initialization code.
-	 */
-	unsigned long *channel_unused;
-
 	struct dma_device		dma_slave;
+	struct dma_device		*dma_memcpy;
 	struct edma_chan		*slave_chans;
+	struct edma_tc			*tc_list;
 	int				dummy_slot;
 };
 
@@ -251,8 +255,17 @@ static const struct edmacc_param dummy_paramset = {
 	.ccnt = 1,
 };
 
+#define EDMA_BINDING_LEGACY	0
+#define EDMA_BINDING_TPCC	1
 static const struct of_device_id edma_of_ids[] = {
-	{ .compatible = "ti,edma3", },
+	{
+		.compatible = "ti,edma3",
+		.data = (void *)EDMA_BINDING_LEGACY,
+	},
+	{
+		.compatible = "ti,edma3-tpcc",
+		.data = (void *)EDMA_BINDING_TPCC,
+	},
 	{}
 };
 
@@ -412,60 +425,6 @@ static void edma_set_chmap(struct edma_chan *echan, int slot)
 	}
 }
 
-static int prepare_unused_channel_list(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct edma_cc *ecc = data;
-	int dma_req_min = EDMA_CTLR_CHAN(ecc->id, 0);
-	int dma_req_max = dma_req_min + ecc->num_channels;
-	int i, count;
-	struct of_phandle_args  dma_spec;
-
-	if (dev->of_node) {
-		struct platform_device *dma_pdev;
-
-		count = of_property_count_strings(dev->of_node, "dma-names");
-		if (count < 0)
-			return 0;
-		for (i = 0; i < count; i++) {
-			if (of_parse_phandle_with_args(dev->of_node, "dmas",
-						       "#dma-cells", i,
-						       &dma_spec))
-				continue;
-
-			if (!of_match_node(edma_of_ids, dma_spec.np)) {
-				of_node_put(dma_spec.np);
-				continue;
-			}
-
-			dma_pdev = of_find_device_by_node(dma_spec.np);
-			if (&dma_pdev->dev != ecc->dev)
-				continue;
-
-			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
-				  ecc->channel_unused);
-			of_node_put(dma_spec.np);
-		}
-		return 0;
-	}
-
-	/* For non-OF case */
-	for (i = 0; i < pdev->num_resources; i++) {
-		struct resource	*res = &pdev->resource[i];
-		int dma_req;
-
-		if (!(res->flags & IORESOURCE_DMA))
-			continue;
-
-		dma_req = (int)res->start;
-		if (dma_req >= dma_req_min && dma_req < dma_req_max)
-			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-				  ecc->channel_unused);
-	}
-
-	return 0;
-}
-
 static void edma_setup_interrupt(struct edma_chan *echan, bool enable)
 {
 	struct edma_cc *ecc = echan->ecc;
@@ -617,7 +576,7 @@ static void edma_start(struct edma_chan *echan)
 	int j = (channel >> 5);
 	unsigned int mask = BIT(channel & 0x1f);
 
-	if (test_bit(channel, ecc->channel_unused)) {
+	if (!echan->hw_triggered) {
 		/* EDMA channels without event association */
 		dev_dbg(ecc->dev, "ESR%d %08x\n", j,
 			edma_shadow0_read_array(ecc, SH_ESR, j));
@@ -734,20 +693,6 @@ static int edma_alloc_channel(struct edma_chan *echan,
 	struct edma_cc *ecc = echan->ecc;
 	int channel = EDMA_CHAN_SLOT(echan->ch_num);
 
-	if (!ecc->unused_chan_list_done) {
-		/*
-		 * Scan all the platform devices to find out the EDMA channels
-		 * used and clear them in the unused list, making the rest
-		 * available for ARM usage.
-		 */
-		int ret = bus_for_each_dev(&platform_bus_type, NULL, ecc,
-					   prepare_unused_channel_list);
-		if (ret < 0)
-			return ret;
-
-		ecc->unused_chan_list_done = true;
-	}
-
 	/* ensure access through shadow region 0 */
 	edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
 
@@ -899,7 +844,7 @@ static int edma_terminate_all(struct dma_chan *chan)
 	if (echan->edesc) {
 		edma_stop(echan);
 		/* Move the cyclic channel back to default queue */
-		if (echan->edesc->cyclic)
+		if (!echan->tc && echan->edesc->cyclic)
 			edma_assign_channel_eventq(echan, EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
@@ -1403,7 +1348,8 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan, EVENTQ_0);
+	if (!echan->tc)
+		edma_assign_channel_eventq(echan, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -1609,18 +1555,54 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable)
+{
+	struct platform_device *tc_pdev;
+	int ret;
+
+	if (!tc)
+		return;
+
+	tc_pdev = of_find_device_by_node(tc->node);
+	if (!tc_pdev) {
+		pr_err("%s: TPTC device is not found\n", __func__);
+		return;
+	}
+	if (!pm_runtime_enabled(&tc_pdev->dev))
+		pm_runtime_enable(&tc_pdev->dev);
+
+	if (enable)
+		ret = pm_runtime_get_sync(&tc_pdev->dev);
+	else
+		ret = pm_runtime_put_sync(&tc_pdev->dev);
+
+	if (ret < 0)
+		pr_err("%s: pm_runtime_%s_sync() failed for %s\n", __func__,
+		       enable ? "get" : "put", dev_name(&tc_pdev->dev));
+}
+
 /* Alloc channel resources */
 static int edma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
-	struct device *dev = chan->device->dev;
+	struct edma_cc *ecc = echan->ecc;
+	struct device *dev = ecc->dev;
+	enum dma_event_q eventq_no = EVENTQ_DEFAULT;
 	int ret;
 
-	ret = edma_alloc_channel(echan, EVENTQ_DEFAULT);
+	if (echan->tc) {
+		eventq_no = echan->tc->id;
+	} else if (ecc->tc_list) {
+		/* memcpy channel */
+		echan->tc = &ecc->tc_list[ecc->info->default_queue];
+		eventq_no = echan->tc->id;
+	}
+
+	ret = edma_alloc_channel(echan, eventq_no);
 	if (ret)
 		return ret;
 
-	echan->slot[0] = edma_alloc_slot(echan->ecc, echan->ch_num);
+	echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num);
 	if (echan->slot[0] < 0) {
 		dev_err(dev, "Entry slot allocation failed for channel %u\n",
 			EDMA_CHAN_SLOT(echan->ch_num));
@@ -1631,8 +1613,11 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	edma_set_chmap(echan, echan->slot[0]);
 	echan->alloced = true;
 
-	dev_dbg(dev, "allocated channel %d for %u:%u\n", echan->ch_num,
-		EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
+	dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id,
+		echan->hw_triggered ? "HW" : "SW");
+
+	edma_tc_set_pm_state(echan->tc, true);
 
 	return 0;
 
@@ -1645,6 +1630,7 @@ err_slot:
 static void edma_free_chan_resources(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = echan->ecc->dev;
 	int i;
 
 	/* Terminate transfers */
@@ -1669,7 +1655,12 @@ static void edma_free_chan_resources(struct dma_chan *chan)
 		echan->alloced = false;
 	}
 
-	dev_dbg(chan->device->dev, "freeing channel for %u\n", echan->ch_num);
+	edma_tc_set_pm_state(echan->tc, false);
+	echan->tc = NULL;
+	echan->hw_triggered = false;
+
+	dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id);
 }
 
 /* Send pending descriptor to hardware */
@@ -1756,41 +1747,90 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
+static bool edma_is_memcpy_channel(int ch_num, u16 *memcpy_channels)
+{
+	s16 *memcpy_ch = memcpy_channels;
+
+	if (!memcpy_channels)
+		return false;
+	while (*memcpy_ch != -1) {
+		if (*memcpy_ch == ch_num)
+			return true;
+		memcpy_ch++;
+	}
+	return false;
+}
+
 #define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
 				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
 				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
 				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
 
-static void edma_dma_init(struct edma_cc *ecc)
+static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 {
-	struct dma_device *ddev = &ecc->dma_slave;
+	struct dma_device *s_ddev = &ecc->dma_slave;
+	struct dma_device *m_ddev = NULL;
+	s16 *memcpy_channels = ecc->info->memcpy_channels;
 	int i, j;
 
-	dma_cap_zero(ddev->cap_mask);
-	dma_cap_set(DMA_SLAVE, ddev->cap_mask);
-	dma_cap_set(DMA_CYCLIC, ddev->cap_mask);
-	dma_cap_set(DMA_MEMCPY, ddev->cap_mask);
+	dma_cap_zero(s_ddev->cap_mask);
+	dma_cap_set(DMA_SLAVE, s_ddev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask);
+	if (ecc->legacy_mode && !memcpy_channels) {
+		dev_warn(ecc->dev,
+			 "Legacy memcpy is enabled, things might not work\n");
 
-	ddev->device_prep_slave_sg = edma_prep_slave_sg;
-	ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
-	ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
-	ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
-	ddev->device_free_chan_resources = edma_free_chan_resources;
-	ddev->device_issue_pending = edma_issue_pending;
-	ddev->device_tx_status = edma_tx_status;
-	ddev->device_config = edma_slave_config;
-	ddev->device_pause = edma_dma_pause;
-	ddev->device_resume = edma_dma_resume;
-	ddev->device_terminate_all = edma_terminate_all;
-
-	ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
-	ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
-	ddev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-
-	ddev->dev = ecc->dev;
+		dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask);
+		s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		s_ddev->directions = BIT(DMA_MEM_TO_MEM);
+	}
 
-	INIT_LIST_HEAD(&ddev->channels);
+	s_ddev->device_prep_slave_sg = edma_prep_slave_sg;
+	s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
+	s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+	s_ddev->device_free_chan_resources = edma_free_chan_resources;
+	s_ddev->device_issue_pending = edma_issue_pending;
+	s_ddev->device_tx_status = edma_tx_status;
+	s_ddev->device_config = edma_slave_config;
+	s_ddev->device_pause = edma_dma_pause;
+	s_ddev->device_resume = edma_dma_resume;
+	s_ddev->device_terminate_all = edma_terminate_all;
+
+	s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV));
+	s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	s_ddev->dev = ecc->dev;
+	INIT_LIST_HEAD(&s_ddev->channels);
+
+	if (memcpy_channels) {
+		m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+		ecc->dma_memcpy = m_ddev;
+
+		dma_cap_zero(m_ddev->cap_mask);
+		dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask);
+
+		m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+		m_ddev->device_free_chan_resources = edma_free_chan_resources;
+		m_ddev->device_issue_pending = edma_issue_pending;
+		m_ddev->device_tx_status = edma_tx_status;
+		m_ddev->device_config = edma_slave_config;
+		m_ddev->device_pause = edma_dma_pause;
+		m_ddev->device_resume = edma_dma_resume;
+		m_ddev->device_terminate_all = edma_terminate_all;
+
+		m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->directions = BIT(DMA_MEM_TO_MEM);
+		m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+		m_ddev->dev = ecc->dev;
+		INIT_LIST_HEAD(&m_ddev->channels);
+	} else if (!ecc->legacy_mode) {
+		dev_info(ecc->dev, "memcpy is disabled\n");
+	}
 
 	for (i = 0; i < ecc->num_channels; i++) {
 		struct edma_chan *echan = &ecc->slave_chans[i];
@@ -1798,7 +1838,10 @@ static void edma_dma_init(struct edma_cc *ecc)
 		echan->ecc = ecc;
 		echan->vchan.desc_free = edma_desc_free;
 
-		vchan_init(&echan->vchan, ddev);
+		if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels))
+			vchan_init(&echan->vchan, m_ddev);
+		else
+			vchan_init(&echan->vchan, s_ddev);
 
 		INIT_LIST_HEAD(&echan->node);
 		for (j = 0; j < EDMA_MAX_SLOTS; j++)
@@ -1921,7 +1964,8 @@ static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
 	return 0;
 }
 
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
 {
 	struct edma_soc_info *info;
 	struct property *prop;
@@ -1932,20 +1976,121 @@ static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map", &sz);
+	if (legacy_mode) {
+		prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map",
+					&sz);
+		if (prop) {
+			ret = edma_xbar_event_map(dev, info, sz);
+			if (ret)
+				return ERR_PTR(ret);
+		}
+		return info;
+	}
+
+	/* Get the list of channels allocated to be used for memcpy */
+	prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz);
+	if (prop) {
+		const char pname[] = "ti,edma-memcpy-channels";
+		size_t nelm = sz / sizeof(s16);
+		s16 *memcpy_ch;
+
+		memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s16),
+					 GFP_KERNEL);
+		if (!memcpy_ch)
+			return ERR_PTR(-ENOMEM);
+
+		ret = of_property_read_u16_array(dev->of_node, pname,
+						 (u16 *)memcpy_ch, nelm);
+		if (ret)
+			return ERR_PTR(ret);
+
+		memcpy_ch[nelm] = -1;
+		info->memcpy_channels = memcpy_ch;
+	}
+
+	prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges",
+				&sz);
 	if (prop) {
-		ret = edma_xbar_event_map(dev, info, sz);
+		const char pname[] = "ti,edma-reserved-slot-ranges";
+		s16 (*rsv_slots)[2];
+		size_t nelm = sz / sizeof(*rsv_slots);
+		struct edma_rsv_info *rsv_info;
+
+		if (!nelm)
+			return info;
+
+		rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL);
+		if (!rsv_info)
+			return ERR_PTR(-ENOMEM);
+
+		rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots),
+					 GFP_KERNEL);
+		if (!rsv_slots)
+			return ERR_PTR(-ENOMEM);
+
+		ret = of_property_read_u16_array(dev->of_node, pname,
+						 (u16 *)rsv_slots, nelm * 2);
 		if (ret)
 			return ERR_PTR(ret);
+
+		rsv_slots[nelm][0] = -1;
+		rsv_slots[nelm][1] = -1;
+		info->rsv = rsv_info;
+		info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots;
 	}
 
 	return info;
 }
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct edma_cc *ecc = ofdma->of_dma_data;
+	struct dma_chan *chan = NULL;
+	struct edma_chan *echan;
+	int i;
+
+	if (!ecc || dma_spec->args_count < 1)
+		return NULL;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		echan = &ecc->slave_chans[i];
+		if (echan->ch_num == dma_spec->args[0]) {
+			chan = &echan->vchan.chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return NULL;
+
+	if (echan->ecc->legacy_mode && dma_spec->args_count == 1)
+		goto out;
+
+	if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 &&
+	    dma_spec->args[1] < echan->ecc->num_tc) {
+		echan->tc = &echan->ecc->tc_list[dma_spec->args[1]];
+		goto out;
+	}
+
+	return NULL;
+out:
+	/* The channel is going to be used as HW synchronized */
+	echan->hw_triggered = true;
+	return dma_get_slave_channel(chan);
+}
 #else
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	return NULL;
+}
 #endif
 
 static int edma_probe(struct platform_device *pdev)
@@ -1953,7 +2098,6 @@ static int edma_probe(struct platform_device *pdev)
 	struct edma_soc_info	*info = pdev->dev.platform_data;
 	s8			(*queue_priority_mapping)[2];
 	int			i, off, ln;
-	const s16		(*rsv_chans)[2];
 	const s16		(*rsv_slots)[2];
 	const s16		(*xbar_chans)[2];
 	int			irq;
@@ -1962,10 +2106,17 @@ static int edma_probe(struct platform_device *pdev)
 	struct device_node	*node = pdev->dev.of_node;
 	struct device		*dev = &pdev->dev;
 	struct edma_cc		*ecc;
+	bool			legacy_mode = true;
 	int ret;
 
 	if (node) {
-		info = edma_setup_info_from_dt(dev);
+		const struct of_device_id *match;
+
+		match = of_match_node(edma_of_ids, node);
+		if (match && (u32)match->data == EDMA_BINDING_TPCC)
+			legacy_mode = false;
+
+		info = edma_setup_info_from_dt(dev, legacy_mode);
 		if (IS_ERR(info)) {
 			dev_err(dev, "failed to get DT data\n");
 			return PTR_ERR(info);
@@ -1994,6 +2145,7 @@ static int edma_probe(struct platform_device *pdev)
 
 	ecc->dev = dev;
 	ecc->id = pdev->id;
+	ecc->legacy_mode = legacy_mode;
 	/* When booting with DT the pdev->id is -1 */
 	if (ecc->id < 0)
 		ecc->id = 0;
@@ -2024,12 +2176,6 @@ static int edma_probe(struct platform_device *pdev)
 	if (!ecc->slave_chans)
 		return -ENOMEM;
 
-	ecc->channel_unused = devm_kcalloc(dev,
-					   BITS_TO_LONGS(ecc->num_channels),
-					   sizeof(unsigned long), GFP_KERNEL);
-	if (!ecc->channel_unused)
-		return -ENOMEM;
-
 	ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
 				       sizeof(unsigned long), GFP_KERNEL);
 	if (!ecc->slot_inuse)
@@ -2040,20 +2186,7 @@ static int edma_probe(struct platform_device *pdev)
 	for (i = 0; i < ecc->num_slots; i++)
 		edma_write_slot(ecc, i, &dummy_paramset);
 
-	/* Mark all channels as unused */
-	memset(ecc->channel_unused, 0xff, sizeof(ecc->channel_unused));
-
 	if (info->rsv) {
-		/* Clear the reserved channels in unused list */
-		rsv_chans = info->rsv->rsv_chans;
-		if (rsv_chans) {
-			for (i = 0; rsv_chans[i][0] != -1; i++) {
-				off = rsv_chans[i][0];
-				ln = rsv_chans[i][1];
-				clear_bits(off, ln, ecc->channel_unused);
-			}
-		}
-
 		/* Set the reserved slots in inuse list */
 		rsv_slots = info->rsv->rsv_slots;
 		if (rsv_slots) {
@@ -2070,7 +2203,6 @@ static int edma_probe(struct platform_device *pdev)
 	if (xbar_chans) {
 		for (i = 0; xbar_chans[i][1] != -1; i++) {
 			off = xbar_chans[i][1];
-			clear_bits(off, 1, ecc->channel_unused);
 		}
 	}
 
@@ -2112,6 +2244,31 @@ static int edma_probe(struct platform_device *pdev)
 
 	queue_priority_mapping = info->queue_priority_mapping;
 
+	if (!ecc->legacy_mode) {
+		int lowest_priority = 0;
+		struct of_phandle_args tc_args;
+
+		ecc->tc_list = devm_kcalloc(dev, ecc->num_tc,
+					    sizeof(*ecc->tc_list), GFP_KERNEL);
+		if (!ecc->tc_list)
+			return -ENOMEM;
+
+		for (i = 0;; i++) {
+			ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs",
+							       1, i, &tc_args);
+			if (ret || i == ecc->num_tc)
+				break;
+
+			ecc->tc_list[i].node = tc_args.np;
+			ecc->tc_list[i].id = i;
+			queue_priority_mapping[i][1] = tc_args.args[0];
+			if (queue_priority_mapping[i][1] > lowest_priority) {
+				lowest_priority = queue_priority_mapping[i][1];
+				info->default_queue = i;
+			}
+		}
+	}
+
 	/* Event queue priority mapping */
 	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
 		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
@@ -2125,7 +2282,7 @@ static int edma_probe(struct platform_device *pdev)
 	ecc->info = info;
 
 	/* Init the dma device and channels */
-	edma_dma_init(ecc);
+	edma_dma_init(ecc, legacy_mode);
 
 	for (i = 0; i < ecc->num_channels; i++) {
 		/* Assign all channels to the default queue */
@@ -2136,12 +2293,23 @@ static int edma_probe(struct platform_device *pdev)
 	}
 
 	ret = dma_async_device_register(&ecc->dma_slave);
-	if (ret)
+	if (ret) {
+		dev_err(dev, "slave ddev registration failed (%d)\n", ret);
 		goto err_reg1;
+	}
+
+	if (ecc->dma_memcpy) {
+		ret = dma_async_device_register(ecc->dma_memcpy);
+		if (ret) {
+			dev_err(dev, "memcpy ddev registration failed (%d)\n",
+				ret);
+			dma_async_device_unregister(&ecc->dma_slave);
+			goto err_reg1;
+		}
+	}
 
 	if (node)
-		of_dma_controller_register(node, of_dma_xlate_by_chan_id,
-					   &ecc->dma_slave);
+		of_dma_controller_register(node, of_edma_xlate, ecc);
 
 	dev_info(dev, "TI EDMA DMA engine driver\n");
 
@@ -2160,12 +2328,30 @@ static int edma_remove(struct platform_device *pdev)
 	if (dev->of_node)
 		of_dma_controller_free(dev->of_node);
 	dma_async_device_unregister(&ecc->dma_slave);
+	if (ecc->dma_memcpy)
+		dma_async_device_unregister(ecc->dma_memcpy);
 	edma_free_slot(ecc, ecc->dummy_slot);
 
 	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int edma_pm_suspend(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	struct edma_chan *echan = ecc->slave_chans;
+	int i;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (echan[i].alloced) {
+			edma_setup_interrupt(&echan[i], false);
+			edma_tc_set_pm_state(echan[i].tc, false);
+		}
+	}
+
+	return 0;
+}
+
 static int edma_pm_resume(struct device *dev)
 {
 	struct edma_cc *ecc = dev_get_drvdata(dev);
@@ -2190,6 +2376,8 @@ static int edma_pm_resume(struct device *dev)
 
 			/* Set up channel -> slot mapping for the entry slot */
 			edma_set_chmap(&echan[i], echan[i].slot[0]);
+
+			edma_tc_set_pm_state(echan[i].tc, true);
 		}
 	}
 
@@ -2198,7 +2386,7 @@ static int edma_pm_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops edma_pm_ops = {
-	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume)
 };
 
 static struct platform_driver edma_driver = {
@@ -2213,12 +2401,18 @@ static struct platform_driver edma_driver = {
 
 bool edma_filter_fn(struct dma_chan *chan, void *param)
 {
+	bool match = false;
+
 	if (chan->device->dev->driver == &edma_driver.driver) {
 		struct edma_chan *echan = to_edma_chan(chan);
 		unsigned ch_req = *(unsigned *)param;
-		return ch_req == echan->ch_num;
+		if (ch_req == echan->ch_num) {
+			/* The channel is going to be used as HW synchronized */
+			echan->hw_triggered = true;
+			match = true;
+		}
 	}
-	return false;
+	return match;
 }
 EXPORT_SYMBOL(edma_filter_fn);
 
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 6b9d500956e4..e2878baeb90e 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -71,6 +71,9 @@ struct edma_soc_info {
 	/* Resource reservation for other cores */
 	struct edma_rsv_info	*rsv;
 
+	/* List of channels allocated for memcpy, terminated with -1 */
+	s16			*memcpy_channels;
+
 	s8	(*queue_priority_mapping)[2];
 	const s16	(*xbar_chans)[2];
 };
-- 
cgit v1.2.3


From 3648dc6d27f648b8e3ce9b48874627a833d53c3a Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Oct 2015 22:54:39 +0100
Subject: NFC: st-nci: Add ese-present/uicc-present dts properties

In order to align with st21nfca, dts configuration properties
ese_present and uicc_present are made available in st-nci driver.

So far, in early development firmware, because
nci_nfcee_mode_set(DISABLE) was not supported we had to try to
enable it during the secure element discovery phase.

After several trials on commercial and qualified firmware it appears
that nci_nfcee_mode_set(ENABLE) and nci_nfcee_mode_set(DISABLE) are
properly supported.

Such feature also help us to eventually save some time (~5ms) when
only one secure element is connected.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 .../devicetree/bindings/net/nfc/st-nci-i2c.txt     |  7 ++
 .../devicetree/bindings/net/nfc/st-nci-spi.txt     |  9 +-
 drivers/nfc/st-nci/core.c                          |  4 +-
 drivers/nfc/st-nci/i2c.c                           | 12 ++-
 drivers/nfc/st-nci/ndlc.c                          |  6 +-
 drivers/nfc/st-nci/ndlc.h                          |  5 +-
 drivers/nfc/st-nci/se.c                            | 98 ++++++++++++++--------
 drivers/nfc/st-nci/spi.c                           | 12 ++-
 drivers/nfc/st-nci/st-nci.h                        | 13 ++-
 include/linux/platform_data/st-nci.h               |  2 +
 10 files changed, 122 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt b/Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt
index d707588ed734..263732e8879f 100644
--- a/Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt
+++ b/Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt
@@ -11,6 +11,10 @@ Required properties:
 Optional SoC Specific Properties:
 - pinctrl-names: Contains only one value - "default".
 - pintctrl-0: Specifies the pin control groups used for this controller.
+- ese-present: Specifies that an ese is physically connected to the nfc
+controller.
+- uicc-present: Specifies that the uicc swp signal can be physically
+connected to the nfc controller.
 
 Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2):
 
@@ -29,5 +33,8 @@ Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2):
 		interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
 
 		reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>;
+
+		ese-present;
+		uicc-present;
 	};
 };
diff --git a/Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt b/Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt
index 525681b6dc39..711ca85a363d 100644
--- a/Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt
+++ b/Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt
@@ -2,7 +2,7 @@
 
 Required properties:
 - compatible: Should be "st,st21nfcb-spi"
-- spi-max-frequency: Maximum SPI frequency (<= 10000000).
+- spi-max-frequency: Maximum SPI frequency (<= 4000000).
 - interrupt-parent: phandle for the interrupt gpio controller
 - interrupts: GPIO interrupt to which the chip is connected
 - reset-gpios: Output GPIO pin used to reset the ST21NFCB
@@ -10,6 +10,10 @@ Required properties:
 Optional SoC Specific Properties:
 - pinctrl-names: Contains only one value - "default".
 - pintctrl-0: Specifies the pin control groups used for this controller.
+- ese-present: Specifies that an ese is physically connected to the nfc
+controller.
+- uicc-present: Specifies that the uicc swp signal can be physically
+connected to the nfc controller.
 
 Example (for ARM-based BeagleBoard xM with ST21NFCB on SPI4):
 
@@ -27,5 +31,8 @@ Example (for ARM-based BeagleBoard xM with ST21NFCB on SPI4):
 		interrupts = <2 IRQ_TYPE_EDGE_RISING>;
 
 		reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>;
+
+		ese-present;
+		uicc-present;
 	};
 };
diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c
index 73d36dd8345c..c693128ee6fb 100644
--- a/drivers/nfc/st-nci/core.c
+++ b/drivers/nfc/st-nci/core.c
@@ -123,7 +123,7 @@ static struct nci_ops st_nci_ops = {
 };
 
 int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
-		       int phy_tailroom)
+		 int phy_tailroom, struct st_nci_se_status *se_status)
 {
 	struct st_nci_info *info;
 	int r;
@@ -164,7 +164,7 @@ int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
 		goto err_reg_dev;
 	}
 
-	return st_nci_se_init(ndlc->ndev);
+	return st_nci_se_init(ndlc->ndev, se_status);
 
 err_reg_dev:
 	nci_free_device(ndlc->ndev);
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 02e585f2be74..172cbc34cc9f 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -52,6 +52,8 @@ struct st_nci_i2c_phy {
 
 	unsigned int gpio_reset;
 	unsigned int irq_polarity;
+
+	struct st_nci_se_status se_status;
 };
 
 #define I2C_DUMP_SKB(info, skb)					\
@@ -245,6 +247,11 @@ static int st_nci_i2c_of_request_resources(struct i2c_client *client)
 
 	phy->irq_polarity = irq_get_trigger_type(client->irq);
 
+	phy->se_status.is_ese_present =
+				of_property_read_bool(pp, "ese-present");
+	phy->se_status.is_uicc_present =
+				of_property_read_bool(pp, "uicc-present");
+
 	return 0;
 }
 #else
@@ -277,6 +284,9 @@ static int st_nci_i2c_request_resources(struct i2c_client *client)
 		return r;
 	}
 
+	phy->se_status.is_ese_present = pdata->is_ese_present;
+	phy->se_status.is_uicc_present = pdata->is_uicc_present;
+
 	return 0;
 }
 
@@ -326,7 +336,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
 
 	r = ndlc_probe(phy, &i2c_phy_ops, &client->dev,
 			ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
-			&phy->ndlc);
+			&phy->ndlc, &phy->se_status);
 	if (r < 0) {
 		nfc_err(&client->dev, "Unable to register ndlc layer\n");
 		return r;
diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
index fb50007ac32a..0884b11001ef 100644
--- a/drivers/nfc/st-nci/ndlc.c
+++ b/drivers/nfc/st-nci/ndlc.c
@@ -20,6 +20,7 @@
 #include <net/nfc/nci_core.h>
 
 #include "st-nci.h"
+#include "ndlc.h"
 
 #define NDLC_TIMER_T1		100
 #define NDLC_TIMER_T1_WAIT	400
@@ -265,7 +266,8 @@ static void ndlc_t2_timeout(unsigned long data)
 }
 
 int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
-	       int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id)
+	       int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id,
+	       struct st_nci_se_status *se_status)
 {
 	struct llt_ndlc *ndlc;
 
@@ -295,7 +297,7 @@ int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
 
 	INIT_WORK(&ndlc->sm_work, llt_ndlc_sm_work);
 
-	return st_nci_probe(ndlc, phy_headroom, phy_tailroom);
+	return st_nci_probe(ndlc, phy_headroom, phy_tailroom, se_status);
 }
 EXPORT_SYMBOL(ndlc_probe);
 
diff --git a/drivers/nfc/st-nci/ndlc.h b/drivers/nfc/st-nci/ndlc.h
index 6361005ef003..bdf78ffd5bb7 100644
--- a/drivers/nfc/st-nci/ndlc.h
+++ b/drivers/nfc/st-nci/ndlc.h
@@ -22,6 +22,8 @@
 #include <linux/skbuff.h>
 #include <net/nfc/nfc.h>
 
+struct st_nci_se_status;
+
 /* Low Level Transport description */
 struct llt_ndlc {
 	struct nci_dev *ndev;
@@ -55,6 +57,7 @@ void ndlc_close(struct llt_ndlc *ndlc);
 int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb);
 void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb);
 int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
-	int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id);
+	       int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id,
+	       struct st_nci_se_status *se_status);
 void ndlc_remove(struct llt_ndlc *ndlc);
 #endif /* __LOCAL_NDLC_H__ */
diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c
index 281288484794..147e2d904c63 100644
--- a/drivers/nfc/st-nci/se.c
+++ b/drivers/nfc/st-nci/se.c
@@ -419,12 +419,8 @@ void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd,
 }
 EXPORT_SYMBOL_GPL(st_nci_hci_cmd_received);
 
-/*
- * Remarks: On some early st_nci firmware, nci_nfcee_mode_set(0)
- * is rejected
- */
 static int st_nci_control_se(struct nci_dev *ndev, u8 se_idx,
-				   u8 state)
+			     u8 state)
 {
 	struct st_nci_info *info = nci_get_drvdata(ndev);
 	int r;
@@ -449,7 +445,7 @@ static int st_nci_control_se(struct nci_dev *ndev, u8 se_idx,
 	 * retrieve a relevant host list.
 	 */
 	reinit_completion(&info->se_info.req_completion);
-	r = nci_nfcee_mode_set(ndev, se_idx, NCI_NFCEE_ENABLE);
+	r = nci_nfcee_mode_set(ndev, se_idx, state);
 	if (r != NCI_STATUS_OK)
 		return r;
 
@@ -465,7 +461,9 @@ static int st_nci_control_se(struct nci_dev *ndev, u8 se_idx,
 	 * There is no possible synchronization to prevent this.
 	 * Adding a small delay is the only way to solve the issue.
 	 */
-	usleep_range(3000, 5000);
+	if (info->se_info.se_status->is_ese_present &&
+	    info->se_info.se_status->is_uicc_present)
+		usleep_range(3000, 5000);
 
 	r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE,
 			NCI_HCI_ADMIN_PARAM_HOST_LIST, &sk_host_list);
@@ -488,11 +486,20 @@ int st_nci_disable_se(struct nci_dev *ndev, u32 se_idx)
 
 	pr_debug("st_nci_disable_se\n");
 
-	if (se_idx == NFC_SE_EMBEDDED) {
-		r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE,
-				ST_NCI_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0);
-		if (r < 0)
-			return r;
+	/*
+	 * According to upper layer, se_idx == NFC_SE_UICC when
+	 * info->se_info.se_status->is_uicc_enable is true should never happen
+	 * Same for eSE.
+	 */
+	r = st_nci_control_se(ndev, se_idx, ST_NCI_SE_MODE_OFF);
+	if (r < 0) {
+		/* Do best effort to release SWP */
+		if (se_idx == NFC_SE_EMBEDDED) {
+			r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE,
+					ST_NCI_EVT_SE_END_OF_APDU_TRANSFER,
+					NULL, 0);
+		}
+		return r;
 	}
 
 	return 0;
@@ -505,11 +512,25 @@ int st_nci_enable_se(struct nci_dev *ndev, u32 se_idx)
 
 	pr_debug("st_nci_enable_se\n");
 
-	if (se_idx == ST_NCI_HCI_HOST_ID_ESE) {
+	/*
+	 * According to upper layer, se_idx == NFC_SE_UICC when
+	 * info->se_info.se_status->is_uicc_enable is true should never happen.
+	 * Same for eSE.
+	 */
+	r = st_nci_control_se(ndev, se_idx, ST_NCI_SE_MODE_ON);
+	if (r == ST_NCI_HCI_HOST_ID_ESE) {
+		st_nci_se_get_atr(ndev);
 		r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE,
 				ST_NCI_EVT_SE_SOFT_RESET, NULL, 0);
-		if (r < 0)
-			return r;
+	}
+
+	if (r < 0) {
+		/*
+		 * The activation procedure failed, the secure element
+		 * is not connected. Remove from the list.
+		 */
+		nfc_remove_se(ndev->nfc_dev, se_idx);
+		return r;
 	}
 
 	return 0;
@@ -592,8 +613,8 @@ exit:
 
 int st_nci_discover_se(struct nci_dev *ndev)
 {
-	u8 param[2];
-	int r;
+	u8 white_list[2];
+	int r, wl_size = 0;
 	int se_count = 0;
 	struct st_nci_info *info = nci_get_drvdata(ndev);
 
@@ -606,29 +627,34 @@ int st_nci_discover_se(struct nci_dev *ndev)
 	if (test_bit(ST_NCI_FACTORY_MODE, &info->flags))
 		return 0;
 
-	param[0] = ST_NCI_UICC_HOST_ID;
-	param[1] = ST_NCI_HCI_HOST_ID_ESE;
-	r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE,
-				NCI_HCI_ADMIN_PARAM_WHITELIST,
-				param, sizeof(param));
-	if (r != NCI_HCI_ANY_OK)
-		return r;
+	if (info->se_info.se_status->is_ese_present &&
+	    info->se_info.se_status->is_uicc_present) {
+		white_list[wl_size++] = ST_NCI_UICC_HOST_ID;
+		white_list[wl_size++] = ST_NCI_ESE_HOST_ID;
+	} else if (!info->se_info.se_status->is_ese_present &&
+		   info->se_info.se_status->is_uicc_present) {
+		white_list[wl_size++] = ST_NCI_UICC_HOST_ID;
+	} else if (info->se_info.se_status->is_ese_present &&
+		   !info->se_info.se_status->is_uicc_present) {
+		white_list[wl_size++] = ST_NCI_ESE_HOST_ID;
+	}
+
+	if (wl_size) {
+		r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE,
+				      NCI_HCI_ADMIN_PARAM_WHITELIST,
+				      white_list, wl_size);
+		if (r != NCI_HCI_ANY_OK)
+			return r;
+	}
 
-	r = st_nci_control_se(ndev, ST_NCI_UICC_HOST_ID,
-				ST_NCI_SE_MODE_ON);
-	if (r == ST_NCI_UICC_HOST_ID) {
+	if (info->se_info.se_status->is_uicc_present) {
 		nfc_add_se(ndev->nfc_dev, ST_NCI_UICC_HOST_ID, NFC_SE_UICC);
 		se_count++;
 	}
 
-	/* Try to enable eSE in order to check availability */
-	r = st_nci_control_se(ndev, ST_NCI_HCI_HOST_ID_ESE,
-				ST_NCI_SE_MODE_ON);
-	if (r == ST_NCI_HCI_HOST_ID_ESE) {
-		nfc_add_se(ndev->nfc_dev, ST_NCI_HCI_HOST_ID_ESE,
-			   NFC_SE_EMBEDDED);
+	if (info->se_info.se_status->is_ese_present) {
+		nfc_add_se(ndev->nfc_dev, ST_NCI_ESE_HOST_ID, NFC_SE_EMBEDDED);
 		se_count++;
-		st_nci_se_get_atr(ndev);
 	}
 
 	return !se_count;
@@ -701,7 +727,7 @@ static void st_nci_se_activation_timeout(unsigned long data)
 	complete(&info->se_info.req_completion);
 }
 
-int st_nci_se_init(struct nci_dev *ndev)
+int st_nci_se_init(struct nci_dev *ndev, struct st_nci_se_status *se_status)
 {
 	struct st_nci_info *info = nci_get_drvdata(ndev);
 
@@ -723,6 +749,8 @@ int st_nci_se_init(struct nci_dev *ndev)
 	info->se_info.wt_timeout =
 		ST_NCI_BWI_TO_TIMEOUT(ST_NCI_ATR_DEFAULT_BWI);
 
+	info->se_info.se_status = se_status;
+
 	return 0;
 }
 EXPORT_SYMBOL(st_nci_se_init);
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index b43f448b8d78..889720336474 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -53,6 +53,8 @@ struct st_nci_spi_phy {
 
 	unsigned int gpio_reset;
 	unsigned int irq_polarity;
+
+	struct st_nci_se_status se_status;
 };
 
 #define SPI_DUMP_SKB(info, skb)					\
@@ -260,6 +262,11 @@ static int st_nci_spi_of_request_resources(struct spi_device *dev)
 
 	phy->irq_polarity = irq_get_trigger_type(dev->irq);
 
+	phy->se_status.is_ese_present =
+				of_property_read_bool(pp, "ese-present");
+	phy->se_status.is_uicc_present =
+				of_property_read_bool(pp, "uicc-present");
+
 	return 0;
 }
 #else
@@ -292,6 +299,9 @@ static int st_nci_spi_request_resources(struct spi_device *dev)
 		return r;
 	}
 
+	phy->se_status.is_ese_present = pdata->is_ese_present;
+	phy->se_status.is_uicc_present = pdata->is_uicc_present;
+
 	return 0;
 }
 
@@ -342,7 +352,7 @@ static int st_nci_spi_probe(struct spi_device *dev)
 
 	r = ndlc_probe(phy, &spi_phy_ops, &dev->dev,
 			ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
-			&phy->ndlc);
+			&phy->ndlc, &phy->se_status);
 	if (r < 0) {
 		nfc_err(&dev->dev, "Unable to register ndlc layer\n");
 		return r;
diff --git a/drivers/nfc/st-nci/st-nci.h b/drivers/nfc/st-nci/st-nci.h
index 9c9bb19cc9ff..8b9f77b0249c 100644
--- a/drivers/nfc/st-nci/st-nci.h
+++ b/drivers/nfc/st-nci/st-nci.h
@@ -48,7 +48,13 @@ struct nci_mode_set_rsp {
 	u8 status;
 } __packed;
 
+struct st_nci_se_status {
+	bool is_ese_present;
+	bool is_uicc_present;
+};
+
 struct st_nci_se_info {
+	struct st_nci_se_status *se_status;
 	u8 atr[ST_NCI_ESE_MAX_LENGTH];
 	struct completion req_completion;
 
@@ -126,15 +132,16 @@ struct st_nci_vendor_info {
 struct st_nci_info {
 	struct llt_ndlc *ndlc;
 	unsigned long flags;
+
 	struct st_nci_se_info se_info;
 	struct st_nci_vendor_info vendor_info;
 };
 
 void st_nci_remove(struct nci_dev *ndev);
 int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
-		int phy_tailroom);
+		 int phy_tailroom, struct st_nci_se_status *se_status);
 
-int st_nci_se_init(struct nci_dev *ndev);
+int st_nci_se_init(struct nci_dev *ndev, struct st_nci_se_status *se_status);
 void st_nci_se_deinit(struct nci_dev *ndev);
 
 int st_nci_discover_se(struct nci_dev *ndev);
@@ -150,7 +157,7 @@ void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd,
 						struct sk_buff *skb);
 
 void st_nci_hci_loopback_event_received(struct nci_dev *ndev, u8 event,
-					 struct sk_buff *skb);
+					struct sk_buff *skb);
 int st_nci_vendor_cmds_init(struct nci_dev *ndev);
 
 #endif /* __LOCAL_ST_NCI_H_ */
diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h
index d9d400a297bd..f6494b347c06 100644
--- a/include/linux/platform_data/st-nci.h
+++ b/include/linux/platform_data/st-nci.h
@@ -24,6 +24,8 @@
 struct st_nci_nfc_platform_data {
 	unsigned int gpio_reset;
 	unsigned int irq_polarity;
+	bool is_ese_present;
+	bool is_uicc_present;
 };
 
 #endif /* _ST_NCI_H_ */
-- 
cgit v1.2.3


From b5b3e23e4cace008e1a30e8614a484d14dfd07a1 Mon Sep 17 00:00:00 2001
From: Vincent Cuissard <cuissard@marvell.com>
Date: Mon, 26 Oct 2015 10:27:41 +0100
Subject: NFC: nfcmrvl: add i2c driver

This driver adds the support of I2C-based Marvell NFC controller.

Signed-off-by: Vincent Cuissard <cuissard@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 .../devicetree/bindings/net/nfc/nfcmrvl.txt        |  34 ++-
 drivers/nfc/nfcmrvl/Kconfig                        |  12 +
 drivers/nfc/nfcmrvl/Makefile                       |   3 +
 drivers/nfc/nfcmrvl/i2c.c                          | 290 +++++++++++++++++++++
 drivers/nfc/nfcmrvl/main.c                         |   9 +-
 drivers/nfc/nfcmrvl/nfcmrvl.h                      |   1 +
 include/linux/platform_data/nfcmrvl.h              |   8 +
 include/net/nfc/nci.h                              |   1 +
 8 files changed, 353 insertions(+), 5 deletions(-)
 create mode 100644 drivers/nfc/nfcmrvl/i2c.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt b/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt
index 7c4a0cc370cf..0fa20cc2c33c 100644
--- a/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt
+++ b/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt
@@ -1,7 +1,9 @@
 * Marvell International Ltd. NCI NFC Controller
 
 Required properties:
-- compatible: Should be "mrvl,nfc-uart".
+- compatible: Should be:
+  - "mrvl,nfc-uart" for UART devices
+  - "mrvl,nfc-i2c" for I2C devices
 
 Optional SoC specific properties:
 - pinctrl-names: Contains only one value - "default".
@@ -13,6 +15,12 @@ Optional UART-based chip specific properties:
 - flow-control: Specifies that the chip is using RTS/CTS.
 - break-control: Specifies that the chip needs specific break management.
 
+Optional I2C-based chip specific properties:
+- i2c-int-falling: Specifies that the chip read event shall be trigged on
+  		   falling edge.
+- i2c-int-rising: Specifies that the chip read event shall be trigged on
+  		  rising edge.
+
 Example (for ARM-based BeagleBoard Black with 88W8887 on UART5):
 
 &uart5 {
@@ -27,3 +35,27 @@ Example (for ARM-based BeagleBoard Black with 88W8887 on UART5):
 		flow-control;
         }
 };
+
+
+Example (for ARM-based BeagleBoard Black with 88W8887 on I2C1):
+
+&i2c1 {
+	status = "okay";
+	clock-frequency = <400000>;
+
+	nfcmrvli2c0: i2c@1 {
+		compatible = "mrvl,nfc-i2c";
+
+		reg = <0x8>;
+
+		/* I2C INT configuration */
+		interrupt-parent = <&gpio3>;
+		interrupts = <21 0>;
+
+		/* I2C INT trigger configuration */
+		i2c-int-rising;
+
+		/* Reset IO */
+		reset-n-io = <&gpio3 19 0>;
+	};
+};
diff --git a/drivers/nfc/nfcmrvl/Kconfig b/drivers/nfc/nfcmrvl/Kconfig
index 19ac492bc25f..e18a979bb6d3 100644
--- a/drivers/nfc/nfcmrvl/Kconfig
+++ b/drivers/nfc/nfcmrvl/Kconfig
@@ -30,3 +30,15 @@ config NFC_MRVL_UART
 
 	  Say Y here to compile support for Marvell NFC-over-UART driver
 	  into the kernel or say M to compile it as module.
+
+config NFC_MRVL_I2C
+	tristate "Marvell NFC-over-I2C driver"
+	depends on NFC_MRVL && I2C
+	help
+	  Marvell NFC-over-I2C driver.
+
+	  This driver provides support for Marvell NFC-over-I2C devices.
+
+	  Say Y here to compile support for Marvell NFC-over-I2C driver
+	  into the kernel or say M to compile it as module.
+
diff --git a/drivers/nfc/nfcmrvl/Makefile b/drivers/nfc/nfcmrvl/Makefile
index 4554ee8e3680..895866a3ebc6 100644
--- a/drivers/nfc/nfcmrvl/Makefile
+++ b/drivers/nfc/nfcmrvl/Makefile
@@ -10,3 +10,6 @@ obj-$(CONFIG_NFC_MRVL_USB) += nfcmrvl_usb.o
 
 nfcmrvl_uart-y += uart.o
 obj-$(CONFIG_NFC_MRVL_UART) += nfcmrvl_uart.o
+
+nfcmrvl_i2c-y += i2c.o
+obj-$(CONFIG_NFC_MRVL_I2C) += nfcmrvl_i2c.o
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
new file mode 100644
index 000000000000..7a44025bdaad
--- /dev/null
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -0,0 +1,290 @@
+/**
+ * Marvell NFC-over-I2C driver: I2C interface related functions
+ *
+ * Copyright (C) 2015, Marvell International Ltd.
+ *
+ * This software file (the "File") is distributed by Marvell International
+ * Ltd. under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License").  You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available on the worldwide web at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
+ * this warranty disclaimer.
+ **/
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/pm_runtime.h>
+#include <linux/nfc.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+#include "nfcmrvl.h"
+
+struct nfcmrvl_i2c_drv_data {
+	unsigned long flags;
+	struct device *dev;
+	struct i2c_client *i2c;
+	struct nfcmrvl_private *priv;
+};
+
+static int nfcmrvl_i2c_read(struct nfcmrvl_i2c_drv_data *drv_data,
+			    struct sk_buff **skb)
+{
+	int ret;
+	struct nci_ctrl_hdr nci_hdr;
+
+	/* Read NCI header to know the payload size */
+	ret = i2c_master_recv(drv_data->i2c, (u8 *)&nci_hdr, NCI_CTRL_HDR_SIZE);
+	if (ret != NCI_CTRL_HDR_SIZE) {
+		nfc_err(&drv_data->i2c->dev, "cannot read NCI header\n");
+		return -EBADMSG;
+	}
+
+	if (nci_hdr.plen > NCI_MAX_PAYLOAD_SIZE) {
+		nfc_err(&drv_data->i2c->dev, "invalid packet payload size\n");
+		return -EBADMSG;
+	}
+
+	*skb = nci_skb_alloc(drv_data->priv->ndev,
+			     nci_hdr.plen + NCI_CTRL_HDR_SIZE, GFP_KERNEL);
+	if (!*skb)
+		return -ENOMEM;
+
+	/* Copy NCI header into the SKB */
+	memcpy(skb_put(*skb, NCI_CTRL_HDR_SIZE), &nci_hdr, NCI_CTRL_HDR_SIZE);
+
+	if (nci_hdr.plen) {
+		/* Read the NCI payload */
+		ret = i2c_master_recv(drv_data->i2c,
+				      skb_put(*skb, nci_hdr.plen),
+				      nci_hdr.plen);
+
+		if (ret != nci_hdr.plen) {
+			nfc_err(&drv_data->i2c->dev,
+				"Invalid frame payload length: %u (expected %u)\n",
+				ret, nci_hdr.plen);
+			kfree_skb(*skb);
+			return -EBADMSG;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t nfcmrvl_i2c_int_irq_thread_fn(int irq, void *drv_data_ptr)
+{
+	struct nfcmrvl_i2c_drv_data *drv_data = drv_data_ptr;
+	struct sk_buff *skb = NULL;
+	int ret;
+
+	if (!drv_data->priv)
+		return IRQ_HANDLED;
+
+	if (test_bit(NFCMRVL_PHY_ERROR, &drv_data->priv->flags))
+		return IRQ_HANDLED;
+
+	ret = nfcmrvl_i2c_read(drv_data, &skb);
+
+	switch (ret) {
+	case -EREMOTEIO:
+		set_bit(NFCMRVL_PHY_ERROR, &drv_data->priv->flags);
+		break;
+	case -ENOMEM:
+	case -EBADMSG:
+		nfc_err(&drv_data->i2c->dev, "read failed %d\n", ret);
+		break;
+	default:
+		if (nfcmrvl_nci_recv_frame(drv_data->priv, skb) < 0)
+			nfc_err(&drv_data->i2c->dev, "corrupted RX packet\n");
+		break;
+	}
+	return IRQ_HANDLED;
+}
+
+static int nfcmrvl_i2c_nci_open(struct nfcmrvl_private *priv)
+{
+	struct nfcmrvl_i2c_drv_data *drv_data = priv->drv_data;
+
+	if (!drv_data)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int nfcmrvl_i2c_nci_close(struct nfcmrvl_private *priv)
+{
+	return 0;
+}
+
+static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv,
+				struct sk_buff *skb)
+{
+	struct nfcmrvl_i2c_drv_data *drv_data = priv->drv_data;
+	int ret;
+
+	if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags))
+		return -EREMOTEIO;
+
+	ret = i2c_master_send(drv_data->i2c, skb->data, skb->len);
+
+	/* Retry if chip was in standby */
+	if (ret == -EREMOTEIO) {
+		nfc_info(drv_data->dev, "chip may sleep, retry\n");
+		usleep_range(6000, 10000);
+		ret = i2c_master_send(drv_data->i2c, skb->data, skb->len);
+	}
+
+	if (ret >= 0) {
+		if (ret != skb->len) {
+			nfc_err(drv_data->dev,
+				"Invalid length sent: %u (expected %u)\n",
+				ret, skb->len);
+			ret = -EREMOTEIO;
+		} else
+			ret = 0;
+		kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv,
+					  const void *param)
+{
+}
+
+static struct nfcmrvl_if_ops i2c_ops = {
+	.nci_open = nfcmrvl_i2c_nci_open,
+	.nci_close = nfcmrvl_i2c_nci_close,
+	.nci_send = nfcmrvl_i2c_nci_send,
+	.nci_update_config = nfcmrvl_i2c_nci_update_config,
+};
+
+static int nfcmrvl_i2c_parse_dt(struct device_node *node,
+				struct nfcmrvl_platform_data *pdata)
+{
+	int ret;
+
+	ret = nfcmrvl_parse_dt(node, pdata);
+	if (ret < 0) {
+		pr_err("Failed to get generic entries\n");
+		return ret;
+	}
+
+	if (of_find_property(node, "i2c-int-falling", NULL))
+		pdata->irq_polarity = IRQF_TRIGGER_FALLING;
+	else
+		pdata->irq_polarity = IRQF_TRIGGER_RISING;
+
+	ret = irq_of_parse_and_map(node, 0);
+	if (ret < 0) {
+		pr_err("Unable to get irq, error: %d\n", ret);
+		return ret;
+	}
+	pdata->irq = ret;
+
+	return 0;
+}
+
+static int nfcmrvl_i2c_probe(struct i2c_client *client,
+			     const struct i2c_device_id *id)
+{
+	struct nfcmrvl_i2c_drv_data *drv_data;
+	struct nfcmrvl_platform_data *pdata;
+	struct nfcmrvl_platform_data config;
+	int ret;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
+		return -ENODEV;
+	}
+
+	drv_data = devm_kzalloc(&client->dev, sizeof(*drv_data), GFP_KERNEL);
+	if (!drv_data)
+		return -ENOMEM;
+
+	drv_data->i2c = client;
+	drv_data->dev = &client->dev;
+	drv_data->priv = NULL;
+
+	i2c_set_clientdata(client, drv_data);
+
+	pdata = client->dev.platform_data;
+
+	if (!pdata && client->dev.of_node)
+		if (nfcmrvl_i2c_parse_dt(client->dev.of_node, &config) == 0)
+			pdata = &config;
+
+	if (!pdata)
+		return -EINVAL;
+
+	/* Request the read IRQ */
+	ret = devm_request_threaded_irq(&drv_data->i2c->dev, pdata->irq,
+					NULL, nfcmrvl_i2c_int_irq_thread_fn,
+					pdata->irq_polarity | IRQF_ONESHOT,
+					"nfcmrvl_i2c_int", drv_data);
+	if (ret < 0) {
+		nfc_err(&drv_data->i2c->dev,
+			"Unable to register IRQ handler\n");
+		return ret;
+	}
+
+	drv_data->priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_I2C,
+						  drv_data, &i2c_ops,
+						  &drv_data->i2c->dev, pdata);
+
+	if (IS_ERR(drv_data->priv))
+		return PTR_ERR(drv_data->priv);
+
+	drv_data->priv->support_fw_dnld = true;
+
+	return 0;
+}
+
+static int nfcmrvl_i2c_remove(struct i2c_client *client)
+{
+	struct nfcmrvl_i2c_drv_data *drv_data = i2c_get_clientdata(client);
+
+	nfcmrvl_nci_unregister_dev(drv_data->priv);
+
+	return 0;
+}
+
+
+static const struct of_device_id of_nfcmrvl_i2c_match[] = {
+	{ .compatible = "mrvl,nfc-i2c", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_nfcmrvl_i2c_match);
+
+static struct i2c_device_id nfcmrvl_i2c_id_table[] = {
+	{ "nfcmrvl_i2c", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, nfcmrvl_i2c_id_table);
+
+static struct i2c_driver nfcmrvl_i2c_driver = {
+	.probe = nfcmrvl_i2c_probe,
+	.id_table = nfcmrvl_i2c_id_table,
+	.remove = nfcmrvl_i2c_remove,
+	.driver = {
+		.name		= "nfcmrvl_i2c",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match_ptr(of_nfcmrvl_i2c_match),
+	},
+};
+
+module_i2c_driver(nfcmrvl_i2c_driver);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("Marvell NFC-over-I2C driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index a24a7ca9f33d..0c27de60a6bd 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -33,6 +33,9 @@ static int nfcmrvl_nci_open(struct nci_dev *ndev)
 	if (test_and_set_bit(NFCMRVL_NCI_RUNNING, &priv->flags))
 		return 0;
 
+	/* Reset possible fault of previous session */
+	clear_bit(NFCMRVL_PHY_ERROR, &priv->flags);
+
 	err = priv->if_ops->nci_open(priv);
 
 	if (err)
@@ -226,10 +229,8 @@ EXPORT_SYMBOL_GPL(nfcmrvl_nci_recv_frame);
 
 void nfcmrvl_chip_reset(struct nfcmrvl_private *priv)
 {
-	/*
-	 * This function does not take care if someone is using the device.
-	 * To be improved.
-	 */
+	/* Reset possible fault of previous session */
+	clear_bit(NFCMRVL_PHY_ERROR, &priv->flags);
 
 	if (priv->config.reset_n_io) {
 		nfc_info(priv->dev, "reset the chip\n");
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index f82678be5aa9..de68ff45e49a 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -25,6 +25,7 @@
 
 /* Define private flags: */
 #define NFCMRVL_NCI_RUNNING			1
+#define NFCMRVL_PHY_ERROR			2
 
 #define NFCMRVL_EXT_COEX_ID			0xE0
 #define NFCMRVL_NOT_ALLOWED_ID			0xE1
diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
index ac91707dabcb..a6f9d633f5be 100644
--- a/include/linux/platform_data/nfcmrvl.h
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -35,6 +35,14 @@ struct nfcmrvl_platform_data {
 	unsigned int flow_control;
 	/* Tell if firmware supports break control for power management */
 	unsigned int break_control;
+
+
+	/*
+	 * I2C specific
+	 */
+
+	unsigned int irq;
+	unsigned int irq_polarity;
 };
 
 #endif /* _NFCMRVL_PTF_H_ */
diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index b495825f8f49..707e3ab816c2 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -35,6 +35,7 @@
 #define NCI_MAX_NUM_RF_CONFIGS					10
 #define NCI_MAX_NUM_CONN					10
 #define NCI_MAX_PARAM_LEN					251
+#define NCI_MAX_PAYLOAD_SIZE					255
 #define NCI_MAX_PACKET_SIZE					258
 
 /* NCI Status Codes */
-- 
cgit v1.2.3


From 62544ce8e01c1879d420ba309f7f319d24c0f4e6 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 22 Oct 2015 17:10:14 -0700
Subject: bpf: fix bpf_perf_event_read() helper

Fix safety checks for bpf_perf_event_read():
- only non-inherited events can be added to perf_event_array map
  (do this check statically at map insertion time)
- dynamically check that event is local and !pmu->count
Otherwise buggy bpf program can cause kernel splat.

Also fix error path after perf_event_attrs()
and remove redundant 'extern'.

Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  1 -
 kernel/bpf/arraymap.c    | 25 ++++++++++++++++---------
 kernel/trace/bpf_trace.c |  7 ++++++-
 3 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e3a51b74e275..75718fa28260 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,7 +194,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
-extern const struct bpf_func_proto bpf_perf_event_read_proto;
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e3cfe46b074f..3f4c99e06c6b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -292,16 +292,23 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 
 	attr = perf_event_attrs(event);
 	if (IS_ERR(attr))
-		return (void *)attr;
+		goto err;
 
-	if (attr->type != PERF_TYPE_RAW &&
-	    !(attr->type == PERF_TYPE_SOFTWARE &&
-	      attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
-	    attr->type != PERF_TYPE_HARDWARE) {
-		perf_event_release_kernel(event);
-		return ERR_PTR(-EINVAL);
-	}
-	return event;
+	if (attr->inherit)
+		goto err;
+
+	if (attr->type == PERF_TYPE_RAW)
+		return event;
+
+	if (attr->type == PERF_TYPE_HARDWARE)
+		return event;
+
+	if (attr->type == PERF_TYPE_SOFTWARE &&
+	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
+		return event;
+err:
+	perf_event_release_kernel(event);
+	return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 47febbe7998e..003df3887287 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -199,6 +199,11 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
 	if (!event)
 		return -ENOENT;
 
+	/* make sure event is local and doesn't have pmu::count */
+	if (event->oncpu != smp_processor_id() ||
+	    event->pmu->count)
+		return -EINVAL;
+
 	/*
 	 * we don't know if the function is run successfully by the
 	 * return value. It can be judged in other places, such as
@@ -207,7 +212,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
 	return perf_event_read_local(event);
 }
 
-const struct bpf_func_proto bpf_perf_event_read_proto = {
+static const struct bpf_func_proto bpf_perf_event_read_proto = {
 	.func		= bpf_perf_event_read,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-- 
cgit v1.2.3


From 174fd8d369613c4e06660f3704caaba48dac8554 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 22 Oct 2015 09:27:12 +0900
Subject: blkcg: fix incorrect read/write sync/async stat accounting

While unifying how blkcg stats are collected, 77ea733884eb ("blkcg:
move io_service_bytes and io_serviced stats into blkcg_gq")
incorrectly used bio->flags instead of bio->rw to tell the IO type.
This made IOs to be accounted as the wrong type.  Fix it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 77ea733884eb ("blkcg: move io_service_bytes and io_serviced stats into blkcg_gq")
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 0a5cc7a1109b..c02e669945e9 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -713,9 +713,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags,
+		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_rw,
 				bio->bi_iter.bi_size);
-		blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1);
+		blkg_rwstat_add(&blkg->stat_ios, bio->bi_rw, 1);
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 9979dbe5158899b556eb772b7335e29417ac0ddd Mon Sep 17 00:00:00 2001
From: Chaotian Jing <chaotian.jing@mediatek.com>
Date: Tue, 27 Oct 2015 14:24:28 +0800
Subject: mmc: mmc: extend the mmc_send_tuning()

The mmc_execute_tuning() has already prepared the opcode,
there is no need to prepare it again at mmc_send_tuning(),
and, there is a BUG of mmc_send_tuning() to determine the opcode
by bus width, assume eMMC was running at HS200, 4bit mode,
then the mmc_send_tuning() will overwrite the opcode from CMD21
to CMD19, then got error.

in addition, extend an argument of "cmd_error" to allow getting
if there was cmd error when tune response.

Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
[Ulf: Rebased patch]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.c         | 8 ++++----
 drivers/mmc/host/dw_mmc-exynos.c   | 4 ++--
 drivers/mmc/host/dw_mmc-rockchip.c | 4 ++--
 drivers/mmc/host/dw_mmc.c          | 2 +-
 drivers/mmc/host/dw_mmc.h          | 2 +-
 drivers/mmc/host/sdhci-esdhc-imx.c | 6 +++---
 drivers/mmc/host/sdhci-msm.c       | 2 +-
 drivers/mmc/host/sdhci-sirf.c      | 2 +-
 include/linux/mmc/core.h           | 2 +-
 9 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 57f3a21783e9..1f444269ebbe 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -588,7 +588,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 }
 EXPORT_SYMBOL_GPL(mmc_switch);
 
-int mmc_send_tuning(struct mmc_host *host)
+int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error)
 {
 	struct mmc_request mrq = {NULL};
 	struct mmc_command cmd = {0};
@@ -598,16 +598,13 @@ int mmc_send_tuning(struct mmc_host *host)
 	const u8 *tuning_block_pattern;
 	int size, err = 0;
 	u8 *data_buf;
-	u32 opcode;
 
 	if (ios->bus_width == MMC_BUS_WIDTH_8) {
 		tuning_block_pattern = tuning_blk_pattern_8bit;
 		size = sizeof(tuning_blk_pattern_8bit);
-		opcode = MMC_SEND_TUNING_BLOCK_HS200;
 	} else if (ios->bus_width == MMC_BUS_WIDTH_4) {
 		tuning_block_pattern = tuning_blk_pattern_4bit;
 		size = sizeof(tuning_blk_pattern_4bit);
-		opcode = MMC_SEND_TUNING_BLOCK;
 	} else
 		return -EINVAL;
 
@@ -638,6 +635,9 @@ int mmc_send_tuning(struct mmc_host *host)
 
 	mmc_wait_for_req(host, &mrq);
 
+	if (cmd_error)
+		*cmd_error = cmd.error;
+
 	if (cmd.error) {
 		err = cmd.error;
 		goto out;
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 1e75309898b7..3a7e835a0033 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -446,7 +446,7 @@ out:
 	return loc;
 }
 
-static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot)
+static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot, u32 opcode)
 {
 	struct dw_mci *host = slot->host;
 	struct dw_mci_exynos_priv_data *priv = host->priv;
@@ -461,7 +461,7 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot)
 		mci_writel(host, TMOUT, ~0);
 		smpl = dw_mci_exynos_move_next_clksmpl(host);
 
-		if (!mmc_send_tuning(mmc))
+		if (!mmc_send_tuning(mmc, opcode, NULL))
 			candiates |= (1 << smpl);
 
 	} while (start_smpl != smpl);
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 4b3650f7d43f..9becebeeccd1 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -83,7 +83,7 @@ static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios)
 #define NUM_PHASES			360
 #define TUNING_ITERATION_TO_PHASE(i)	(DIV_ROUND_UP((i) * 360, NUM_PHASES))
 
-static int dw_mci_rk3288_execute_tuning(struct dw_mci_slot *slot)
+static int dw_mci_rk3288_execute_tuning(struct dw_mci_slot *slot, u32 opcode)
 {
 	struct dw_mci *host = slot->host;
 	struct dw_mci_rockchip_priv_data *priv = host->priv;
@@ -114,7 +114,7 @@ static int dw_mci_rk3288_execute_tuning(struct dw_mci_slot *slot)
 	for (i = 0; i < NUM_PHASES; ) {
 		clk_set_phase(priv->sample_clk, TUNING_ITERATION_TO_PHASE(i));
 
-		v = !mmc_send_tuning(mmc);
+		v = !mmc_send_tuning(mmc, opcode, NULL);
 
 		if (i == 0)
 			first_v = v;
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 6e600e875328..63eefea9645e 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1540,7 +1540,7 @@ static int dw_mci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	int err = -EINVAL;
 
 	if (drv_data && drv_data->execute_tuning)
-		err = drv_data->execute_tuning(slot);
+		err = drv_data->execute_tuning(slot, opcode);
 	return err;
 }
 
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index f2a88d4bcbac..11cc848a40d4 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -290,7 +290,7 @@ struct dw_mci_drv_data {
 	void		(*prepare_command)(struct dw_mci *host, u32 *cmdr);
 	void		(*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
 	int		(*parse_dt)(struct dw_mci *host);
-	int		(*execute_tuning)(struct dw_mci_slot *slot);
+	int		(*execute_tuning)(struct dw_mci_slot *slot, u32 opcode);
 	int		(*prepare_hs400_tuning)(struct dw_mci *host,
 						struct mmc_ios *ios);
 	int		(*switch_voltage)(struct mmc_host *mmc,
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 886d230f41d0..1f1582f6cccb 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -759,7 +759,7 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
 	min = ESDHC_TUNE_CTRL_MIN;
 	while (min < ESDHC_TUNE_CTRL_MAX) {
 		esdhc_prepare_tuning(host, min);
-		if (!mmc_send_tuning(host->mmc))
+		if (!mmc_send_tuning(host->mmc, opcode, NULL))
 			break;
 		min += ESDHC_TUNE_CTRL_STEP;
 	}
@@ -768,7 +768,7 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
 	max = min + ESDHC_TUNE_CTRL_STEP;
 	while (max < ESDHC_TUNE_CTRL_MAX) {
 		esdhc_prepare_tuning(host, max);
-		if (mmc_send_tuning(host->mmc)) {
+		if (mmc_send_tuning(host->mmc, opcode, NULL)) {
 			max -= ESDHC_TUNE_CTRL_STEP;
 			break;
 		}
@@ -778,7 +778,7 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
 	/* use average delay to get the best timing */
 	avg = (min + max) / 2;
 	esdhc_prepare_tuning(host, avg);
-	ret = mmc_send_tuning(host->mmc);
+	ret = mmc_send_tuning(host->mmc, opcode, NULL);
 	esdhc_post_tuning(host);
 
 	dev_dbg(mmc_dev(host->mmc), "tunning %s at 0x%x ret %d\n",
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4bcee033feda..4695bee203ea 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -373,7 +373,7 @@ retry:
 		if (rc)
 			return rc;
 
-		rc = mmc_send_tuning(mmc);
+		rc = mmc_send_tuning(mmc, opcode, NULL);
 		if (!rc) {
 			/* Tuning is successful at this tuning point */
 			tuned_phases[tuned_phase_cnt++] = phase;
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index f5488c409fd1..34866f668dd7 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -98,7 +98,7 @@ retry:
 			clock_setting | phase,
 			SDHCI_CLK_DELAY_SETTING);
 
-		if (!mmc_send_tuning(mmc)) {
+		if (!mmc_send_tuning(mmc, opcode, NULL)) {
 			/* Tuning is successful at this tuning point */
 			tuned_phase_cnt++;
 			dev_dbg(mmc_dev(mmc), "%s: Found good phase = %d\n",
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 79a31d3c3788..37967b6da03c 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -153,7 +153,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
 extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
-extern int mmc_send_tuning(struct mmc_host *host);
+extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
-- 
cgit v1.2.3


From ef0eebc05130b0d22b0ea65c0cd014ee16fc89c7 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 20 Oct 2015 21:15:06 -0700
Subject: drivers/pinctrl: Add the concept of an "init" state

For pinctrl the "default" state is applied to pins before the driver's
probe function is called.  This is normally a sensible thing to do,
but in some cases can cause problems.  That's because the pins will
change state before the driver is given a chance to program how those
pins should behave.

As an example you might have a regulator that is controlled by a PWM
(output high = high voltage, output low = low voltage).  The firmware
might leave this pin as driven high.  If we allow the driver core to
reconfigure this pin as a PWM pin before the PWM's probe function runs
then you might end up running at too low of a voltage while we probe.

Let's introudce a new "init" state.  If this is defined we'll set
pinctrl to this state before probe and then "default" after probe
(unless the driver explicitly changed states already).

An alternative idea that was thought of was to use the pre-existing
"sleep" or "idle" states and add a boolean property that we should
start in that mode.  This was not done because the "init" state is
needed for correctness and those other states are only present (and
only transitioned in to and out of) when (optional) power management
is enabled.

Changes in v3:
- Moved declarations to pinctrl/devinfo.h
- Fixed author/SoB

Changes in v2:
- Added comment to pinctrl_init_done() as per Linus W.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Caesar Wang <wxt@rock-chips.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/base/dd.c                     |  2 ++
 drivers/base/pinctrl.c                | 15 +++++++++++++--
 drivers/pinctrl/core.c                | 32 ++++++++++++++++++++++++++++++++
 include/linux/pinctrl/devinfo.h       | 10 ++++++++++
 include/linux/pinctrl/pinctrl-state.h |  8 ++++++++
 5 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index be0eb4639128..a641cf3ccad6 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -322,6 +322,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 			goto probe_failed;
 	}
 
+	pinctrl_init_done(dev);
+
 	if (dev->pm_domain && dev->pm_domain->sync)
 		dev->pm_domain->sync(dev);
 
diff --git a/drivers/base/pinctrl.c b/drivers/base/pinctrl.c
index 5fb74b43848e..076297592754 100644
--- a/drivers/base/pinctrl.c
+++ b/drivers/base/pinctrl.c
@@ -42,9 +42,20 @@ int pinctrl_bind_pins(struct device *dev)
 		goto cleanup_get;
 	}
 
-	ret = pinctrl_select_state(dev->pins->p, dev->pins->default_state);
+	dev->pins->init_state = pinctrl_lookup_state(dev->pins->p,
+					PINCTRL_STATE_INIT);
+	if (IS_ERR(dev->pins->init_state)) {
+		/* Not supplying this state is perfectly legal */
+		dev_dbg(dev, "no init pinctrl state\n");
+
+		ret = pinctrl_select_state(dev->pins->p,
+					   dev->pins->default_state);
+	} else {
+		ret = pinctrl_select_state(dev->pins->p, dev->pins->init_state);
+	}
+
 	if (ret) {
-		dev_dbg(dev, "failed to activate default pinctrl state\n");
+		dev_dbg(dev, "failed to activate initial pinctrl state\n");
 		goto cleanup_get;
 	}
 
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 9638a00c67c2..2686a4450dfc 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1240,6 +1240,38 @@ int pinctrl_force_default(struct pinctrl_dev *pctldev)
 }
 EXPORT_SYMBOL_GPL(pinctrl_force_default);
 
+/**
+ * pinctrl_init_done() - tell pinctrl probe is done
+ *
+ * We'll use this time to switch the pins from "init" to "default" unless the
+ * driver selected some other state.
+ *
+ * @dev: device to that's done probing
+ */
+int pinctrl_init_done(struct device *dev)
+{
+	struct dev_pin_info *pins = dev->pins;
+	int ret;
+
+	if (!pins)
+		return 0;
+
+	if (IS_ERR(pins->init_state))
+		return 0; /* No such state */
+
+	if (pins->p->state != pins->init_state)
+		return 0; /* Not at init anyway */
+
+	if (IS_ERR(pins->default_state))
+		return 0; /* No default state */
+
+	ret = pinctrl_select_state(pins->p, pins->default_state);
+	if (ret)
+		dev_err(dev, "failed to activate default pinctrl state\n");
+
+	return ret;
+}
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h
index 281cb91ddcf5..05082e407c4a 100644
--- a/include/linux/pinctrl/devinfo.h
+++ b/include/linux/pinctrl/devinfo.h
@@ -24,10 +24,14 @@
  * struct dev_pin_info - pin state container for devices
  * @p: pinctrl handle for the containing device
  * @default_state: the default state for the handle, if found
+ * @init_state: the state at probe time, if found
+ * @sleep_state: the state at suspend time, if found
+ * @idle_state: the state at idle (runtime suspend) time, if found
  */
 struct dev_pin_info {
 	struct pinctrl *p;
 	struct pinctrl_state *default_state;
+	struct pinctrl_state *init_state;
 #ifdef CONFIG_PM
 	struct pinctrl_state *sleep_state;
 	struct pinctrl_state *idle_state;
@@ -35,6 +39,7 @@ struct dev_pin_info {
 };
 
 extern int pinctrl_bind_pins(struct device *dev);
+extern int pinctrl_init_done(struct device *dev);
 
 #else
 
@@ -45,5 +50,10 @@ static inline int pinctrl_bind_pins(struct device *dev)
 	return 0;
 }
 
+static inline int pinctrl_init_done(struct device *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_PINCTRL */
 #endif /* PINCTRL_DEVINFO_H */
diff --git a/include/linux/pinctrl/pinctrl-state.h b/include/linux/pinctrl/pinctrl-state.h
index b5919f8e6d1a..23073519339f 100644
--- a/include/linux/pinctrl/pinctrl-state.h
+++ b/include/linux/pinctrl/pinctrl-state.h
@@ -9,6 +9,13 @@
  *	hogs to configure muxing and pins at boot, and also as a state
  *	to go into when returning from sleep and idle in
  *	.pm_runtime_resume() or ordinary .resume() for example.
+ * @PINCTRL_STATE_INIT: normally the pinctrl will be set to "default"
+ *	before the driver's probe() function is called.  There are some
+ *	drivers where that is not appropriate becausing doing so would
+ *	glitch the pins.  In those cases you can add an "init" pinctrl
+ *	which is the state of the pins before drive probe.  After probe
+ *	if the pins are still in "init" state they'll be moved to
+ *	"default".
  * @PINCTRL_STATE_IDLE: the state the pinctrl handle shall be put into
  *	when the pins are idle. This is a state where the system is relaxed
  *	but not fully sleeping - some power may be on but clocks gated for
@@ -20,5 +27,6 @@
  *	ordinary .suspend() function.
  */
 #define PINCTRL_STATE_DEFAULT "default"
+#define PINCTRL_STATE_INIT "init"
 #define PINCTRL_STATE_IDLE "idle"
 #define PINCTRL_STATE_SLEEP "sleep"
-- 
cgit v1.2.3


From ca5d24854210dd02548a080d4271560e926c4fcb Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Fri, 23 Oct 2015 08:59:10 -0500
Subject: spi: Add THIS_MODULE to spi_driver in SPI core

Add spi_register_driver helper macro that adds THIS_MODULE to
spi_driver for the registering driver. We rename and modify
the existing spi_register_driver to enable this.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 7 ++++---
 include/linux/spi/spi.h | 6 +++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 3abb3903f2ad..51e33dbe4e05 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -305,12 +305,13 @@ static void spi_drv_shutdown(struct device *dev)
 }
 
 /**
- * spi_register_driver - register a SPI driver
+ * __spi_register_driver - register a SPI driver
  * @sdrv: the driver to register
  * Context: can sleep
  */
-int spi_register_driver(struct spi_driver *sdrv)
+int __spi_register_driver(struct module *owner, struct spi_driver *sdrv)
 {
+	sdrv->driver.owner = owner;
 	sdrv->driver.bus = &spi_bus_type;
 	if (sdrv->probe)
 		sdrv->driver.probe = spi_drv_probe;
@@ -320,7 +321,7 @@ int spi_register_driver(struct spi_driver *sdrv)
 		sdrv->driver.shutdown = spi_drv_shutdown;
 	return driver_register(&sdrv->driver);
 }
-EXPORT_SYMBOL_GPL(spi_register_driver);
+EXPORT_SYMBOL_GPL(__spi_register_driver);
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 269e8afd3e2a..e2da17b5900e 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -250,7 +250,7 @@ static inline struct spi_driver *to_spi_driver(struct device_driver *drv)
 	return drv ? container_of(drv, struct spi_driver, driver) : NULL;
 }
 
-extern int spi_register_driver(struct spi_driver *sdrv);
+extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv);
 
 /**
  * spi_unregister_driver - reverse effect of spi_register_driver
@@ -263,6 +263,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 		driver_unregister(&sdrv->driver);
 }
 
+/* use a define to avoid include chaining to get THIS_MODULE */
+#define spi_register_driver(driver) \
+	__spi_register_driver(THIS_MODULE, driver)
+
 /**
  * module_spi_driver() - Helper macro for registering a SPI driver
  * @__spi_driver: spi_driver struct
-- 
cgit v1.2.3


From 5523662edd4fe937267053c2018b75be2ac17860 Mon Sep 17 00:00:00 2001
From: Stephen Chandler Paul <cpaul@redhat.com>
Date: Sat, 24 Oct 2015 13:10:29 -0700
Subject: Input: add userio module

Debugging input devices, specifically laptop touchpads, can be tricky
without having the physical device handy. Here we try to remedy that
with userio. This module allows an application to connect to a character
device provided by the kernel, and emulate any serio device. In
combination with userspace programs that can record PS/2 devices and
replay them through the /dev/userio device, this allows developers to
debug driver issues on the PS/2 level with devices simply by requesting
a recording from the user experiencing the issue without having to have
the physical hardware in front of them.

Signed-off-by: Stephen Chandler Paul <cpaul@redhat.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/input/userio.txt |  70 ++++++++++
 MAINTAINERS                    |   6 +
 drivers/input/serio/Kconfig    |  14 ++
 drivers/input/serio/Makefile   |   1 +
 drivers/input/serio/userio.c   | 285 +++++++++++++++++++++++++++++++++++++++++
 include/linux/miscdevice.h     |   1 +
 include/uapi/linux/userio.h    |  44 +++++++
 7 files changed, 421 insertions(+)
 create mode 100644 Documentation/input/userio.txt
 create mode 100644 drivers/input/serio/userio.c
 create mode 100644 include/uapi/linux/userio.h

(limited to 'include/linux')

diff --git a/Documentation/input/userio.txt b/Documentation/input/userio.txt
new file mode 100644
index 000000000000..0880c0f447a6
--- /dev/null
+++ b/Documentation/input/userio.txt
@@ -0,0 +1,70 @@
+			      The userio Protocol
+	     (c) 2015 Stephen Chandler Paul <thatslyude@gmail.com>
+			      Sponsored by Red Hat
+--------------------------------------------------------------------------------
+
+1. Introduction
+~~~~~~~~~~~~~~~
+  This module is intended to try to make the lives of input driver developers
+easier by allowing them to test various serio devices (mainly the various
+touchpads found on laptops) without having to have the physical device in front
+of them. userio accomplishes this by allowing any privileged userspace program
+to directly interact with the kernel's serio driver and control a virtual serio
+port from there.
+
+2. Usage overview
+~~~~~~~~~~~~~~~~~
+  In order to interact with the userio kernel module, one simply opens the
+/dev/userio character device in their applications. Commands are sent to the
+kernel module by writing to the device, and any data received from the serio
+driver is read as-is from the /dev/userio device. All of the structures and
+macros you need to interact with the device are defined in <linux/userio.h> and
+<linux/serio.h>.
+
+3. Command Structure
+~~~~~~~~~~~~~~~~~~~~
+  The struct used for sending commands to /dev/userio is as follows:
+
+	struct userio_cmd {
+		__u8 type;
+		__u8 data;
+	};
+
+  "type" describes the type of command that is being sent. This can be any one
+of the USERIO_CMD macros defined in <linux/userio.h>. "data" is the argument
+that goes along with the command. In the event that the command doesn't have an
+argument, this field can be left untouched and will be ignored by the kernel.
+Each command should be sent by writing the struct directly to the character
+device. In the event that the command you send is invalid, an error will be
+returned by the character device and a more descriptive error will be printed
+to the kernel log. Only one command can be sent at a time, any additional data
+written to the character device after the initial command will be ignored.
+  To close the virtual serio port, just close /dev/userio.
+
+4. Commands
+~~~~~~~~~~~
+
+4.1 USERIO_CMD_REGISTER
+~~~~~~~~~~~~~~~~~~~~~~~
+  Registers the port with the serio driver and begins transmitting data back and
+forth. Registration can only be performed once a port type is set with
+USERIO_CMD_SET_PORT_TYPE. Has no argument.
+
+4.2 USERIO_CMD_SET_PORT_TYPE
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Sets the type of port we're emulating, where "data" is the port type being
+set. Can be any of the macros from <linux/serio.h>. For example: SERIO_8042
+would set the port type to be a normal PS/2 port.
+
+4.3 USERIO_CMD_SEND_INTERRUPT
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Sends an interrupt through the virtual serio port to the serio driver, where
+"data" is the interrupt data being sent.
+
+5. Userspace tools
+~~~~~~~~~~~~~~~~~~
+  The userio userspace tools are able to record PS/2 devices using some of the
+debugging information from i8042, and play back the devices on /dev/userio. The
+latest version of these tools can be found at:
+
+	https://github.com/Lyude/ps2emu
diff --git a/MAINTAINERS b/MAINTAINERS
index 797236befd27..ba59bd7b3557 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11100,6 +11100,12 @@ S:	Maintained
 F:	drivers/media/v4l2-core/videobuf2-*
 F:	include/media/videobuf2-*
 
+VIRTUAL SERIO DEVICE DRIVER
+M:	Stephen Chandler Paul <thatslyude@gmail.com>
+S:	Maintained
+F:	drivers/input/serio/userio.c
+F:	include/uapi/linux/userio.h
+
 VIRTIO CONSOLE DRIVER
 M:	Amit Shah <amit.shah@redhat.com>
 L:	virtualization@lists.linux-foundation.org
diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 200841b77edb..c3d05b4d3118 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -292,4 +292,18 @@ config SERIO_SUN4I_PS2
 	  To compile this driver as a module, choose M here: the
 	  module will be called sun4i-ps2.
 
+config USERIO
+	tristate "User space serio port driver support"
+	help
+	  Say Y here if you want to support user level drivers for serio
+	  subsystem accessible under char device 10:240 - /dev/userio. Using
+	  this facility userspace programs can implement serio ports that
+	  will be used by the standard in-kernel serio consumer drivers,
+	  such as psmouse and atkbd.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called userio.
+
+	  If you are unsure, say N.
+
 endif
diff --git a/drivers/input/serio/Makefile b/drivers/input/serio/Makefile
index c600089b7a34..2374ef9b33d7 100644
--- a/drivers/input/serio/Makefile
+++ b/drivers/input/serio/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_SERIO_APBPS2)	+= apbps2.o
 obj-$(CONFIG_SERIO_OLPC_APSP)	+= olpc_apsp.o
 obj-$(CONFIG_HYPERV_KEYBOARD)	+= hyperv-keyboard.o
 obj-$(CONFIG_SERIO_SUN4I_PS2)	+= sun4i-ps2.o
+obj-$(CONFIG_USERIO)		+= userio.o
diff --git a/drivers/input/serio/userio.c b/drivers/input/serio/userio.c
new file mode 100644
index 000000000000..df1fd41860ac
--- /dev/null
+++ b/drivers/input/serio/userio.c
@@ -0,0 +1,285 @@
+/*
+ * userio kernel serio device emulation module
+ * Copyright (C) 2015 Red Hat
+ * Copyright (C) 2015 Stephen Chandler Paul <thatslyude@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+ * General Public License for more details.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <uapi/linux/userio.h>
+
+#define USERIO_NAME		"userio"
+#define USERIO_BUFSIZE		16
+
+static struct miscdevice userio_misc;
+
+struct userio_device {
+	struct serio *serio;
+	struct mutex mutex;
+
+	bool running;
+
+	u8 head;
+	u8 tail;
+
+	spinlock_t buf_lock;
+	unsigned char buf[USERIO_BUFSIZE];
+
+	wait_queue_head_t waitq;
+};
+
+/**
+ * userio_device_write - Write data from serio to a userio device in userspace
+ * @id: The serio port for the userio device
+ * @val: The data to write to the device
+ */
+static int userio_device_write(struct serio *id, unsigned char val)
+{
+	struct userio_device *userio = id->port_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&userio->buf_lock, flags);
+
+	userio->buf[userio->head] = val;
+	userio->head = (userio->head + 1) % USERIO_BUFSIZE;
+
+	if (userio->head == userio->tail)
+		dev_warn(userio_misc.this_device,
+			 "Buffer overflowed, userio client isn't keeping up");
+
+	spin_unlock_irqrestore(&userio->buf_lock, flags);
+
+	wake_up_interruptible(&userio->waitq);
+
+	return 0;
+}
+
+static int userio_char_open(struct inode *inode, struct file *file)
+{
+	struct userio_device *userio;
+
+	userio = kzalloc(sizeof(struct userio_device), GFP_KERNEL);
+	if (!userio)
+		return -ENOMEM;
+
+	mutex_init(&userio->mutex);
+	spin_lock_init(&userio->buf_lock);
+	init_waitqueue_head(&userio->waitq);
+
+	userio->serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
+	if (!userio->serio) {
+		kfree(userio);
+		return -ENOMEM;
+	}
+
+	userio->serio->write = userio_device_write;
+	userio->serio->port_data = userio;
+
+	file->private_data = userio;
+
+	return 0;
+}
+
+static int userio_char_release(struct inode *inode, struct file *file)
+{
+	struct userio_device *userio = file->private_data;
+
+	if (userio->running) {
+		/*
+		 * Don't free the serio port here, serio_unregister_port()
+		 * does it for us.
+		 */
+		serio_unregister_port(userio->serio);
+	} else {
+		kfree(userio->serio);
+	}
+
+	kfree(userio);
+
+	return 0;
+}
+
+static ssize_t userio_char_read(struct file *file, char __user *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct userio_device *userio = file->private_data;
+	int error;
+	size_t nonwrap_len, copylen;
+	unsigned char buf[USERIO_BUFSIZE];
+	unsigned long flags;
+
+	/*
+	 * By the time we get here, the data that was waiting might have
+	 * been taken by another thread. Grab the buffer lock and check if
+	 * there's still any data waiting, otherwise repeat this process
+	 * until we have data (unless the file descriptor is non-blocking
+	 * of course).
+	 */
+	for (;;) {
+		spin_lock_irqsave(&userio->buf_lock, flags);
+
+		nonwrap_len = CIRC_CNT_TO_END(userio->head,
+					      userio->tail,
+					      USERIO_BUFSIZE);
+		copylen = min(nonwrap_len, count);
+		if (copylen) {
+			memcpy(buf, &userio->buf[userio->tail], copylen);
+			userio->tail = (userio->tail + copylen) %
+							USERIO_BUFSIZE;
+		}
+
+		spin_unlock_irqrestore(&userio->buf_lock, flags);
+
+		if (nonwrap_len)
+			break;
+
+		/* buffer was/is empty */
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		/*
+		 * count == 0 is special - no IO is done but we check
+		 * for error conditions (see above).
+		 */
+		if (count == 0)
+			return 0;
+
+		error = wait_event_interruptible(userio->waitq,
+						 userio->head != userio->tail);
+		if (error)
+			return error;
+	}
+
+	if (copylen)
+		if (copy_to_user(user_buffer, buf, copylen))
+			return -EFAULT;
+
+	return copylen;
+}
+
+static ssize_t userio_char_write(struct file *file, const char __user *buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct userio_device *userio = file->private_data;
+	struct userio_cmd cmd;
+	int error;
+
+	if (count != sizeof(cmd)) {
+		dev_warn(userio_misc.this_device, "Invalid payload size\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&cmd, buffer, sizeof(cmd)))
+		return -EFAULT;
+
+	error = mutex_lock_interruptible(&userio->mutex);
+	if (error)
+		return error;
+
+	switch (cmd.type) {
+	case USERIO_CMD_REGISTER:
+		if (!userio->serio->id.type) {
+			dev_warn(userio_misc.this_device,
+				 "No port type given on /dev/userio\n");
+
+			error = -EINVAL;
+			goto out;
+		}
+
+		if (userio->running) {
+			dev_warn(userio_misc.this_device,
+				 "Begin command sent, but we're already running\n");
+			error = -EBUSY;
+			goto out;
+		}
+
+		userio->running = true;
+		serio_register_port(userio->serio);
+		break;
+
+	case USERIO_CMD_SET_PORT_TYPE:
+		if (userio->running) {
+			dev_warn(userio_misc.this_device,
+				 "Can't change port type on an already running userio instance\n");
+			error = -EBUSY;
+			goto out;
+		}
+
+		userio->serio->id.type = cmd.data;
+		break;
+
+	case USERIO_CMD_SEND_INTERRUPT:
+		if (!userio->running) {
+			dev_warn(userio_misc.this_device,
+				 "The device must be registered before sending interrupts\n");
+			error = -ENODEV;
+			goto out;
+		}
+
+		serio_interrupt(userio->serio, cmd.data, 0);
+		break;
+
+	default:
+		error = -EOPNOTSUPP;
+		goto out;
+	}
+
+out:
+	mutex_unlock(&userio->mutex);
+	return error ?: count;
+}
+
+static unsigned int userio_char_poll(struct file *file, poll_table *wait)
+{
+	struct userio_device *userio = file->private_data;
+
+	poll_wait(file, &userio->waitq, wait);
+
+	if (userio->head != userio->tail)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static const struct file_operations userio_fops = {
+	.owner		= THIS_MODULE,
+	.open		= userio_char_open,
+	.release	= userio_char_release,
+	.read		= userio_char_read,
+	.write		= userio_char_write,
+	.poll		= userio_char_poll,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice userio_misc = {
+	.fops	= &userio_fops,
+	.minor	= USERIO_MINOR,
+	.name	= USERIO_NAME,
+};
+module_driver(userio_misc, misc_register, misc_deregister);
+
+MODULE_ALIAS_MISCDEV(USERIO_MINOR);
+MODULE_ALIAS("devname:" USERIO_NAME);
+
+MODULE_AUTHOR("Stephen Chandler Paul <thatslyude@gmail.com>");
+MODULE_DESCRIPTION("Virtual Serio Device Support");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 81f6e427ba6b..543037465973 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -49,6 +49,7 @@
 #define LOOP_CTRL_MINOR		237
 #define VHOST_NET_MINOR		238
 #define UHID_MINOR		239
+#define USERIO_MINOR		240
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
diff --git a/include/uapi/linux/userio.h b/include/uapi/linux/userio.h
new file mode 100644
index 000000000000..37d147f0a13a
--- /dev/null
+++ b/include/uapi/linux/userio.h
@@ -0,0 +1,44 @@
+/*
+ * userio: virtual serio device support
+ * Copyright (C) 2015 Red Hat
+ * Copyright (C) 2015 Lyude (Stephen Chandler Paul) <cpaul@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * This is the public header used for user-space communication with the userio
+ * driver. __attribute__((__packed__)) is used for all structs to keep ABI
+ * compatibility between all architectures.
+ */
+
+#ifndef _USERIO_H
+#define _USERIO_H
+
+#include <linux/types.h>
+
+enum userio_cmd_type {
+	USERIO_CMD_REGISTER = 0,
+	USERIO_CMD_SET_PORT_TYPE = 1,
+	USERIO_CMD_SEND_INTERRUPT = 2
+};
+
+/*
+ * userio Commands
+ * All commands sent to /dev/userio are encoded using this structure. The type
+ * field should contain a USERIO_CMD* value that indicates what kind of command
+ * is being sent to userio. The data field should contain the accompanying
+ * argument for the command, if there is one.
+ */
+struct userio_cmd {
+	__u8 type;
+	__u8 data;
+} __attribute__((__packed__));
+
+#endif /* !_USERIO_H */
-- 
cgit v1.2.3


From fe56b9e6a8d957d6a20729d626027f800c17a2da Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Mon, 26 Oct 2015 11:02:25 +0200
Subject: qed: Add module with basic common support

The Qlogic Everest Driver is the backend module for the QL4xxx ethernet
products by Qlogic.

This module serves two main purposes:
 1. It's responsible to contain all the common code that will be shared
    between the various drivers that would be used with said line of
    products. Flows such as chip initialization and de-initialization
    fall under this category.

 2. It would abstract the protocol-specific HW & FW components, allowing
    the protocol drivers to have a clean APIs which is detached in its
    slowpath configuration from the actual HSI.

This adds a very basic module without any protocol-specific bits.
I.e., this adds a basic implementation that almost entirely falls under
the first category.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                                        |   10 +
 drivers/net/ethernet/qlogic/Kconfig                |    6 +
 drivers/net/ethernet/qlogic/Makefile               |    1 +
 drivers/net/ethernet/qlogic/qed/Makefile           |    4 +
 drivers/net/ethernet/qlogic/qed/qed.h              |  448 ++
 drivers/net/ethernet/qlogic/qed/qed_cxt.c          |  847 ++++
 drivers/net/ethernet/qlogic/qed/qed_cxt.h          |  139 +
 drivers/net/ethernet/qlogic/qed/qed_dev.c          | 1277 +++++
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h      |  222 +
 drivers/net/ethernet/qlogic/qed/qed_hsi.h          | 4966 ++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_hw.c           |  776 +++
 drivers/net/ethernet/qlogic/qed/qed_hw.h           |  263 ++
 .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c    |  798 ++++
 drivers/net/ethernet/qlogic/qed/qed_init_ops.c     |  531 +++
 drivers/net/ethernet/qlogic/qed/qed_init_ops.h     |  110 +
 drivers/net/ethernet/qlogic/qed/qed_int.c          |  802 ++++
 drivers/net/ethernet/qlogic/qed/qed_int.h          |  391 ++
 drivers/net/ethernet/qlogic/qed/qed_main.c         |  948 ++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c          |  549 +++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h          |  232 +
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h     |  366 ++
 drivers/net/ethernet/qlogic/qed/qed_sp.h           |  333 ++
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c  |  170 +
 drivers/net/ethernet/qlogic/qed/qed_spq.c          |  831 ++++
 include/linux/qed/common_hsi.h                     |  607 +++
 include/linux/qed/qed_chain.h                      |  539 +++
 include/linux/qed/qed_if.h                         |  498 ++
 27 files changed, 16664 insertions(+)
 create mode 100644 drivers/net/ethernet/qlogic/qed/Makefile
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_cxt.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_cxt.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_dev.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_dev_api.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_hsi.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_hw.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_hw.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_init_ops.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_init_ops.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_int.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_int.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_main.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_mcp.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_mcp.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_sp.h
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_spq.c
 create mode 100644 include/linux/qed/common_hsi.h
 create mode 100644 include/linux/qed/qed_chain.h
 create mode 100644 include/linux/qed/qed_if.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index fb8603e2a3f3..219c789aff27 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8540,6 +8540,16 @@ L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlge/
 
+QLOGIC QL4xxx ETHERNET DRIVER
+M:	Yuval Mintz <Yuval.Mintz@qlogic.com>
+M:	Ariel Elior <Ariel.Elior@qlogic.com>
+M:	everest-linux-l2@qlogic.com
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/qlogic/qed/
+F:	include/linux/qed/
+F:	drivers/net/ethernet/qlogic/qede/
+
 QNX4 FILESYSTEM
 M:	Anders Larsen <al@alarsen.net>
 W:	http://www.alarsen.net/linux/qnx4fs/
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index f1f0108c275d..58c3fb388f46 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -91,4 +91,10 @@ config NETXEN_NIC
 	---help---
 	  This enables the support for NetXen's Gigabit Ethernet card.
 
+config QED
+	tristate "QLogic QED 25/40/100Gb core driver"
+	depends on PCI
+	---help---
+	  This enables the support for ...
+
 endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/Makefile b/drivers/net/ethernet/qlogic/Makefile
index b2a283d9ae60..7600138268ee 100644
--- a/drivers/net/ethernet/qlogic/Makefile
+++ b/drivers/net/ethernet/qlogic/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_QLA3XXX) += qla3xxx.o
 obj-$(CONFIG_QLCNIC) += qlcnic/
 obj-$(CONFIG_QLGE) += qlge/
 obj-$(CONFIG_NETXEN_NIC) += netxen/
+obj-$(CONFIG_QED) += qed/
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
new file mode 100644
index 000000000000..6969b5c66929
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_QED) := qed.o
+
+qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
+	 qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
new file mode 100644
index 000000000000..a63ef3120d78
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -0,0 +1,448 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_H
+#define _QED_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+#include <linux/zlib.h>
+#include <linux/hashtable.h>
+#include <linux/qed/qed_if.h>
+#include "qed_hsi.h"
+
+#define DRV_MODULE_VERSION "8.4.0.0"
+
+#define MAX_HWFNS_PER_DEVICE    (4)
+#define NAME_SIZE 16
+#define VER_SIZE 16
+
+/* cau states */
+enum qed_coalescing_mode {
+	QED_COAL_MODE_DISABLE,
+	QED_COAL_MODE_ENABLE
+};
+
+struct qed_eth_cb_ops;
+struct qed_dev_info;
+
+/* helpers */
+static inline u32 qed_db_addr(u32 cid, u32 DEMS)
+{
+	u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
+		      FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid);
+
+	return db_addr;
+}
+
+#define ALIGNED_TYPE_SIZE(type_name, p_hwfn)				     \
+	((sizeof(type_name) + (u32)(1 << (p_hwfn->cdev->cache_shift)) - 1) & \
+	 ~((1 << (p_hwfn->cdev->cache_shift)) - 1))
+
+#define for_each_hwfn(cdev, i)  for (i = 0; i < cdev->num_hwfns; i++)
+
+#define D_TRINE(val, cond1, cond2, true1, true2, def) \
+	(val == (cond1) ? true1 :		      \
+	 (val == (cond2) ? true2 : def))
+
+/* forward */
+struct qed_ptt_pool;
+struct qed_spq;
+struct qed_sb_info;
+struct qed_sb_attn_info;
+struct qed_cxt_mngr;
+struct qed_sb_sp_info;
+struct qed_mcp_info;
+
+struct qed_rt_data {
+	u32 init_val;
+	bool b_valid;
+};
+
+/* The PCI personality is not quite synonymous to protocol ID:
+ * 1. All personalities need CORE connections
+ * 2. The Ethernet personality may support also the RoCE protocol
+ */
+enum qed_pci_personality {
+	QED_PCI_ETH,
+	QED_PCI_DEFAULT /* default in shmem */
+};
+
+/* All VFs are symmetric, all counters are PF + all VFs */
+struct qed_qm_iids {
+	u32 cids;
+	u32 vf_cids;
+	u32 tids;
+};
+
+enum QED_RESOURCES {
+	QED_SB,
+	QED_VPORT,
+	QED_PQ,
+	QED_RL,
+	QED_ILT,
+	QED_MAX_RESC,
+};
+
+struct qed_hw_info {
+	/* PCI personality */
+	enum qed_pci_personality	personality;
+
+	/* Resource Allocation scheme results */
+	u32				resc_start[QED_MAX_RESC];
+	u32				resc_num[QED_MAX_RESC];
+
+#define RESC_START(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_start[resc])
+#define RESC_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_num[resc])
+#define FEAT_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.feat_num[resc])
+
+	u8				num_tc;
+	u8				offload_tc;
+	u8				non_offload_tc;
+
+	u32				concrete_fid;
+	u16				opaque_fid;
+	u16				ovlan;
+	u32				part_num[4];
+
+	u32				vendor_id;
+	u32				device_id;
+
+	unsigned char			hw_mac_addr[ETH_ALEN];
+
+	struct qed_igu_info		*p_igu_info;
+
+	u32				port_mode;
+	u32				hw_mode;
+};
+
+struct qed_hw_cid_data {
+	u32	cid;
+	bool	b_cid_allocated;
+
+	/* Additional identifiers */
+	u16	opaque_fid;
+	u8	vport_id;
+};
+
+/* maximun size of read/write commands (HW limit) */
+#define DMAE_MAX_RW_SIZE        0x2000
+
+struct qed_dmae_info {
+	/* Mutex for synchronizing access to functions */
+	struct mutex	mutex;
+
+	u8		channel;
+
+	dma_addr_t	completion_word_phys_addr;
+
+	/* The memory location where the DMAE writes the completion
+	 * value when an operation is finished on this context.
+	 */
+	u32		*p_completion_word;
+
+	dma_addr_t	intermediate_buffer_phys_addr;
+
+	/* An intermediate buffer for DMAE operations that use virtual
+	 * addresses - data is DMA'd to/from this buffer and then
+	 * memcpy'd to/from the virtual address
+	 */
+	u32		*p_intermediate_buffer;
+
+	dma_addr_t	dmae_cmd_phys_addr;
+	struct dmae_cmd *p_dmae_cmd;
+};
+
+struct qed_qm_info {
+	struct init_qm_pq_params	*qm_pq_params;
+	struct init_qm_vport_params	*qm_vport_params;
+	struct init_qm_port_params	*qm_port_params;
+	u16				start_pq;
+	u8				start_vport;
+	u8				pure_lb_pq;
+	u8				offload_pq;
+	u8				pure_ack_pq;
+	u8				vf_queues_offset;
+	u16				num_pqs;
+	u16				num_vf_pqs;
+	u8				num_vports;
+	u8				max_phys_tcs_per_port;
+	bool				pf_rl_en;
+	bool				pf_wfq_en;
+	bool				vport_rl_en;
+	bool				vport_wfq_en;
+	u8				pf_wfq;
+	u32				pf_rl;
+};
+
+struct qed_fw_data {
+	const u8		*modes_tree_buf;
+	union init_op		*init_ops;
+	const u32		*arr_data;
+	u32			init_ops_size;
+};
+
+struct qed_simd_fp_handler {
+	void	*token;
+	void	(*func)(void *);
+};
+
+struct qed_hwfn {
+	struct qed_dev			*cdev;
+	u8				my_id;          /* ID inside the PF */
+#define IS_LEAD_HWFN(edev)              (!((edev)->my_id))
+	u8				rel_pf_id;      /* Relative to engine*/
+	u8				abs_pf_id;
+#define QED_PATH_ID(_p_hwfn)		((_p_hwfn)->abs_pf_id & 1)
+	u8				port_id;
+	bool				b_active;
+
+	u32				dp_module;
+	u8				dp_level;
+	char				name[NAME_SIZE];
+
+	bool				first_on_engine;
+	bool				hw_init_done;
+
+	/* BAR access */
+	void __iomem			*regview;
+	void __iomem			*doorbells;
+	u64				db_phys_addr;
+	unsigned long			db_size;
+
+	/* PTT pool */
+	struct qed_ptt_pool		*p_ptt_pool;
+
+	/* HW info */
+	struct qed_hw_info		hw_info;
+
+	/* rt_array (for init-tool) */
+	struct qed_rt_data		*rt_data;
+
+	/* SPQ */
+	struct qed_spq			*p_spq;
+
+	/* EQ */
+	struct qed_eq			*p_eq;
+
+	/* Consolidate Q*/
+	struct qed_consq		*p_consq;
+
+	/* Slow-Path definitions */
+	struct tasklet_struct		*sp_dpc;
+	bool				b_sp_dpc_enabled;
+
+	struct qed_ptt			*p_main_ptt;
+	struct qed_ptt			*p_dpc_ptt;
+
+	struct qed_sb_sp_info		*p_sp_sb;
+	struct qed_sb_attn_info		*p_sb_attn;
+
+	/* Protocol related */
+	struct qed_pf_params		pf_params;
+
+	/* Array of sb_info of all status blocks */
+	struct qed_sb_info		*sbs_info[MAX_SB_PER_PF_MIMD];
+	u16				num_sbs;
+
+	struct qed_cxt_mngr		*p_cxt_mngr;
+
+	/* Flag indicating whether interrupts are enabled or not*/
+	bool				b_int_enabled;
+
+	struct qed_mcp_info		*mcp_info;
+
+	struct qed_dmae_info		dmae_info;
+
+	/* QM init */
+	struct qed_qm_info		qm_info;
+
+	/* Buffer for unzipping firmware data */
+	void				*unzip_buf;
+
+	struct qed_simd_fp_handler	simd_proto_handler[64];
+
+	struct z_stream_s		*stream;
+};
+
+struct pci_params {
+	int		pm_cap;
+
+	unsigned long	mem_start;
+	unsigned long	mem_end;
+	unsigned int	irq;
+	u8		pf_num;
+};
+
+struct qed_int_param {
+	u32	int_mode;
+	u8	num_vectors;
+	u8	min_msix_cnt; /* for minimal functionality */
+};
+
+struct qed_int_params {
+	struct qed_int_param	in;
+	struct qed_int_param	out;
+	struct msix_entry	*msix_table;
+	bool			fp_initialized;
+	u8			fp_msix_base;
+	u8			fp_msix_cnt;
+};
+
+struct qed_dev {
+	u32	dp_module;
+	u8	dp_level;
+	char	name[NAME_SIZE];
+
+	u8	type;
+#define QED_DEV_TYPE_BB_A0      (0 << 0)
+#define QED_DEV_TYPE_MASK       (0x3)
+#define QED_DEV_TYPE_SHIFT      (0)
+
+	u16	chip_num;
+#define CHIP_NUM_MASK                   0xffff
+#define CHIP_NUM_SHIFT                  16
+
+	u16	chip_rev;
+#define CHIP_REV_MASK                   0xf
+#define CHIP_REV_SHIFT                  12
+
+	u16				chip_metal;
+#define CHIP_METAL_MASK                 0xff
+#define CHIP_METAL_SHIFT                4
+
+	u16				chip_bond_id;
+#define CHIP_BOND_ID_MASK               0xf
+#define CHIP_BOND_ID_SHIFT              0
+
+	u8				num_engines;
+	u8				num_ports_in_engines;
+	u8				num_funcs_in_port;
+
+	u8				path_id;
+	enum mf_mode			mf_mode;
+#define IS_MF(_p_hwfn)          (((_p_hwfn)->cdev)->mf_mode != SF)
+#define IS_MF_SI(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == MF_NPAR)
+#define IS_MF_SD(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == MF_OVLAN)
+
+	int				pcie_width;
+	int				pcie_speed;
+	u8				ver_str[VER_SIZE];
+
+	/* Add MF related configuration */
+	u8				mcp_rev;
+	u8				boot_mode;
+
+	u8				wol;
+
+	u32				int_mode;
+	enum qed_coalescing_mode	int_coalescing_mode;
+	u8				rx_coalesce_usecs;
+	u8				tx_coalesce_usecs;
+
+	/* Start Bar offset of first hwfn */
+	void __iomem			*regview;
+	void __iomem			*doorbells;
+	u64				db_phys_addr;
+	unsigned long			db_size;
+
+	/* PCI */
+	u8				cache_shift;
+
+	/* Init */
+	const struct iro		*iro_arr;
+#define IRO (p_hwfn->cdev->iro_arr)
+
+	/* HW functions */
+	u8				num_hwfns;
+	struct qed_hwfn			hwfns[MAX_HWFNS_PER_DEVICE];
+
+	u32				drv_type;
+
+	struct qed_eth_stats		*reset_stats;
+	struct qed_fw_data		*fw_data;
+
+	u32				mcp_nvm_resp;
+
+	/* Linux specific here */
+	struct  qede_dev		*edev;
+	struct  pci_dev			*pdev;
+	int				msg_enable;
+
+	struct pci_params		pci_params;
+
+	struct qed_int_params		int_params;
+
+	u8				protocol;
+#define IS_QED_ETH_IF(cdev)     ((cdev)->protocol == QED_PROTOCOL_ETH)
+
+	const struct firmware		*firmware;
+};
+
+#define QED_GET_TYPE(dev)       (((dev)->type & QED_DEV_TYPE_MASK) >> \
+				 QED_DEV_TYPE_SHIFT)
+#define QED_IS_BB_A0(dev)       (QED_GET_TYPE(dev) == QED_DEV_TYPE_BB_A0)
+#define QED_IS_BB(dev)  (QED_IS_BB_A0(dev))
+
+#define NUM_OF_SBS(dev)         MAX_SB_PER_PATH_BB
+#define NUM_OF_ENG_PFS(dev)     MAX_NUM_PFS_BB
+
+/**
+ * @brief qed_concrete_to_sw_fid - get the sw function id from
+ *        the concrete value.
+ *
+ * @param concrete_fid
+ *
+ * @return inline u8
+ */
+static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
+					u32 concrete_fid)
+{
+	u8 pfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID);
+
+	return pfid;
+}
+
+#define PURE_LB_TC 8
+
+#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
+
+/* Other Linux specific common definitions */
+#define DP_NAME(cdev) ((cdev)->name)
+
+#define REG_ADDR(cdev, offset)          (void __iomem *)((u8 __iomem *)\
+						(cdev->regview) + \
+							 (offset))
+
+#define REG_RD(cdev, offset)            readl(REG_ADDR(cdev, offset))
+#define REG_WR(cdev, offset, val)       writel((u32)val, REG_ADDR(cdev, offset))
+#define REG_WR16(cdev, offset, val)     writew((u16)val, REG_ADDR(cdev, offset))
+
+#define DOORBELL(cdev, db_addr, val)			 \
+	writel((u32)val, (void __iomem *)((u8 __iomem *)\
+					  (cdev->doorbells) + (db_addr)))
+
+/* Prototypes */
+int qed_fill_dev_info(struct qed_dev *cdev,
+		      struct qed_dev_info *dev_info);
+u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
+		   u32 input_len, u8 *input_buf,
+		   u32 max_size, u8 *unzip_buf);
+
+#define QED_ETH_INTERFACE_VERSION       300
+
+#endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
new file mode 100644
index 000000000000..7ccdb46c6764
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -0,0 +1,847 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_reg_addr.h"
+
+/* Max number of connection types in HW (DQ/CDU etc.) */
+#define MAX_CONN_TYPES		PROTOCOLID_COMMON
+#define NUM_TASK_TYPES		2
+#define NUM_TASK_PF_SEGMENTS	4
+
+/* QM constants */
+#define QM_PQ_ELEMENT_SIZE	4 /* in bytes */
+
+/* Doorbell-Queue constants */
+#define DQ_RANGE_SHIFT		4
+#define DQ_RANGE_ALIGN		BIT(DQ_RANGE_SHIFT)
+
+/* ILT constants */
+#define ILT_DEFAULT_HW_P_SIZE		3
+#define ILT_PAGE_IN_BYTES(hw_p_size)	(1U << ((hw_p_size) + 12))
+#define ILT_CFG_REG(cli, reg)	PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
+
+/* ILT entry structure */
+#define ILT_ENTRY_PHY_ADDR_MASK		0x000FFFFFFFFFFFULL
+#define ILT_ENTRY_PHY_ADDR_SHIFT	0
+#define ILT_ENTRY_VALID_MASK		0x1ULL
+#define ILT_ENTRY_VALID_SHIFT		52
+#define ILT_ENTRY_IN_REGS		2
+#define ILT_REG_SIZE_IN_BYTES		4
+
+/* connection context union */
+union conn_context {
+	struct core_conn_context core_ctx;
+	struct eth_conn_context eth_ctx;
+};
+
+#define CONN_CXT_SIZE(p_hwfn) \
+	ALIGNED_TYPE_SIZE(union conn_context, p_hwfn)
+
+/* PF per protocl configuration object */
+struct qed_conn_type_cfg {
+	u32 cid_count;
+	u32 cid_start;
+};
+
+/* ILT Client configuration, Per connection type (protocol) resources. */
+#define ILT_CLI_PF_BLOCKS	(1 + NUM_TASK_PF_SEGMENTS * 2)
+#define CDUC_BLK		(0)
+
+enum ilt_clients {
+	ILT_CLI_CDUC,
+	ILT_CLI_QM,
+	ILT_CLI_MAX
+};
+
+struct ilt_cfg_pair {
+	u32 reg;
+	u32 val;
+};
+
+struct qed_ilt_cli_blk {
+	u32 total_size; /* 0 means not active */
+	u32 real_size_in_page;
+	u32 start_line;
+};
+
+struct qed_ilt_client_cfg {
+	bool active;
+
+	/* ILT boundaries */
+	struct ilt_cfg_pair first;
+	struct ilt_cfg_pair last;
+	struct ilt_cfg_pair p_size;
+
+	/* ILT client blocks for PF */
+	struct qed_ilt_cli_blk pf_blks[ILT_CLI_PF_BLOCKS];
+	u32 pf_total_lines;
+};
+
+/* Per Path -
+ *      ILT shadow table
+ *      Protocol acquired CID lists
+ *      PF start line in ILT
+ */
+struct qed_dma_mem {
+	dma_addr_t p_phys;
+	void *p_virt;
+	size_t size;
+};
+
+struct qed_cid_acquired_map {
+	u32		start_cid;
+	u32		max_count;
+	unsigned long	*cid_map;
+};
+
+struct qed_cxt_mngr {
+	/* Per protocl configuration */
+	struct qed_conn_type_cfg	conn_cfg[MAX_CONN_TYPES];
+
+	/* computed ILT structure */
+	struct qed_ilt_client_cfg	clients[ILT_CLI_MAX];
+
+	/* Acquired CIDs */
+	struct qed_cid_acquired_map	acquired[MAX_CONN_TYPES];
+
+	/* ILT  shadow table */
+	struct qed_dma_mem		*ilt_shadow;
+	u32				pf_start_line;
+};
+
+static u32 qed_cxt_cdu_iids(struct qed_cxt_mngr *p_mngr)
+{
+	u32 type, pf_cids = 0;
+
+	for (type = 0; type < MAX_CONN_TYPES; type++)
+		pf_cids += p_mngr->conn_cfg[type].cid_count;
+
+	return pf_cids;
+}
+
+static void qed_cxt_qm_iids(struct qed_hwfn *p_hwfn,
+			    struct qed_qm_iids *iids)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	int type;
+
+	for (type = 0; type < MAX_CONN_TYPES; type++)
+		iids->cids += p_mngr->conn_cfg[type].cid_count;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_ILT, "iids: CIDS %08x\n", iids->cids);
+}
+
+/* set the iids count per protocol */
+static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
+					enum protocol_type type,
+					u32 cid_count)
+{
+	struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
+	struct qed_conn_type_cfg *p_conn = &p_mgr->conn_cfg[type];
+
+	p_conn->cid_count = roundup(cid_count, DQ_RANGE_ALIGN);
+}
+
+static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli,
+				 struct qed_ilt_cli_blk *p_blk,
+				 u32 start_line, u32 total_size,
+				 u32 elem_size)
+{
+	u32 ilt_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val);
+
+	/* verify thatits called only once for each block */
+	if (p_blk->total_size)
+		return;
+
+	p_blk->total_size = total_size;
+	p_blk->real_size_in_page = 0;
+	if (elem_size)
+		p_blk->real_size_in_page = (ilt_size / elem_size) * elem_size;
+	p_blk->start_line = start_line;
+}
+
+static void qed_ilt_cli_adv_line(struct qed_hwfn *p_hwfn,
+				 struct qed_ilt_client_cfg *p_cli,
+				 struct qed_ilt_cli_blk *p_blk,
+				 u32 *p_line, enum ilt_clients client_id)
+{
+	if (!p_blk->total_size)
+		return;
+
+	if (!p_cli->active)
+		p_cli->first.val = *p_line;
+
+	p_cli->active = true;
+	*p_line += DIV_ROUND_UP(p_blk->total_size,
+				p_blk->real_size_in_page);
+	p_cli->last.val = *p_line - 1;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
+		   "ILT[Client %d] - Lines: [%08x - %08x]. Block - Size %08x [Real %08x] Start line %d\n",
+		   client_id, p_cli->first.val,
+		   p_cli->last.val, p_blk->total_size,
+		   p_blk->real_size_in_page, p_blk->start_line);
+}
+
+int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	u32 curr_line, total, pf_cids;
+	struct qed_qm_iids qm_iids;
+
+	memset(&qm_iids, 0, sizeof(qm_iids));
+
+	p_mngr->pf_start_line = RESC_START(p_hwfn, QED_ILT);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
+		   "hwfn [%d] - Set context manager starting line to be 0x%08x\n",
+		   p_hwfn->my_id, p_hwfn->p_cxt_mngr->pf_start_line);
+
+	/* CDUC */
+	p_cli = &p_mngr->clients[ILT_CLI_CDUC];
+	curr_line = p_mngr->pf_start_line;
+	p_cli->pf_total_lines = 0;
+
+	/* get the counters for the CDUC and QM clients  */
+	pf_cids = qed_cxt_cdu_iids(p_mngr);
+
+	p_blk = &p_cli->pf_blks[CDUC_BLK];
+
+	total = pf_cids * CONN_CXT_SIZE(p_hwfn);
+
+	qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line,
+			     total, CONN_CXT_SIZE(p_hwfn));
+
+	qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_CDUC);
+	p_cli->pf_total_lines = curr_line - p_blk->start_line;
+
+	/* QM */
+	p_cli = &p_mngr->clients[ILT_CLI_QM];
+	p_blk = &p_cli->pf_blks[0];
+
+	qed_cxt_qm_iids(p_hwfn, &qm_iids);
+	total = qed_qm_pf_mem_size(p_hwfn->rel_pf_id, qm_iids.cids, 0, 0,
+				   p_hwfn->qm_info.num_pqs, 0);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
+		   "QM ILT Info, (cids=%d, num_pqs=%d, memory_size=%d)\n",
+		   qm_iids.cids, p_hwfn->qm_info.num_pqs, total);
+
+	qed_ilt_cli_blk_fill(p_cli, p_blk,
+			     curr_line, total * 0x1000,
+			     QM_PQ_ELEMENT_SIZE);
+
+	qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_QM);
+	p_cli->pf_total_lines = curr_line - p_blk->start_line;
+
+	if (curr_line - p_hwfn->p_cxt_mngr->pf_start_line >
+	    RESC_NUM(p_hwfn, QED_ILT)) {
+		DP_ERR(p_hwfn, "too many ilt lines...#lines=%d\n",
+		       curr_line - p_hwfn->p_cxt_mngr->pf_start_line);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define for_each_ilt_valid_client(pos, clients)	\
+		for (pos = 0; pos < ILT_CLI_MAX; pos++)
+
+/* Total number of ILT lines used by this PF */
+static u32 qed_cxt_ilt_shadow_size(struct qed_ilt_client_cfg *ilt_clients)
+{
+	u32 size = 0;
+	u32 i;
+
+	for_each_ilt_valid_client(i, ilt_clients) {
+		if (!ilt_clients[i].active)
+			continue;
+		size += (ilt_clients[i].last.val -
+			 ilt_clients[i].first.val + 1);
+	}
+
+	return size;
+}
+
+static void qed_ilt_shadow_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *p_cli = p_hwfn->p_cxt_mngr->clients;
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 ilt_size, i;
+
+	ilt_size = qed_cxt_ilt_shadow_size(p_cli);
+
+	for (i = 0; p_mngr->ilt_shadow && i < ilt_size; i++) {
+		struct qed_dma_mem *p_dma = &p_mngr->ilt_shadow[i];
+
+		if (p_dma->p_virt)
+			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+					  p_dma->size, p_dma->p_virt,
+					  p_dma->p_phys);
+		p_dma->p_virt = NULL;
+	}
+	kfree(p_mngr->ilt_shadow);
+}
+
+static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
+			     struct qed_ilt_cli_blk *p_blk,
+			     enum ilt_clients ilt_client,
+			     u32 start_line_offset)
+{
+	struct qed_dma_mem *ilt_shadow = p_hwfn->p_cxt_mngr->ilt_shadow;
+	u32 lines, line, sz_left;
+
+	if (!p_blk->total_size)
+		return 0;
+
+	sz_left = p_blk->total_size;
+	lines = DIV_ROUND_UP(sz_left, p_blk->real_size_in_page);
+	line = p_blk->start_line + start_line_offset -
+	       p_hwfn->p_cxt_mngr->pf_start_line;
+
+	for (; lines; lines--) {
+		dma_addr_t p_phys;
+		void *p_virt;
+		u32 size;
+
+		size = min_t(u32, sz_left,
+			     p_blk->real_size_in_page);
+		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					    size,
+					    &p_phys,
+					    GFP_KERNEL);
+		if (!p_virt)
+			return -ENOMEM;
+		memset(p_virt, 0, size);
+
+		ilt_shadow[line].p_phys = p_phys;
+		ilt_shadow[line].p_virt = p_virt;
+		ilt_shadow[line].size = size;
+
+		DP_VERBOSE(p_hwfn, QED_MSG_ILT,
+			   "ILT shadow: Line [%d] Physical 0x%llx Virtual %p Size %d\n",
+			    line, (u64)p_phys, p_virt, size);
+
+		sz_left -= size;
+		line++;
+	}
+
+	return 0;
+}
+
+static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_ilt_client_cfg *clients = p_mngr->clients;
+	struct qed_ilt_cli_blk *p_blk;
+	u32 size, i, j;
+	int rc;
+
+	size = qed_cxt_ilt_shadow_size(clients);
+	p_mngr->ilt_shadow = kcalloc(size, sizeof(struct qed_dma_mem),
+				     GFP_KERNEL);
+	if (!p_mngr->ilt_shadow) {
+		DP_NOTICE(p_hwfn, "Failed to allocate ilt shadow table\n");
+		rc = -ENOMEM;
+		goto ilt_shadow_fail;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
+		   "Allocated 0x%x bytes for ilt shadow\n",
+		   (u32)(size * sizeof(struct qed_dma_mem)));
+
+	for_each_ilt_valid_client(i, clients) {
+		if (!clients[i].active)
+			continue;
+		for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) {
+			p_blk = &clients[i].pf_blks[j];
+			rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0);
+			if (rc != 0)
+				goto ilt_shadow_fail;
+		}
+	}
+
+	return 0;
+
+ilt_shadow_fail:
+	qed_ilt_shadow_free(p_hwfn);
+	return rc;
+}
+
+static void qed_cid_map_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 type;
+
+	for (type = 0; type < MAX_CONN_TYPES; type++) {
+		kfree(p_mngr->acquired[type].cid_map);
+		p_mngr->acquired[type].max_count = 0;
+		p_mngr->acquired[type].start_cid = 0;
+	}
+}
+
+static int qed_cid_map_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 start_cid = 0;
+	u32 type;
+
+	for (type = 0; type < MAX_CONN_TYPES; type++) {
+		u32 cid_cnt = p_hwfn->p_cxt_mngr->conn_cfg[type].cid_count;
+		u32 size;
+
+		if (cid_cnt == 0)
+			continue;
+
+		size = DIV_ROUND_UP(cid_cnt,
+				    sizeof(unsigned long) * BITS_PER_BYTE) *
+		       sizeof(unsigned long);
+		p_mngr->acquired[type].cid_map = kzalloc(size, GFP_KERNEL);
+		if (!p_mngr->acquired[type].cid_map)
+			goto cid_map_fail;
+
+		p_mngr->acquired[type].max_count = cid_cnt;
+		p_mngr->acquired[type].start_cid = start_cid;
+
+		p_hwfn->p_cxt_mngr->conn_cfg[type].cid_start = start_cid;
+
+		DP_VERBOSE(p_hwfn, QED_MSG_CXT,
+			   "Type %08x start: %08x count %08x\n",
+			   type, p_mngr->acquired[type].start_cid,
+			   p_mngr->acquired[type].max_count);
+		start_cid += cid_cnt;
+	}
+
+	return 0;
+
+cid_map_fail:
+	qed_cid_map_free(p_hwfn);
+	return -ENOMEM;
+}
+
+int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr;
+	u32 i;
+
+	p_mngr = kzalloc(sizeof(*p_mngr), GFP_ATOMIC);
+	if (!p_mngr) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_cxt_mngr'\n");
+		return -ENOMEM;
+	}
+
+	/* Initialize ILT client registers */
+	p_mngr->clients[ILT_CLI_CDUC].first.reg = ILT_CFG_REG(CDUC, FIRST_ILT);
+	p_mngr->clients[ILT_CLI_CDUC].last.reg = ILT_CFG_REG(CDUC, LAST_ILT);
+	p_mngr->clients[ILT_CLI_CDUC].p_size.reg = ILT_CFG_REG(CDUC, P_SIZE);
+
+	p_mngr->clients[ILT_CLI_QM].first.reg = ILT_CFG_REG(QM, FIRST_ILT);
+	p_mngr->clients[ILT_CLI_QM].last.reg = ILT_CFG_REG(QM, LAST_ILT);
+	p_mngr->clients[ILT_CLI_QM].p_size.reg = ILT_CFG_REG(QM, P_SIZE);
+
+	/* default ILT page size for all clients is 32K */
+	for (i = 0; i < ILT_CLI_MAX; i++)
+		p_mngr->clients[i].p_size.val = ILT_DEFAULT_HW_P_SIZE;
+
+	/* Set the cxt mangr pointer priori to further allocations */
+	p_hwfn->p_cxt_mngr = p_mngr;
+
+	return 0;
+}
+
+int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn)
+{
+	int rc;
+
+	/* Allocate the ILT shadow table */
+	rc = qed_ilt_shadow_alloc(p_hwfn);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to allocate ilt memory\n");
+		goto tables_alloc_fail;
+	}
+
+	/* Allocate and initialize the acquired cids bitmaps */
+	rc = qed_cid_map_alloc(p_hwfn);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to allocate cid maps\n");
+		goto tables_alloc_fail;
+	}
+
+	return 0;
+
+tables_alloc_fail:
+	qed_cxt_mngr_free(p_hwfn);
+	return rc;
+}
+
+void qed_cxt_mngr_free(struct qed_hwfn *p_hwfn)
+{
+	if (!p_hwfn->p_cxt_mngr)
+		return;
+
+	qed_cid_map_free(p_hwfn);
+	qed_ilt_shadow_free(p_hwfn);
+	kfree(p_hwfn->p_cxt_mngr);
+
+	p_hwfn->p_cxt_mngr = NULL;
+}
+
+void qed_cxt_mngr_setup(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	int type;
+
+	/* Reset acquired cids */
+	for (type = 0; type < MAX_CONN_TYPES; type++) {
+		u32 cid_cnt = p_hwfn->p_cxt_mngr->conn_cfg[type].cid_count;
+
+		if (cid_cnt == 0)
+			continue;
+
+		memset(p_mngr->acquired[type].cid_map, 0,
+		       DIV_ROUND_UP(cid_cnt,
+				    sizeof(unsigned long) * BITS_PER_BYTE) *
+		       sizeof(unsigned long));
+	}
+}
+
+/* CDU Common */
+#define CDUC_CXT_SIZE_SHIFT \
+	CDU_REG_CID_ADDR_PARAMS_CONTEXT_SIZE_SHIFT
+
+#define CDUC_CXT_SIZE_MASK \
+	(CDU_REG_CID_ADDR_PARAMS_CONTEXT_SIZE >> CDUC_CXT_SIZE_SHIFT)
+
+#define CDUC_BLOCK_WASTE_SHIFT \
+	CDU_REG_CID_ADDR_PARAMS_BLOCK_WASTE_SHIFT
+
+#define CDUC_BLOCK_WASTE_MASK \
+	(CDU_REG_CID_ADDR_PARAMS_BLOCK_WASTE >> CDUC_BLOCK_WASTE_SHIFT)
+
+#define CDUC_NCIB_SHIFT	\
+	CDU_REG_CID_ADDR_PARAMS_NCIB_SHIFT
+
+#define CDUC_NCIB_MASK \
+	(CDU_REG_CID_ADDR_PARAMS_NCIB >> CDUC_NCIB_SHIFT)
+
+static void qed_cdu_init_common(struct qed_hwfn *p_hwfn)
+{
+	u32 page_sz, elems_per_page, block_waste, cxt_size, cdu_params = 0;
+
+	/* CDUC - connection configuration */
+	page_sz = p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUC].p_size.val;
+	cxt_size = CONN_CXT_SIZE(p_hwfn);
+	elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+	block_waste = ILT_PAGE_IN_BYTES(page_sz) - elems_per_page * cxt_size;
+
+	SET_FIELD(cdu_params, CDUC_CXT_SIZE, cxt_size);
+	SET_FIELD(cdu_params, CDUC_BLOCK_WASTE, block_waste);
+	SET_FIELD(cdu_params, CDUC_NCIB, elems_per_page);
+	STORE_RT_REG(p_hwfn, CDU_REG_CID_ADDR_PARAMS_RT_OFFSET, cdu_params);
+}
+
+void qed_qm_init_pf(struct qed_hwfn *p_hwfn)
+{
+	struct qed_qm_pf_rt_init_params params;
+	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+	struct qed_qm_iids iids;
+
+	memset(&iids, 0, sizeof(iids));
+	qed_cxt_qm_iids(p_hwfn, &iids);
+
+	memset(&params, 0, sizeof(params));
+	params.port_id = p_hwfn->port_id;
+	params.pf_id = p_hwfn->rel_pf_id;
+	params.max_phys_tcs_per_port = qm_info->max_phys_tcs_per_port;
+	params.is_first_pf = p_hwfn->first_on_engine;
+	params.num_pf_cids = iids.cids;
+	params.start_pq = qm_info->start_pq;
+	params.num_pf_pqs = qm_info->num_pqs;
+	params.start_vport = qm_info->num_vports;
+	params.pf_wfq = qm_info->pf_wfq;
+	params.pf_rl = qm_info->pf_rl;
+	params.pq_params = qm_info->qm_pq_params;
+	params.vport_params = qm_info->qm_vport_params;
+
+	qed_qm_pf_rt_init(p_hwfn, p_hwfn->p_main_ptt, &params);
+}
+
+/* CM PF */
+static int qed_cm_init_pf(struct qed_hwfn *p_hwfn)
+{
+	union qed_qm_pq_params pq_params;
+	u16 pq;
+
+	/* XCM pure-LB queue */
+	memset(&pq_params, 0, sizeof(pq_params));
+	pq_params.core.tc = LB_TC;
+	pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params);
+	STORE_RT_REG(p_hwfn, XCM_REG_CON_PHY_Q3_RT_OFFSET, pq);
+
+	return 0;
+}
+
+/* DQ PF */
+static void qed_dq_init_pf(struct qed_hwfn *p_hwfn)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 dq_pf_max_cid = 0;
+
+	dq_pf_max_cid += (p_mngr->conn_cfg[0].cid_count >> DQ_RANGE_SHIFT);
+	STORE_RT_REG(p_hwfn, DORQ_REG_PF_MAX_ICID_0_RT_OFFSET, dq_pf_max_cid);
+
+	dq_pf_max_cid += (p_mngr->conn_cfg[1].cid_count >> DQ_RANGE_SHIFT);
+	STORE_RT_REG(p_hwfn, DORQ_REG_PF_MAX_ICID_1_RT_OFFSET, dq_pf_max_cid);
+
+	dq_pf_max_cid += (p_mngr->conn_cfg[2].cid_count >> DQ_RANGE_SHIFT);
+	STORE_RT_REG(p_hwfn, DORQ_REG_PF_MAX_ICID_2_RT_OFFSET, dq_pf_max_cid);
+
+	dq_pf_max_cid += (p_mngr->conn_cfg[3].cid_count >> DQ_RANGE_SHIFT);
+	STORE_RT_REG(p_hwfn, DORQ_REG_PF_MAX_ICID_3_RT_OFFSET, dq_pf_max_cid);
+
+	dq_pf_max_cid += (p_mngr->conn_cfg[4].cid_count >> DQ_RANGE_SHIFT);
+	STORE_RT_REG(p_hwfn, DORQ_REG_PF_MAX_ICID_4_RT_OFFSET, dq_pf_max_cid);
+
+	/* 5 - PF */
+	dq_pf_max_cid += (p_mngr->conn_cfg[5].cid_count >> DQ_RANGE_SHIFT);
+	STORE_RT_REG(p_hwfn, DORQ_REG_PF_MAX_ICID_5_RT_OFFSET, dq_pf_max_cid);
+}
+
+static void qed_ilt_bounds_init(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *ilt_clients;
+	int i;
+
+	ilt_clients = p_hwfn->p_cxt_mngr->clients;
+	for_each_ilt_valid_client(i, ilt_clients) {
+		if (!ilt_clients[i].active)
+			continue;
+		STORE_RT_REG(p_hwfn,
+			     ilt_clients[i].first.reg,
+			     ilt_clients[i].first.val);
+		STORE_RT_REG(p_hwfn,
+			     ilt_clients[i].last.reg,
+			     ilt_clients[i].last.val);
+		STORE_RT_REG(p_hwfn,
+			     ilt_clients[i].p_size.reg,
+			     ilt_clients[i].p_size.val);
+	}
+}
+
+/* ILT (PSWRQ2) PF */
+static void qed_ilt_init_pf(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *clients;
+	struct qed_cxt_mngr *p_mngr;
+	struct qed_dma_mem *p_shdw;
+	u32 line, rt_offst, i;
+
+	qed_ilt_bounds_init(p_hwfn);
+
+	p_mngr = p_hwfn->p_cxt_mngr;
+	p_shdw = p_mngr->ilt_shadow;
+	clients = p_hwfn->p_cxt_mngr->clients;
+
+	for_each_ilt_valid_client(i, clients) {
+		if (!clients[i].active)
+			continue;
+
+		/** Client's 1st val and RT array are absolute, ILT shadows'
+		 *  lines are relative.
+		 */
+		line = clients[i].first.val - p_mngr->pf_start_line;
+		rt_offst = PSWRQ2_REG_ILT_MEMORY_RT_OFFSET +
+			   clients[i].first.val * ILT_ENTRY_IN_REGS;
+
+		for (; line <= clients[i].last.val - p_mngr->pf_start_line;
+		     line++, rt_offst += ILT_ENTRY_IN_REGS) {
+			u64 ilt_hw_entry = 0;
+
+			/** p_virt could be NULL incase of dynamic
+			 *  allocation
+			 */
+			if (p_shdw[line].p_virt) {
+				SET_FIELD(ilt_hw_entry, ILT_ENTRY_VALID, 1ULL);
+				SET_FIELD(ilt_hw_entry, ILT_ENTRY_PHY_ADDR,
+					  (p_shdw[line].p_phys >> 12));
+
+				DP_VERBOSE(p_hwfn, QED_MSG_ILT,
+					   "Setting RT[0x%08x] from ILT[0x%08x] [Client is %d] to Physical addr: 0x%llx\n",
+					   rt_offst, line, i,
+					   (u64)(p_shdw[line].p_phys >> 12));
+			}
+
+			STORE_RT_REG_AGG(p_hwfn, rt_offst, ilt_hw_entry);
+		}
+	}
+}
+
+void qed_cxt_hw_init_common(struct qed_hwfn *p_hwfn)
+{
+	qed_cdu_init_common(p_hwfn);
+}
+
+void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn)
+{
+	qed_qm_init_pf(p_hwfn);
+	qed_cm_init_pf(p_hwfn);
+	qed_dq_init_pf(p_hwfn);
+	qed_ilt_init_pf(p_hwfn);
+}
+
+int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
+			enum protocol_type type,
+			u32 *p_cid)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 rel_cid;
+
+	if (type >= MAX_CONN_TYPES || !p_mngr->acquired[type].cid_map) {
+		DP_NOTICE(p_hwfn, "Invalid protocol type %d", type);
+		return -EINVAL;
+	}
+
+	rel_cid = find_first_zero_bit(p_mngr->acquired[type].cid_map,
+				      p_mngr->acquired[type].max_count);
+
+	if (rel_cid >= p_mngr->acquired[type].max_count) {
+		DP_NOTICE(p_hwfn, "no CID available for protocol %d\n",
+			  type);
+		return -EINVAL;
+	}
+
+	__set_bit(rel_cid, p_mngr->acquired[type].cid_map);
+
+	*p_cid = rel_cid + p_mngr->acquired[type].start_cid;
+
+	return 0;
+}
+
+static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn,
+				      u32 cid,
+				      enum protocol_type *p_type)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_cid_acquired_map *p_map;
+	enum protocol_type p;
+	u32 rel_cid;
+
+	/* Iterate over protocols and find matching cid range */
+	for (p = 0; p < MAX_CONN_TYPES; p++) {
+		p_map = &p_mngr->acquired[p];
+
+		if (!p_map->cid_map)
+			continue;
+		if (cid >= p_map->start_cid &&
+		    cid < p_map->start_cid + p_map->max_count)
+			break;
+	}
+	*p_type = p;
+
+	if (p == MAX_CONN_TYPES) {
+		DP_NOTICE(p_hwfn, "Invalid CID %d", cid);
+		return false;
+	}
+
+	rel_cid = cid - p_map->start_cid;
+	if (!test_bit(rel_cid, p_map->cid_map)) {
+		DP_NOTICE(p_hwfn, "CID %d not acquired", cid);
+		return false;
+	}
+	return true;
+}
+
+void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
+			 u32 cid)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	enum protocol_type type;
+	bool b_acquired;
+	u32 rel_cid;
+
+	/* Test acquired and find matching per-protocol map */
+	b_acquired = qed_cxt_test_cid_acquired(p_hwfn, cid, &type);
+
+	if (!b_acquired)
+		return;
+
+	rel_cid = cid - p_mngr->acquired[type].start_cid;
+	__clear_bit(rel_cid, p_mngr->acquired[type].cid_map);
+}
+
+int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
+			 struct qed_cxt_info *p_info)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	u32 conn_cxt_size, hw_p_size, cxts_per_p, line;
+	enum protocol_type type;
+	bool b_acquired;
+
+	/* Test acquired and find matching per-protocol map */
+	b_acquired = qed_cxt_test_cid_acquired(p_hwfn, p_info->iid, &type);
+
+	if (!b_acquired)
+		return -EINVAL;
+
+	/* set the protocl type */
+	p_info->type = type;
+
+	/* compute context virtual pointer */
+	hw_p_size = p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUC].p_size.val;
+
+	conn_cxt_size = CONN_CXT_SIZE(p_hwfn);
+	cxts_per_p = ILT_PAGE_IN_BYTES(hw_p_size) / conn_cxt_size;
+	line = p_info->iid / cxts_per_p;
+
+	/* Make sure context is allocated (dynamic allocation) */
+	if (!p_mngr->ilt_shadow[line].p_virt)
+		return -EINVAL;
+
+	p_info->p_cxt = p_mngr->ilt_shadow[line].p_virt +
+			p_info->iid % cxts_per_p * conn_cxt_size;
+
+	DP_VERBOSE(p_hwfn, (QED_MSG_ILT | QED_MSG_CXT),
+		   "Accessing ILT shadow[%d]: CXT pointer is at %p (for iid %d)\n",
+		   p_info->iid / cxts_per_p, p_info->p_cxt, p_info->iid);
+
+	return 0;
+}
+
+int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn)
+{
+	struct qed_eth_pf_params *p_params = &p_hwfn->pf_params.eth_pf_params;
+
+	/* Set the number of required CORE connections */
+	u32 core_cids = 1; /* SPQ */
+
+	qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids);
+
+	qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+				    p_params->num_cons);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
new file mode 100644
index 000000000000..c8e1f5e5c42b
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -0,0 +1,139 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_CXT_H
+#define _QED_CXT_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/qed/qed_if.h>
+#include "qed_hsi.h"
+#include "qed.h"
+
+struct qed_cxt_info {
+	void			*p_cxt;
+	u32			iid;
+	enum protocol_type	type;
+};
+
+/**
+ * @brief qed_cxt_acquire - Acquire a new cid of a specific protocol type
+ *
+ * @param p_hwfn
+ * @param type
+ * @param p_cid
+ *
+ * @return int
+ */
+int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
+			enum protocol_type type,
+			u32 *p_cid);
+
+/**
+ * @brief qedo_cid_get_cxt_info - Returns the context info for a specific cid
+ *
+ *
+ * @param p_hwfn
+ * @param p_info in/out
+ *
+ * @return int
+ */
+int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
+			 struct qed_cxt_info *p_info);
+
+enum qed_cxt_elem_type {
+	QED_ELEM_CXT,
+	QED_ELEM_TASK
+};
+
+/**
+ * @brief qed_cxt_set_pf_params - Set the PF params for cxt init
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_cfg_ilt_compute - compute ILT init parameters
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_mngr_alloc - Allocate and init the context manager struct
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_mngr_free
+ *
+ * @param p_hwfn
+ */
+void qed_cxt_mngr_free(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_tables_alloc - Allocate ILT shadow, Searcher T2, acquired map
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_mngr_setup - Reset the acquired CIDs
+ *
+ * @param p_hwfn
+ */
+void qed_cxt_mngr_setup(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_hw_init_common - Initailze ILT and DQ, common phase, per path.
+ *
+ *
+ *
+ * @param p_hwfn
+ */
+void qed_cxt_hw_init_common(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_hw_init_pf - Initailze ILT and DQ, PF phase, per path.
+ *
+ *
+ *
+ * @param p_hwfn
+ */
+void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_qm_init_pf - Initailze the QM PF phase, per path
+ *
+ * @param p_hwfn
+ */
+
+void qed_qm_init_pf(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_cxt_release - Release a cid
+ *
+ * @param p_hwfn
+ * @param cid
+ */
+void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
+			 u32 cid);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
new file mode 100644
index 000000000000..5b845220ae8c
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -0,0 +1,1277 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/etherdevice.h>
+#include <linux/qed/qed_chain.h>
+#include <linux/qed/qed_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_int.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+/* API common to all protocols */
+void qed_init_dp(struct qed_dev *cdev,
+		 u32 dp_module, u8 dp_level)
+{
+	u32 i;
+
+	cdev->dp_level = dp_level;
+	cdev->dp_module = dp_module;
+	for (i = 0; i < MAX_HWFNS_PER_DEVICE; i++) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		p_hwfn->dp_level = dp_level;
+		p_hwfn->dp_module = dp_module;
+	}
+}
+
+void qed_init_struct(struct qed_dev *cdev)
+{
+	u8 i;
+
+	for (i = 0; i < MAX_HWFNS_PER_DEVICE; i++) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		p_hwfn->cdev = cdev;
+		p_hwfn->my_id = i;
+		p_hwfn->b_active = false;
+
+		mutex_init(&p_hwfn->dmae_info.mutex);
+	}
+
+	/* hwfn 0 is always active */
+	cdev->hwfns[0].b_active = true;
+
+	/* set the default cache alignment to 128 */
+	cdev->cache_shift = 7;
+}
+
+static void qed_qm_info_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+
+	kfree(qm_info->qm_pq_params);
+	qm_info->qm_pq_params = NULL;
+	kfree(qm_info->qm_vport_params);
+	qm_info->qm_vport_params = NULL;
+	kfree(qm_info->qm_port_params);
+	qm_info->qm_port_params = NULL;
+}
+
+void qed_resc_free(struct qed_dev *cdev)
+{
+	int i;
+
+	kfree(cdev->fw_data);
+	cdev->fw_data = NULL;
+
+	kfree(cdev->reset_stats);
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		qed_cxt_mngr_free(p_hwfn);
+		qed_qm_info_free(p_hwfn);
+		qed_spq_free(p_hwfn);
+		qed_eq_free(p_hwfn, p_hwfn->p_eq);
+		qed_consq_free(p_hwfn, p_hwfn->p_consq);
+		qed_int_free(p_hwfn);
+		qed_dmae_info_free(p_hwfn);
+	}
+}
+
+static int qed_init_qm_info(struct qed_hwfn *p_hwfn)
+{
+	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+	struct init_qm_port_params *p_qm_port;
+	u8 num_vports, i, vport_id, num_ports;
+	u16 num_pqs, multi_cos_tcs = 1;
+
+	memset(qm_info, 0, sizeof(*qm_info));
+
+	num_pqs = multi_cos_tcs + 1; /* The '1' is for pure-LB */
+	num_vports = (u8)RESC_NUM(p_hwfn, QED_VPORT);
+
+	/* Sanity checking that setup requires legal number of resources */
+	if (num_pqs > RESC_NUM(p_hwfn, QED_PQ)) {
+		DP_ERR(p_hwfn,
+		       "Need too many Physical queues - 0x%04x when only %04x are available\n",
+		       num_pqs, RESC_NUM(p_hwfn, QED_PQ));
+		return -EINVAL;
+	}
+
+	/* PQs will be arranged as follows: First per-TC PQ then pure-LB quete.
+	 */
+	qm_info->qm_pq_params = kzalloc(sizeof(*qm_info->qm_pq_params) *
+					num_pqs, GFP_ATOMIC);
+	if (!qm_info->qm_pq_params)
+		goto alloc_err;
+
+	qm_info->qm_vport_params = kzalloc(sizeof(*qm_info->qm_vport_params) *
+					   num_vports, GFP_ATOMIC);
+	if (!qm_info->qm_vport_params)
+		goto alloc_err;
+
+	qm_info->qm_port_params = kzalloc(sizeof(*qm_info->qm_port_params) *
+					  MAX_NUM_PORTS, GFP_ATOMIC);
+	if (!qm_info->qm_port_params)
+		goto alloc_err;
+
+	vport_id = (u8)RESC_START(p_hwfn, QED_VPORT);
+
+	/* First init per-TC PQs */
+	for (i = 0; i < multi_cos_tcs; i++) {
+		struct init_qm_pq_params *params = &qm_info->qm_pq_params[i];
+
+		params->vport_id = vport_id;
+		params->tc_id = p_hwfn->hw_info.non_offload_tc;
+		params->wrr_group = 1;
+	}
+
+	/* Then init pure-LB PQ */
+	qm_info->pure_lb_pq = i;
+	qm_info->qm_pq_params[i].vport_id = (u8)RESC_START(p_hwfn, QED_VPORT);
+	qm_info->qm_pq_params[i].tc_id = PURE_LB_TC;
+	qm_info->qm_pq_params[i].wrr_group = 1;
+	i++;
+
+	qm_info->offload_pq = 0;
+	qm_info->num_pqs = num_pqs;
+	qm_info->num_vports = num_vports;
+
+	/* Initialize qm port parameters */
+	num_ports = p_hwfn->cdev->num_ports_in_engines;
+	for (i = 0; i < num_ports; i++) {
+		p_qm_port = &qm_info->qm_port_params[i];
+		p_qm_port->active = 1;
+		p_qm_port->num_active_phys_tcs = 4;
+		p_qm_port->num_pbf_cmd_lines = PBF_MAX_CMD_LINES / num_ports;
+		p_qm_port->num_btb_blocks = BTB_MAX_BLOCKS / num_ports;
+	}
+
+	qm_info->max_phys_tcs_per_port = NUM_OF_PHYS_TCS;
+
+	qm_info->start_pq = (u16)RESC_START(p_hwfn, QED_PQ);
+
+	qm_info->start_vport = (u8)RESC_START(p_hwfn, QED_VPORT);
+
+	qm_info->pf_wfq = 0;
+	qm_info->pf_rl = 0;
+	qm_info->vport_rl_en = 1;
+
+	return 0;
+
+alloc_err:
+	DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n");
+	kfree(qm_info->qm_pq_params);
+	kfree(qm_info->qm_vport_params);
+	kfree(qm_info->qm_port_params);
+
+	return -ENOMEM;
+}
+
+int qed_resc_alloc(struct qed_dev *cdev)
+{
+	struct qed_consq *p_consq;
+	struct qed_eq *p_eq;
+	int i, rc = 0;
+
+	cdev->fw_data = kzalloc(sizeof(*cdev->fw_data), GFP_KERNEL);
+	if (!cdev->fw_data)
+		return -ENOMEM;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		/* First allocate the context manager structure */
+		rc = qed_cxt_mngr_alloc(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
+		/* Set the HW cid/tid numbers (in the contest manager)
+		 * Must be done prior to any further computations.
+		 */
+		rc = qed_cxt_set_pf_params(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
+		/* Prepare and process QM requirements */
+		rc = qed_init_qm_info(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
+		/* Compute the ILT client partition */
+		rc = qed_cxt_cfg_ilt_compute(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
+		/* CID map / ILT shadow table / T2
+		 * The talbes sizes are determined by the computations above
+		 */
+		rc = qed_cxt_tables_alloc(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
+		/* SPQ, must follow ILT because initializes SPQ context */
+		rc = qed_spq_alloc(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
+		/* SP status block allocation */
+		p_hwfn->p_dpc_ptt = qed_get_reserved_ptt(p_hwfn,
+							 RESERVED_PTT_DPC);
+
+		rc = qed_int_alloc(p_hwfn, p_hwfn->p_main_ptt);
+		if (rc)
+			goto alloc_err;
+
+		/* EQ */
+		p_eq = qed_eq_alloc(p_hwfn, 256);
+
+		if (!p_eq)
+			goto alloc_err;
+		p_hwfn->p_eq = p_eq;
+
+		p_consq = qed_consq_alloc(p_hwfn);
+		if (!p_consq)
+			goto alloc_err;
+		p_hwfn->p_consq = p_consq;
+
+		/* DMA info initialization */
+		rc = qed_dmae_info_alloc(p_hwfn);
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to allocate memory for dmae_info structure\n");
+			goto alloc_err;
+		}
+	}
+
+	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
+	if (!cdev->reset_stats) {
+		DP_NOTICE(cdev, "Failed to allocate reset statistics\n");
+		goto alloc_err;
+	}
+
+	return 0;
+
+alloc_err:
+	qed_resc_free(cdev);
+	return rc;
+}
+
+void qed_resc_setup(struct qed_dev *cdev)
+{
+	int i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		qed_cxt_mngr_setup(p_hwfn);
+		qed_spq_setup(p_hwfn);
+		qed_eq_setup(p_hwfn, p_hwfn->p_eq);
+		qed_consq_setup(p_hwfn, p_hwfn->p_consq);
+
+		/* Read shadow of current MFW mailbox */
+		qed_mcp_read_mb(p_hwfn, p_hwfn->p_main_ptt);
+		memcpy(p_hwfn->mcp_info->mfw_mb_shadow,
+		       p_hwfn->mcp_info->mfw_mb_cur,
+		       p_hwfn->mcp_info->mfw_mb_length);
+
+		qed_int_setup(p_hwfn, p_hwfn->p_main_ptt);
+	}
+}
+
+#define FINAL_CLEANUP_CMD_OFFSET        (0)
+#define FINAL_CLEANUP_CMD (0x1)
+#define FINAL_CLEANUP_VALID_OFFSET      (6)
+#define FINAL_CLEANUP_VFPF_ID_SHIFT     (7)
+#define FINAL_CLEANUP_COMP (0x2)
+#define FINAL_CLEANUP_POLL_CNT          (100)
+#define FINAL_CLEANUP_POLL_TIME         (10)
+int qed_final_cleanup(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u16 id)
+{
+	u32 command = 0, addr, count = FINAL_CLEANUP_POLL_CNT;
+	int rc = -EBUSY;
+
+	addr = GTT_BAR0_MAP_REG_USDM_RAM + USTORM_FLR_FINAL_ACK_OFFSET;
+
+	command |= FINAL_CLEANUP_CMD << FINAL_CLEANUP_CMD_OFFSET;
+	command |= 1 << FINAL_CLEANUP_VALID_OFFSET;
+	command |= id << FINAL_CLEANUP_VFPF_ID_SHIFT;
+	command |= FINAL_CLEANUP_COMP << SDM_OP_GEN_COMP_TYPE_SHIFT;
+
+	/* Make sure notification is not set before initiating final cleanup */
+	if (REG_RD(p_hwfn, addr)) {
+		DP_NOTICE(
+			p_hwfn,
+			"Unexpected; Found final cleanup notification before initiating final cleanup\n");
+		REG_WR(p_hwfn, addr, 0);
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "Sending final cleanup for PFVF[%d] [Command %08x\n]",
+		   id, command);
+
+	qed_wr(p_hwfn, p_ptt, XSDM_REG_OPERATION_GEN, command);
+
+	/* Poll until completion */
+	while (!REG_RD(p_hwfn, addr) && count--)
+		msleep(FINAL_CLEANUP_POLL_TIME);
+
+	if (REG_RD(p_hwfn, addr))
+		rc = 0;
+	else
+		DP_NOTICE(p_hwfn,
+			  "Failed to receive FW final cleanup notification\n");
+
+	/* Cleanup afterwards */
+	REG_WR(p_hwfn, addr, 0);
+
+	return rc;
+}
+
+static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
+{
+	int hw_mode = 0;
+
+	hw_mode = (1 << MODE_BB_A0);
+
+	switch (p_hwfn->cdev->num_ports_in_engines) {
+	case 1:
+		hw_mode |= 1 << MODE_PORTS_PER_ENG_1;
+		break;
+	case 2:
+		hw_mode |= 1 << MODE_PORTS_PER_ENG_2;
+		break;
+	case 4:
+		hw_mode |= 1 << MODE_PORTS_PER_ENG_4;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "num_ports_in_engine = %d not supported\n",
+			  p_hwfn->cdev->num_ports_in_engines);
+		return;
+	}
+
+	switch (p_hwfn->cdev->mf_mode) {
+	case SF:
+		hw_mode |= 1 << MODE_SF;
+		break;
+	case MF_OVLAN:
+		hw_mode |= 1 << MODE_MF_SD;
+		break;
+	case MF_NPAR:
+		hw_mode |= 1 << MODE_MF_SI;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unsupported MF mode, init as SF\n");
+		hw_mode |= 1 << MODE_SF;
+	}
+
+	hw_mode |= 1 << MODE_ASIC;
+
+	p_hwfn->hw_info.hw_mode = hw_mode;
+}
+
+/* Init run time data for all PFs on an engine. */
+static void qed_init_cau_rt_data(struct qed_dev *cdev)
+{
+	u32 offset = CAU_REG_SB_VAR_MEMORY_RT_OFFSET;
+	int i, sb_id;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		struct qed_igu_info *p_igu_info;
+		struct qed_igu_block *p_block;
+		struct cau_sb_entry sb_entry;
+
+		p_igu_info = p_hwfn->hw_info.p_igu_info;
+
+		for (sb_id = 0; sb_id < QED_MAPPING_MEMORY_SIZE(cdev);
+		     sb_id++) {
+			p_block = &p_igu_info->igu_map.igu_blocks[sb_id];
+			if (!p_block->is_pf)
+				continue;
+
+			qed_init_cau_sb_entry(p_hwfn, &sb_entry,
+					      p_block->function_id,
+					      0, 0);
+			STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2,
+					 sb_entry);
+		}
+	}
+}
+
+static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      int hw_mode)
+{
+	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+	struct qed_qm_common_rt_init_params params;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc = 0;
+
+	qed_init_cau_rt_data(cdev);
+
+	/* Program GTT windows */
+	qed_gtt_init(p_hwfn);
+
+	if (p_hwfn->mcp_info) {
+		if (p_hwfn->mcp_info->func_info.bandwidth_max)
+			qm_info->pf_rl_en = 1;
+		if (p_hwfn->mcp_info->func_info.bandwidth_min)
+			qm_info->pf_wfq_en = 1;
+	}
+
+	memset(&params, 0, sizeof(params));
+	params.max_ports_per_engine = p_hwfn->cdev->num_ports_in_engines;
+	params.max_phys_tcs_per_port = qm_info->max_phys_tcs_per_port;
+	params.pf_rl_en = qm_info->pf_rl_en;
+	params.pf_wfq_en = qm_info->pf_wfq_en;
+	params.vport_rl_en = qm_info->vport_rl_en;
+	params.vport_wfq_en = qm_info->vport_wfq_en;
+	params.port_params = qm_info->qm_port_params;
+
+	qed_qm_common_rt_init(p_hwfn, &params);
+
+	qed_cxt_hw_init_common(p_hwfn);
+
+	/* Close gate from NIG to BRB/Storm; By default they are open, but
+	 * we close them to prevent NIG from passing data to reset blocks.
+	 * Should have been done in the ENGINE phase, but init-tool lacks
+	 * proper port-pretend capabilities.
+	 */
+	qed_wr(p_hwfn, p_ptt, NIG_REG_RX_BRB_OUT_EN, 0);
+	qed_wr(p_hwfn, p_ptt, NIG_REG_STORM_OUT_EN, 0);
+	qed_port_pretend(p_hwfn, p_ptt, p_hwfn->port_id ^ 1);
+	qed_wr(p_hwfn, p_ptt, NIG_REG_RX_BRB_OUT_EN, 0);
+	qed_wr(p_hwfn, p_ptt, NIG_REG_STORM_OUT_EN, 0);
+	qed_port_unpretend(p_hwfn, p_ptt);
+
+	rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode);
+	if (rc != 0)
+		return rc;
+
+	qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0);
+	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_USE_CLIENTID_IN_TAG, 1);
+
+	/* Disable relaxed ordering in the PCI config space */
+	qed_wr(p_hwfn, p_ptt, 0x20b4,
+	       qed_rd(p_hwfn, p_ptt, 0x20b4) & ~0x10);
+
+	return rc;
+}
+
+static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    int hw_mode)
+{
+	int rc = 0;
+
+	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id,
+			  hw_mode);
+	return rc;
+}
+
+static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  int hw_mode,
+			  bool b_hw_start,
+			  enum qed_int_mode int_mode,
+			  bool allow_npar_tx_switch)
+{
+	u8 rel_pf_id = p_hwfn->rel_pf_id;
+	int rc = 0;
+
+	if (p_hwfn->mcp_info) {
+		struct qed_mcp_function_info *p_info;
+
+		p_info = &p_hwfn->mcp_info->func_info;
+		if (p_info->bandwidth_min)
+			p_hwfn->qm_info.pf_wfq = p_info->bandwidth_min;
+
+		/* Update rate limit once we'll actually have a link */
+		p_hwfn->qm_info.pf_rl = 100;
+	}
+
+	qed_cxt_hw_init_pf(p_hwfn);
+
+	qed_int_igu_init_rt(p_hwfn);
+
+	/* Set VLAN in NIG if needed */
+	if (hw_mode & (1 << MODE_MF_SD)) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring LLH_FUNC_TAG\n");
+		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1);
+		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET,
+			     p_hwfn->hw_info.ovlan);
+	}
+
+	/* Enable classification by MAC if needed */
+	if (hw_mode & MODE_MF_SI) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+			   "Configuring TAGMAC_CLS_TYPE\n");
+		STORE_RT_REG(p_hwfn,
+			     NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET, 1);
+	}
+
+	/* Protocl Configuration  */
+	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_TCP_RT_OFFSET, 0);
+	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_FCOE_RT_OFFSET, 0);
+	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0);
+
+	/* Cleanup chip from previous driver if such remains exist */
+	rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id);
+	if (rc != 0)
+		return rc;
+
+	/* PF Init sequence */
+	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PF, rel_pf_id, hw_mode);
+	if (rc)
+		return rc;
+
+	/* QM_PF Init sequence (may be invoked separately e.g. for DCB) */
+	rc = qed_init_run(p_hwfn, p_ptt, PHASE_QM_PF, rel_pf_id, hw_mode);
+	if (rc)
+		return rc;
+
+	/* Pure runtime initializations - directly to the HW  */
+	qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true);
+
+	if (b_hw_start) {
+		/* enable interrupts */
+		qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
+
+		/* send function start command */
+		rc = qed_sp_pf_start(p_hwfn, p_hwfn->cdev->mf_mode);
+		if (rc)
+			DP_NOTICE(p_hwfn, "Function start ramrod failed\n");
+	}
+	return rc;
+}
+
+static int qed_change_pci_hwfn(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt,
+			       u8 enable)
+{
+	u32 delay_idx = 0, val, set_val = enable ? 1 : 0;
+
+	/* Change PF in PXP */
+	qed_wr(p_hwfn, p_ptt,
+	       PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
+
+	/* wait until value is set - try for 1 second every 50us */
+	for (delay_idx = 0; delay_idx < 20000; delay_idx++) {
+		val = qed_rd(p_hwfn, p_ptt,
+			     PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
+		if (val == set_val)
+			break;
+
+		usleep_range(50, 60);
+	}
+
+	if (val != set_val) {
+		DP_NOTICE(p_hwfn,
+			  "PFID_ENABLE_MASTER wasn't changed after a second\n");
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_main_ptt)
+{
+	/* Read shadow of current MFW mailbox */
+	qed_mcp_read_mb(p_hwfn, p_main_ptt);
+	memcpy(p_hwfn->mcp_info->mfw_mb_shadow,
+	       p_hwfn->mcp_info->mfw_mb_cur,
+	       p_hwfn->mcp_info->mfw_mb_length);
+}
+
+int qed_hw_init(struct qed_dev *cdev,
+		bool b_hw_start,
+		enum qed_int_mode int_mode,
+		bool allow_npar_tx_switch,
+		const u8 *bin_fw_data)
+{
+	u32 load_code, param;
+	int rc, mfw_rc, i;
+
+	rc = qed_init_fw_data(cdev, bin_fw_data);
+	if (rc != 0)
+		return rc;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		/* Enable DMAE in PXP */
+		rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
+
+		qed_calc_hw_mode(p_hwfn);
+
+		rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
+				      &load_code);
+		if (rc) {
+			DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n");
+			return rc;
+		}
+
+		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
+
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "Load request was sent. Resp:0x%x, Load code: 0x%x\n",
+			   rc, load_code);
+
+		p_hwfn->first_on_engine = (load_code ==
+					   FW_MSG_CODE_DRV_LOAD_ENGINE);
+
+		switch (load_code) {
+		case FW_MSG_CODE_DRV_LOAD_ENGINE:
+			rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt,
+						p_hwfn->hw_info.hw_mode);
+			if (rc)
+				break;
+		/* Fall into */
+		case FW_MSG_CODE_DRV_LOAD_PORT:
+			rc = qed_hw_init_port(p_hwfn, p_hwfn->p_main_ptt,
+					      p_hwfn->hw_info.hw_mode);
+			if (rc)
+				break;
+
+		/* Fall into */
+		case FW_MSG_CODE_DRV_LOAD_FUNCTION:
+			rc = qed_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt,
+					    p_hwfn->hw_info.hw_mode,
+					    b_hw_start, int_mode,
+					    allow_npar_tx_switch);
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc)
+			DP_NOTICE(p_hwfn,
+				  "init phase failed for loadcode 0x%x (rc %d)\n",
+				   load_code, rc);
+
+		/* ACK mfw regardless of success or failure of initialization */
+		mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				     DRV_MSG_CODE_LOAD_DONE,
+				     0, &load_code, &param);
+		if (rc)
+			return rc;
+		if (mfw_rc) {
+			DP_NOTICE(p_hwfn, "Failed sending LOAD_DONE command\n");
+			return mfw_rc;
+		}
+
+		p_hwfn->hw_init_done = true;
+	}
+
+	return 0;
+}
+
+#define QED_HW_STOP_RETRY_LIMIT (10)
+int qed_hw_stop(struct qed_dev *cdev)
+{
+	int rc = 0, t_rc;
+	int i, j;
+
+	for_each_hwfn(cdev, j) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[j];
+		struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
+
+		DP_VERBOSE(p_hwfn, NETIF_MSG_IFDOWN, "Stopping hw/fw\n");
+
+		/* mark the hw as uninitialized... */
+		p_hwfn->hw_init_done = false;
+
+		rc = qed_sp_pf_stop(p_hwfn);
+		if (rc)
+			return rc;
+
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1);
+
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_TCP, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_UDP, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_FCOE, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_ROCE, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_OPENFLOW, 0x0);
+
+		qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
+		qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
+		for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
+			if ((!qed_rd(p_hwfn, p_ptt,
+				     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
+			    (!qed_rd(p_hwfn, p_ptt,
+				     TM_REG_PF_SCAN_ACTIVE_TASK)))
+				break;
+
+			usleep_range(1000, 2000);
+		}
+		if (i == QED_HW_STOP_RETRY_LIMIT)
+			DP_NOTICE(p_hwfn,
+				  "Timers linear scans are not over [Connection %02x Tasks %02x]\n",
+				  (u8)qed_rd(p_hwfn, p_ptt,
+					     TM_REG_PF_SCAN_ACTIVE_CONN),
+				  (u8)qed_rd(p_hwfn, p_ptt,
+					     TM_REG_PF_SCAN_ACTIVE_TASK));
+
+		/* Disable Attention Generation */
+		qed_int_igu_disable_int(p_hwfn, p_ptt);
+
+		qed_wr(p_hwfn, p_ptt, IGU_REG_LEADING_EDGE_LATCH, 0);
+		qed_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0);
+
+		qed_int_igu_init_pure_rt(p_hwfn, p_ptt, false, true);
+
+		/* Need to wait 1ms to guarantee SBs are cleared */
+		usleep_range(1000, 2000);
+	}
+
+	/* Disable DMAE in PXP - in CMT, this should only be done for
+	 * first hw-function, and only after all transactions have
+	 * stopped for all active hw-functions.
+	 */
+	t_rc = qed_change_pci_hwfn(&cdev->hwfns[0],
+				   cdev->hwfns[0].p_main_ptt,
+				   false);
+	if (t_rc != 0)
+		rc = t_rc;
+
+	return rc;
+}
+
+static int qed_reg_assert(struct qed_hwfn *hwfn,
+			  struct qed_ptt *ptt, u32 reg,
+			  bool expected)
+{
+	u32 assert_val = qed_rd(hwfn, ptt, reg);
+
+	if (assert_val != expected) {
+		DP_NOTICE(hwfn, "Value at address 0x%x != 0x%08x\n",
+			  reg, expected);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int qed_hw_reset(struct qed_dev *cdev)
+{
+	int rc = 0;
+	u32 unload_resp, unload_param;
+	int i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		DP_VERBOSE(p_hwfn, NETIF_MSG_IFDOWN, "Resetting hw/fw\n");
+
+		/* Check for incorrect states */
+		qed_reg_assert(p_hwfn, p_hwfn->p_main_ptt,
+			       QM_REG_USG_CNT_PF_TX, 0);
+		qed_reg_assert(p_hwfn, p_hwfn->p_main_ptt,
+			       QM_REG_USG_CNT_PF_OTHER, 0);
+
+		/* Disable PF in HW blocks */
+		qed_wr(p_hwfn, p_hwfn->p_main_ptt, DORQ_REG_PF_DB_ENABLE, 0);
+		qed_wr(p_hwfn, p_hwfn->p_main_ptt, QM_REG_PF_EN, 0);
+		qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+		       TCFC_REG_STRONG_ENABLE_PF, 0);
+		qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+		       CCFC_REG_STRONG_ENABLE_PF, 0);
+
+		/* Send unload command to MCP */
+		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				 DRV_MSG_CODE_UNLOAD_REQ,
+				 DRV_MB_PARAM_UNLOAD_WOL_MCP,
+				 &unload_resp, &unload_param);
+		if (rc) {
+			DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n");
+			unload_resp = FW_MSG_CODE_DRV_UNLOAD_ENGINE;
+		}
+
+		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				 DRV_MSG_CODE_UNLOAD_DONE,
+				 0, &unload_resp, &unload_param);
+		if (rc) {
+			DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_DONE failed\n");
+			return rc;
+		}
+	}
+
+	return rc;
+}
+
+/* Free hwfn memory and resources acquired in hw_hwfn_prepare */
+static void qed_hw_hwfn_free(struct qed_hwfn *p_hwfn)
+{
+	qed_ptt_pool_free(p_hwfn);
+	kfree(p_hwfn->hw_info.p_igu_info);
+}
+
+/* Setup bar access */
+static int qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
+{
+	int rc;
+
+	/* Allocate PTT pool */
+	rc = qed_ptt_pool_alloc(p_hwfn);
+	if (rc)
+		return rc;
+
+	/* Allocate the main PTT */
+	p_hwfn->p_main_ptt = qed_get_reserved_ptt(p_hwfn, RESERVED_PTT_MAIN);
+
+	/* clear indirect access */
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_88_F0, 0);
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_8C_F0, 0);
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_90_F0, 0);
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt, PGLUE_B_REG_PGL_ADDR_94_F0, 0);
+
+	/* Clean Previous errors if such exist */
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
+	       1 << p_hwfn->abs_pf_id);
+
+	/* enable internal target-read */
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+	       PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1);
+
+	return 0;
+}
+
+static void get_function_id(struct qed_hwfn *p_hwfn)
+{
+	/* ME Register */
+	p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR);
+
+	p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR);
+
+	p_hwfn->abs_pf_id = (p_hwfn->hw_info.concrete_fid >> 16) & 0xf;
+	p_hwfn->rel_pf_id = GET_FIELD(p_hwfn->hw_info.concrete_fid,
+				      PXP_CONCRETE_FID_PFID);
+	p_hwfn->port_id = GET_FIELD(p_hwfn->hw_info.concrete_fid,
+				    PXP_CONCRETE_FID_PORT);
+}
+
+static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+{
+	u32 *resc_start = p_hwfn->hw_info.resc_start;
+	u32 *resc_num = p_hwfn->hw_info.resc_num;
+	int num_funcs, i;
+
+	num_funcs = IS_MF(p_hwfn) ? MAX_NUM_PFS_BB
+				  : p_hwfn->cdev->num_ports_in_engines;
+
+	resc_num[QED_SB] = min_t(u32,
+				 (MAX_SB_PER_PATH_BB / num_funcs),
+				 qed_int_get_num_sbs(p_hwfn, NULL));
+	resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
+	resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs;
+	resc_num[QED_RL] = 8;
+	resc_num[QED_ILT] = 950;
+
+	for (i = 0; i < QED_MAX_RESC; i++)
+		resc_start[i] = resc_num[i] * p_hwfn->rel_pf_id;
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
+		   "The numbers for each resource are:\n"
+		   "SB = %d start = %d\n"
+		   "VPORT = %d start = %d\n"
+		   "PQ = %d start = %d\n"
+		   "RL = %d start = %d\n"
+		   "ILT = %d start = %d\n",
+		   p_hwfn->hw_info.resc_num[QED_SB],
+		   p_hwfn->hw_info.resc_start[QED_SB],
+		   p_hwfn->hw_info.resc_num[QED_VPORT],
+		   p_hwfn->hw_info.resc_start[QED_VPORT],
+		   p_hwfn->hw_info.resc_num[QED_PQ],
+		   p_hwfn->hw_info.resc_start[QED_PQ],
+		   p_hwfn->hw_info.resc_num[QED_RL],
+		   p_hwfn->hw_info.resc_start[QED_RL],
+		   p_hwfn->hw_info.resc_num[QED_ILT],
+		   p_hwfn->hw_info.resc_start[QED_ILT]);
+}
+
+static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt)
+{
+	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, nvm_cfg_addr;
+	u32 val;
+
+	/* Read global nvm_cfg address */
+	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+
+	/* Verify MCP has initialized it */
+	if (!nvm_cfg_addr) {
+		DP_NOTICE(p_hwfn, "Shared memory not initialized\n");
+		return -EINVAL;
+	}
+
+	/* Read nvm_cfg1  (Notice this is just offset, and not offsize (TBD) */
+	nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+
+	/* Read Vendor Id / Device Id */
+	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+	       offsetof(struct nvm_cfg1, glob) +
+	       offsetof(struct nvm_cfg1_glob, pci_id);
+	p_hwfn->hw_info.vendor_id = qed_rd(p_hwfn, p_ptt, addr) &
+				    NVM_CFG1_GLOB_VENDOR_ID_MASK;
+	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+	       offsetof(struct nvm_cfg1, func[MCP_PF_ID(p_hwfn)]) +
+	       offsetof(struct nvm_cfg1_func, device_id);
+	val = qed_rd(p_hwfn, p_ptt, addr);
+
+	if (IS_MF(p_hwfn)) {
+		p_hwfn->hw_info.device_id =
+			(val & NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK) >>
+			NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET;
+	} else {
+		p_hwfn->hw_info.device_id =
+			(val & NVM_CFG1_FUNC_VENDOR_DEVICE_ID_MASK) >>
+			NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET;
+	}
+
+	/* Read Multi-function information from shmem */
+	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+	       offsetof(struct nvm_cfg1, glob) +
+	       offsetof(struct nvm_cfg1_glob, generic_cont0);
+
+	generic_cont0 = qed_rd(p_hwfn, p_ptt, addr);
+
+	mf_mode = (generic_cont0 & NVM_CFG1_GLOB_MF_MODE_MASK) >>
+		  NVM_CFG1_GLOB_MF_MODE_OFFSET;
+
+	switch (mf_mode) {
+	case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
+		p_hwfn->cdev->mf_mode = MF_OVLAN;
+		break;
+	case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
+		p_hwfn->cdev->mf_mode = MF_NPAR;
+		break;
+	case NVM_CFG1_GLOB_MF_MODE_FORCED_SF:
+		p_hwfn->cdev->mf_mode = SF;
+		break;
+	}
+	DP_INFO(p_hwfn, "Multi function mode is %08x\n",
+		p_hwfn->cdev->mf_mode);
+
+	return qed_mcp_fill_shmem_func_info(p_hwfn, p_ptt);
+}
+
+static int
+qed_get_hw_info(struct qed_hwfn *p_hwfn,
+		struct qed_ptt *p_ptt,
+		enum qed_pci_personality personality)
+{
+	u32 port_mode;
+	int rc;
+
+	/* Read the port mode */
+	port_mode = qed_rd(p_hwfn, p_ptt,
+			   CNIG_REG_NW_PORT_MODE_BB_B0);
+
+	if (port_mode < 3) {
+		p_hwfn->cdev->num_ports_in_engines = 1;
+	} else if (port_mode <= 5) {
+		p_hwfn->cdev->num_ports_in_engines = 2;
+	} else {
+		DP_NOTICE(p_hwfn, "PORT MODE: %d not supported\n",
+			  p_hwfn->cdev->num_ports_in_engines);
+
+		/* Default num_ports_in_engines to something */
+		p_hwfn->cdev->num_ports_in_engines = 1;
+	}
+
+	qed_hw_get_nvm_info(p_hwfn, p_ptt);
+
+	rc = qed_int_igu_read_cam(p_hwfn, p_ptt);
+	if (rc)
+		return rc;
+
+	if (qed_mcp_is_init(p_hwfn))
+		ether_addr_copy(p_hwfn->hw_info.hw_mac_addr,
+				p_hwfn->mcp_info->func_info.mac);
+	else
+		eth_random_addr(p_hwfn->hw_info.hw_mac_addr);
+
+	if (qed_mcp_is_init(p_hwfn)) {
+		if (p_hwfn->mcp_info->func_info.ovlan != QED_MCP_VLAN_UNSET)
+			p_hwfn->hw_info.ovlan =
+				p_hwfn->mcp_info->func_info.ovlan;
+
+		qed_mcp_cmd_port_init(p_hwfn, p_ptt);
+	}
+
+	if (qed_mcp_is_init(p_hwfn)) {
+		enum qed_pci_personality protocol;
+
+		protocol = p_hwfn->mcp_info->func_info.protocol;
+		p_hwfn->hw_info.personality = protocol;
+	}
+
+	qed_hw_get_resc(p_hwfn);
+
+	return rc;
+}
+
+static void qed_get_dev_info(struct qed_dev *cdev)
+{
+	u32 tmp;
+
+	cdev->chip_num = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+				     MISCS_REG_CHIP_NUM);
+	cdev->chip_rev = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+				     MISCS_REG_CHIP_REV);
+	MASK_FIELD(CHIP_REV, cdev->chip_rev);
+
+	/* Learn number of HW-functions */
+	tmp = qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+		     MISCS_REG_CMT_ENABLED_FOR_PAIR);
+
+	if (tmp & (1 << cdev->hwfns[0].rel_pf_id)) {
+		DP_NOTICE(cdev->hwfns, "device in CMT mode\n");
+		cdev->num_hwfns = 2;
+	} else {
+		cdev->num_hwfns = 1;
+	}
+
+	cdev->chip_bond_id = qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+				    MISCS_REG_CHIP_TEST_REG) >> 4;
+	MASK_FIELD(CHIP_BOND_ID, cdev->chip_bond_id);
+	cdev->chip_metal = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+				       MISCS_REG_CHIP_METAL);
+	MASK_FIELD(CHIP_METAL, cdev->chip_metal);
+
+	DP_INFO(cdev->hwfns,
+		"Chip details - Num: %04x Rev: %04x Bond id: %04x Metal: %04x\n",
+		cdev->chip_num, cdev->chip_rev,
+		cdev->chip_bond_id, cdev->chip_metal);
+}
+
+static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
+				 void __iomem *p_regview,
+				 void __iomem *p_doorbells,
+				 enum qed_pci_personality personality)
+{
+	int rc = 0;
+
+	/* Split PCI bars evenly between hwfns */
+	p_hwfn->regview = p_regview;
+	p_hwfn->doorbells = p_doorbells;
+
+	/* Validate that chip access is feasible */
+	if (REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR) == 0xffffffff) {
+		DP_ERR(p_hwfn,
+		       "Reading the ME register returns all Fs; Preventing further chip access\n");
+		return -EINVAL;
+	}
+
+	get_function_id(p_hwfn);
+
+	rc = qed_hw_hwfn_prepare(p_hwfn);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to prepare hwfn's hw\n");
+		goto err0;
+	}
+
+	/* First hwfn learns basic information, e.g., number of hwfns */
+	if (!p_hwfn->my_id)
+		qed_get_dev_info(p_hwfn->cdev);
+
+	/* Initialize MCP structure */
+	rc = qed_mcp_cmd_init(p_hwfn, p_hwfn->p_main_ptt);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed initializing mcp command\n");
+		goto err1;
+	}
+
+	/* Read the device configuration information from the HW and SHMEM */
+	rc = qed_get_hw_info(p_hwfn, p_hwfn->p_main_ptt, personality);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to get HW information\n");
+		goto err2;
+	}
+
+	/* Allocate the init RT array and initialize the init-ops engine */
+	rc = qed_init_alloc(p_hwfn);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to allocate the init array\n");
+		goto err2;
+	}
+
+	return rc;
+err2:
+	qed_mcp_free(p_hwfn);
+err1:
+	qed_hw_hwfn_free(p_hwfn);
+err0:
+	return rc;
+}
+
+static u32 qed_hw_bar_size(struct qed_dev *cdev,
+			   u8 bar_id)
+{
+	u32 size = pci_resource_len(cdev->pdev, (bar_id > 0) ? 2 : 0);
+
+	return size / cdev->num_hwfns;
+}
+
+int qed_hw_prepare(struct qed_dev *cdev,
+		   int personality)
+{
+	int rc, i;
+
+	/* Store the precompiled init data ptrs */
+	qed_init_iro_array(cdev);
+
+	/* Initialize the first hwfn - will learn number of hwfns */
+	rc = qed_hw_prepare_single(&cdev->hwfns[0], cdev->regview,
+				   cdev->doorbells, personality);
+	if (rc)
+		return rc;
+
+	personality = cdev->hwfns[0].hw_info.personality;
+
+	/* Initialize the rest of the hwfns */
+	for (i = 1; i < cdev->num_hwfns; i++) {
+		void __iomem *p_regview, *p_doorbell;
+
+		p_regview =  cdev->regview +
+			     i * qed_hw_bar_size(cdev, 0);
+		p_doorbell = cdev->doorbells +
+			     i * qed_hw_bar_size(cdev, 1);
+		rc = qed_hw_prepare_single(&cdev->hwfns[i], p_regview,
+					   p_doorbell, personality);
+		if (rc) {
+			/* Cleanup previously initialized hwfns */
+			while (--i >= 0) {
+				qed_init_free(&cdev->hwfns[i]);
+				qed_mcp_free(&cdev->hwfns[i]);
+				qed_hw_hwfn_free(&cdev->hwfns[i]);
+			}
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+void qed_hw_remove(struct qed_dev *cdev)
+{
+	int i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		qed_init_free(p_hwfn);
+		qed_hw_hwfn_free(p_hwfn);
+		qed_mcp_free(p_hwfn);
+	}
+}
+
+int qed_chain_alloc(struct qed_dev *cdev,
+		    enum qed_chain_use_mode intended_use,
+		    enum qed_chain_mode mode,
+		    u16 num_elems,
+		    size_t elem_size,
+		    struct qed_chain *p_chain)
+{
+	dma_addr_t p_pbl_phys = 0;
+	void *p_pbl_virt = NULL;
+	dma_addr_t p_phys = 0;
+	void *p_virt = NULL;
+	u16 page_cnt = 0;
+	size_t size;
+
+	if (mode == QED_CHAIN_MODE_SINGLE)
+		page_cnt = 1;
+	else
+		page_cnt = QED_CHAIN_PAGE_CNT(num_elems, elem_size, mode);
+
+	size = page_cnt * QED_CHAIN_PAGE_SIZE;
+	p_virt = dma_alloc_coherent(&cdev->pdev->dev,
+				    size, &p_phys, GFP_KERNEL);
+	if (!p_virt) {
+		DP_NOTICE(cdev, "Failed to allocate chain mem\n");
+		goto nomem;
+	}
+
+	if (mode == QED_CHAIN_MODE_PBL) {
+		size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
+		p_pbl_virt = dma_alloc_coherent(&cdev->pdev->dev,
+						size, &p_pbl_phys,
+						GFP_KERNEL);
+		if (!p_pbl_virt) {
+			DP_NOTICE(cdev, "Failed to allocate chain pbl mem\n");
+			goto nomem;
+		}
+
+		qed_chain_pbl_init(p_chain, p_virt, p_phys, page_cnt,
+				   (u8)elem_size, intended_use,
+				   p_pbl_phys, p_pbl_virt);
+	} else {
+		qed_chain_init(p_chain, p_virt, p_phys, page_cnt,
+			       (u8)elem_size, intended_use, mode);
+	}
+
+	return 0;
+
+nomem:
+	dma_free_coherent(&cdev->pdev->dev,
+			  page_cnt * QED_CHAIN_PAGE_SIZE,
+			  p_virt, p_phys);
+	dma_free_coherent(&cdev->pdev->dev,
+			  page_cnt * QED_CHAIN_PBL_ENTRY_SIZE,
+			  p_pbl_virt, p_pbl_phys);
+
+	return -ENOMEM;
+}
+
+void qed_chain_free(struct qed_dev *cdev,
+		    struct qed_chain *p_chain)
+{
+	size_t size;
+
+	if (!p_chain->p_virt_addr)
+		return;
+
+	if (p_chain->mode == QED_CHAIN_MODE_PBL) {
+		size = p_chain->page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
+		dma_free_coherent(&cdev->pdev->dev, size,
+				  p_chain->pbl.p_virt_table,
+				  p_chain->pbl.p_phys_table);
+	}
+
+	size = p_chain->page_cnt * QED_CHAIN_PAGE_SIZE;
+	dma_free_coherent(&cdev->pdev->dev, size,
+			  p_chain->p_virt_addr,
+			  p_chain->p_phys_addr);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
new file mode 100644
index 000000000000..5051af5f378e
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -0,0 +1,222 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_DEV_API_H
+#define _QED_DEV_API_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/qed/qed_chain.h>
+#include <linux/qed/qed_if.h>
+#include "qed_int.h"
+
+/**
+ * @brief qed_init_dp - initialize the debug level
+ *
+ * @param cdev
+ * @param dp_module
+ * @param dp_level
+ */
+void qed_init_dp(struct qed_dev *cdev,
+		 u32 dp_module,
+		 u8 dp_level);
+
+/**
+ * @brief qed_init_struct - initialize the device structure to
+ *        its defaults
+ *
+ * @param cdev
+ */
+void qed_init_struct(struct qed_dev *cdev);
+
+/**
+ * @brief qed_resc_free -
+ *
+ * @param cdev
+ */
+void qed_resc_free(struct qed_dev *cdev);
+
+/**
+ * @brief qed_resc_alloc -
+ *
+ * @param cdev
+ *
+ * @return int
+ */
+int qed_resc_alloc(struct qed_dev *cdev);
+
+/**
+ * @brief qed_resc_setup -
+ *
+ * @param cdev
+ */
+void qed_resc_setup(struct qed_dev *cdev);
+
+/**
+ * @brief qed_hw_init -
+ *
+ * @param cdev
+ * @param b_hw_start
+ * @param int_mode - interrupt mode [msix, inta, etc.] to use.
+ * @param allow_npar_tx_switch - npar tx switching to be used
+ *	  for vports configured for tx-switching.
+ * @param bin_fw_data - binary fw data pointer in binary fw file.
+ *			Pass NULL if not using binary fw file.
+ *
+ * @return int
+ */
+int qed_hw_init(struct qed_dev *cdev,
+		bool b_hw_start,
+		enum qed_int_mode int_mode,
+		bool allow_npar_tx_switch,
+		const u8 *bin_fw_data);
+
+/**
+ * @brief qed_hw_stop -
+ *
+ * @param cdev
+ *
+ * @return int
+ */
+int qed_hw_stop(struct qed_dev *cdev);
+
+/**
+ * @brief qed_hw_reset -
+ *
+ * @param cdev
+ *
+ * @return int
+ */
+int qed_hw_reset(struct qed_dev *cdev);
+
+/**
+ * @brief qed_hw_prepare -
+ *
+ * @param cdev
+ * @param personality - personality to initialize
+ *
+ * @return int
+ */
+int qed_hw_prepare(struct qed_dev *cdev,
+		   int personality);
+
+/**
+ * @brief qed_hw_remove -
+ *
+ * @param cdev
+ */
+void qed_hw_remove(struct qed_dev *cdev);
+
+/**
+ * @brief qed_ptt_acquire - Allocate a PTT window
+ *
+ * Should be called at the entry point to the driver (at the beginning of an
+ * exported function)
+ *
+ * @param p_hwfn
+ *
+ * @return struct qed_ptt
+ */
+struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_ptt_release - Release PTT Window
+ *
+ * Should be called at the end of a flow - at the end of the function that
+ * acquired the PTT.
+ *
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void qed_ptt_release(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt);
+
+enum qed_dmae_address_type_t {
+	QED_DMAE_ADDRESS_HOST_VIRT,
+	QED_DMAE_ADDRESS_HOST_PHYS,
+	QED_DMAE_ADDRESS_GRC
+};
+
+/* value of flags If QED_DMAE_FLAG_RW_REPL_SRC flag is set and the
+ * source is a block of length DMAE_MAX_RW_SIZE and the
+ * destination is larger, the source block will be duplicated as
+ * many times as required to fill the destination block. This is
+ * used mostly to write a zeroed buffer to destination address
+ * using DMA
+ */
+#define QED_DMAE_FLAG_RW_REPL_SRC       0x00000001
+#define QED_DMAE_FLAG_COMPLETION_DST    0x00000008
+
+struct qed_dmae_params {
+	u32	flags; /* consists of QED_DMAE_FLAG_* values */
+};
+
+/**
+ * @brief qed_dmae_host2grc - copy data from source addr to
+ * dmae registers using the given ptt
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param source_addr
+ * @param grc_addr (dmae_data_offset)
+ * @param size_in_dwords
+ * @param flags (one of the flags defined above)
+ */
+int
+qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt,
+		  u64 source_addr,
+		  u32 grc_addr,
+		  u32 size_in_dwords,
+		  u32 flags);
+
+/**
+ * @brief qed_chain_alloc - Allocate and initialize a chain
+ *
+ * @param p_hwfn
+ * @param intended_use
+ * @param mode
+ * @param num_elems
+ * @param elem_size
+ * @param p_chain
+ *
+ * @return int
+ */
+int
+qed_chain_alloc(struct qed_dev *cdev,
+		enum qed_chain_use_mode intended_use,
+		enum qed_chain_mode mode,
+		u16 num_elems,
+		size_t elem_size,
+		struct qed_chain *p_chain);
+
+/**
+ * @brief qed_chain_free - Free chain DMA memory
+ *
+ * @param p_hwfn
+ * @param p_chain
+ */
+void qed_chain_free(struct qed_dev *cdev,
+		    struct qed_chain *p_chain);
+
+/**
+ * *@brief Cleanup of previous driver remains prior to load
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param id - For PF, engine-relative. For VF, PF-relative.
+ *
+ * @return int
+ */
+int qed_final_cleanup(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u16 id);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
new file mode 100644
index 000000000000..61c15a57c267
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -0,0 +1,4966 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_HSI_H
+#define _QED_HSI_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/qed/common_hsi.h>
+
+struct qed_hwfn;
+struct qed_ptt;
+/********************************/
+/* Add include to common target */
+/********************************/
+
+/* opcodes for the event ring */
+enum common_event_opcode {
+	COMMON_EVENT_PF_START,
+	COMMON_EVENT_PF_STOP,
+	COMMON_EVENT_RESERVED,
+	COMMON_EVENT_RESERVED2,
+	COMMON_EVENT_RESERVED3,
+	COMMON_EVENT_RESERVED4,
+	COMMON_EVENT_RESERVED5,
+	MAX_COMMON_EVENT_OPCODE
+};
+
+/* Common Ramrod Command IDs */
+enum common_ramrod_cmd_id {
+	COMMON_RAMROD_UNUSED,
+	COMMON_RAMROD_PF_START /* PF Function Start Ramrod */,
+	COMMON_RAMROD_PF_STOP /* PF Function Stop Ramrod */,
+	COMMON_RAMROD_RESERVED,
+	COMMON_RAMROD_RESERVED2,
+	COMMON_RAMROD_RESERVED3,
+	MAX_COMMON_RAMROD_CMD_ID
+};
+
+/* The core storm context for the Ystorm */
+struct ystorm_core_conn_st_ctx {
+	__le32 reserved[4];
+};
+
+/* The core storm context for the Pstorm */
+struct pstorm_core_conn_st_ctx {
+	__le32 reserved[4];
+};
+
+/* Core Slowpath Connection storm context of Xstorm */
+struct xstorm_core_conn_st_ctx {
+	__le32		spq_base_lo /* SPQ Ring Base Address low dword */;
+	__le32		spq_base_hi /* SPQ Ring Base Address high dword */;
+	struct regpair	consolid_base_addr;
+	__le16		spq_cons /* SPQ Ring Consumer */;
+	__le16		consolid_cons /* Consolidation Ring Consumer */;
+	__le32		reserved0[55] /* Pad to 15 cycles */;
+};
+
+struct xstorm_core_conn_ag_ctx {
+	u8	reserved0 /* cdu_validation */;
+	u8	core_state /* state */;
+	u8	flags0;
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_MASK         0x1
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT        0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_MASK            0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_SHIFT           1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_MASK            0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_SHIFT           2
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_MASK         0x1
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT        3
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_MASK            0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_SHIFT           4
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_MASK            0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_SHIFT           5
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_MASK            0x1   /* bit6 */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_SHIFT           6
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_MASK            0x1   /* bit7 */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_SHIFT           7
+	u8 flags1;
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_MASK            0x1   /* bit8 */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_SHIFT           0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_MASK            0x1   /* bit9 */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_SHIFT           1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_MASK            0x1   /* bit10 */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_SHIFT           2
+#define XSTORM_CORE_CONN_AG_CTX_BIT11_MASK                0x1   /* bit11 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT11_SHIFT               3
+#define XSTORM_CORE_CONN_AG_CTX_BIT12_MASK                0x1   /* bit12 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT12_SHIFT               4
+#define XSTORM_CORE_CONN_AG_CTX_BIT13_MASK                0x1   /* bit13 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT13_SHIFT               5
+#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_MASK       0x1   /* bit14 */
+#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT      6
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_MASK         0x1   /* bit15 */
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT        7
+	u8 flags2;
+#define XSTORM_CORE_CONN_AG_CTX_CF0_MASK                  0x3   /* timer0cf */
+#define XSTORM_CORE_CONN_AG_CTX_CF0_SHIFT                 0
+#define XSTORM_CORE_CONN_AG_CTX_CF1_MASK                  0x3   /* timer1cf */
+#define XSTORM_CORE_CONN_AG_CTX_CF1_SHIFT                 2
+#define XSTORM_CORE_CONN_AG_CTX_CF2_MASK                  0x3   /* timer2cf */
+#define XSTORM_CORE_CONN_AG_CTX_CF2_SHIFT                 4
+#define XSTORM_CORE_CONN_AG_CTX_CF3_MASK                  0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF3_SHIFT                 6
+	u8 flags3;
+#define XSTORM_CORE_CONN_AG_CTX_CF4_MASK                  0x3   /* cf4 */
+#define XSTORM_CORE_CONN_AG_CTX_CF4_SHIFT                 0
+#define XSTORM_CORE_CONN_AG_CTX_CF5_MASK                  0x3   /* cf5 */
+#define XSTORM_CORE_CONN_AG_CTX_CF5_SHIFT                 2
+#define XSTORM_CORE_CONN_AG_CTX_CF6_MASK                  0x3   /* cf6 */
+#define XSTORM_CORE_CONN_AG_CTX_CF6_SHIFT                 4
+#define XSTORM_CORE_CONN_AG_CTX_CF7_MASK                  0x3   /* cf7 */
+#define XSTORM_CORE_CONN_AG_CTX_CF7_SHIFT                 6
+	u8 flags4;
+#define XSTORM_CORE_CONN_AG_CTX_CF8_MASK                  0x3   /* cf8 */
+#define XSTORM_CORE_CONN_AG_CTX_CF8_SHIFT                 0
+#define XSTORM_CORE_CONN_AG_CTX_CF9_MASK                  0x3   /* cf9 */
+#define XSTORM_CORE_CONN_AG_CTX_CF9_SHIFT                 2
+#define XSTORM_CORE_CONN_AG_CTX_CF10_MASK                 0x3   /* cf10 */
+#define XSTORM_CORE_CONN_AG_CTX_CF10_SHIFT                4
+#define XSTORM_CORE_CONN_AG_CTX_CF11_MASK                 0x3   /* cf11 */
+#define XSTORM_CORE_CONN_AG_CTX_CF11_SHIFT                6
+	u8 flags5;
+#define XSTORM_CORE_CONN_AG_CTX_CF12_MASK                 0x3   /* cf12 */
+#define XSTORM_CORE_CONN_AG_CTX_CF12_SHIFT                0
+#define XSTORM_CORE_CONN_AG_CTX_CF13_MASK                 0x3   /* cf13 */
+#define XSTORM_CORE_CONN_AG_CTX_CF13_SHIFT                2
+#define XSTORM_CORE_CONN_AG_CTX_CF14_MASK                 0x3   /* cf14 */
+#define XSTORM_CORE_CONN_AG_CTX_CF14_SHIFT                4
+#define XSTORM_CORE_CONN_AG_CTX_CF15_MASK                 0x3   /* cf15 */
+#define XSTORM_CORE_CONN_AG_CTX_CF15_SHIFT                6
+	u8 flags6;
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_MASK     0x3   /* cf16 */
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_SHIFT    0
+#define XSTORM_CORE_CONN_AG_CTX_CF17_MASK                 0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF17_SHIFT                2
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_MASK                0x3   /* cf18 */
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_SHIFT               4
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_MASK         0x3   /* cf19 */
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_SHIFT        6
+	u8 flags7;
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_MASK             0x3   /* cf20 */
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_SHIFT            0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_MASK           0x3   /* cf21 */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_SHIFT          2
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_MASK            0x3   /* cf22 */
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_SHIFT           4
+#define XSTORM_CORE_CONN_AG_CTX_CF0EN_MASK                0x1   /* cf0en */
+#define XSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT               6
+#define XSTORM_CORE_CONN_AG_CTX_CF1EN_MASK                0x1   /* cf1en */
+#define XSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT               7
+	u8 flags8;
+#define XSTORM_CORE_CONN_AG_CTX_CF2EN_MASK                0x1   /* cf2en */
+#define XSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT               0
+#define XSTORM_CORE_CONN_AG_CTX_CF3EN_MASK                0x1   /* cf3en */
+#define XSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT               1
+#define XSTORM_CORE_CONN_AG_CTX_CF4EN_MASK                0x1   /* cf4en */
+#define XSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT               2
+#define XSTORM_CORE_CONN_AG_CTX_CF5EN_MASK                0x1   /* cf5en */
+#define XSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT               3
+#define XSTORM_CORE_CONN_AG_CTX_CF6EN_MASK                0x1   /* cf6en */
+#define XSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT               4
+#define XSTORM_CORE_CONN_AG_CTX_CF7EN_MASK                0x1   /* cf7en */
+#define XSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT               5
+#define XSTORM_CORE_CONN_AG_CTX_CF8EN_MASK                0x1   /* cf8en */
+#define XSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT               6
+#define XSTORM_CORE_CONN_AG_CTX_CF9EN_MASK                0x1   /* cf9en */
+#define XSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT               7
+	u8 flags9;
+#define XSTORM_CORE_CONN_AG_CTX_CF10EN_MASK               0x1   /* cf10en */
+#define XSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT              0
+#define XSTORM_CORE_CONN_AG_CTX_CF11EN_MASK               0x1   /* cf11en */
+#define XSTORM_CORE_CONN_AG_CTX_CF11EN_SHIFT              1
+#define XSTORM_CORE_CONN_AG_CTX_CF12EN_MASK               0x1   /* cf12en */
+#define XSTORM_CORE_CONN_AG_CTX_CF12EN_SHIFT              2
+#define XSTORM_CORE_CONN_AG_CTX_CF13EN_MASK               0x1   /* cf13en */
+#define XSTORM_CORE_CONN_AG_CTX_CF13EN_SHIFT              3
+#define XSTORM_CORE_CONN_AG_CTX_CF14EN_MASK               0x1   /* cf14en */
+#define XSTORM_CORE_CONN_AG_CTX_CF14EN_SHIFT              4
+#define XSTORM_CORE_CONN_AG_CTX_CF15EN_MASK               0x1   /* cf15en */
+#define XSTORM_CORE_CONN_AG_CTX_CF15EN_SHIFT              5
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_MASK  0x1   /* cf16en */
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_SHIFT 6
+#define XSTORM_CORE_CONN_AG_CTX_CF17EN_MASK               0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF17EN_SHIFT              7
+	u8 flags10;
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_MASK             0x1   /* cf18en */
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_SHIFT            0
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_MASK      0x1   /* cf19en */
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT     1
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_MASK          0x1   /* cf20en */
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT         2
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_MASK           0x1   /* cf21en */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_SHIFT          3
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_MASK         0x1   /* cf22en */
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT        4
+#define XSTORM_CORE_CONN_AG_CTX_CF23EN_MASK               0x1   /* cf23en */
+#define XSTORM_CORE_CONN_AG_CTX_CF23EN_SHIFT              5
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_MASK           0x1   /* rule0en */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_SHIFT          6
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_MASK           0x1   /* rule1en */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_SHIFT          7
+	u8 flags11;
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_MASK           0x1   /* rule2en */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_SHIFT          0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_MASK           0x1   /* rule3en */
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_SHIFT          1
+#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_MASK       0x1   /* rule4en */
+#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT      2
+#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK              0x1   /* rule5en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT             3
+#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK              0x1   /* rule6en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT             4
+#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK              0x1   /* rule7en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT             5
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_MASK         0x1   /* rule8en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_SHIFT        6
+#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_MASK              0x1   /* rule9en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_SHIFT             7
+	u8 flags12;
+#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_MASK             0x1   /* rule10en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_SHIFT            0
+#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_MASK             0x1   /* rule11en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_SHIFT            1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_MASK         0x1   /* rule12en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_SHIFT        2
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_MASK         0x1   /* rule13en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_SHIFT        3
+#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_MASK             0x1   /* rule14en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_SHIFT            4
+#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_MASK             0x1   /* rule15en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_SHIFT            5
+#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_MASK             0x1   /* rule16en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_SHIFT            6
+#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_MASK             0x1   /* rule17en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_SHIFT            7
+	u8 flags13;
+#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_MASK             0x1   /* rule18en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_SHIFT            0
+#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_MASK             0x1   /* rule19en */
+#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_SHIFT            1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_MASK         0x1   /* rule20en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_SHIFT        2
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_MASK         0x1   /* rule21en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_SHIFT        3
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_MASK         0x1   /* rule22en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_SHIFT        4
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_MASK         0x1   /* rule23en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_SHIFT        5
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_MASK         0x1   /* rule24en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_SHIFT        6
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_MASK         0x1   /* rule25en */
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_SHIFT        7
+	u8 flags14;
+#define XSTORM_CORE_CONN_AG_CTX_BIT16_MASK                0x1   /* bit16 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT16_SHIFT               0
+#define XSTORM_CORE_CONN_AG_CTX_BIT17_MASK                0x1   /* bit17 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT17_SHIFT               1
+#define XSTORM_CORE_CONN_AG_CTX_BIT18_MASK                0x1   /* bit18 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT18_SHIFT               2
+#define XSTORM_CORE_CONN_AG_CTX_BIT19_MASK                0x1   /* bit19 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT19_SHIFT               3
+#define XSTORM_CORE_CONN_AG_CTX_BIT20_MASK                0x1   /* bit20 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT20_SHIFT               4
+#define XSTORM_CORE_CONN_AG_CTX_BIT21_MASK                0x1   /* bit21 */
+#define XSTORM_CORE_CONN_AG_CTX_BIT21_SHIFT               5
+#define XSTORM_CORE_CONN_AG_CTX_CF23_MASK                 0x3   /* cf23 */
+#define XSTORM_CORE_CONN_AG_CTX_CF23_SHIFT                6
+	u8	byte2 /* byte2 */;
+	__le16	physical_q0 /* physical_q0 */;
+	__le16	consolid_prod /* physical_q1 */;
+	__le16	reserved16 /* physical_q2 */;
+	__le16	tx_bd_cons /* word3 */;
+	__le16	tx_bd_or_spq_prod /* word4 */;
+	__le16	word5 /* word5 */;
+	__le16	conn_dpi /* conn_dpi */;
+	u8	byte3 /* byte3 */;
+	u8	byte4 /* byte4 */;
+	u8	byte5 /* byte5 */;
+	u8	byte6 /* byte6 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le32	reg4 /* reg4 */;
+	__le32	reg5 /* cf_array0 */;
+	__le32	reg6 /* cf_array1 */;
+	__le16	word7 /* word7 */;
+	__le16	word8 /* word8 */;
+	__le16	word9 /* word9 */;
+	__le16	word10 /* word10 */;
+	__le32	reg7 /* reg7 */;
+	__le32	reg8 /* reg8 */;
+	__le32	reg9 /* reg9 */;
+	u8	byte7 /* byte7 */;
+	u8	byte8 /* byte8 */;
+	u8	byte9 /* byte9 */;
+	u8	byte10 /* byte10 */;
+	u8	byte11 /* byte11 */;
+	u8	byte12 /* byte12 */;
+	u8	byte13 /* byte13 */;
+	u8	byte14 /* byte14 */;
+	u8	byte15 /* byte15 */;
+	u8	byte16 /* byte16 */;
+	__le16	word11 /* word11 */;
+	__le32	reg10 /* reg10 */;
+	__le32	reg11 /* reg11 */;
+	__le32	reg12 /* reg12 */;
+	__le32	reg13 /* reg13 */;
+	__le32	reg14 /* reg14 */;
+	__le32	reg15 /* reg15 */;
+	__le32	reg16 /* reg16 */;
+	__le32	reg17 /* reg17 */;
+	__le32	reg18 /* reg18 */;
+	__le32	reg19 /* reg19 */;
+	__le16	word12 /* word12 */;
+	__le16	word13 /* word13 */;
+	__le16	word14 /* word14 */;
+	__le16	word15 /* word15 */;
+};
+
+/* The core storm context for the Mstorm */
+struct mstorm_core_conn_st_ctx {
+	__le32 reserved[24];
+};
+
+/* The core storm context for the Ustorm */
+struct ustorm_core_conn_st_ctx {
+	__le32 reserved[4];
+};
+
+/* core connection context */
+struct core_conn_context {
+	struct ystorm_core_conn_st_ctx	ystorm_st_context;
+	struct regpair			ystorm_st_padding[2] /* padding */;
+	struct pstorm_core_conn_st_ctx	pstorm_st_context;
+	struct regpair			pstorm_st_padding[2];
+	struct xstorm_core_conn_st_ctx	xstorm_st_context;
+	struct xstorm_core_conn_ag_ctx	xstorm_ag_context;
+	struct mstorm_core_conn_st_ctx	mstorm_st_context;
+	struct regpair			mstorm_st_padding[2];
+	struct ustorm_core_conn_st_ctx	ustorm_st_context;
+	struct regpair			ustorm_st_padding[2] /* padding */;
+};
+
+/* Event Ring Next Page Address */
+struct event_ring_next_addr {
+	struct regpair	addr /* Next Page Address */;
+	__le32		reserved[2] /* Reserved */;
+};
+
+union event_ring_element {
+	struct event_ring_entry		entry /* Event Ring Entry */;
+	struct event_ring_next_addr	next_addr;
+};
+
+enum personality_type {
+	PERSONALITY_RESERVED,
+	PERSONALITY_RESERVED2,
+	PERSONALITY_RDMA_AND_ETH /* Roce or Iwarp */,
+	PERSONALITY_RESERVED3,
+	PERSONALITY_ETH /* Ethernet */,
+	PERSONALITY_RESERVED4,
+	MAX_PERSONALITY_TYPE
+};
+
+struct pf_start_tunnel_config {
+	u8	set_vxlan_udp_port_flg;
+	u8	set_geneve_udp_port_flg;
+	u8	tx_enable_vxlan /* If set, enable VXLAN tunnel in TX path. */;
+	u8	tx_enable_l2geneve;
+	u8	tx_enable_ipgeneve;
+	u8	tx_enable_l2gre /* If set, enable l2 GRE tunnel in TX path. */;
+	u8	tx_enable_ipgre /* If set, enable IP GRE tunnel in TX path. */;
+	u8	tunnel_clss_vxlan /* Classification scheme for VXLAN tunnel. */;
+	u8	tunnel_clss_l2geneve;
+	u8	tunnel_clss_ipgeneve;
+	u8	tunnel_clss_l2gre;
+	u8	tunnel_clss_ipgre;
+	__le16	vxlan_udp_port /* VXLAN tunnel UDP destination port. */;
+	__le16	geneve_udp_port /* GENEVE tunnel UDP destination port. */;
+};
+
+/* Ramrod data for PF start ramrod */
+struct pf_start_ramrod_data {
+	struct regpair			event_ring_pbl_addr;
+	struct regpair			consolid_q_pbl_addr;
+	struct pf_start_tunnel_config	tunnel_config;
+	__le16				event_ring_sb_id;
+	u8				base_vf_id;
+	u8				num_vfs;
+	u8				event_ring_num_pages;
+	u8				event_ring_sb_index;
+	u8				path_id;
+	u8				warning_as_error;
+	u8				dont_log_ramrods;
+	u8				personality;
+	__le16				log_type_mask;
+	u8				mf_mode /* Multi function mode */;
+	u8				integ_phase /* Integration phase */;
+	u8				allow_npar_tx_switching;
+	u8				inner_to_outer_pri_map[8];
+	u8				pri_map_valid;
+	u32				outer_tag;
+	u8				reserved0[4];
+};
+
+enum ports_mode {
+	ENGX2_PORTX1 /* 2 engines x 1 port */,
+	ENGX2_PORTX2 /* 2 engines x 2 ports */,
+	ENGX1_PORTX1 /* 1 engine  x 1 port */,
+	ENGX1_PORTX2 /* 1 engine  x 2 ports */,
+	ENGX1_PORTX4 /* 1 engine  x 4 ports */,
+	MAX_PORTS_MODE
+};
+
+/* Ramrod Header of SPQE */
+struct ramrod_header {
+	__le32	cid /* Slowpath Connection CID */;
+	u8	cmd_id /* Ramrod Cmd (Per Protocol Type) */;
+	u8	protocol_id /* Ramrod Protocol ID */;
+	__le16	echo /* Ramrod echo */;
+};
+
+/* Slowpath Element (SPQE) */
+struct slow_path_element {
+	struct ramrod_header	hdr /* Ramrod Header */;
+	struct regpair		data_ptr;
+};
+
+struct tstorm_per_port_stat {
+	struct regpair	trunc_error_discard;
+	struct regpair	mac_error_discard;
+	struct regpair	mftag_filter_discard;
+	struct regpair	eth_mac_filter_discard;
+	struct regpair	ll2_mac_filter_discard;
+	struct regpair	ll2_conn_disabled_discard;
+	struct regpair	iscsi_irregular_pkt;
+	struct regpair	fcoe_irregular_pkt;
+	struct regpair	roce_irregular_pkt;
+	struct regpair	eth_irregular_pkt;
+	struct regpair	toe_irregular_pkt;
+	struct regpair	preroce_irregular_pkt;
+};
+
+struct atten_status_block {
+	__le32	atten_bits;
+	__le32	atten_ack;
+	__le16	reserved0;
+	__le16	sb_index /* status block running index */;
+	__le32	reserved1;
+};
+
+enum block_addr {
+	GRCBASE_GRC		= 0x50000,
+	GRCBASE_MISCS		= 0x9000,
+	GRCBASE_MISC		= 0x8000,
+	GRCBASE_DBU		= 0xa000,
+	GRCBASE_PGLUE_B		= 0x2a8000,
+	GRCBASE_CNIG		= 0x218000,
+	GRCBASE_CPMU		= 0x30000,
+	GRCBASE_NCSI		= 0x40000,
+	GRCBASE_OPTE		= 0x53000,
+	GRCBASE_BMB		= 0x540000,
+	GRCBASE_PCIE		= 0x54000,
+	GRCBASE_MCP		= 0xe00000,
+	GRCBASE_MCP2		= 0x52000,
+	GRCBASE_PSWHST		= 0x2a0000,
+	GRCBASE_PSWHST2		= 0x29e000,
+	GRCBASE_PSWRD		= 0x29c000,
+	GRCBASE_PSWRD2		= 0x29d000,
+	GRCBASE_PSWWR		= 0x29a000,
+	GRCBASE_PSWWR2		= 0x29b000,
+	GRCBASE_PSWRQ		= 0x280000,
+	GRCBASE_PSWRQ2		= 0x240000,
+	GRCBASE_PGLCS		= 0x0,
+	GRCBASE_PTU		= 0x560000,
+	GRCBASE_DMAE		= 0xc000,
+	GRCBASE_TCM		= 0x1180000,
+	GRCBASE_MCM		= 0x1200000,
+	GRCBASE_UCM		= 0x1280000,
+	GRCBASE_XCM		= 0x1000000,
+	GRCBASE_YCM		= 0x1080000,
+	GRCBASE_PCM		= 0x1100000,
+	GRCBASE_QM		= 0x2f0000,
+	GRCBASE_TM		= 0x2c0000,
+	GRCBASE_DORQ		= 0x100000,
+	GRCBASE_BRB		= 0x340000,
+	GRCBASE_SRC		= 0x238000,
+	GRCBASE_PRS		= 0x1f0000,
+	GRCBASE_TSDM		= 0xfb0000,
+	GRCBASE_MSDM		= 0xfc0000,
+	GRCBASE_USDM		= 0xfd0000,
+	GRCBASE_XSDM		= 0xf80000,
+	GRCBASE_YSDM		= 0xf90000,
+	GRCBASE_PSDM		= 0xfa0000,
+	GRCBASE_TSEM		= 0x1700000,
+	GRCBASE_MSEM		= 0x1800000,
+	GRCBASE_USEM		= 0x1900000,
+	GRCBASE_XSEM		= 0x1400000,
+	GRCBASE_YSEM		= 0x1500000,
+	GRCBASE_PSEM		= 0x1600000,
+	GRCBASE_RSS		= 0x238800,
+	GRCBASE_TMLD		= 0x4d0000,
+	GRCBASE_MULD		= 0x4e0000,
+	GRCBASE_YULD		= 0x4c8000,
+	GRCBASE_XYLD		= 0x4c0000,
+	GRCBASE_PRM		= 0x230000,
+	GRCBASE_PBF_PB1		= 0xda0000,
+	GRCBASE_PBF_PB2		= 0xda4000,
+	GRCBASE_RPB		= 0x23c000,
+	GRCBASE_BTB		= 0xdb0000,
+	GRCBASE_PBF		= 0xd80000,
+	GRCBASE_RDIF		= 0x300000,
+	GRCBASE_TDIF		= 0x310000,
+	GRCBASE_CDU		= 0x580000,
+	GRCBASE_CCFC		= 0x2e0000,
+	GRCBASE_TCFC		= 0x2d0000,
+	GRCBASE_IGU		= 0x180000,
+	GRCBASE_CAU		= 0x1c0000,
+	GRCBASE_UMAC		= 0x51000,
+	GRCBASE_XMAC		= 0x210000,
+	GRCBASE_DBG		= 0x10000,
+	GRCBASE_NIG		= 0x500000,
+	GRCBASE_WOL		= 0x600000,
+	GRCBASE_BMBN		= 0x610000,
+	GRCBASE_IPC		= 0x20000,
+	GRCBASE_NWM		= 0x800000,
+	GRCBASE_NWS		= 0x700000,
+	GRCBASE_MS		= 0x6a0000,
+	GRCBASE_PHY_PCIE	= 0x618000,
+	GRCBASE_MISC_AEU	= 0x8000,
+	GRCBASE_BAR0_MAP	= 0x1c00000,
+	MAX_BLOCK_ADDR
+};
+
+enum block_id {
+	BLOCK_GRC,
+	BLOCK_MISCS,
+	BLOCK_MISC,
+	BLOCK_DBU,
+	BLOCK_PGLUE_B,
+	BLOCK_CNIG,
+	BLOCK_CPMU,
+	BLOCK_NCSI,
+	BLOCK_OPTE,
+	BLOCK_BMB,
+	BLOCK_PCIE,
+	BLOCK_MCP,
+	BLOCK_MCP2,
+	BLOCK_PSWHST,
+	BLOCK_PSWHST2,
+	BLOCK_PSWRD,
+	BLOCK_PSWRD2,
+	BLOCK_PSWWR,
+	BLOCK_PSWWR2,
+	BLOCK_PSWRQ,
+	BLOCK_PSWRQ2,
+	BLOCK_PGLCS,
+	BLOCK_PTU,
+	BLOCK_DMAE,
+	BLOCK_TCM,
+	BLOCK_MCM,
+	BLOCK_UCM,
+	BLOCK_XCM,
+	BLOCK_YCM,
+	BLOCK_PCM,
+	BLOCK_QM,
+	BLOCK_TM,
+	BLOCK_DORQ,
+	BLOCK_BRB,
+	BLOCK_SRC,
+	BLOCK_PRS,
+	BLOCK_TSDM,
+	BLOCK_MSDM,
+	BLOCK_USDM,
+	BLOCK_XSDM,
+	BLOCK_YSDM,
+	BLOCK_PSDM,
+	BLOCK_TSEM,
+	BLOCK_MSEM,
+	BLOCK_USEM,
+	BLOCK_XSEM,
+	BLOCK_YSEM,
+	BLOCK_PSEM,
+	BLOCK_RSS,
+	BLOCK_TMLD,
+	BLOCK_MULD,
+	BLOCK_YULD,
+	BLOCK_XYLD,
+	BLOCK_PRM,
+	BLOCK_PBF_PB1,
+	BLOCK_PBF_PB2,
+	BLOCK_RPB,
+	BLOCK_BTB,
+	BLOCK_PBF,
+	BLOCK_RDIF,
+	BLOCK_TDIF,
+	BLOCK_CDU,
+	BLOCK_CCFC,
+	BLOCK_TCFC,
+	BLOCK_IGU,
+	BLOCK_CAU,
+	BLOCK_UMAC,
+	BLOCK_XMAC,
+	BLOCK_DBG,
+	BLOCK_NIG,
+	BLOCK_WOL,
+	BLOCK_BMBN,
+	BLOCK_IPC,
+	BLOCK_NWM,
+	BLOCK_NWS,
+	BLOCK_MS,
+	BLOCK_PHY_PCIE,
+	BLOCK_MISC_AEU,
+	BLOCK_BAR0_MAP,
+	MAX_BLOCK_ID
+};
+
+enum command_type_bit {
+	IGU_COMMAND_TYPE_NOP	= 0,
+	IGU_COMMAND_TYPE_SET	= 1,
+	MAX_COMMAND_TYPE_BIT
+};
+
+struct dmae_cmd {
+	__le32 opcode;
+#define DMAE_CMD_SRC_MASK              0x1
+#define DMAE_CMD_SRC_SHIFT             0
+#define DMAE_CMD_DST_MASK              0x3
+#define DMAE_CMD_DST_SHIFT             1
+#define DMAE_CMD_C_DST_MASK            0x1
+#define DMAE_CMD_C_DST_SHIFT           3
+#define DMAE_CMD_CRC_RESET_MASK        0x1
+#define DMAE_CMD_CRC_RESET_SHIFT       4
+#define DMAE_CMD_SRC_ADDR_RESET_MASK   0x1
+#define DMAE_CMD_SRC_ADDR_RESET_SHIFT  5
+#define DMAE_CMD_DST_ADDR_RESET_MASK   0x1
+#define DMAE_CMD_DST_ADDR_RESET_SHIFT  6
+#define DMAE_CMD_COMP_FUNC_MASK        0x1
+#define DMAE_CMD_COMP_FUNC_SHIFT       7
+#define DMAE_CMD_COMP_WORD_EN_MASK     0x1
+#define DMAE_CMD_COMP_WORD_EN_SHIFT    8
+#define DMAE_CMD_COMP_CRC_EN_MASK      0x1
+#define DMAE_CMD_COMP_CRC_EN_SHIFT     9
+#define DMAE_CMD_COMP_CRC_OFFSET_MASK  0x7
+#define DMAE_CMD_COMP_CRC_OFFSET_SHIFT 10
+#define DMAE_CMD_RESERVED1_MASK        0x1
+#define DMAE_CMD_RESERVED1_SHIFT       13
+#define DMAE_CMD_ENDIANITY_MODE_MASK   0x3
+#define DMAE_CMD_ENDIANITY_MODE_SHIFT  14
+#define DMAE_CMD_ERR_HANDLING_MASK     0x3
+#define DMAE_CMD_ERR_HANDLING_SHIFT    16
+#define DMAE_CMD_PORT_ID_MASK          0x3
+#define DMAE_CMD_PORT_ID_SHIFT         18
+#define DMAE_CMD_SRC_PF_ID_MASK        0xF
+#define DMAE_CMD_SRC_PF_ID_SHIFT       20
+#define DMAE_CMD_DST_PF_ID_MASK        0xF
+#define DMAE_CMD_DST_PF_ID_SHIFT       24
+#define DMAE_CMD_SRC_VF_ID_VALID_MASK  0x1
+#define DMAE_CMD_SRC_VF_ID_VALID_SHIFT 28
+#define DMAE_CMD_DST_VF_ID_VALID_MASK  0x1
+#define DMAE_CMD_DST_VF_ID_VALID_SHIFT 29
+#define DMAE_CMD_RESERVED2_MASK        0x3
+#define DMAE_CMD_RESERVED2_SHIFT       30
+	__le32	src_addr_lo;
+	__le32	src_addr_hi;
+	__le32	dst_addr_lo;
+	__le32	dst_addr_hi;
+	__le16	length /* Length in DW */;
+	__le16	opcode_b;
+#define DMAE_CMD_SRC_VF_ID_MASK        0xFF     /* Source VF id */
+#define DMAE_CMD_SRC_VF_ID_SHIFT       0
+#define DMAE_CMD_DST_VF_ID_MASK        0xFF     /* Destination VF id */
+#define DMAE_CMD_DST_VF_ID_SHIFT       8
+	__le32	comp_addr_lo /* PCIe completion address low or grc address */;
+	__le32	comp_addr_hi;
+	__le32	comp_val /* Value to write to copmletion address */;
+	__le32	crc32 /* crc16 result */;
+	__le32	crc_32_c /* crc32_c result */;
+	__le16	crc16 /* crc16 result */;
+	__le16	crc16_c /* crc16_c result */;
+	__le16	crc10 /* crc_t10 result */;
+	__le16	reserved;
+	__le16	xsum16 /* checksum16 result  */;
+	__le16	xsum8 /* checksum8 result  */;
+};
+
+struct igu_cleanup {
+	__le32 sb_id_and_flags;
+#define IGU_CLEANUP_RESERVED0_MASK     0x7FFFFFF
+#define IGU_CLEANUP_RESERVED0_SHIFT    0
+#define IGU_CLEANUP_CLEANUP_SET_MASK   0x1 /* cleanup clear - 0, set - 1 */
+#define IGU_CLEANUP_CLEANUP_SET_SHIFT  27
+#define IGU_CLEANUP_CLEANUP_TYPE_MASK  0x7
+#define IGU_CLEANUP_CLEANUP_TYPE_SHIFT 28
+#define IGU_CLEANUP_COMMAND_TYPE_MASK  0x1
+#define IGU_CLEANUP_COMMAND_TYPE_SHIFT 31
+	__le32 reserved1;
+};
+
+union igu_command {
+	struct igu_prod_cons_update	prod_cons_update;
+	struct igu_cleanup		cleanup;
+};
+
+struct igu_command_reg_ctrl {
+	__le16	opaque_fid;
+	__le16	igu_command_reg_ctrl_fields;
+#define IGU_COMMAND_REG_CTRL_PXP_BAR_ADDR_MASK  0xFFF
+#define IGU_COMMAND_REG_CTRL_PXP_BAR_ADDR_SHIFT 0
+#define IGU_COMMAND_REG_CTRL_RESERVED_MASK      0x7
+#define IGU_COMMAND_REG_CTRL_RESERVED_SHIFT     12
+#define IGU_COMMAND_REG_CTRL_COMMAND_TYPE_MASK  0x1
+#define IGU_COMMAND_REG_CTRL_COMMAND_TYPE_SHIFT 15
+};
+
+struct igu_mapping_line {
+	__le32 igu_mapping_line_fields;
+#define IGU_MAPPING_LINE_VALID_MASK            0x1
+#define IGU_MAPPING_LINE_VALID_SHIFT           0
+#define IGU_MAPPING_LINE_VECTOR_NUMBER_MASK    0xFF
+#define IGU_MAPPING_LINE_VECTOR_NUMBER_SHIFT   1
+#define IGU_MAPPING_LINE_FUNCTION_NUMBER_MASK  0xFF
+#define IGU_MAPPING_LINE_FUNCTION_NUMBER_SHIFT 9
+#define IGU_MAPPING_LINE_PF_VALID_MASK         0x1      /* PF-1, VF-0 */
+#define IGU_MAPPING_LINE_PF_VALID_SHIFT        17
+#define IGU_MAPPING_LINE_IPS_GROUP_MASK        0x3F
+#define IGU_MAPPING_LINE_IPS_GROUP_SHIFT       18
+#define IGU_MAPPING_LINE_RESERVED_MASK         0xFF
+#define IGU_MAPPING_LINE_RESERVED_SHIFT        24
+};
+
+struct igu_msix_vector {
+	struct regpair	address;
+	__le32		data;
+	__le32		msix_vector_fields;
+#define IGU_MSIX_VECTOR_MASK_BIT_MASK      0x1
+#define IGU_MSIX_VECTOR_MASK_BIT_SHIFT     0
+#define IGU_MSIX_VECTOR_RESERVED0_MASK     0x7FFF
+#define IGU_MSIX_VECTOR_RESERVED0_SHIFT    1
+#define IGU_MSIX_VECTOR_STEERING_TAG_MASK  0xFF
+#define IGU_MSIX_VECTOR_STEERING_TAG_SHIFT 16
+#define IGU_MSIX_VECTOR_RESERVED1_MASK     0xFF
+#define IGU_MSIX_VECTOR_RESERVED1_SHIFT    24
+};
+
+enum init_modes {
+	MODE_BB_A0,
+	MODE_RESERVED,
+	MODE_RESERVED2,
+	MODE_ASIC,
+	MODE_RESERVED3,
+	MODE_RESERVED4,
+	MODE_RESERVED5,
+	MODE_SF,
+	MODE_MF_SD,
+	MODE_MF_SI,
+	MODE_PORTS_PER_ENG_1,
+	MODE_PORTS_PER_ENG_2,
+	MODE_PORTS_PER_ENG_4,
+	MODE_40G,
+	MODE_100G,
+	MODE_EAGLE_ENG1_WORKAROUND,
+	MAX_INIT_MODES
+};
+
+enum init_phases {
+	PHASE_ENGINE,
+	PHASE_PORT,
+	PHASE_PF,
+	PHASE_RESERVED,
+	PHASE_QM_PF,
+	MAX_INIT_PHASES
+};
+
+struct mstorm_core_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* cf0 */
+#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* cf1 */
+#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* cf2 */
+#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16	word0 /* word0 */;
+	__le16	word1 /* word1 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+};
+
+/* per encapsulation type enabling flags */
+struct prs_reg_encapsulation_type_en {
+	u8 flags;
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_MASK     0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GRE_ENABLE_SHIFT    0
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_MASK      0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_SHIFT     1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_MASK            0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_SHIFT           2
+#define PRS_REG_ENCAPSULATION_TYPE_EN_T_TAG_ENABLE_MASK            0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_T_TAG_ENABLE_SHIFT           3
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_MASK  0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_ETH_OVER_GENEVE_ENABLE_SHIFT 4
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_MASK   0x1
+#define PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_SHIFT  5
+#define PRS_REG_ENCAPSULATION_TYPE_EN_RESERVED_MASK                0x3
+#define PRS_REG_ENCAPSULATION_TYPE_EN_RESERVED_SHIFT               6
+};
+
+enum pxp_tph_st_hint {
+	TPH_ST_HINT_BIDIR /* Read/Write access by Host and Device */,
+	TPH_ST_HINT_REQUESTER /* Read/Write access by Device */,
+	TPH_ST_HINT_TARGET,
+	TPH_ST_HINT_TARGET_PRIO,
+	MAX_PXP_TPH_ST_HINT
+};
+
+/* QM hardware structure of enable bypass credit mask */
+struct qm_rf_bypass_mask {
+	u8 flags;
+#define QM_RF_BYPASS_MASK_LINEVOQ_MASK    0x1
+#define QM_RF_BYPASS_MASK_LINEVOQ_SHIFT   0
+#define QM_RF_BYPASS_MASK_RESERVED0_MASK  0x1
+#define QM_RF_BYPASS_MASK_RESERVED0_SHIFT 1
+#define QM_RF_BYPASS_MASK_PFWFQ_MASK      0x1
+#define QM_RF_BYPASS_MASK_PFWFQ_SHIFT     2
+#define QM_RF_BYPASS_MASK_VPWFQ_MASK      0x1
+#define QM_RF_BYPASS_MASK_VPWFQ_SHIFT     3
+#define QM_RF_BYPASS_MASK_PFRL_MASK       0x1
+#define QM_RF_BYPASS_MASK_PFRL_SHIFT      4
+#define QM_RF_BYPASS_MASK_VPQCNRL_MASK    0x1
+#define QM_RF_BYPASS_MASK_VPQCNRL_SHIFT   5
+#define QM_RF_BYPASS_MASK_FWPAUSE_MASK    0x1
+#define QM_RF_BYPASS_MASK_FWPAUSE_SHIFT   6
+#define QM_RF_BYPASS_MASK_RESERVED1_MASK  0x1
+#define QM_RF_BYPASS_MASK_RESERVED1_SHIFT 7
+};
+
+/* QM hardware structure of opportunistic credit mask */
+struct qm_rf_opportunistic_mask {
+	__le16 flags;
+#define QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_MASK     0x1
+#define QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_SHIFT    0
+#define QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_MASK     0x1
+#define QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_SHIFT    1
+#define QM_RF_OPPORTUNISTIC_MASK_PFWFQ_MASK       0x1
+#define QM_RF_OPPORTUNISTIC_MASK_PFWFQ_SHIFT      2
+#define QM_RF_OPPORTUNISTIC_MASK_VPWFQ_MASK       0x1
+#define QM_RF_OPPORTUNISTIC_MASK_VPWFQ_SHIFT      3
+#define QM_RF_OPPORTUNISTIC_MASK_PFRL_MASK        0x1
+#define QM_RF_OPPORTUNISTIC_MASK_PFRL_SHIFT       4
+#define QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_MASK     0x1
+#define QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_SHIFT    5
+#define QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_MASK     0x1
+#define QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_SHIFT    6
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED0_MASK   0x1
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED0_SHIFT  7
+#define QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_MASK  0x1
+#define QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_SHIFT 8
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED1_MASK   0x7F
+#define QM_RF_OPPORTUNISTIC_MASK_RESERVED1_SHIFT  9
+};
+
+/* QM hardware structure of QM map memory */
+struct qm_rf_pq_map {
+	u32 reg;
+#define QM_RF_PQ_MAP_PQ_VALID_MASK          0x1         /* PQ active */
+#define QM_RF_PQ_MAP_PQ_VALID_SHIFT         0
+#define QM_RF_PQ_MAP_RL_ID_MASK             0xFF        /* RL ID */
+#define QM_RF_PQ_MAP_RL_ID_SHIFT            1
+#define QM_RF_PQ_MAP_VP_PQ_ID_MASK          0x1FF
+#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT         9
+#define QM_RF_PQ_MAP_VOQ_MASK               0x1F        /* VOQ */
+#define QM_RF_PQ_MAP_VOQ_SHIFT              18
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK  0x3         /* WRR weight */
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT 23
+#define QM_RF_PQ_MAP_RL_VALID_MASK          0x1         /* RL active */
+#define QM_RF_PQ_MAP_RL_VALID_SHIFT         25
+#define QM_RF_PQ_MAP_RESERVED_MASK          0x3F
+#define QM_RF_PQ_MAP_RESERVED_SHIFT         26
+};
+
+/* SDM operation gen command (generate aggregative interrupt) */
+struct sdm_op_gen {
+	__le32 command;
+#define SDM_OP_GEN_COMP_PARAM_MASK  0xFFFF      /* completion parameters 0-15 */
+#define SDM_OP_GEN_COMP_PARAM_SHIFT 0
+#define SDM_OP_GEN_COMP_TYPE_MASK   0xF         /* completion type 16-19 */
+#define SDM_OP_GEN_COMP_TYPE_SHIFT  16
+#define SDM_OP_GEN_RESERVED_MASK    0xFFF       /* reserved 20-31 */
+#define SDM_OP_GEN_RESERVED_SHIFT   20
+};
+
+struct tstorm_core_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK     0x1       /* bit2 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT    2
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK     0x1       /* bit3 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT    3
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK     0x1       /* bit4 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT    4
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK     0x1       /* bit5 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT    5
+#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     6
+	u8 flags1;
+#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     2
+#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
+#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT     4
+#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
+#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT     6
+	u8 flags2;
+#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
+#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
+#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT     2
+#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK      0x3       /* cf7 */
+#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT     4
+#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK      0x3       /* cf8 */
+#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT     6
+	u8 flags3;
+#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK      0x3       /* cf9 */
+#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK     0x3       /* cf10 */
+#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT    2
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   4
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   5
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   6
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   7
+	u8 flags4;
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   0
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   2
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK    0x1       /* cf7en */
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT   3
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK    0x1       /* cf8en */
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT   4
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK    0x1       /* cf9en */
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT   5
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK   0x1       /* cf10en */
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT  6
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags5;
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le32	reg4 /* reg4 */;
+	__le32	reg5 /* reg5 */;
+	__le32	reg6 /* reg6 */;
+	__le32	reg7 /* reg7 */;
+	__le32	reg8 /* reg8 */;
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* word0 */;
+	u8	byte4 /* byte4 */;
+	u8	byte5 /* byte5 */;
+	__le16	word1 /* word1 */;
+	__le16	word2 /* conn_dpi */;
+	__le16	word3 /* word3 */;
+	__le32	reg9 /* reg9 */;
+	__le32	reg10 /* reg10 */;
+};
+
+struct ustorm_core_conn_ag_ctx {
+	u8	reserved /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
+#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
+#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define USTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
+#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
+#define USTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
+#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
+#define USTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
+#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define USTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
+#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT     0
+#define USTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
+#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT     2
+#define USTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
+#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT     4
+#define USTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
+#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT     6
+	u8 flags2;
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   3
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   4
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   5
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   6
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags3;
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* conn_dpi */;
+	__le16	word1 /* word1 */;
+	__le32	rx_producers /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le16	word2 /* word2 */;
+	__le16	word3 /* word3 */;
+};
+
+struct ystorm_core_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* cf0 */
+#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
+#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* cf1 */
+#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
+#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* cf2 */
+#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* word0 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le16	word1 /* word1 */;
+	__le16	word2 /* word2 */;
+	__le16	word3 /* word3 */;
+	__le16	word4 /* word4 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+};
+
+/*********************************** Init ************************************/
+
+/* Width of GRC address in bits (addresses are specified in dwords) */
+#define GRC_ADDR_BITS                   23
+#define MAX_GRC_ADDR                    ((1 << GRC_ADDR_BITS) - 1)
+
+/* indicates an init that should be applied to any phase ID */
+#define ANY_PHASE_ID                    0xffff
+
+/* init pattern size in bytes */
+#define INIT_PATTERN_SIZE_BITS  4
+#define MAX_INIT_PATTERN_SIZE	BIT(INIT_PATTERN_SIZE_BITS)
+
+/* Max size in dwords of a zipped array */
+#define MAX_ZIPPED_SIZE                 8192
+
+/* Global PXP window */
+#define NUM_OF_PXP_WIN                  19
+#define PXP_WIN_DWORD_SIZE_BITS 10
+#define PXP_WIN_DWORD_SIZE		BIT(PXP_WIN_DWORD_SIZE_BITS)
+#define PXP_WIN_BYTE_SIZE_BITS  (PXP_WIN_DWORD_SIZE_BITS + 2)
+#define PXP_WIN_BYTE_SIZE               (PXP_WIN_DWORD_SIZE * 4)
+
+/********************************* GRC Dump **********************************/
+
+/* width of GRC dump register sequence length in bits */
+#define DUMP_SEQ_LEN_BITS                       8
+#define DUMP_SEQ_LEN_MAX_VAL            ((1 << DUMP_SEQ_LEN_BITS) - 1)
+
+/* width of GRC dump memory length in bits */
+#define DUMP_MEM_LEN_BITS                       18
+#define DUMP_MEM_LEN_MAX_VAL            ((1 << DUMP_MEM_LEN_BITS) - 1)
+
+/* width of register type ID in bits */
+#define REG_TYPE_ID_BITS                        6
+#define REG_TYPE_ID_MAX_VAL                     ((1 << REG_TYPE_ID_BITS) - 1)
+
+/* width of block ID in bits */
+#define BLOCK_ID_BITS                           8
+#define BLOCK_ID_MAX_VAL                        ((1 << BLOCK_ID_BITS) - 1)
+
+/******************************** Idle Check *********************************/
+
+/* max number of idle check predicate immediates */
+#define MAX_IDLE_CHK_PRED_IMM           3
+
+/* max number of idle check argument registers */
+#define MAX_IDLE_CHK_READ_REGS          3
+
+/* max number of idle check loops */
+#define MAX_IDLE_CHK_LOOPS                      0x10000
+
+/* max idle check address increment */
+#define MAX_IDLE_CHK_INCREMENT          0x10000
+
+/* inicates an undefined idle check line index */
+#define IDLE_CHK_UNDEFINED_LINE_IDX     0xffffff
+
+/* max number of register values following the idle check header */
+#define IDLE_CHK_MAX_DUMP_REGS          2
+
+/* arguments for IDLE_CHK_MACRO_TYPE_QM_RD_WR */
+#define IDLE_CHK_QM_RD_WR_PTR           0
+#define IDLE_CHK_QM_RD_WR_BANK          1
+
+/**************************************/
+/* HSI Functions constants and macros */
+/**************************************/
+
+/* Number of VLAN priorities */
+#define NUM_OF_VLAN_PRIORITIES                  8
+
+/* the MCP Trace meta data signautre is duplicated in the perl script that
+ * generats the NVRAM images.
+ */
+#define MCP_TRACE_META_IMAGE_SIGNATURE  0x669955aa
+
+/* Binary buffer header */
+struct bin_buffer_hdr {
+	u32	offset;
+	u32	length /* buffer length in bytes */;
+};
+
+/* binary buffer types */
+enum bin_buffer_type {
+	BIN_BUF_FW_VER_INFO /* fw_ver_info struct */,
+	BIN_BUF_INIT_CMD /* init commands */,
+	BIN_BUF_INIT_VAL /* init data */,
+	BIN_BUF_INIT_MODE_TREE /* init modes tree */,
+	BIN_BUF_IRO /* internal RAM offsets array */,
+	MAX_BIN_BUFFER_TYPE
+};
+
+/* Chip IDs */
+enum chip_ids {
+	CHIP_BB_A0 /* BB A0 chip ID */,
+	CHIP_BB_B0 /* BB B0 chip ID */,
+	CHIP_K2 /* AH chip ID */,
+	MAX_CHIP_IDS
+};
+
+enum idle_chk_severity_types {
+	IDLE_CHK_SEVERITY_ERROR /* idle check failure should cause an error */,
+	IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC,
+	IDLE_CHK_SEVERITY_WARNING,
+	MAX_IDLE_CHK_SEVERITY_TYPES
+};
+
+struct init_array_raw_hdr {
+	__le32 data;
+#define INIT_ARRAY_RAW_HDR_TYPE_MASK    0xF
+#define INIT_ARRAY_RAW_HDR_TYPE_SHIFT   0
+#define INIT_ARRAY_RAW_HDR_PARAMS_MASK  0xFFFFFFF       /* init array params */
+#define INIT_ARRAY_RAW_HDR_PARAMS_SHIFT 4
+};
+
+struct init_array_standard_hdr {
+	__le32 data;
+#define INIT_ARRAY_STANDARD_HDR_TYPE_MASK  0xF
+#define INIT_ARRAY_STANDARD_HDR_TYPE_SHIFT 0
+#define INIT_ARRAY_STANDARD_HDR_SIZE_MASK  0xFFFFFFF
+#define INIT_ARRAY_STANDARD_HDR_SIZE_SHIFT 4
+};
+
+struct init_array_zipped_hdr {
+	__le32 data;
+#define INIT_ARRAY_ZIPPED_HDR_TYPE_MASK         0xF
+#define INIT_ARRAY_ZIPPED_HDR_TYPE_SHIFT        0
+#define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_MASK  0xFFFFFFF
+#define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_SHIFT 4
+};
+
+struct init_array_pattern_hdr {
+	__le32 data;
+#define INIT_ARRAY_PATTERN_HDR_TYPE_MASK          0xF
+#define INIT_ARRAY_PATTERN_HDR_TYPE_SHIFT         0
+#define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_MASK  0xF
+#define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_SHIFT 4
+#define INIT_ARRAY_PATTERN_HDR_REPETITIONS_MASK   0xFFFFFF
+#define INIT_ARRAY_PATTERN_HDR_REPETITIONS_SHIFT  8
+};
+
+union init_array_hdr {
+	struct init_array_raw_hdr	raw /* raw init array header */;
+	struct init_array_standard_hdr	standard;
+	struct init_array_zipped_hdr	zipped /* zipped init array header */;
+	struct init_array_pattern_hdr	pattern /* pattern init array header */;
+};
+
+enum init_array_types {
+	INIT_ARR_STANDARD /* standard init array */,
+	INIT_ARR_ZIPPED /* zipped init array */,
+	INIT_ARR_PATTERN /* a repeated pattern */,
+	MAX_INIT_ARRAY_TYPES
+};
+
+/* init operation: callback */
+struct init_callback_op {
+	__le32	op_data;
+#define INIT_CALLBACK_OP_OP_MASK        0xF
+#define INIT_CALLBACK_OP_OP_SHIFT       0
+#define INIT_CALLBACK_OP_RESERVED_MASK  0xFFFFFFF
+#define INIT_CALLBACK_OP_RESERVED_SHIFT 4
+	__le16	callback_id /* Callback ID */;
+	__le16	block_id /* Blocks ID */;
+};
+
+/* init comparison types */
+enum init_comparison_types {
+	INIT_COMPARISON_EQ /* init value is included in the init command */,
+	INIT_COMPARISON_OR /* init value is all zeros */,
+	INIT_COMPARISON_AND /* init value is an array of values */,
+	MAX_INIT_COMPARISON_TYPES
+};
+
+/* init operation: delay */
+struct init_delay_op {
+	__le32	op_data;
+#define INIT_DELAY_OP_OP_MASK        0xF
+#define INIT_DELAY_OP_OP_SHIFT       0
+#define INIT_DELAY_OP_RESERVED_MASK  0xFFFFFFF
+#define INIT_DELAY_OP_RESERVED_SHIFT 4
+	__le32	delay /* delay in us */;
+};
+
+/* init operation: if_mode */
+struct init_if_mode_op {
+	__le32 op_data;
+#define INIT_IF_MODE_OP_OP_MASK          0xF
+#define INIT_IF_MODE_OP_OP_SHIFT         0
+#define INIT_IF_MODE_OP_RESERVED1_MASK   0xFFF
+#define INIT_IF_MODE_OP_RESERVED1_SHIFT  4
+#define INIT_IF_MODE_OP_CMD_OFFSET_MASK  0xFFFF
+#define INIT_IF_MODE_OP_CMD_OFFSET_SHIFT 16
+	__le16	reserved2;
+	__le16	modes_buf_offset;
+};
+
+/*  init operation: if_phase */
+struct init_if_phase_op {
+	__le32 op_data;
+#define INIT_IF_PHASE_OP_OP_MASK           0xF
+#define INIT_IF_PHASE_OP_OP_SHIFT          0
+#define INIT_IF_PHASE_OP_DMAE_ENABLE_MASK  0x1
+#define INIT_IF_PHASE_OP_DMAE_ENABLE_SHIFT 4
+#define INIT_IF_PHASE_OP_RESERVED1_MASK    0x7FF
+#define INIT_IF_PHASE_OP_RESERVED1_SHIFT   5
+#define INIT_IF_PHASE_OP_CMD_OFFSET_MASK   0xFFFF
+#define INIT_IF_PHASE_OP_CMD_OFFSET_SHIFT  16
+	__le32 phase_data;
+#define INIT_IF_PHASE_OP_PHASE_MASK        0xFF /* Init phase */
+#define INIT_IF_PHASE_OP_PHASE_SHIFT       0
+#define INIT_IF_PHASE_OP_RESERVED2_MASK    0xFF
+#define INIT_IF_PHASE_OP_RESERVED2_SHIFT   8
+#define INIT_IF_PHASE_OP_PHASE_ID_MASK     0xFFFF /* Init phase ID */
+#define INIT_IF_PHASE_OP_PHASE_ID_SHIFT    16
+};
+
+/* init mode operators */
+enum init_mode_ops {
+	INIT_MODE_OP_NOT /* init mode not operator */,
+	INIT_MODE_OP_OR /* init mode or operator */,
+	INIT_MODE_OP_AND /* init mode and operator */,
+	MAX_INIT_MODE_OPS
+};
+
+/* init operation: raw */
+struct init_raw_op {
+	__le32	op_data;
+#define INIT_RAW_OP_OP_MASK      0xF
+#define INIT_RAW_OP_OP_SHIFT     0
+#define INIT_RAW_OP_PARAM1_MASK  0xFFFFFFF      /* init param 1 */
+#define INIT_RAW_OP_PARAM1_SHIFT 4
+	__le32	param2 /* Init param 2 */;
+};
+
+/* init array params */
+struct init_op_array_params {
+	__le16	size /* array size in dwords */;
+	__le16	offset /* array start offset in dwords */;
+};
+
+/* Write init operation arguments */
+union init_write_args {
+	__le32				inline_val;
+	__le32				zeros_count;
+	__le32				array_offset;
+	struct init_op_array_params	runtime;
+};
+
+/* init operation: write */
+struct init_write_op {
+	__le32 data;
+#define INIT_WRITE_OP_OP_MASK        0xF
+#define INIT_WRITE_OP_OP_SHIFT       0
+#define INIT_WRITE_OP_SOURCE_MASK    0x7
+#define INIT_WRITE_OP_SOURCE_SHIFT   4
+#define INIT_WRITE_OP_RESERVED_MASK  0x1
+#define INIT_WRITE_OP_RESERVED_SHIFT 7
+#define INIT_WRITE_OP_WIDE_BUS_MASK  0x1
+#define INIT_WRITE_OP_WIDE_BUS_SHIFT 8
+#define INIT_WRITE_OP_ADDRESS_MASK   0x7FFFFF
+#define INIT_WRITE_OP_ADDRESS_SHIFT  9
+	union init_write_args args /* Write init operation arguments */;
+};
+
+/* init operation: read */
+struct init_read_op {
+	__le32 op_data;
+#define INIT_READ_OP_OP_MASK         0xF
+#define INIT_READ_OP_OP_SHIFT        0
+#define INIT_READ_OP_POLL_COMP_MASK  0x7
+#define INIT_READ_OP_POLL_COMP_SHIFT 4
+#define INIT_READ_OP_RESERVED_MASK   0x1
+#define INIT_READ_OP_RESERVED_SHIFT  7
+#define INIT_READ_OP_POLL_MASK       0x1
+#define INIT_READ_OP_POLL_SHIFT      8
+#define INIT_READ_OP_ADDRESS_MASK    0x7FFFFF
+#define INIT_READ_OP_ADDRESS_SHIFT   9
+	__le32 expected_val;
+};
+
+/* Init operations union */
+union init_op {
+	struct init_raw_op	raw /* raw init operation */;
+	struct init_write_op	write /* write init operation */;
+	struct init_read_op	read /* read init operation */;
+	struct init_if_mode_op	if_mode /* if_mode init operation */;
+	struct init_if_phase_op if_phase /* if_phase init operation */;
+	struct init_callback_op callback /* callback init operation */;
+	struct init_delay_op	delay /* delay init operation */;
+};
+
+/* Init command operation types */
+enum init_op_types {
+	INIT_OP_READ /* GRC read init command */,
+	INIT_OP_WRITE /* GRC write init command */,
+	INIT_OP_IF_MODE,
+	INIT_OP_IF_PHASE,
+	INIT_OP_DELAY /* delay init command */,
+	INIT_OP_CALLBACK /* callback init command */,
+	MAX_INIT_OP_TYPES
+};
+
+/* init source types */
+enum init_source_types {
+	INIT_SRC_INLINE /* init value is included in the init command */,
+	INIT_SRC_ZEROS /* init value is all zeros */,
+	INIT_SRC_ARRAY /* init value is an array of values */,
+	INIT_SRC_RUNTIME /* init value is provided during runtime */,
+	MAX_INIT_SOURCE_TYPES
+};
+
+/* Internal RAM Offsets macro data */
+struct iro {
+	u32	base /* RAM field offset */;
+	u16	m1 /* multiplier 1 */;
+	u16	m2 /* multiplier 2 */;
+	u16	m3 /* multiplier 3 */;
+	u16	size /* RAM field size */;
+};
+
+/* QM per-port init parameters */
+struct init_qm_port_params {
+	u8	active /* Indicates if this port is active */;
+	u8	num_active_phys_tcs;
+	u16	num_pbf_cmd_lines;
+	u16	num_btb_blocks;
+	__le16	reserved;
+};
+
+/* QM per-PQ init parameters */
+struct init_qm_pq_params {
+	u8	vport_id /* VPORT ID */;
+	u8	tc_id /* TC ID */;
+	u8	wrr_group /* WRR group */;
+	u8	reserved;
+};
+
+/* QM per-vport init parameters */
+struct init_qm_vport_params {
+	u32	vport_rl;
+	u16	vport_wfq;
+	u16	first_tx_pq_id[NUM_OF_TCS];
+};
+
+/* Win 2 */
+#define GTT_BAR0_MAP_REG_IGU_CMD \
+	0x00f000UL
+/* Win 3 */
+#define GTT_BAR0_MAP_REG_TSDM_RAM \
+	0x010000UL
+/* Win 4 */
+#define GTT_BAR0_MAP_REG_MSDM_RAM \
+	0x011000UL
+/* Win 5 */
+#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 \
+	0x012000UL
+/* Win 6 */
+#define GTT_BAR0_MAP_REG_USDM_RAM \
+	0x013000UL
+/* Win 7 */
+#define GTT_BAR0_MAP_REG_USDM_RAM_1024 \
+	0x014000UL
+/* Win 8 */
+#define GTT_BAR0_MAP_REG_USDM_RAM_2048 \
+	0x015000UL
+/* Win 9 */
+#define GTT_BAR0_MAP_REG_XSDM_RAM \
+	0x016000UL
+/* Win 10 */
+#define GTT_BAR0_MAP_REG_YSDM_RAM \
+	0x017000UL
+/* Win 11 */
+#define GTT_BAR0_MAP_REG_PSDM_RAM \
+	0x018000UL
+
+/**
+ * @brief qed_qm_pf_mem_size - prepare QM ILT sizes
+ *
+ * Returns the required host memory size in 4KB units.
+ * Must be called before all QM init HSI functions.
+ *
+ * @param pf_id			- physical function ID
+ * @param num_pf_cids	- number of connections used by this PF
+ * @param num_vf_cids	- number of connections used by VFs of this PF
+ * @param num_tids		- number of tasks used by this PF
+ * @param num_pf_pqs	- number of PQs used by this PF
+ * @param num_vf_pqs	- number of PQs used by VFs of this PF
+ *
+ * @return The required host memory size in 4KB units.
+ */
+u32 qed_qm_pf_mem_size(u8	pf_id,
+		       u32	num_pf_cids,
+		       u32	num_vf_cids,
+		       u32	num_tids,
+		       u16	num_pf_pqs,
+		       u16	num_vf_pqs);
+
+struct qed_qm_common_rt_init_params {
+	u8				max_ports_per_engine;
+	u8				max_phys_tcs_per_port;
+	bool				pf_rl_en;
+	bool				pf_wfq_en;
+	bool				vport_rl_en;
+	bool				vport_wfq_en;
+	struct init_qm_port_params	*port_params;
+};
+
+/**
+ * @brief qed_qm_common_rt_init - Prepare QM runtime init values for the
+ * engine phase.
+ *
+ * @param p_hwfn
+ * @param max_ports_per_engine	- max number of ports per engine in HW
+ * @param max_phys_tcs_per_port	- max number of physical TCs per port in HW
+ * @param pf_rl_en				- enable per-PF rate limiters
+ * @param pf_wfq_en				- enable per-PF WFQ
+ * @param vport_rl_en			- enable per-VPORT rate limiters
+ * @param vport_wfq_en			- enable per-VPORT WFQ
+ * @param port_params			- array of size MAX_NUM_PORTS with
+ *						arameters for each port
+ *
+ * @return 0 on success, -1 on error.
+ */
+int qed_qm_common_rt_init(
+	struct qed_hwfn				*p_hwfn,
+	struct qed_qm_common_rt_init_params	*p_params);
+
+struct qed_qm_pf_rt_init_params {
+	u8				port_id;
+	u8				pf_id;
+	u8				max_phys_tcs_per_port;
+	bool				is_first_pf;
+	u32				num_pf_cids;
+	u32				num_vf_cids;
+	u32				num_tids;
+	u16				start_pq;
+	u16				num_pf_pqs;
+	u16				num_vf_pqs;
+	u8				start_vport;
+	u8				num_vports;
+	u8				pf_wfq;
+	u32				pf_rl;
+	struct init_qm_pq_params	*pq_params;
+	struct init_qm_vport_params	*vport_params;
+};
+
+int qed_qm_pf_rt_init(struct qed_hwfn			*p_hwfn,
+		      struct qed_ptt			*p_ptt,
+		      struct qed_qm_pf_rt_init_params	*p_params);
+
+/**
+ * @brief qed_init_pf_rl  Initializes the rate limit of the specified PF
+ *
+ * @param p_hwfn
+ * @param p_ptt	- ptt window used for writing the registers
+ * @param pf_id	- PF ID
+ * @param pf_rl	- rate limit in Mb/sec units
+ *
+ * @return 0 on success, -1 on error.
+ */
+int qed_init_pf_rl(struct qed_hwfn	*p_hwfn,
+		   struct qed_ptt	*p_ptt,
+		   u8			pf_id,
+		   u32			pf_rl);
+
+/**
+ * @brief qed_init_vport_rl  Initializes the rate limit of the specified VPORT
+ *
+ * @param p_hwfn
+ * @param p_ptt		- ptt window used for writing the registers
+ * @param vport_id	- VPORT ID
+ * @param vport_rl	- rate limit in Mb/sec units
+ *
+ * @return 0 on success, -1 on error.
+ */
+
+int qed_init_vport_rl(struct qed_hwfn	*p_hwfn,
+		      struct qed_ptt	*p_ptt,
+		      u8		vport_id,
+		      u32		vport_rl);
+/**
+ * @brief qed_send_qm_stop_cmd  Sends a stop command to the QM
+ *
+ * @param p_hwfn
+ * @param p_ptt	         - ptt window used for writing the registers
+ * @param is_release_cmd - true for release, false for stop.
+ * @param is_tx_pq       - true for Tx PQs, false for Other PQs.
+ * @param start_pq       - first PQ ID to stop
+ * @param num_pqs        - Number of PQs to stop, starting from start_pq.
+ *
+ * @return bool, true if successful, false if timeout occurred while waiting
+ *					for QM command done.
+ */
+
+bool qed_send_qm_stop_cmd(struct qed_hwfn	*p_hwfn,
+			  struct qed_ptt	*p_ptt,
+			  bool			is_release_cmd,
+			  bool			is_tx_pq,
+			  u16			start_pq,
+			  u16			num_pqs);
+
+/* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */
+#define YSTORM_FLOW_CONTROL_MODE_OFFSET			(IRO[0].base)
+#define YSTORM_FLOW_CONTROL_MODE_SIZE			(IRO[0].size)
+/* Tstorm port statistics */
+#define TSTORM_PORT_STAT_OFFSET(port_id)		(IRO[1].base + \
+							 ((port_id) * \
+							  IRO[1].m1))
+#define TSTORM_PORT_STAT_SIZE				(IRO[1].size)
+/* Ustorm VF-PF Channel ready flag */
+#define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id)	(IRO[2].base +	\
+							 ((vf_id) *	\
+							  IRO[2].m1))
+#define USTORM_VF_PF_CHANNEL_READY_SIZE			(IRO[2].size)
+/* Ustorm Final flr cleanup ack */
+#define USTORM_FLR_FINAL_ACK_OFFSET			(IRO[3].base)
+#define USTORM_FLR_FINAL_ACK_SIZE			(IRO[3].size)
+/* Ustorm Event ring consumer */
+#define USTORM_EQE_CONS_OFFSET(pf_id)			(IRO[4].base +	\
+							 ((pf_id) *	\
+							  IRO[4].m1))
+#define USTORM_EQE_CONS_SIZE				(IRO[4].size)
+/* Ustorm Completion ring consumer */
+#define USTORM_CQ_CONS_OFFSET(global_queue_id)		(IRO[5].base +	\
+							 ((global_queue_id) * \
+							  IRO[5].m1))
+#define USTORM_CQ_CONS_SIZE				(IRO[5].size)
+/* Xstorm Integration Test Data */
+#define XSTORM_INTEG_TEST_DATA_OFFSET			(IRO[6].base)
+#define XSTORM_INTEG_TEST_DATA_SIZE			(IRO[6].size)
+/* Ystorm Integration Test Data */
+#define YSTORM_INTEG_TEST_DATA_OFFSET			(IRO[7].base)
+#define YSTORM_INTEG_TEST_DATA_SIZE			(IRO[7].size)
+/* Pstorm Integration Test Data */
+#define PSTORM_INTEG_TEST_DATA_OFFSET			(IRO[8].base)
+#define PSTORM_INTEG_TEST_DATA_SIZE			(IRO[8].size)
+/* Tstorm Integration Test Data */
+#define TSTORM_INTEG_TEST_DATA_OFFSET			(IRO[9].base)
+#define TSTORM_INTEG_TEST_DATA_SIZE			(IRO[9].size)
+/* Mstorm Integration Test Data */
+#define MSTORM_INTEG_TEST_DATA_OFFSET			(IRO[10].base)
+#define MSTORM_INTEG_TEST_DATA_SIZE			(IRO[10].size)
+/* Ustorm Integration Test Data */
+#define USTORM_INTEG_TEST_DATA_OFFSET			(IRO[11].base)
+#define USTORM_INTEG_TEST_DATA_SIZE			(IRO[11].size)
+/* Tstorm producers */
+#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id)	(IRO[12].base +	\
+							 ((core_rx_queue_id) * \
+							  IRO[12].m1))
+#define TSTORM_LL2_RX_PRODS_SIZE			(IRO[12].size)
+/* Tstorm LiteL2 queue statistics */
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_q_id) (IRO[13].base + \
+							     ((core_rx_q_id) * \
+							      IRO[13].m1))
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE		(IRO[13].size)
+/* Ustorm LiteL2 queue statistics */
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_q_id) (IRO[14].base + \
+							     ((core_rx_q_id) * \
+							      IRO[14].m1))
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE		(IRO[14].size)
+/* Pstorm LiteL2 queue statistics */
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_txst_id) (IRO[15].base + \
+							     ((core_txst_id) * \
+							      IRO[15].m1))
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE		(IRO[15].size)
+/* Mstorm queue statistics */
+#define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) (IRO[16].base + \
+						   ((stat_counter_id) *	\
+						    IRO[16].m1))
+#define MSTORM_QUEUE_STAT_SIZE				(IRO[16].size)
+/* Mstorm producers */
+#define MSTORM_PRODS_OFFSET(queue_id)			(IRO[17].base +	\
+							 ((queue_id) *	\
+							  IRO[17].m1))
+#define MSTORM_PRODS_SIZE				(IRO[17].size)
+/* TPA agregation timeout in us resolution (on ASIC) */
+#define MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[18].base)
+#define MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[18].size)
+/* Ustorm queue statistics */
+#define USTORM_QUEUE_STAT_OFFSET(stat_counter_id)	(IRO[19].base +	\
+							((stat_counter_id) * \
+							 IRO[19].m1))
+#define USTORM_QUEUE_STAT_SIZE				(IRO[19].size)
+/* Ustorm queue zone */
+#define USTORM_ETH_QUEUE_ZONE_OFFSET(queue_id)		(IRO[20].base +	\
+							 ((queue_id) *	\
+							  IRO[20].m1))
+#define USTORM_ETH_QUEUE_ZONE_SIZE			(IRO[20].size)
+/* Pstorm queue statistics */
+#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id)	(IRO[21].base +	\
+							 ((stat_counter_id) * \
+							  IRO[21].m1))
+#define PSTORM_QUEUE_STAT_SIZE				(IRO[21].size)
+/* Tstorm last parser message */
+#define TSTORM_ETH_PRS_INPUT_OFFSET(pf_id)		(IRO[22].base +	\
+							 ((pf_id) *	\
+							  IRO[22].m1))
+#define TSTORM_ETH_PRS_INPUT_SIZE			(IRO[22].size)
+/* Ystorm queue zone */
+#define YSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id)		(IRO[23].base +	\
+							 ((queue_id) *	\
+							  IRO[23].m1))
+#define YSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[23].size)
+/* Ystorm cqe producer */
+#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id)		(IRO[24].base +	\
+							 ((rss_id) *	\
+							  IRO[24].m1))
+#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[24].size)
+/* Ustorm cqe producer */
+#define USTORM_TOE_CQ_PROD_OFFSET(rss_id)		(IRO[25].base +	\
+							 ((rss_id) *	\
+							  IRO[25].m1))
+#define USTORM_TOE_CQ_PROD_SIZE				(IRO[25].size)
+/* Ustorm grq producer */
+#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id)		(IRO[26].base +	\
+							 ((pf_id) *	\
+							  IRO[26].m1))
+#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[26].size)
+/* Tstorm cmdq-cons of given command queue-id */
+#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id)	(IRO[27].base +	\
+							 ((cmdq_queue_id) * \
+							  IRO[27].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[27].size)
+/* Mstorm rq-cons of given queue-id */
+#define MSTORM_SCSI_RQ_CONS_OFFSET(rq_queue_id)		(IRO[28].base +	\
+							 ((rq_queue_id) * \
+							  IRO[28].m1))
+#define MSTORM_SCSI_RQ_CONS_SIZE			(IRO[28].size)
+/* Pstorm RoCE statistics */
+#define PSTORM_ROCE_STAT_OFFSET(stat_counter_id)	(IRO[29].base +	\
+							 ((stat_counter_id) * \
+							  IRO[29].m1))
+#define PSTORM_ROCE_STAT_SIZE				(IRO[29].size)
+/* Tstorm RoCE statistics */
+#define TSTORM_ROCE_STAT_OFFSET(stat_counter_id)	(IRO[30].base +	\
+							 ((stat_counter_id) * \
+							  IRO[30].m1))
+#define TSTORM_ROCE_STAT_SIZE				(IRO[30].size)
+
+static const struct iro iro_arr[31] = {
+	{ 0x10,	  0x0,	 0x0,	0x0,   0x8     },
+	{ 0x4448, 0x60,	 0x0,	0x0,   0x60    },
+	{ 0x498,  0x8,	 0x0,	0x0,   0x4     },
+	{ 0x494,  0x0,	 0x0,	0x0,   0x4     },
+	{ 0x10,	  0x8,	 0x0,	0x0,   0x2     },
+	{ 0x90,	  0x8,	 0x0,	0x0,   0x2     },
+	{ 0x4540, 0x0,	 0x0,	0x0,   0xf8    },
+	{ 0x39e0, 0x0,	 0x0,	0x0,   0xf8    },
+	{ 0x2598, 0x0,	 0x0,	0x0,   0xf8    },
+	{ 0x4350, 0x0,	 0x0,	0x0,   0xf8    },
+	{ 0x52d0, 0x0,	 0x0,	0x0,   0xf8    },
+	{ 0x7a48, 0x0,	 0x0,	0x0,   0xf8    },
+	{ 0x100,  0x8,	 0x0,	0x0,   0x8     },
+	{ 0x5808, 0x10,	 0x0,	0x0,   0x10    },
+	{ 0xb100, 0x30,	 0x0,	0x0,   0x30    },
+	{ 0x95c0, 0x30,	 0x0,	0x0,   0x30    },
+	{ 0x54f8, 0x40,	 0x0,	0x0,   0x40    },
+	{ 0x200,  0x10,	 0x0,	0x0,   0x8     },
+	{ 0x9e70, 0x0,	 0x0,	0x0,   0x4     },
+	{ 0x7ca0, 0x40,	 0x0,	0x0,   0x30    },
+	{ 0xd00,  0x8,	 0x0,	0x0,   0x8     },
+	{ 0x2790, 0x80,	 0x0,	0x0,   0x38    },
+	{ 0xa520, 0xf0,	 0x0,	0x0,   0xf0    },
+	{ 0x80,	  0x8,	 0x0,	0x0,   0x8     },
+	{ 0xac0,  0x8,	 0x0,	0x0,   0x8     },
+	{ 0x2580, 0x8,	 0x0,	0x0,   0x8     },
+	{ 0x2500, 0x8,	 0x0,	0x0,   0x8     },
+	{ 0x440,  0x8,	 0x0,	0x0,   0x2     },
+	{ 0x1800, 0x8,	 0x0,	0x0,   0x2     },
+	{ 0x27c8, 0x80,	 0x0,	0x0,   0x10    },
+	{ 0x4710, 0x10,	 0x0,	0x0,   0x10    },
+};
+
+/* Runtime array offsets */
+#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET                                0
+#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET                                1
+#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET                                2
+#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET                                3
+#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET                                4
+#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET                                5
+#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET                                6
+#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET                                7
+#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET                                8
+#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET                                9
+#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET                                10
+#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET                                11
+#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET                                12
+#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET                                13
+#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET                                14
+#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET                                15
+#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET                                  16
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET                              17
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET                              18
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET                               19
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET                               20
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET                            21
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET                           22
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET                             23
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 760
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                                   736
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 760
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                                   736
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET                                1496
+#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE                                  736
+#define CAU_REG_PI_MEMORY_RT_OFFSET                                     2232
+#define CAU_REG_PI_MEMORY_RT_SIZE                                       4416
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET                    6648
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET                      6649
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET                      6650
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET                         6651
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET                         6652
+#define PRS_REG_SEARCH_TCP_RT_OFFSET                                    6653
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET                                   6654
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET                                   6655
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET                           6656
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET                           6657
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET                               6658
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET                     6659
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET           6660
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET                      6661
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET                         6662
+#define SRC_REG_FIRSTFREE_RT_OFFSET                                     6663
+#define SRC_REG_FIRSTFREE_RT_SIZE                                       2
+#define SRC_REG_LASTFREE_RT_OFFSET                                      6665
+#define SRC_REG_LASTFREE_RT_SIZE                                        2
+#define SRC_REG_COUNTFREE_RT_OFFSET                                     6667
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET                              6668
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET                                6669
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET                                6670
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET                                  6671
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET                                  6672
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET                                 6673
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET                               6674
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET                                6675
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET                               6676
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET                                6677
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET                              6678
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET                               6679
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET                             6680
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET                              6681
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET                             6682
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET                              6683
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET                             6684
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET                              6685
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET                     6686
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6687
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6688
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET                               6689
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET                             6690
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET                             6691
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET                           6692
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET                         6693
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET                         6694
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET                                    6695
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET                                6696
+#define PSWRQ2_REG_WR_MBS0_RT_OFFSET                                    6697
+#define PSWRQ2_REG_RD_MBS0_RT_OFFSET                                    6698
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET                              6699
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET                              6700
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET                                 6701
+#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE                                   22000
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET                                   28701
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET                           28702
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET                              28703
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET                              28704
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET                              28705
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET                                 28706
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET                                 28707
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET                                 28708
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET                     28709
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET                     28710
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET                                28711
+#define TM_REG_CONFIG_CONN_MEM_RT_SIZE                                  416
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET                                29127
+#define TM_REG_CONFIG_TASK_MEM_RT_SIZE                                  512
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET                                    29639
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET                                    29640
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET                                    29641
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET                               29642
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET                               29643
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET                               29644
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET                               29645
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET                               29646
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET                               29647
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET                               29648
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET                               29649
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET                               29650
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET                               29651
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET                              29652
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET                              29653
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET                              29654
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET                              29655
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET                              29656
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET                              29657
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET                              29658
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET                              29659
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET                              29660
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET                              29661
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET                              29662
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET                              29663
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET                              29664
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET                              29665
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET                              29666
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET                              29667
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET                              29668
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET                              29669
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET                              29670
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET                              29671
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET                              29672
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET                              29673
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET                              29674
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET                              29675
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET                              29676
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET                              29677
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET                              29678
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET                              29679
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET                              29680
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET                              29681
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET                              29682
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET                              29683
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET                              29684
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET                              29685
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET                              29686
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET                              29687
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET                              29688
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET                              29689
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET                              29690
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET                              29691
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET                              29692
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET                              29693
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET                              29694
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET                              29695
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET                              29696
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET                              29697
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET                              29698
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET                              29699
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET                              29700
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET                              29701
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET                              29702
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET                              29703
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET                              29704
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET                              29705
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET                                29706
+#define QM_REG_BASEADDROTHERPQ_RT_SIZE                                  128
+#define QM_REG_VOQCRDLINE_RT_OFFSET                                     29834
+#define QM_REG_VOQCRDLINE_RT_SIZE                                       20
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET                                 29854
+#define QM_REG_VOQINITCRDLINE_RT_SIZE                                   20
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET                             29874
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET                             29875
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET                              29876
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET                            29877
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET                           29878
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET                                29879
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET                                29880
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET                                29881
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET                                29882
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET                                29883
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET                                29884
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET                                29885
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET                                29886
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET                                29887
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET                                29888
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET                               29889
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET                               29890
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET                               29891
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET                               29892
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET                               29893
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET                               29894
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET                            29895
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET                            29896
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET                            29897
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET                            29898
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET                               29899
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET                               29900
+#define QM_REG_PQTX2PF_0_RT_OFFSET                                      29901
+#define QM_REG_PQTX2PF_1_RT_OFFSET                                      29902
+#define QM_REG_PQTX2PF_2_RT_OFFSET                                      29903
+#define QM_REG_PQTX2PF_3_RT_OFFSET                                      29904
+#define QM_REG_PQTX2PF_4_RT_OFFSET                                      29905
+#define QM_REG_PQTX2PF_5_RT_OFFSET                                      29906
+#define QM_REG_PQTX2PF_6_RT_OFFSET                                      29907
+#define QM_REG_PQTX2PF_7_RT_OFFSET                                      29908
+#define QM_REG_PQTX2PF_8_RT_OFFSET                                      29909
+#define QM_REG_PQTX2PF_9_RT_OFFSET                                      29910
+#define QM_REG_PQTX2PF_10_RT_OFFSET                                     29911
+#define QM_REG_PQTX2PF_11_RT_OFFSET                                     29912
+#define QM_REG_PQTX2PF_12_RT_OFFSET                                     29913
+#define QM_REG_PQTX2PF_13_RT_OFFSET                                     29914
+#define QM_REG_PQTX2PF_14_RT_OFFSET                                     29915
+#define QM_REG_PQTX2PF_15_RT_OFFSET                                     29916
+#define QM_REG_PQTX2PF_16_RT_OFFSET                                     29917
+#define QM_REG_PQTX2PF_17_RT_OFFSET                                     29918
+#define QM_REG_PQTX2PF_18_RT_OFFSET                                     29919
+#define QM_REG_PQTX2PF_19_RT_OFFSET                                     29920
+#define QM_REG_PQTX2PF_20_RT_OFFSET                                     29921
+#define QM_REG_PQTX2PF_21_RT_OFFSET                                     29922
+#define QM_REG_PQTX2PF_22_RT_OFFSET                                     29923
+#define QM_REG_PQTX2PF_23_RT_OFFSET                                     29924
+#define QM_REG_PQTX2PF_24_RT_OFFSET                                     29925
+#define QM_REG_PQTX2PF_25_RT_OFFSET                                     29926
+#define QM_REG_PQTX2PF_26_RT_OFFSET                                     29927
+#define QM_REG_PQTX2PF_27_RT_OFFSET                                     29928
+#define QM_REG_PQTX2PF_28_RT_OFFSET                                     29929
+#define QM_REG_PQTX2PF_29_RT_OFFSET                                     29930
+#define QM_REG_PQTX2PF_30_RT_OFFSET                                     29931
+#define QM_REG_PQTX2PF_31_RT_OFFSET                                     29932
+#define QM_REG_PQTX2PF_32_RT_OFFSET                                     29933
+#define QM_REG_PQTX2PF_33_RT_OFFSET                                     29934
+#define QM_REG_PQTX2PF_34_RT_OFFSET                                     29935
+#define QM_REG_PQTX2PF_35_RT_OFFSET                                     29936
+#define QM_REG_PQTX2PF_36_RT_OFFSET                                     29937
+#define QM_REG_PQTX2PF_37_RT_OFFSET                                     29938
+#define QM_REG_PQTX2PF_38_RT_OFFSET                                     29939
+#define QM_REG_PQTX2PF_39_RT_OFFSET                                     29940
+#define QM_REG_PQTX2PF_40_RT_OFFSET                                     29941
+#define QM_REG_PQTX2PF_41_RT_OFFSET                                     29942
+#define QM_REG_PQTX2PF_42_RT_OFFSET                                     29943
+#define QM_REG_PQTX2PF_43_RT_OFFSET                                     29944
+#define QM_REG_PQTX2PF_44_RT_OFFSET                                     29945
+#define QM_REG_PQTX2PF_45_RT_OFFSET                                     29946
+#define QM_REG_PQTX2PF_46_RT_OFFSET                                     29947
+#define QM_REG_PQTX2PF_47_RT_OFFSET                                     29948
+#define QM_REG_PQTX2PF_48_RT_OFFSET                                     29949
+#define QM_REG_PQTX2PF_49_RT_OFFSET                                     29950
+#define QM_REG_PQTX2PF_50_RT_OFFSET                                     29951
+#define QM_REG_PQTX2PF_51_RT_OFFSET                                     29952
+#define QM_REG_PQTX2PF_52_RT_OFFSET                                     29953
+#define QM_REG_PQTX2PF_53_RT_OFFSET                                     29954
+#define QM_REG_PQTX2PF_54_RT_OFFSET                                     29955
+#define QM_REG_PQTX2PF_55_RT_OFFSET                                     29956
+#define QM_REG_PQTX2PF_56_RT_OFFSET                                     29957
+#define QM_REG_PQTX2PF_57_RT_OFFSET                                     29958
+#define QM_REG_PQTX2PF_58_RT_OFFSET                                     29959
+#define QM_REG_PQTX2PF_59_RT_OFFSET                                     29960
+#define QM_REG_PQTX2PF_60_RT_OFFSET                                     29961
+#define QM_REG_PQTX2PF_61_RT_OFFSET                                     29962
+#define QM_REG_PQTX2PF_62_RT_OFFSET                                     29963
+#define QM_REG_PQTX2PF_63_RT_OFFSET                                     29964
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET                                   29965
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET                                   29966
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET                                   29967
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET                                   29968
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET                                   29969
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET                                   29970
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET                                   29971
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET                                   29972
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET                                   29973
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET                                   29974
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET                                  29975
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET                                  29976
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET                                  29977
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET                                  29978
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET                                  29979
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET                                  29980
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET                                 29981
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET                                 29982
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET                            29983
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET                            29984
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET                              29985
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET                              29986
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET                              29987
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET                              29988
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET                              29989
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET                              29990
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET                              29991
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET                              29992
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET                                   29993
+#define QM_REG_RLGLBLINCVAL_RT_SIZE                                     256
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET                               30249
+#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE                                 256
+#define QM_REG_RLGLBLCRD_RT_OFFSET                                      30505
+#define QM_REG_RLGLBLCRD_RT_SIZE                                        256
+#define QM_REG_RLGLBLENABLE_RT_OFFSET                                   30761
+#define QM_REG_RLPFPERIOD_RT_OFFSET                                     30762
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET                                30763
+#define QM_REG_RLPFINCVAL_RT_OFFSET                                     30764
+#define QM_REG_RLPFINCVAL_RT_SIZE                                       16
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET                                 30780
+#define QM_REG_RLPFUPPERBOUND_RT_SIZE                                   16
+#define QM_REG_RLPFCRD_RT_OFFSET                                        30796
+#define QM_REG_RLPFCRD_RT_SIZE                                          16
+#define QM_REG_RLPFENABLE_RT_OFFSET                                     30812
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET                                  30813
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET                                    30814
+#define QM_REG_WFQPFWEIGHT_RT_SIZE                                      16
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET                                30830
+#define QM_REG_WFQPFUPPERBOUND_RT_SIZE                                  16
+#define QM_REG_WFQPFCRD_RT_OFFSET                                       30846
+#define QM_REG_WFQPFCRD_RT_SIZE                                         160
+#define QM_REG_WFQPFENABLE_RT_OFFSET                                    31006
+#define QM_REG_WFQVPENABLE_RT_OFFSET                                    31007
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET                                   31008
+#define QM_REG_BASEADDRTXPQ_RT_SIZE                                     512
+#define QM_REG_TXPQMAP_RT_OFFSET                                        31520
+#define QM_REG_TXPQMAP_RT_SIZE                                          512
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET                                    32032
+#define QM_REG_WFQVPWEIGHT_RT_SIZE                                      512
+#define QM_REG_WFQVPUPPERBOUND_RT_OFFSET                                32544
+#define QM_REG_WFQVPUPPERBOUND_RT_SIZE                                  512
+#define QM_REG_WFQVPCRD_RT_OFFSET                                       33056
+#define QM_REG_WFQVPCRD_RT_SIZE                                         512
+#define QM_REG_WFQVPMAP_RT_OFFSET                                       33568
+#define QM_REG_WFQVPMAP_RT_SIZE                                         512
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET                                   34080
+#define QM_REG_WFQPFCRD_MSB_RT_SIZE                                     160
+#define NIG_REG_LLH_CLS_TYPE_DUALMODE_RT_OFFSET                         34240
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET                         34241
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET                         34242
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET                         34243
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET                         34244
+#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET                          34245
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET                      34246
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET                               34247
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE                                 4
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET                          34251
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE                            4
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET                            34255
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE                              4
+#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET                               34259
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET                         34260
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE                           32
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET                            34292
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE                              16
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET                          34308
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE                            16
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET                 34324
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE                   16
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                       34340
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE                         16
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                                  34356
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                               34357
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                               34358
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                               34359
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                           34360
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                           34361
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                           34362
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                           34363
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                        34364
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                        34365
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                        34366
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                        34367
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                            34368
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                         34369
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                          34370
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                        34371
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                           34372
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                    34373
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                        34374
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                           34375
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                    34376
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                        34377
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                           34378
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                    34379
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                        34380
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                           34381
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                    34382
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                        34383
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                           34384
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                    34385
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                        34386
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                           34387
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                    34388
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                        34389
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                           34390
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                    34391
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                        34392
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                           34393
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                    34394
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                        34395
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                           34396
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                    34397
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                        34398
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                           34399
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                    34400
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                       34401
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                          34402
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET                   34403
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                       34404
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                          34405
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET                   34406
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                       34407
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                          34408
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET                   34409
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                       34410
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                          34411
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET                   34412
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                       34413
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                          34414
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET                   34415
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                       34416
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                          34417
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET                   34418
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                       34419
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                          34420
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET                   34421
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                       34422
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                          34423
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET                   34424
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                       34425
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                          34426
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET                   34427
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                       34428
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                          34429
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET                   34430
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                    34431
+
+#define RUNTIME_ARRAY_SIZE 34432
+
+/* The eth storm context for the Ystorm */
+struct ystorm_eth_conn_st_ctx {
+	__le32 reserved[4];
+};
+
+/* The eth storm context for the Pstorm */
+struct pstorm_eth_conn_st_ctx {
+	__le32 reserved[8];
+};
+
+/* The eth storm context for the Xstorm */
+struct xstorm_eth_conn_st_ctx {
+	__le32 reserved[60];
+};
+
+struct xstorm_eth_conn_ag_ctx {
+	u8	reserved0 /* cdu_validation */;
+	u8	eth_state /* state */;
+	u8	flags0;
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK            0x1
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT           0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_MASK               0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_SHIFT              1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_MASK               0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_SHIFT              2
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_MASK            0x1
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_SHIFT           3
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_MASK               0x1 /* bit4 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_SHIFT              4
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_MASK               0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_SHIFT              5
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_MASK               0x1 /* bit6 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_SHIFT              6
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_MASK               0x1 /* bit7 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_SHIFT              7
+	u8 flags1;
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_MASK               0x1 /* bit8 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_SHIFT              0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_MASK               0x1 /* bit9 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_SHIFT              1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_MASK               0x1 /* bit10 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_SHIFT              2
+#define XSTORM_ETH_CONN_AG_CTX_BIT11_MASK                   0x1 /* bit11 */
+#define XSTORM_ETH_CONN_AG_CTX_BIT11_SHIFT                  3
+#define XSTORM_ETH_CONN_AG_CTX_BIT12_MASK                   0x1 /* bit12 */
+#define XSTORM_ETH_CONN_AG_CTX_BIT12_SHIFT                  4
+#define XSTORM_ETH_CONN_AG_CTX_BIT13_MASK                   0x1 /* bit13 */
+#define XSTORM_ETH_CONN_AG_CTX_BIT13_SHIFT                  5
+#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_MASK          0x1 /* bit14 */
+#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT         6
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_MASK            0x1 /* bit15 */
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT           7
+	u8 flags2;
+#define XSTORM_ETH_CONN_AG_CTX_CF0_MASK                     0x3 /* timer0cf */
+#define XSTORM_ETH_CONN_AG_CTX_CF0_SHIFT                    0
+#define XSTORM_ETH_CONN_AG_CTX_CF1_MASK                     0x3 /* timer1cf */
+#define XSTORM_ETH_CONN_AG_CTX_CF1_SHIFT                    2
+#define XSTORM_ETH_CONN_AG_CTX_CF2_MASK                     0x3 /* timer2cf */
+#define XSTORM_ETH_CONN_AG_CTX_CF2_SHIFT                    4
+#define XSTORM_ETH_CONN_AG_CTX_CF3_MASK                     0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF3_SHIFT                    6
+	u8 flags3;
+#define XSTORM_ETH_CONN_AG_CTX_CF4_MASK                     0x3 /* cf4 */
+#define XSTORM_ETH_CONN_AG_CTX_CF4_SHIFT                    0
+#define XSTORM_ETH_CONN_AG_CTX_CF5_MASK                     0x3 /* cf5 */
+#define XSTORM_ETH_CONN_AG_CTX_CF5_SHIFT                    2
+#define XSTORM_ETH_CONN_AG_CTX_CF6_MASK                     0x3 /* cf6 */
+#define XSTORM_ETH_CONN_AG_CTX_CF6_SHIFT                    4
+#define XSTORM_ETH_CONN_AG_CTX_CF7_MASK                     0x3 /* cf7 */
+#define XSTORM_ETH_CONN_AG_CTX_CF7_SHIFT                    6
+	u8 flags4;
+#define XSTORM_ETH_CONN_AG_CTX_CF8_MASK                     0x3 /* cf8 */
+#define XSTORM_ETH_CONN_AG_CTX_CF8_SHIFT                    0
+#define XSTORM_ETH_CONN_AG_CTX_CF9_MASK                     0x3 /* cf9 */
+#define XSTORM_ETH_CONN_AG_CTX_CF9_SHIFT                    2
+#define XSTORM_ETH_CONN_AG_CTX_CF10_MASK                    0x3 /* cf10 */
+#define XSTORM_ETH_CONN_AG_CTX_CF10_SHIFT                   4
+#define XSTORM_ETH_CONN_AG_CTX_CF11_MASK                    0x3 /* cf11 */
+#define XSTORM_ETH_CONN_AG_CTX_CF11_SHIFT                   6
+	u8 flags5;
+#define XSTORM_ETH_CONN_AG_CTX_CF12_MASK                    0x3 /* cf12 */
+#define XSTORM_ETH_CONN_AG_CTX_CF12_SHIFT                   0
+#define XSTORM_ETH_CONN_AG_CTX_CF13_MASK                    0x3 /* cf13 */
+#define XSTORM_ETH_CONN_AG_CTX_CF13_SHIFT                   2
+#define XSTORM_ETH_CONN_AG_CTX_CF14_MASK                    0x3 /* cf14 */
+#define XSTORM_ETH_CONN_AG_CTX_CF14_SHIFT                   4
+#define XSTORM_ETH_CONN_AG_CTX_CF15_MASK                    0x3 /* cf15 */
+#define XSTORM_ETH_CONN_AG_CTX_CF15_SHIFT                   6
+	u8 flags6;
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK        0x3 /* cf16 */
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT       0
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_MASK        0x3
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT       2
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_MASK                   0x3 /* cf18 */
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_SHIFT                  4
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_MASK            0x3 /* cf19 */
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_SHIFT           6
+	u8 flags7;
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_MASK                0x3 /* cf20 */
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_SHIFT               0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_MASK              0x3 /* cf21 */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_SHIFT             2
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_MASK               0x3 /* cf22 */
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_SHIFT              4
+#define XSTORM_ETH_CONN_AG_CTX_CF0EN_MASK                   0x1 /* cf0en */
+#define XSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT                  6
+#define XSTORM_ETH_CONN_AG_CTX_CF1EN_MASK                   0x1 /* cf1en */
+#define XSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT                  7
+	u8 flags8;
+#define XSTORM_ETH_CONN_AG_CTX_CF2EN_MASK                   0x1 /* cf2en */
+#define XSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                  0
+#define XSTORM_ETH_CONN_AG_CTX_CF3EN_MASK                   0x1 /* cf3en */
+#define XSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT                  1
+#define XSTORM_ETH_CONN_AG_CTX_CF4EN_MASK                   0x1 /* cf4en */
+#define XSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT                  2
+#define XSTORM_ETH_CONN_AG_CTX_CF5EN_MASK                   0x1 /* cf5en */
+#define XSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT                  3
+#define XSTORM_ETH_CONN_AG_CTX_CF6EN_MASK                   0x1 /* cf6en */
+#define XSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT                  4
+#define XSTORM_ETH_CONN_AG_CTX_CF7EN_MASK                   0x1 /* cf7en */
+#define XSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT                  5
+#define XSTORM_ETH_CONN_AG_CTX_CF8EN_MASK                   0x1 /* cf8en */
+#define XSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT                  6
+#define XSTORM_ETH_CONN_AG_CTX_CF9EN_MASK                   0x1 /* cf9en */
+#define XSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT                  7
+	u8 flags9;
+#define XSTORM_ETH_CONN_AG_CTX_CF10EN_MASK                  0x1 /* cf10en */
+#define XSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT                 0
+#define XSTORM_ETH_CONN_AG_CTX_CF11EN_MASK                  0x1 /* cf11en */
+#define XSTORM_ETH_CONN_AG_CTX_CF11EN_SHIFT                 1
+#define XSTORM_ETH_CONN_AG_CTX_CF12EN_MASK                  0x1 /* cf12en */
+#define XSTORM_ETH_CONN_AG_CTX_CF12EN_SHIFT                 2
+#define XSTORM_ETH_CONN_AG_CTX_CF13EN_MASK                  0x1 /* cf13en */
+#define XSTORM_ETH_CONN_AG_CTX_CF13EN_SHIFT                 3
+#define XSTORM_ETH_CONN_AG_CTX_CF14EN_MASK                  0x1 /* cf14en */
+#define XSTORM_ETH_CONN_AG_CTX_CF14EN_SHIFT                 4
+#define XSTORM_ETH_CONN_AG_CTX_CF15EN_MASK                  0x1 /* cf15en */
+#define XSTORM_ETH_CONN_AG_CTX_CF15EN_SHIFT                 5
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK     0x1 /* cf16en */
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT    6
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK     0x1
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT    7
+	u8 flags10;
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_MASK                0x1 /* cf18en */
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_SHIFT               0
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_MASK         0x1 /* cf19en */
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT        1
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_MASK             0x1 /* cf20en */
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT            2
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_MASK              0x1 /* cf21en */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_SHIFT             3
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_MASK            0x1 /* cf22en */
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_SHIFT           4
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK  0x1 /* cf23en */
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT 5
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_MASK              0x1 /* rule0en */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_SHIFT             6
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_MASK              0x1 /* rule1en */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_SHIFT             7
+	u8 flags11;
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_MASK              0x1 /* rule2en */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_SHIFT             0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_MASK              0x1 /* rule3en */
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_SHIFT             1
+#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_MASK          0x1 /* rule4en */
+#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT         2
+#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK                 0x1 /* rule5en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT                3
+#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_MASK                 0x1 /* rule6en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT                4
+#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK                 0x1 /* rule7en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT                5
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_MASK            0x1 /* rule8en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_SHIFT           6
+#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_MASK                 0x1 /* rule9en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_SHIFT                7
+	u8 flags12;
+#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_MASK                0x1 /* rule10en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_SHIFT               0
+#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_MASK                0x1 /* rule11en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_SHIFT               1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_MASK            0x1 /* rule12en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_SHIFT           2
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_MASK            0x1 /* rule13en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_SHIFT           3
+#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_MASK                0x1 /* rule14en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_SHIFT               4
+#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_MASK                0x1 /* rule15en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_SHIFT               5
+#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_MASK                0x1 /* rule16en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_SHIFT               6
+#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_MASK                0x1 /* rule17en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_SHIFT               7
+	u8 flags13;
+#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_MASK                0x1 /* rule18en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_SHIFT               0
+#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_MASK                0x1 /* rule19en */
+#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_SHIFT               1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_MASK            0x1 /* rule20en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_SHIFT           2
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_MASK            0x1 /* rule21en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_SHIFT           3
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_MASK            0x1 /* rule22en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_SHIFT           4
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_MASK            0x1 /* rule23en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_SHIFT           5
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_MASK            0x1 /* rule24en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_SHIFT           6
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_MASK            0x1 /* rule25en */
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_SHIFT           7
+	u8 flags14;
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK        0x1 /* bit16 */
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT       0
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK      0x1 /* bit17 */
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT     1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK    0x1 /* bit18 */
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT   2
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK    0x1 /* bit19 */
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT   3
+#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_MASK          0x1 /* bit20 */
+#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT         4
+#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK        0x1 /* bit21 */
+#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT       5
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_MASK              0x3 /* cf23 */
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_SHIFT             6
+	u8	edpm_event_id /* byte2 */;
+	__le16	physical_q0 /* physical_q0 */;
+	__le16	word1 /* physical_q1 */;
+	__le16	edpm_num_bds /* physical_q2 */;
+	__le16	tx_bd_cons /* word3 */;
+	__le16	tx_bd_prod /* word4 */;
+	__le16	go_to_bd_cons /* word5 */;
+	__le16	conn_dpi /* conn_dpi */;
+	u8	byte3 /* byte3 */;
+	u8	byte4 /* byte4 */;
+	u8	byte5 /* byte5 */;
+	u8	byte6 /* byte6 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le32	reg4 /* reg4 */;
+	__le32	reg5 /* cf_array0 */;
+	__le32	reg6 /* cf_array1 */;
+	__le16	word7 /* word7 */;
+	__le16	word8 /* word8 */;
+	__le16	word9 /* word9 */;
+	__le16	word10 /* word10 */;
+	__le32	reg7 /* reg7 */;
+	__le32	reg8 /* reg8 */;
+	__le32	reg9 /* reg9 */;
+	u8	byte7 /* byte7 */;
+	u8	byte8 /* byte8 */;
+	u8	byte9 /* byte9 */;
+	u8	byte10 /* byte10 */;
+	u8	byte11 /* byte11 */;
+	u8	byte12 /* byte12 */;
+	u8	byte13 /* byte13 */;
+	u8	byte14 /* byte14 */;
+	u8	byte15 /* byte15 */;
+	u8	byte16 /* byte16 */;
+	__le16	word11 /* word11 */;
+	__le32	reg10 /* reg10 */;
+	__le32	reg11 /* reg11 */;
+	__le32	reg12 /* reg12 */;
+	__le32	reg13 /* reg13 */;
+	__le32	reg14 /* reg14 */;
+	__le32	reg15 /* reg15 */;
+	__le32	reg16 /* reg16 */;
+	__le32	reg17 /* reg17 */;
+	__le32	reg18 /* reg18 */;
+	__le32	reg19 /* reg19 */;
+	__le16	word12 /* word12 */;
+	__le16	word13 /* word13 */;
+	__le16	word14 /* word14 */;
+	__le16	word15 /* word15 */;
+};
+
+/* The eth storm context for the Tstorm */
+struct tstorm_eth_conn_st_ctx {
+	__le32 reserved[4];
+};
+
+/* The eth storm context for the Mstorm */
+struct mstorm_eth_conn_st_ctx {
+	__le32 reserved[8];
+};
+
+/* The eth storm context for the Ustorm */
+struct ustorm_eth_conn_st_ctx {
+	__le32 reserved[40];
+};
+
+/* eth connection context */
+struct eth_conn_context {
+	struct ystorm_eth_conn_st_ctx	ystorm_st_context;
+	struct regpair			ystorm_st_padding[2] /* padding */;
+	struct pstorm_eth_conn_st_ctx	pstorm_st_context;
+	struct regpair			pstorm_st_padding[2] /* padding */;
+	struct xstorm_eth_conn_st_ctx	xstorm_st_context;
+	struct xstorm_eth_conn_ag_ctx	xstorm_ag_context;
+	struct tstorm_eth_conn_st_ctx	tstorm_st_context;
+	struct regpair			tstorm_st_padding[2] /* padding */;
+	struct mstorm_eth_conn_st_ctx	mstorm_st_context;
+	struct ustorm_eth_conn_st_ctx	ustorm_st_context;
+};
+
+struct mstorm_eth_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK  0x1   /* exist_in_qm0 */
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK          0x1   /* exist_in_qm1 */
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT         1
+#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK           0x3   /* cf0 */
+#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT          2
+#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK           0x3   /* cf1 */
+#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT          4
+#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK           0x3   /* cf2 */
+#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT          6
+	u8 flags1;
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK         0x1   /* cf0en */
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT        0
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK         0x1   /* cf1en */
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT        1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK         0x1   /* cf2en */
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT        2
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK       0x1   /* rule0en */
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT      3
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK       0x1   /* rule1en */
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT      4
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK       0x1   /* rule2en */
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT      5
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK       0x1   /* rule3en */
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT      6
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK       0x1   /* rule4en */
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT      7
+	__le16	word0 /* word0 */;
+	__le16	word1 /* word1 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+};
+
+struct tstorm_eth_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK      0x1       /* exist_in_qm0 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT     0
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK      0x1       /* exist_in_qm1 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT     1
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK      0x1       /* bit2 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT     2
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK      0x1       /* bit3 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT     3
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK      0x1       /* bit4 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT     4
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK      0x1       /* bit5 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT     5
+#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK       0x3       /* timer0cf */
+#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT      6
+	u8 flags1;
+#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK       0x3       /* timer1cf */
+#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT      0
+#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK       0x3       /* timer2cf */
+#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT      2
+#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK       0x3       /* timer_stop_all */
+#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT      4
+#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK       0x3       /* cf4 */
+#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT      6
+	u8 flags2;
+#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK       0x3       /* cf5 */
+#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT      0
+#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK       0x3       /* cf6 */
+#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT      2
+#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK       0x3       /* cf7 */
+#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT      4
+#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK       0x3       /* cf8 */
+#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT      6
+	u8 flags3;
+#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK       0x3       /* cf9 */
+#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT      0
+#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK      0x3       /* cf10 */
+#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT     2
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK     0x1       /* cf0en */
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT    4
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK     0x1       /* cf1en */
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT    5
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK     0x1       /* cf2en */
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT    6
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK     0x1       /* cf3en */
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT    7
+	u8 flags4;
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK     0x1       /* cf4en */
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT    0
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK     0x1       /* cf5en */
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT    1
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK     0x1       /* cf6en */
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT    2
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK     0x1       /* cf7en */
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT    3
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK     0x1       /* cf8en */
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT    4
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK     0x1       /* cf9en */
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT    5
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK    0x1       /* cf10en */
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT   6
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK   0x1       /* rule0en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT  7
+	u8 flags5;
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK   0x1       /* rule1en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT  0
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK   0x1       /* rule2en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT  1
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK   0x1       /* rule3en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT  2
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK   0x1       /* rule4en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT  3
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK   0x1       /* rule5en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT  4
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK  0x1       /* rule6en */
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT 5
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK   0x1       /* rule7en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT  6
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK   0x1       /* rule8en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT  7
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le32	reg4 /* reg4 */;
+	__le32	reg5 /* reg5 */;
+	__le32	reg6 /* reg6 */;
+	__le32	reg7 /* reg7 */;
+	__le32	reg8 /* reg8 */;
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	rx_bd_cons /* word0 */;
+	u8	byte4 /* byte4 */;
+	u8	byte5 /* byte5 */;
+	__le16	rx_bd_prod /* word1 */;
+	__le16	word2 /* conn_dpi */;
+	__le16	word3 /* word3 */;
+	__le32	reg9 /* reg9 */;
+	__le32	reg10 /* reg10 */;
+};
+
+struct ustorm_eth_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK                  0x1
+#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT                 0
+#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK                  0x1
+#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT                 1
+#define USTORM_ETH_CONN_AG_CTX_CF0_MASK                   0x3   /* timer0cf */
+#define USTORM_ETH_CONN_AG_CTX_CF0_SHIFT                  2
+#define USTORM_ETH_CONN_AG_CTX_CF1_MASK                   0x3   /* timer1cf */
+#define USTORM_ETH_CONN_AG_CTX_CF1_SHIFT                  4
+#define USTORM_ETH_CONN_AG_CTX_CF2_MASK                   0x3   /* timer2cf */
+#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT                  6
+	u8 flags1;
+#define USTORM_ETH_CONN_AG_CTX_CF3_MASK                   0x3
+#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT                  0
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK             0x3   /* cf4 */
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT            2
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK             0x3   /* cf5 */
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT            4
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK     0x3   /* cf6 */
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT    6
+	u8 flags2;
+#define USTORM_ETH_CONN_AG_CTX_CF0EN_MASK                 0x1   /* cf0en */
+#define USTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT                0
+#define USTORM_ETH_CONN_AG_CTX_CF1EN_MASK                 0x1   /* cf1en */
+#define USTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT                1
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK                 0x1   /* cf2en */
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                2
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK                 0x1   /* cf3en */
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT                3
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK          0x1   /* cf4en */
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT         4
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK          0x1   /* cf5en */
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT         5
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK  0x1   /* cf6en */
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 6
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK               0x1   /* rule0en */
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT              7
+	u8 flags3;
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK               0x1   /* rule1en */
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT              0
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK               0x1   /* rule2en */
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT              1
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK               0x1   /* rule3en */
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT              2
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK               0x1   /* rule4en */
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT              3
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK               0x1   /* rule5en */
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT              4
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK               0x1   /* rule6en */
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT              5
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK               0x1   /* rule7en */
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT              6
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK               0x1   /* rule8en */
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT              7
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* conn_dpi */;
+	__le16	tx_bd_cons /* word1 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le16	tx_drv_bd_cons /* word2 */;
+	__le16	rx_drv_cqe_cons /* word3 */;
+};
+
+struct xstorm_eth_hw_conn_ag_ctx {
+	u8	reserved0 /* cdu_validation */;
+	u8	eth_state /* state */;
+	u8	flags0;
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_SHIFT           0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_SHIFT              1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_SHIFT              2
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_SHIFT           3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_SHIFT              4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_SHIFT              5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_SHIFT              6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_SHIFT              7
+	u8 flags1;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_SHIFT              0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_SHIFT              1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_MASK               0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_SHIFT              2
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_SHIFT                  3
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_SHIFT                  4
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_SHIFT                  5
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_MASK          0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT         6
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT           7
+	u8 flags2;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_SHIFT                    0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_SHIFT                    2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_SHIFT                    4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_SHIFT                    6
+	u8 flags3;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_SHIFT                    0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_SHIFT                    2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_SHIFT                    4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_SHIFT                    6
+	u8 flags4;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_SHIFT                    0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_MASK                     0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_SHIFT                    2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_MASK                    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_SHIFT                   4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_MASK                    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_SHIFT                   6
+	u8 flags5;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_MASK                    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_SHIFT                   0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_MASK                    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_SHIFT                   2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_MASK                    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_SHIFT                   4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_MASK                    0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_SHIFT                   6
+	u8 flags6;
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK        0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT       0
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_MASK        0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT       2
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_MASK                   0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_SHIFT                  4
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_MASK            0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_SHIFT           6
+	u8 flags7;
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_MASK                0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_MASK              0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_SHIFT             2
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_MASK               0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_SHIFT              4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_SHIFT                  6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_SHIFT                  7
+	u8 flags8;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_SHIFT                  0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_SHIFT                  1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_SHIFT                  2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_SHIFT                  3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_SHIFT                  4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_SHIFT                  5
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_SHIFT                  6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_MASK                   0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_SHIFT                  7
+	u8 flags9;
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_MASK                  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_SHIFT                 0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_MASK                  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_SHIFT                 1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_MASK                  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_SHIFT                 2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_MASK                  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_SHIFT                 3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_MASK                  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_SHIFT                 4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_MASK                  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_SHIFT                 5
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK     0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT    6
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK     0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT    7
+	u8 flags10;
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_MASK         0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT        1
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_MASK             0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT            2
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_MASK              0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_SHIFT             3
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_SHIFT           4
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK  0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT 5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_MASK              0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_SHIFT             6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_MASK              0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_SHIFT             7
+	u8 flags11;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_MASK              0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_SHIFT             0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_MASK              0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_SHIFT             1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_MASK          0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT         2
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_MASK                 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_SHIFT                3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_MASK                 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_SHIFT                4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_MASK                 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_SHIFT                5
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_SHIFT           6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_MASK                 0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_SHIFT                7
+	u8 flags12;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_SHIFT               1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_SHIFT           2
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_SHIFT           3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_SHIFT               4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_SHIFT               5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_SHIFT               6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_SHIFT               7
+	u8 flags13;
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_SHIFT               0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_MASK                0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_SHIFT               1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_SHIFT           2
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_SHIFT           3
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_SHIFT           4
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_SHIFT           5
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_SHIFT           6
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_MASK            0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_SHIFT           7
+	u8 flags14;
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT       0
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK      0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT     1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT   2
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK    0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT   3
+#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_MASK          0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT         4
+#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK        0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT       5
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_MASK              0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT             6
+	u8	edpm_event_id /* byte2 */;
+	__le16	physical_q0 /* physical_q0 */;
+	__le16	word1 /* physical_q1 */;
+	__le16	edpm_num_bds /* physical_q2 */;
+	__le16	tx_bd_cons /* word3 */;
+	__le16	tx_bd_prod /* word4 */;
+	__le16	go_to_bd_cons /* word5 */;
+	__le16	conn_dpi /* conn_dpi */;
+};
+
+#define VF_MAX_STATIC 192       /* In case of K2 */
+
+#define MCP_GLOB_PATH_MAX       2
+#define MCP_PORT_MAX            2       /* Global */
+#define MCP_GLOB_PORT_MAX       4       /* Global */
+#define MCP_GLOB_FUNC_MAX       16      /* Global */
+
+typedef u32 offsize_t;                  /* In DWORDS !!! */
+/* Offset from the beginning of the MCP scratchpad */
+#define OFFSIZE_OFFSET_SHIFT    0
+#define OFFSIZE_OFFSET_MASK     0x0000ffff
+/* Size of specific element (not the whole array if any) */
+#define OFFSIZE_SIZE_SHIFT      16
+#define OFFSIZE_SIZE_MASK       0xffff0000
+
+/* SECTION_OFFSET is calculating the offset in bytes out of offsize */
+#define SECTION_OFFSET(_offsize)        ((((_offsize &		    \
+					    OFFSIZE_OFFSET_MASK) >> \
+					   OFFSIZE_OFFSET_SHIFT) << 2))
+
+/* QED_SECTION_SIZE is calculating the size in bytes out of offsize */
+#define QED_SECTION_SIZE(_offsize)              (((_offsize &		 \
+						   OFFSIZE_SIZE_MASK) >> \
+						  OFFSIZE_SIZE_SHIFT) << 2)
+
+/* SECTION_ADDR returns the GRC addr of a section, given offsize and index
+ * within section.
+ */
+#define SECTION_ADDR(_offsize, idx)     (MCP_REG_SCRATCH +	    \
+					 SECTION_OFFSET(_offsize) + \
+					 (QED_SECTION_SIZE(_offsize) * idx))
+
+/* SECTION_OFFSIZE_ADDR returns the GRC addr to the offsize address.
+ * Use offsetof, since the OFFSETUP collide with the firmware definition
+ */
+#define SECTION_OFFSIZE_ADDR(_pub_base, _section) (_pub_base +		     \
+						   offsetof(struct	     \
+							    mcp_public_data, \
+							    sections[_section]))
+/* PHY configuration */
+struct pmm_phy_cfg {
+	u32	speed;
+#define PMM_SPEED_AUTONEG   0
+
+	u32	pause;  /* bitmask */
+#define PMM_PAUSE_NONE          0x0
+#define PMM_PAUSE_AUTONEG       0x1
+#define PMM_PAUSE_RX            0x2
+#define PMM_PAUSE_TX            0x4
+
+	u32	adv_speed;  /* Default should be the speed_cap_mask */
+	u32	loopback_mode;
+#define PMM_LOOPBACK_NONE               0
+#define PMM_LOOPBACK_INT_PHY    1
+#define PMM_LOOPBACK_EXT_PHY    2
+#define PMM_LOOPBACK_EXT                3
+#define PMM_LOOPBACK_MAC                4
+
+	/* features */
+	u32 feature_config_flags;
+};
+
+struct port_mf_cfg {
+	u32	dynamic_cfg; /* device control channel */
+#define PORT_MF_CFG_OV_TAG_MASK              0x0000ffff
+#define PORT_MF_CFG_OV_TAG_SHIFT             0
+#define PORT_MF_CFG_OV_TAG_DEFAULT         PORT_MF_CFG_OV_TAG_MASK
+
+	u32	reserved[1];
+};
+
+/* DO NOT add new fields in the middle
+ * MUST be synced with struct pmm_stats_map
+ */
+struct pmm_stats {
+	u64	r64;    /* 0x00 (Offset 0x00 ) RX 64-byte frame counter*/
+	u64	r127;   /* 0x01 (Offset 0x08 ) RX 65 to 127 byte frame counter*/
+	u64	r255;
+	u64	r511;
+	u64	r1023;
+	u64	r1518;
+	u64	r1522;
+	u64	r2047;
+	u64	r4095;
+	u64	r9216;
+	u64	r16383;
+	u64	rfcs;   /* 0x0F (Offset 0x58 ) RX FCS error frame counter*/
+	u64	rxcf;   /* 0x10 (Offset 0x60 ) RX control frame counter*/
+	u64	rxpf;   /* 0x11 (Offset 0x68 ) RX pause frame counter*/
+	u64	rxpp;   /* 0x12 (Offset 0x70 ) RX PFC frame counter*/
+	u64	raln;   /* 0x16 (Offset 0x78 ) RX alignment error counter*/
+	u64	rfcr;   /* 0x19 (Offset 0x80 ) RX false carrier counter */
+	u64	rovr;   /* 0x1A (Offset 0x88 ) RX oversized frame counter*/
+	u64	rjbr;   /* 0x1B (Offset 0x90 ) RX jabber frame counter */
+	u64	rund;   /* 0x34 (Offset 0x98 ) RX undersized frame counter */
+	u64	rfrg;   /* 0x35 (Offset 0xa0 ) RX fragment counter */
+	u64	t64;    /* 0x40 (Offset 0xa8 ) TX 64-byte frame counter */
+	u64	t127;
+	u64	t255;
+	u64	t511;
+	u64	t1023;
+	u64	t1518;
+	u64	t2047;
+	u64	t4095;
+	u64	t9216;
+	u64	t16383;
+	u64	txpf;   /* 0x50 (Offset 0xf8 ) TX pause frame counter */
+	u64	txpp;   /* 0x51 (Offset 0x100) TX PFC frame counter */
+	u64	tlpiec;
+	u64	tncl;
+	u64	rbyte;  /* 0x3d (Offset 0x118) RX byte counter */
+	u64	rxuca;  /* 0x0c (Offset 0x120) RX UC frame counter */
+	u64	rxmca;  /* 0x0d (Offset 0x128) RX MC frame counter */
+	u64	rxbca;  /* 0x0e (Offset 0x130) RX BC frame counter */
+	u64	rxpok;
+	u64	tbyte;  /* 0x6f (Offset 0x140) TX byte counter */
+	u64	txuca;  /* 0x4d (Offset 0x148) TX UC frame counter */
+	u64	txmca;  /* 0x4e (Offset 0x150) TX MC frame counter */
+	u64	txbca;  /* 0x4f (Offset 0x158) TX BC frame counter */
+	u64	txcf;   /* 0x54 (Offset 0x160) TX control frame counter */
+};
+
+struct brb_stats {
+	u64	brb_truncate[8];
+	u64	brb_discard[8];
+};
+
+struct port_stats {
+	struct brb_stats	brb;
+	struct pmm_stats	pmm;
+};
+
+#define CMT_TEAM0 0
+#define CMT_TEAM1 1
+#define CMT_TEAM_MAX 2
+
+struct couple_mode_teaming {
+	u8 port_cmt[MCP_GLOB_PORT_MAX];
+#define PORT_CMT_IN_TEAM		BIT(0)
+
+#define PORT_CMT_PORT_ROLE		BIT(1)
+#define PORT_CMT_PORT_INACTIVE      (0 << 1)
+#define PORT_CMT_PORT_ACTIVE		BIT(1)
+
+#define PORT_CMT_TEAM_MASK		BIT(2)
+#define PORT_CMT_TEAM0              (0 << 2)
+#define PORT_CMT_TEAM1			BIT(2)
+};
+
+/**************************************
+*     LLDP and DCBX HSI structures
+**************************************/
+#define LLDP_CHASSIS_ID_STAT_LEN 4
+#define LLDP_PORT_ID_STAT_LEN 4
+#define DCBX_MAX_APP_PROTOCOL           32
+#define MAX_SYSTEM_LLDP_TLV_DATA    32
+
+enum lldp_agent_e {
+	LLDP_NEAREST_BRIDGE = 0,
+	LLDP_NEAREST_NON_TPMR_BRIDGE,
+	LLDP_NEAREST_CUSTOMER_BRIDGE,
+	LLDP_MAX_LLDP_AGENTS
+};
+
+struct lldp_config_params_s {
+	u32 config;
+#define LLDP_CONFIG_TX_INTERVAL_MASK        0x000000ff
+#define LLDP_CONFIG_TX_INTERVAL_SHIFT       0
+#define LLDP_CONFIG_HOLD_MASK               0x00000f00
+#define LLDP_CONFIG_HOLD_SHIFT              8
+#define LLDP_CONFIG_MAX_CREDIT_MASK         0x0000f000
+#define LLDP_CONFIG_MAX_CREDIT_SHIFT        12
+#define LLDP_CONFIG_ENABLE_RX_MASK          0x40000000
+#define LLDP_CONFIG_ENABLE_RX_SHIFT         30
+#define LLDP_CONFIG_ENABLE_TX_MASK          0x80000000
+#define LLDP_CONFIG_ENABLE_TX_SHIFT         31
+	u32	local_chassis_id[LLDP_CHASSIS_ID_STAT_LEN];
+	u32	local_port_id[LLDP_PORT_ID_STAT_LEN];
+};
+
+struct lldp_status_params_s {
+	u32	prefix_seq_num;
+	u32	status; /* TBD */
+
+	/* Holds remote Chassis ID TLV header, subtype and 9B of payload. */
+	u32	peer_chassis_id[LLDP_CHASSIS_ID_STAT_LEN];
+
+	/* Holds remote Port ID TLV header, subtype and 9B of payload. */
+	u32	peer_port_id[LLDP_PORT_ID_STAT_LEN];
+	u32	suffix_seq_num;
+};
+
+struct dcbx_ets_feature {
+	u32 flags;
+#define DCBX_ETS_ENABLED_MASK                   0x00000001
+#define DCBX_ETS_ENABLED_SHIFT                  0
+#define DCBX_ETS_WILLING_MASK                   0x00000002
+#define DCBX_ETS_WILLING_SHIFT                  1
+#define DCBX_ETS_ERROR_MASK                     0x00000004
+#define DCBX_ETS_ERROR_SHIFT                    2
+#define DCBX_ETS_CBS_MASK                       0x00000008
+#define DCBX_ETS_CBS_SHIFT                      3
+#define DCBX_ETS_MAX_TCS_MASK                   0x000000f0
+#define DCBX_ETS_MAX_TCS_SHIFT                  4
+	u32	pri_tc_tbl[1];
+#define DCBX_ISCSI_OOO_TC                       4
+#define NIG_ETS_ISCSI_OOO_CLIENT_OFFSET         (DCBX_ISCSI_OOO_TC + 1)
+	u32	tc_bw_tbl[2];
+	u32	tc_tsa_tbl[2];
+#define DCBX_ETS_TSA_STRICT                     0
+#define DCBX_ETS_TSA_CBS                        1
+#define DCBX_ETS_TSA_ETS                        2
+};
+
+struct dcbx_app_priority_entry {
+	u32 entry;
+#define DCBX_APP_PRI_MAP_MASK       0x000000ff
+#define DCBX_APP_PRI_MAP_SHIFT      0
+#define DCBX_APP_PRI_0              0x01
+#define DCBX_APP_PRI_1              0x02
+#define DCBX_APP_PRI_2              0x04
+#define DCBX_APP_PRI_3              0x08
+#define DCBX_APP_PRI_4              0x10
+#define DCBX_APP_PRI_5              0x20
+#define DCBX_APP_PRI_6              0x40
+#define DCBX_APP_PRI_7              0x80
+#define DCBX_APP_SF_MASK            0x00000300
+#define DCBX_APP_SF_SHIFT           8
+#define DCBX_APP_SF_ETHTYPE         0
+#define DCBX_APP_SF_PORT            1
+#define DCBX_APP_PROTOCOL_ID_MASK   0xffff0000
+#define DCBX_APP_PROTOCOL_ID_SHIFT  16
+};
+
+/* FW structure in BE */
+struct dcbx_app_priority_feature {
+	u32 flags;
+#define DCBX_APP_ENABLED_MASK           0x00000001
+#define DCBX_APP_ENABLED_SHIFT          0
+#define DCBX_APP_WILLING_MASK           0x00000002
+#define DCBX_APP_WILLING_SHIFT          1
+#define DCBX_APP_ERROR_MASK             0x00000004
+#define DCBX_APP_ERROR_SHIFT            2
+/* Not in use
+ * #define DCBX_APP_DEFAULT_PRI_MASK       0x00000f00
+ * #define DCBX_APP_DEFAULT_PRI_SHIFT      8
+ */
+#define DCBX_APP_MAX_TCS_MASK           0x0000f000
+#define DCBX_APP_MAX_TCS_SHIFT          12
+#define DCBX_APP_NUM_ENTRIES_MASK       0x00ff0000
+#define DCBX_APP_NUM_ENTRIES_SHIFT      16
+	struct dcbx_app_priority_entry app_pri_tbl[DCBX_MAX_APP_PROTOCOL];
+};
+
+/* FW structure in BE */
+struct dcbx_features {
+	/* PG feature */
+	struct dcbx_ets_feature ets;
+
+	/* PFC feature */
+	u32			pfc;
+#define DCBX_PFC_PRI_EN_BITMAP_MASK             0x000000ff
+#define DCBX_PFC_PRI_EN_BITMAP_SHIFT            0
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_0            0x01
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_1            0x02
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_2            0x04
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_3            0x08
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_4            0x10
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_5            0x20
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_6            0x40
+#define DCBX_PFC_PRI_EN_BITMAP_PRI_7            0x80
+
+#define DCBX_PFC_FLAGS_MASK                     0x0000ff00
+#define DCBX_PFC_FLAGS_SHIFT                    8
+#define DCBX_PFC_CAPS_MASK                      0x00000f00
+#define DCBX_PFC_CAPS_SHIFT                     8
+#define DCBX_PFC_MBC_MASK                       0x00004000
+#define DCBX_PFC_MBC_SHIFT                      14
+#define DCBX_PFC_WILLING_MASK                   0x00008000
+#define DCBX_PFC_WILLING_SHIFT                  15
+#define DCBX_PFC_ENABLED_MASK                   0x00010000
+#define DCBX_PFC_ENABLED_SHIFT                  16
+#define DCBX_PFC_ERROR_MASK                     0x00020000
+#define DCBX_PFC_ERROR_SHIFT                    17
+
+	/* APP feature */
+	struct dcbx_app_priority_feature app;
+};
+
+struct dcbx_local_params {
+	u32 config;
+#define DCBX_CONFIG_VERSION_MASK            0x00000003
+#define DCBX_CONFIG_VERSION_SHIFT           0
+#define DCBX_CONFIG_VERSION_DISABLED        0
+#define DCBX_CONFIG_VERSION_IEEE            1
+#define DCBX_CONFIG_VERSION_CEE             2
+
+	u32			flags;
+	struct dcbx_features	features;
+};
+
+struct dcbx_mib {
+	u32	prefix_seq_num;
+	u32	flags;
+	struct dcbx_features	features;
+	u32			suffix_seq_num;
+};
+
+struct lldp_system_tlvs_buffer_s {
+	u16	valid;
+	u16	length;
+	u32	data[MAX_SYSTEM_LLDP_TLV_DATA];
+};
+
+/**************************************/
+/*                                    */
+/*     P U B L I C      G L O B A L   */
+/*                                    */
+/**************************************/
+struct public_global {
+	u32				max_path;
+#define MAX_PATH_BIG_BEAR       2
+#define MAX_PATH_K2             1
+	u32				max_ports;
+#define MODE_1P 1
+#define MODE_2P 2
+#define MODE_3P 3
+#define MODE_4P 4
+	u32				debug_mb_offset;
+	u32				phymod_dbg_mb_offset;
+	struct couple_mode_teaming	cmt;
+	s32				internal_temperature;
+	u32				mfw_ver;
+	u32				running_bundle_id;
+};
+
+/**************************************/
+/*                                    */
+/*     P U B L I C      P A T H       */
+/*                                    */
+/**************************************/
+
+/****************************************************************************
+* Shared Memory 2 Region                                                   *
+****************************************************************************/
+/* The fw_flr_ack is actually built in the following way:                   */
+/* 8 bit:  PF ack                                                           */
+/* 128 bit: VF ack                                                           */
+/* 8 bit:  ios_dis_ack                                                      */
+/* In order to maintain endianity in the mailbox hsi, we want to keep using */
+/* u32. The fw must have the VF right after the PF since this is how it     */
+/* access arrays(it expects always the VF to reside after the PF, and that  */
+/* makes the calculation much easier for it. )                              */
+/* In order to answer both limitations, and keep the struct small, the code */
+/* will abuse the structure defined here to achieve the actual partition    */
+/* above                                                                    */
+/****************************************************************************/
+struct fw_flr_mb {
+	u32	aggint;
+	u32	opgen_addr;
+	u32	accum_ack;  /* 0..15:PF, 16..207:VF, 256..271:IOV_DIS */
+#define ACCUM_ACK_PF_BASE       0
+#define ACCUM_ACK_PF_SHIFT      0
+
+#define ACCUM_ACK_VF_BASE       8
+#define ACCUM_ACK_VF_SHIFT      3
+
+#define ACCUM_ACK_IOV_DIS_BASE  256
+#define ACCUM_ACK_IOV_DIS_SHIFT 8
+};
+
+struct public_path {
+	struct fw_flr_mb	flr_mb;
+	u32			mcp_vf_disabled[VF_MAX_STATIC / 32];
+
+	u32			process_kill;
+#define PROCESS_KILL_COUNTER_MASK               0x0000ffff
+#define PROCESS_KILL_COUNTER_SHIFT              0
+#define PROCESS_KILL_GLOB_AEU_BIT_MASK          0xffff0000
+#define PROCESS_KILL_GLOB_AEU_BIT_SHIFT         16
+#define GLOBAL_AEU_BIT(aeu_reg_id, aeu_bit) (aeu_reg_id * 32 + aeu_bit)
+};
+
+/**************************************/
+/*                                    */
+/*     P U B L I C      P O R T       */
+/*                                    */
+/**************************************/
+
+/****************************************************************************
+* Driver <-> FW Mailbox                                                    *
+****************************************************************************/
+
+struct public_port {
+	u32 validity_map;   /* 0x0 (4*2 = 0x8) */
+
+	/* validity bits */
+#define MCP_VALIDITY_PCI_CFG                    0x00100000
+#define MCP_VALIDITY_MB                         0x00200000
+#define MCP_VALIDITY_DEV_INFO                   0x00400000
+#define MCP_VALIDITY_RESERVED                   0x00000007
+
+	/* One licensing bit should be set */
+#define MCP_VALIDITY_LIC_KEY_IN_EFFECT_MASK     0x00000038
+#define MCP_VALIDITY_LIC_MANUF_KEY_IN_EFFECT    0x00000008
+#define MCP_VALIDITY_LIC_UPGRADE_KEY_IN_EFFECT  0x00000010
+#define MCP_VALIDITY_LIC_NO_KEY_IN_EFFECT       0x00000020
+
+	/* Active MFW */
+#define MCP_VALIDITY_ACTIVE_MFW_UNKNOWN         0x00000000
+#define MCP_VALIDITY_ACTIVE_MFW_MASK            0x000001c0
+#define MCP_VALIDITY_ACTIVE_MFW_NCSI            0x00000040
+#define MCP_VALIDITY_ACTIVE_MFW_NONE            0x000001c0
+
+	u32 link_status;
+#define LINK_STATUS_LINK_UP \
+	0x00000001
+#define LINK_STATUS_SPEED_AND_DUPLEX_MASK                       0x0000001e
+#define LINK_STATUS_SPEED_AND_DUPLEX_1000THD		BIT(1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_1000TFD            (2 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_10G                        (3 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_20G                        (4 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_40G                        (5 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_50G                        (6 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_100G                       (7 << 1)
+#define LINK_STATUS_SPEED_AND_DUPLEX_25G                        (8 << 1)
+
+#define LINK_STATUS_AUTO_NEGOTIATE_ENABLED                      0x00000020
+
+#define LINK_STATUS_AUTO_NEGOTIATE_COMPLETE                     0x00000040
+#define LINK_STATUS_PARALLEL_DETECTION_USED                     0x00000080
+
+#define LINK_STATUS_PFC_ENABLED	\
+	0x00000100
+#define LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE        0x00000200
+#define LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE        0x00000400
+#define LINK_STATUS_LINK_PARTNER_10G_CAPABLE            0x00000800
+#define LINK_STATUS_LINK_PARTNER_20G_CAPABLE            0x00001000
+#define LINK_STATUS_LINK_PARTNER_40G_CAPABLE            0x00002000
+#define LINK_STATUS_LINK_PARTNER_50G_CAPABLE            0x00004000
+#define LINK_STATUS_LINK_PARTNER_100G_CAPABLE           0x00008000
+#define LINK_STATUS_LINK_PARTNER_25G_CAPABLE            0x00010000
+
+#define LINK_STATUS_LINK_PARTNER_FLOW_CONTROL_MASK      0x000C0000
+#define LINK_STATUS_LINK_PARTNER_NOT_PAUSE_CAPABLE      (0 << 18)
+#define LINK_STATUS_LINK_PARTNER_SYMMETRIC_PAUSE	BIT(18)
+#define LINK_STATUS_LINK_PARTNER_ASYMMETRIC_PAUSE       (2 << 18)
+#define LINK_STATUS_LINK_PARTNER_BOTH_PAUSE                     (3 << 18)
+
+#define LINK_STATUS_SFP_TX_FAULT \
+	0x00100000
+#define LINK_STATUS_TX_FLOW_CONTROL_ENABLED                     0x00200000
+#define LINK_STATUS_RX_FLOW_CONTROL_ENABLED                     0x00400000
+
+	u32			link_status1;
+	u32			ext_phy_fw_version;
+	u32			drv_phy_cfg_addr;
+
+	u32			port_stx;
+
+	u32			stat_nig_timer;
+
+	struct port_mf_cfg	port_mf_config;
+	struct port_stats	stats;
+
+	u32			media_type;
+#define MEDIA_UNSPECIFIED       0x0
+#define MEDIA_SFPP_10G_FIBER    0x1
+#define MEDIA_XFP_FIBER         0x2
+#define MEDIA_DA_TWINAX         0x3
+#define MEDIA_BASE_T            0x4
+#define MEDIA_SFP_1G_FIBER      0x5
+#define MEDIA_KR                0xf0
+#define MEDIA_NOT_PRESENT       0xff
+
+	u32 lfa_status;
+#define LFA_LINK_FLAP_REASON_OFFSET             0
+#define LFA_LINK_FLAP_REASON_MASK               0x000000ff
+#define LFA_NO_REASON                                   (0 << 0)
+#define LFA_LINK_DOWN					BIT(0)
+#define LFA_FORCE_INIT                                  BIT(1)
+#define LFA_LOOPBACK_MISMATCH                           BIT(2)
+#define LFA_SPEED_MISMATCH                              BIT(3)
+#define LFA_FLOW_CTRL_MISMATCH                          BIT(4)
+#define LFA_ADV_SPEED_MISMATCH                          BIT(5)
+#define LINK_FLAP_AVOIDANCE_COUNT_OFFSET        8
+#define LINK_FLAP_AVOIDANCE_COUNT_MASK          0x0000ff00
+#define LINK_FLAP_COUNT_OFFSET                  16
+#define LINK_FLAP_COUNT_MASK                    0x00ff0000
+
+	u32					link_change_count;
+
+	/* LLDP params */
+	struct lldp_config_params_s		lldp_config_params[
+		LLDP_MAX_LLDP_AGENTS];
+	struct lldp_status_params_s		lldp_status_params[
+		LLDP_MAX_LLDP_AGENTS];
+	struct lldp_system_tlvs_buffer_s	system_lldp_tlvs_buf;
+
+	/* DCBX related MIB */
+	struct dcbx_local_params		local_admin_dcbx_mib;
+	struct dcbx_mib				remote_dcbx_mib;
+	struct dcbx_mib				operational_dcbx_mib;
+};
+
+/**************************************/
+/*                                    */
+/*     P U B L I C      F U N C       */
+/*                                    */
+/**************************************/
+
+struct public_func {
+	u32	iscsi_boot_signature;
+	u32	iscsi_boot_block_offset;
+
+	u32	reserved[8];
+
+	u32	config;
+
+	/* E/R/I/D */
+	/* function 0 of each port cannot be hidden */
+#define FUNC_MF_CFG_FUNC_HIDE                   0x00000001
+#define FUNC_MF_CFG_PAUSE_ON_HOST_RING          0x00000002
+#define FUNC_MF_CFG_PAUSE_ON_HOST_RING_SHIFT    0x00000001
+
+#define FUNC_MF_CFG_PROTOCOL_MASK               0x000000f0
+#define FUNC_MF_CFG_PROTOCOL_SHIFT              4
+#define FUNC_MF_CFG_PROTOCOL_ETHERNET           0x00000000
+#define FUNC_MF_CFG_PROTOCOL_ISCSI              0x00000010
+#define FUNC_MF_CFG_PROTOCOL_FCOE               0x00000020
+#define FUNC_MF_CFG_PROTOCOL_ROCE               0x00000030
+#define FUNC_MF_CFG_PROTOCOL_MAX                0x00000030
+
+	/* MINBW, MAXBW */
+	/* value range - 0..100, increments in 1 %  */
+#define FUNC_MF_CFG_MIN_BW_MASK                 0x0000ff00
+#define FUNC_MF_CFG_MIN_BW_SHIFT                8
+#define FUNC_MF_CFG_MIN_BW_DEFAULT              0x00000000
+#define FUNC_MF_CFG_MAX_BW_MASK                 0x00ff0000
+#define FUNC_MF_CFG_MAX_BW_SHIFT                16
+#define FUNC_MF_CFG_MAX_BW_DEFAULT              0x00640000
+
+	u32	status;
+#define FUNC_STATUS_VLINK_DOWN                  0x00000001
+
+	u32	mac_upper;  /* MAC */
+#define FUNC_MF_CFG_UPPERMAC_MASK               0x0000ffff
+#define FUNC_MF_CFG_UPPERMAC_SHIFT              0
+#define FUNC_MF_CFG_UPPERMAC_DEFAULT            FUNC_MF_CFG_UPPERMAC_MASK
+	u32	mac_lower;
+#define FUNC_MF_CFG_LOWERMAC_DEFAULT            0xffffffff
+
+	u32	fcoe_wwn_port_name_upper;
+	u32	fcoe_wwn_port_name_lower;
+
+	u32	fcoe_wwn_node_name_upper;
+	u32	fcoe_wwn_node_name_lower;
+
+	u32	ovlan_stag; /* tags */
+#define FUNC_MF_CFG_OV_STAG_MASK              0x0000ffff
+#define FUNC_MF_CFG_OV_STAG_SHIFT             0
+#define FUNC_MF_CFG_OV_STAG_DEFAULT           FUNC_MF_CFG_OV_STAG_MASK
+
+	u32	pf_allocation;  /* vf per pf */
+
+	u32	preserve_data;  /* Will be used bt CCM */
+
+	u32	driver_last_activity_ts;
+
+	u32	drv_ack_vf_disabled[VF_MAX_STATIC / 32]; /* 0x0044 */
+
+	u32	drv_id;
+#define DRV_ID_PDA_COMP_VER_MASK        0x0000ffff
+#define DRV_ID_PDA_COMP_VER_SHIFT       0
+
+#define DRV_ID_MCP_HSI_VER_MASK         0x00ff0000
+#define DRV_ID_MCP_HSI_VER_SHIFT        16
+#define DRV_ID_MCP_HSI_VER_CURRENT	BIT(DRV_ID_MCP_HSI_VER_SHIFT)
+
+#define DRV_ID_DRV_TYPE_MASK            0xff000000
+#define DRV_ID_DRV_TYPE_SHIFT           24
+#define DRV_ID_DRV_TYPE_UNKNOWN         (0 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_LINUX		BIT(DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_WINDOWS         (2 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_DIAG            (3 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_PREBOOT         (4 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_SOLARIS         (5 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_VMWARE          (6 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_FREEBSD         (7 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_AIX             (8 << DRV_ID_DRV_TYPE_SHIFT)
+};
+
+/**************************************/
+/*                                    */
+/*     P U B L I C       M B          */
+/*                                    */
+/**************************************/
+/* This is the only section that the driver can write to, and each */
+/* Basically each driver request to set feature parameters,
+ * will be done using a different command, which will be linked
+ * to a specific data structure from the union below.
+ * For huge strucuture, the common blank structure should be used.
+ */
+
+struct mcp_mac {
+	u32	mac_upper;  /* Upper 16 bits are always zeroes */
+	u32	mac_lower;
+};
+
+struct mcp_val64 {
+	u32	lo;
+	u32	hi;
+};
+
+struct mcp_file_att {
+	u32	nvm_start_addr;
+	u32	len;
+};
+
+#define MCP_DRV_VER_STR_SIZE 16
+#define MCP_DRV_VER_STR_SIZE_DWORD (MCP_DRV_VER_STR_SIZE / sizeof(u32))
+#define MCP_DRV_NVM_BUF_LEN 32
+struct drv_version_stc {
+	u32	version;
+	u8	name[MCP_DRV_VER_STR_SIZE - 4];
+};
+
+union drv_union_data {
+	u32			ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
+	struct mcp_mac		wol_mac;
+
+	struct pmm_phy_cfg	drv_phy_cfg;
+
+	struct mcp_val64	val64; /* For PHY / AVS commands */
+
+	u8			raw_data[MCP_DRV_NVM_BUF_LEN];
+
+	struct mcp_file_att	file_att;
+
+	u32			ack_vf_disabled[VF_MAX_STATIC / 32];
+
+	struct drv_version_stc	drv_version;
+};
+
+struct public_drv_mb {
+	u32 drv_mb_header;
+#define DRV_MSG_CODE_MASK                       0xffff0000
+#define DRV_MSG_CODE_LOAD_REQ                   0x10000000
+#define DRV_MSG_CODE_LOAD_DONE                  0x11000000
+#define DRV_MSG_CODE_UNLOAD_REQ                 0x20000000
+#define DRV_MSG_CODE_UNLOAD_DONE                0x21000000
+#define DRV_MSG_CODE_INIT_PHY                   0x22000000
+	/* Params - FORCE - Reinitialize the link regardless of LFA */
+	/*        - DONT_CARE - Don't flap the link if up */
+#define DRV_MSG_CODE_LINK_RESET                 0x23000000
+
+#define DRV_MSG_CODE_SET_LLDP                   0x24000000
+#define DRV_MSG_CODE_SET_DCBX                   0x25000000
+
+#define DRV_MSG_CODE_NIG_DRAIN                  0x30000000
+
+#define DRV_MSG_CODE_INITIATE_FLR               0x02000000
+#define DRV_MSG_CODE_VF_DISABLED_DONE           0xc0000000
+#define DRV_MSG_CODE_CFG_VF_MSIX                0xc0010000
+#define DRV_MSG_CODE_NVM_PUT_FILE_BEGIN         0x00010000
+#define DRV_MSG_CODE_NVM_PUT_FILE_DATA          0x00020000
+#define DRV_MSG_CODE_NVM_GET_FILE_ATT           0x00030000
+#define DRV_MSG_CODE_NVM_READ_NVRAM             0x00050000
+#define DRV_MSG_CODE_NVM_WRITE_NVRAM            0x00060000
+#define DRV_MSG_CODE_NVM_DEL_FILE               0x00080000
+#define DRV_MSG_CODE_MCP_RESET                  0x00090000
+#define DRV_MSG_CODE_SET_SECURE_MODE            0x000a0000
+#define DRV_MSG_CODE_PHY_RAW_READ               0x000b0000
+#define DRV_MSG_CODE_PHY_RAW_WRITE              0x000c0000
+#define DRV_MSG_CODE_PHY_CORE_READ              0x000d0000
+#define DRV_MSG_CODE_PHY_CORE_WRITE             0x000e0000
+#define DRV_MSG_CODE_SET_VERSION                0x000f0000
+
+#define DRV_MSG_SEQ_NUMBER_MASK                 0x0000ffff
+
+	u32 drv_mb_param;
+
+	/* UNLOAD_REQ params */
+#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN         0x00000000
+#define DRV_MB_PARAM_UNLOAD_WOL_MCP             0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED        0x00000002
+#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED         0x00000003
+
+	/* UNLOAD_DONE_params */
+#define DRV_MB_PARAM_UNLOAD_NON_D3_POWER        0x00000001
+
+	/* INIT_PHY params */
+#define DRV_MB_PARAM_INIT_PHY_FORCE             0x00000001
+#define DRV_MB_PARAM_INIT_PHY_DONT_CARE         0x00000002
+
+	/* LLDP / DCBX params*/
+#define DRV_MB_PARAM_LLDP_SEND_MASK             0x00000001
+#define DRV_MB_PARAM_LLDP_SEND_SHIFT            0
+#define DRV_MB_PARAM_LLDP_AGENT_MASK            0x00000006
+#define DRV_MB_PARAM_LLDP_AGENT_SHIFT           1
+#define DRV_MB_PARAM_DCBX_NOTIFY_MASK           0x00000008
+#define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT          3
+
+#define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_MASK   0x000000FF
+#define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_SHIFT  0
+
+#define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MFW     0x1
+#define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_IMAGE   0x2
+
+#define DRV_MB_PARAM_NVM_OFFSET_SHIFT           0
+#define DRV_MB_PARAM_NVM_OFFSET_MASK            0x00FFFFFF
+#define DRV_MB_PARAM_NVM_LEN_SHIFT              24
+#define DRV_MB_PARAM_NVM_LEN_MASK               0xFF000000
+
+#define DRV_MB_PARAM_PHY_ADDR_SHIFT             0
+#define DRV_MB_PARAM_PHY_ADDR_MASK              0x1FF0FFFF
+#define DRV_MB_PARAM_PHY_LANE_SHIFT             16
+#define DRV_MB_PARAM_PHY_LANE_MASK              0x000F0000
+#define DRV_MB_PARAM_PHY_SELECT_PORT_SHIFT      29
+#define DRV_MB_PARAM_PHY_SELECT_PORT_MASK       0x20000000
+#define DRV_MB_PARAM_PHY_PORT_SHIFT             30
+#define DRV_MB_PARAM_PHY_PORT_MASK              0xc0000000
+
+/* configure vf MSIX params*/
+#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT    0
+#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK     0x000000FF
+#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT   8
+#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK    0x0000FF00
+
+	u32 fw_mb_header;
+#define FW_MSG_CODE_MASK                        0xffff0000
+#define FW_MSG_CODE_DRV_LOAD_ENGINE             0x10100000
+#define FW_MSG_CODE_DRV_LOAD_PORT               0x10110000
+#define FW_MSG_CODE_DRV_LOAD_FUNCTION           0x10120000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_PDA        0x10200000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_HSI        0x10210000
+#define FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG       0x10220000
+#define FW_MSG_CODE_DRV_LOAD_DONE               0x11100000
+#define FW_MSG_CODE_DRV_UNLOAD_ENGINE           0x20110000
+#define FW_MSG_CODE_DRV_UNLOAD_PORT             0x20120000
+#define FW_MSG_CODE_DRV_UNLOAD_FUNCTION         0x20130000
+#define FW_MSG_CODE_DRV_UNLOAD_DONE             0x21100000
+#define FW_MSG_CODE_INIT_PHY_DONE               0x21200000
+#define FW_MSG_CODE_INIT_PHY_ERR_INVALID_ARGS   0x21300000
+#define FW_MSG_CODE_LINK_RESET_DONE             0x23000000
+#define FW_MSG_CODE_SET_LLDP_DONE               0x24000000
+#define FW_MSG_CODE_SET_LLDP_UNSUPPORTED_AGENT  0x24010000
+#define FW_MSG_CODE_SET_DCBX_DONE               0x25000000
+#define FW_MSG_CODE_NIG_DRAIN_DONE              0x30000000
+#define FW_MSG_CODE_VF_DISABLED_DONE            0xb0000000
+#define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE        0xb0010000
+#define FW_MSG_CODE_FLR_ACK                     0x02000000
+#define FW_MSG_CODE_FLR_NACK                    0x02100000
+
+#define FW_MSG_CODE_NVM_OK                      0x00010000
+#define FW_MSG_CODE_NVM_INVALID_MODE            0x00020000
+#define FW_MSG_CODE_NVM_PREV_CMD_WAS_NOT_FINISHED       0x00030000
+#define FW_MSG_CODE_NVM_FAILED_TO_ALLOCATE_PAGE 0x00040000
+#define FW_MSG_CODE_NVM_INVALID_DIR_FOUND       0x00050000
+#define FW_MSG_CODE_NVM_PAGE_NOT_FOUND          0x00060000
+#define FW_MSG_CODE_NVM_FAILED_PARSING_BNDLE_HEADER 0x00070000
+#define FW_MSG_CODE_NVM_FAILED_PARSING_IMAGE_HEADER 0x00080000
+#define FW_MSG_CODE_NVM_PARSING_OUT_OF_SYNC     0x00090000
+#define FW_MSG_CODE_NVM_FAILED_UPDATING_DIR     0x000a0000
+#define FW_MSG_CODE_NVM_FAILED_TO_FREE_PAGE     0x000b0000
+#define FW_MSG_CODE_NVM_FILE_NOT_FOUND          0x000c0000
+#define FW_MSG_CODE_NVM_OPERATION_FAILED        0x000d0000
+#define FW_MSG_CODE_NVM_FAILED_UNALIGNED        0x000e0000
+#define FW_MSG_CODE_NVM_BAD_OFFSET              0x000f0000
+#define FW_MSG_CODE_NVM_BAD_SIGNATURE           0x00100000
+#define FW_MSG_CODE_NVM_FILE_READ_ONLY          0x00200000
+#define FW_MSG_CODE_NVM_UNKNOWN_FILE            0x00300000
+#define FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK      0x00400000
+#define FW_MSG_CODE_MCP_RESET_REJECT            0x00600000
+#define FW_MSG_CODE_PHY_OK                      0x00110000
+#define FW_MSG_CODE_PHY_ERROR                   0x00120000
+#define FW_MSG_CODE_SET_SECURE_MODE_ERROR       0x00130000
+#define FW_MSG_CODE_SET_SECURE_MODE_OK          0x00140000
+#define FW_MSG_MODE_PHY_PRIVILEGE_ERROR         0x00150000
+
+#define FW_MSG_SEQ_NUMBER_MASK                  0x0000ffff
+
+	u32	fw_mb_param;
+
+	u32	drv_pulse_mb;
+#define DRV_PULSE_SEQ_MASK                      0x00007fff
+#define DRV_PULSE_SYSTEM_TIME_MASK              0xffff0000
+#define DRV_PULSE_ALWAYS_ALIVE                  0x00008000
+	u32 mcp_pulse_mb;
+#define MCP_PULSE_SEQ_MASK                      0x00007fff
+#define MCP_PULSE_ALWAYS_ALIVE                  0x00008000
+#define MCP_EVENT_MASK                          0xffff0000
+#define MCP_EVENT_OTHER_DRIVER_RESET_REQ        0x00010000
+
+	union drv_union_data union_data;
+};
+
+/* MFW - DRV MB */
+/**********************************************************************
+* Description
+*   Incremental Aggregative
+*   8-bit MFW counter per message
+*   8-bit ack-counter per message
+* Capabilities
+*   Provides up to 256 aggregative message per type
+*   Provides 4 message types in dword
+*   Message type pointers to byte offset
+*   Backward Compatibility by using sizeof for the counters.
+*   No lock requires for 32bit messages
+* Limitations:
+* In case of messages greater than 32bit, a dedicated mechanism(e.g lock)
+* is required to prevent data corruption.
+**********************************************************************/
+enum MFW_DRV_MSG_TYPE {
+	MFW_DRV_MSG_LINK_CHANGE,
+	MFW_DRV_MSG_FLR_FW_ACK_FAILED,
+	MFW_DRV_MSG_VF_DISABLED,
+	MFW_DRV_MSG_LLDP_DATA_UPDATED,
+	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
+	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
+	MFW_DRV_MSG_ERROR_RECOVERY,
+	MFW_DRV_MSG_MAX
+};
+
+#define MFW_DRV_MSG_MAX_DWORDS(msgs)    (((msgs - 1) >> 2) + 1)
+#define MFW_DRV_MSG_DWORD(msg_id)       (msg_id >> 2)
+#define MFW_DRV_MSG_OFFSET(msg_id)      ((msg_id & 0x3) << 3)
+#define MFW_DRV_MSG_MASK(msg_id)        (0xff << MFW_DRV_MSG_OFFSET(msg_id))
+
+struct public_mfw_mb {
+	u32	sup_msgs;
+	u32	msg[MFW_DRV_MSG_MAX_DWORDS(MFW_DRV_MSG_MAX)];
+	u32	ack[MFW_DRV_MSG_MAX_DWORDS(MFW_DRV_MSG_MAX)];
+};
+
+/**************************************/
+/*                                    */
+/*     P U B L I C       D A T A      */
+/*                                    */
+/**************************************/
+enum public_sections {
+	PUBLIC_DRV_MB,          /* Points to the first drv_mb of path0 */
+	PUBLIC_MFW_MB,          /* Points to the first mfw_mb of path0 */
+	PUBLIC_GLOBAL,
+	PUBLIC_PATH,
+	PUBLIC_PORT,
+	PUBLIC_FUNC,
+	PUBLIC_MAX_SECTIONS
+};
+
+struct drv_ver_info_stc {
+	u32	ver;
+	u8	name[32];
+};
+
+struct mcp_public_data {
+	/* The sections fields is an array */
+	u32			num_sections;
+	offsize_t		sections[PUBLIC_MAX_SECTIONS];
+	struct public_drv_mb	drv_mb[MCP_GLOB_FUNC_MAX];
+	struct public_mfw_mb	mfw_mb[MCP_GLOB_FUNC_MAX];
+	struct public_global	global;
+	struct public_path	path[MCP_GLOB_PATH_MAX];
+	struct public_port	port[MCP_GLOB_PORT_MAX];
+	struct public_func	func[MCP_GLOB_FUNC_MAX];
+	struct drv_ver_info_stc drv_info;
+};
+
+struct nvm_cfg_mac_address {
+	u32	mac_addr_hi;
+#define NVM_CFG_MAC_ADDRESS_HI_MASK                             0x0000FFFF
+#define NVM_CFG_MAC_ADDRESS_HI_OFFSET                           0
+
+	u32	mac_addr_lo;
+};
+
+/******************************************
+* nvm_cfg1 structs
+******************************************/
+
+struct nvm_cfg1_glob {
+	u32 generic_cont0;					/* 0x0 */
+#define NVM_CFG1_GLOB_BOARD_SWAP_MASK                           0x0000000F
+#define NVM_CFG1_GLOB_BOARD_SWAP_OFFSET                         0
+#define NVM_CFG1_GLOB_BOARD_SWAP_NONE                           0x0
+#define NVM_CFG1_GLOB_BOARD_SWAP_PATH                           0x1
+#define NVM_CFG1_GLOB_BOARD_SWAP_PORT                           0x2
+#define NVM_CFG1_GLOB_BOARD_SWAP_BOTH                           0x3
+#define NVM_CFG1_GLOB_MF_MODE_MASK                              0x00000FF0
+#define NVM_CFG1_GLOB_MF_MODE_OFFSET                            4
+#define NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED                        0x0
+#define NVM_CFG1_GLOB_MF_MODE_FORCED_SF                         0x1
+#define NVM_CFG1_GLOB_MF_MODE_SPIO4                             0x2
+#define NVM_CFG1_GLOB_MF_MODE_NPAR1_0                           0x3
+#define NVM_CFG1_GLOB_MF_MODE_NPAR1_5                           0x4
+#define NVM_CFG1_GLOB_MF_MODE_NPAR2_0                           0x5
+#define NVM_CFG1_GLOB_MF_MODE_BD                                0x6
+#define NVM_CFG1_GLOB_MF_MODE_UFP                               0x7
+#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_MASK              0x00001000
+#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_OFFSET            12
+#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_DISABLED          0x0
+#define NVM_CFG1_GLOB_FAN_FAILURE_ENFORCEMENT_ENABLED           0x1
+#define NVM_CFG1_GLOB_AVS_MARGIN_LOW_MASK                       0x001FE000
+#define NVM_CFG1_GLOB_AVS_MARGIN_LOW_OFFSET                     13
+#define NVM_CFG1_GLOB_AVS_MARGIN_HIGH_MASK                      0x1FE00000
+#define NVM_CFG1_GLOB_AVS_MARGIN_HIGH_OFFSET                    21
+#define NVM_CFG1_GLOB_ENABLE_SRIOV_MASK                         0x20000000
+#define NVM_CFG1_GLOB_ENABLE_SRIOV_OFFSET                       29
+#define NVM_CFG1_GLOB_ENABLE_SRIOV_DISABLED                     0x0
+#define NVM_CFG1_GLOB_ENABLE_SRIOV_ENABLED                      0x1
+#define NVM_CFG1_GLOB_ENABLE_ATC_MASK                           0x40000000
+#define NVM_CFG1_GLOB_ENABLE_ATC_OFFSET                         30
+#define NVM_CFG1_GLOB_ENABLE_ATC_DISABLED                       0x0
+#define NVM_CFG1_GLOB_ENABLE_ATC_ENABLED                        0x1
+#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_MASK                       0x80000000
+#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_OFFSET                     31
+#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_DISABLED                   0x0
+#define NVM_CFG1_GLOB_CLOCK_SLOWDOWN_ENABLED                    0x1
+
+	u32	engineering_change[3];				/* 0x4 */
+
+	u32	manufacturing_id;				/* 0x10 */
+
+	u32	serial_number[4];				/* 0x14 */
+
+	u32	pcie_cfg;					/* 0x24 */
+#define NVM_CFG1_GLOB_PCI_GEN_MASK                              0x00000003
+#define NVM_CFG1_GLOB_PCI_GEN_OFFSET                            0
+#define NVM_CFG1_GLOB_PCI_GEN_PCI_GEN1                          0x0
+#define NVM_CFG1_GLOB_PCI_GEN_PCI_GEN2                          0x1
+#define NVM_CFG1_GLOB_PCI_GEN_PCI_GEN3                          0x2
+#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_MASK                   0x00000004
+#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_OFFSET                 2
+#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_DISABLED               0x0
+#define NVM_CFG1_GLOB_BEACON_WOL_ENABLED_ENABLED                0x1
+#define NVM_CFG1_GLOB_ASPM_SUPPORT_MASK                         0x00000018
+#define NVM_CFG1_GLOB_ASPM_SUPPORT_OFFSET                       3
+#define NVM_CFG1_GLOB_ASPM_SUPPORT_L0S_L1_ENABLED               0x0
+#define NVM_CFG1_GLOB_ASPM_SUPPORT_L0S_DISABLED                 0x1
+#define NVM_CFG1_GLOB_ASPM_SUPPORT_L1_DISABLED                  0x2
+#define NVM_CFG1_GLOB_ASPM_SUPPORT_L0S_L1_DISABLED              0x3
+#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_MASK               0x00000020
+#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_OFFSET             5
+#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_DISABLED           0x0
+#define NVM_CFG1_GLOB_PREVENT_PCIE_L1_MENTRY_ENABLED            0x1
+#define NVM_CFG1_GLOB_PCIE_G2_TX_AMPLITUDE_MASK                 0x000003C0
+#define NVM_CFG1_GLOB_PCIE_G2_TX_AMPLITUDE_OFFSET               6
+#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_MASK                     0x00001C00
+#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_OFFSET                   10
+#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_HW                       0x0
+#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_0DB                      0x1
+#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_3_5DB                    0x2
+#define NVM_CFG1_GLOB_PCIE_PREEMPHASIS_6_0DB                    0x3
+#define NVM_CFG1_GLOB_WWN_NODE_PREFIX0_MASK                     0x001FE000
+#define NVM_CFG1_GLOB_WWN_NODE_PREFIX0_OFFSET                   13
+#define NVM_CFG1_GLOB_WWN_NODE_PREFIX1_MASK                     0x1FE00000
+#define NVM_CFG1_GLOB_WWN_NODE_PREFIX1_OFFSET                   21
+#define NVM_CFG1_GLOB_NCSI_PACKAGE_ID_MASK                      0x60000000
+#define NVM_CFG1_GLOB_NCSI_PACKAGE_ID_OFFSET                    29
+
+	u32 mgmt_traffic;                                       /* 0x28 */
+#define NVM_CFG1_GLOB_RESERVED60_MASK                           0x00000001
+#define NVM_CFG1_GLOB_RESERVED60_OFFSET                         0
+#define NVM_CFG1_GLOB_RESERVED60_100KHZ                         0x0
+#define NVM_CFG1_GLOB_RESERVED60_400KHZ                         0x1
+#define NVM_CFG1_GLOB_WWN_PORT_PREFIX0_MASK                     0x000001FE
+#define NVM_CFG1_GLOB_WWN_PORT_PREFIX0_OFFSET                   1
+#define NVM_CFG1_GLOB_WWN_PORT_PREFIX1_MASK                     0x0001FE00
+#define NVM_CFG1_GLOB_WWN_PORT_PREFIX1_OFFSET                   9
+#define NVM_CFG1_GLOB_SMBUS_ADDRESS_MASK                        0x01FE0000
+#define NVM_CFG1_GLOB_SMBUS_ADDRESS_OFFSET                      17
+#define NVM_CFG1_GLOB_SIDEBAND_MODE_MASK                        0x06000000
+#define NVM_CFG1_GLOB_SIDEBAND_MODE_OFFSET                      25
+#define NVM_CFG1_GLOB_SIDEBAND_MODE_DISABLED                    0x0
+#define NVM_CFG1_GLOB_SIDEBAND_MODE_RMII                        0x1
+#define NVM_CFG1_GLOB_SIDEBAND_MODE_SGMII                       0x2
+
+	u32 core_cfg;                                           /* 0x2C */
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_MASK                    0x000000FF
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_OFFSET                  0
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X40G                0x0
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X50G                0x1
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X100G               0x2
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_F              0x3
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_E              0x4
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X20G                0x5
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X40G                0xB
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X25G                0xC
+#define NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X25G                0xD
+#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_MASK             0x00000100
+#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_OFFSET           8
+#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_DISABLED         0x0
+#define NVM_CFG1_GLOB_EAGLE_ENFORCE_TX_FIR_CFG_ENABLED          0x1
+#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_MASK            0x00000200
+#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_OFFSET          9
+#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_DISABLED        0x0
+#define NVM_CFG1_GLOB_FALCON_ENFORCE_TX_FIR_CFG_ENABLED         0x1
+#define NVM_CFG1_GLOB_EAGLE_CORE_ADDR_MASK                      0x0003FC00
+#define NVM_CFG1_GLOB_EAGLE_CORE_ADDR_OFFSET                    10
+#define NVM_CFG1_GLOB_FALCON_CORE_ADDR_MASK                     0x03FC0000
+#define NVM_CFG1_GLOB_FALCON_CORE_ADDR_OFFSET                   18
+#define NVM_CFG1_GLOB_AVS_MODE_MASK                             0x1C000000
+#define NVM_CFG1_GLOB_AVS_MODE_OFFSET                           26
+#define NVM_CFG1_GLOB_AVS_MODE_CLOSE_LOOP                       0x0
+#define NVM_CFG1_GLOB_AVS_MODE_OPEN_LOOP                        0x1
+#define NVM_CFG1_GLOB_AVS_MODE_DISABLED                         0x3
+#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_MASK                 0x60000000
+#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_OFFSET               29
+#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_DISABLED             0x0
+#define NVM_CFG1_GLOB_OVERRIDE_SECURE_MODE_ENABLED              0x1
+
+	u32 e_lane_cfg1;					/* 0x30 */
+#define NVM_CFG1_GLOB_RX_LANE0_SWAP_MASK                        0x0000000F
+#define NVM_CFG1_GLOB_RX_LANE0_SWAP_OFFSET                      0
+#define NVM_CFG1_GLOB_RX_LANE1_SWAP_MASK                        0x000000F0
+#define NVM_CFG1_GLOB_RX_LANE1_SWAP_OFFSET                      4
+#define NVM_CFG1_GLOB_RX_LANE2_SWAP_MASK                        0x00000F00
+#define NVM_CFG1_GLOB_RX_LANE2_SWAP_OFFSET                      8
+#define NVM_CFG1_GLOB_RX_LANE3_SWAP_MASK                        0x0000F000
+#define NVM_CFG1_GLOB_RX_LANE3_SWAP_OFFSET                      12
+#define NVM_CFG1_GLOB_TX_LANE0_SWAP_MASK                        0x000F0000
+#define NVM_CFG1_GLOB_TX_LANE0_SWAP_OFFSET                      16
+#define NVM_CFG1_GLOB_TX_LANE1_SWAP_MASK                        0x00F00000
+#define NVM_CFG1_GLOB_TX_LANE1_SWAP_OFFSET                      20
+#define NVM_CFG1_GLOB_TX_LANE2_SWAP_MASK                        0x0F000000
+#define NVM_CFG1_GLOB_TX_LANE2_SWAP_OFFSET                      24
+#define NVM_CFG1_GLOB_TX_LANE3_SWAP_MASK                        0xF0000000
+#define NVM_CFG1_GLOB_TX_LANE3_SWAP_OFFSET                      28
+
+	u32 e_lane_cfg2;					/* 0x34 */
+#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_MASK                    0x00000001
+#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_OFFSET                  0
+#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_MASK                    0x00000002
+#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_OFFSET                  1
+#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_MASK                    0x00000004
+#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_OFFSET                  2
+#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_MASK                    0x00000008
+#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_OFFSET                  3
+#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_MASK                    0x00000010
+#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_OFFSET                  4
+#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_MASK                    0x00000020
+#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_OFFSET                  5
+#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_MASK                    0x00000040
+#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_OFFSET                  6
+#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_MASK                    0x00000080
+#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_OFFSET                  7
+#define NVM_CFG1_GLOB_SMBUS_MODE_MASK                           0x00000F00
+#define NVM_CFG1_GLOB_SMBUS_MODE_OFFSET                         8
+#define NVM_CFG1_GLOB_SMBUS_MODE_DISABLED                       0x0
+#define NVM_CFG1_GLOB_SMBUS_MODE_100KHZ                         0x1
+#define NVM_CFG1_GLOB_SMBUS_MODE_400KHZ                         0x2
+#define NVM_CFG1_GLOB_NCSI_MASK                                 0x0000F000
+#define NVM_CFG1_GLOB_NCSI_OFFSET                               12
+#define NVM_CFG1_GLOB_NCSI_DISABLED                             0x0
+#define NVM_CFG1_GLOB_NCSI_ENABLED                              0x1
+
+	u32 f_lane_cfg1;					/* 0x38 */
+#define NVM_CFG1_GLOB_RX_LANE0_SWAP_MASK                        0x0000000F
+#define NVM_CFG1_GLOB_RX_LANE0_SWAP_OFFSET                      0
+#define NVM_CFG1_GLOB_RX_LANE1_SWAP_MASK                        0x000000F0
+#define NVM_CFG1_GLOB_RX_LANE1_SWAP_OFFSET                      4
+#define NVM_CFG1_GLOB_RX_LANE2_SWAP_MASK                        0x00000F00
+#define NVM_CFG1_GLOB_RX_LANE2_SWAP_OFFSET                      8
+#define NVM_CFG1_GLOB_RX_LANE3_SWAP_MASK                        0x0000F000
+#define NVM_CFG1_GLOB_RX_LANE3_SWAP_OFFSET                      12
+#define NVM_CFG1_GLOB_TX_LANE0_SWAP_MASK                        0x000F0000
+#define NVM_CFG1_GLOB_TX_LANE0_SWAP_OFFSET                      16
+#define NVM_CFG1_GLOB_TX_LANE1_SWAP_MASK                        0x00F00000
+#define NVM_CFG1_GLOB_TX_LANE1_SWAP_OFFSET                      20
+#define NVM_CFG1_GLOB_TX_LANE2_SWAP_MASK                        0x0F000000
+#define NVM_CFG1_GLOB_TX_LANE2_SWAP_OFFSET                      24
+#define NVM_CFG1_GLOB_TX_LANE3_SWAP_MASK                        0xF0000000
+#define NVM_CFG1_GLOB_TX_LANE3_SWAP_OFFSET                      28
+
+	u32 f_lane_cfg2;					/* 0x3C */
+#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_MASK                    0x00000001
+#define NVM_CFG1_GLOB_RX_LANE0_POL_FLIP_OFFSET                  0
+#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_MASK                    0x00000002
+#define NVM_CFG1_GLOB_RX_LANE1_POL_FLIP_OFFSET                  1
+#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_MASK                    0x00000004
+#define NVM_CFG1_GLOB_RX_LANE2_POL_FLIP_OFFSET                  2
+#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_MASK                    0x00000008
+#define NVM_CFG1_GLOB_RX_LANE3_POL_FLIP_OFFSET                  3
+#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_MASK                    0x00000010
+#define NVM_CFG1_GLOB_TX_LANE0_POL_FLIP_OFFSET                  4
+#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_MASK                    0x00000020
+#define NVM_CFG1_GLOB_TX_LANE1_POL_FLIP_OFFSET                  5
+#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_MASK                    0x00000040
+#define NVM_CFG1_GLOB_TX_LANE2_POL_FLIP_OFFSET                  6
+#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_MASK                    0x00000080
+#define NVM_CFG1_GLOB_TX_LANE3_POL_FLIP_OFFSET                  7
+
+	u32 eagle_preemphasis;					/* 0x40 */
+#define NVM_CFG1_GLOB_LANE0_PREEMP_MASK                         0x000000FF
+#define NVM_CFG1_GLOB_LANE0_PREEMP_OFFSET                       0
+#define NVM_CFG1_GLOB_LANE1_PREEMP_MASK                         0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_PREEMP_OFFSET                       8
+#define NVM_CFG1_GLOB_LANE2_PREEMP_MASK                         0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_PREEMP_OFFSET                       16
+#define NVM_CFG1_GLOB_LANE3_PREEMP_MASK                         0xFF000000
+#define NVM_CFG1_GLOB_LANE3_PREEMP_OFFSET                       24
+
+	u32 eagle_driver_current;				/* 0x44 */
+#define NVM_CFG1_GLOB_LANE0_AMP_MASK                            0x000000FF
+#define NVM_CFG1_GLOB_LANE0_AMP_OFFSET                          0
+#define NVM_CFG1_GLOB_LANE1_AMP_MASK                            0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_AMP_OFFSET                          8
+#define NVM_CFG1_GLOB_LANE2_AMP_MASK                            0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_AMP_OFFSET                          16
+#define NVM_CFG1_GLOB_LANE3_AMP_MASK                            0xFF000000
+#define NVM_CFG1_GLOB_LANE3_AMP_OFFSET                          24
+
+	u32 falcon_preemphasis;					/* 0x48 */
+#define NVM_CFG1_GLOB_LANE0_PREEMP_MASK                         0x000000FF
+#define NVM_CFG1_GLOB_LANE0_PREEMP_OFFSET                       0
+#define NVM_CFG1_GLOB_LANE1_PREEMP_MASK                         0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_PREEMP_OFFSET                       8
+#define NVM_CFG1_GLOB_LANE2_PREEMP_MASK                         0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_PREEMP_OFFSET                       16
+#define NVM_CFG1_GLOB_LANE3_PREEMP_MASK                         0xFF000000
+#define NVM_CFG1_GLOB_LANE3_PREEMP_OFFSET                       24
+
+	u32 falcon_driver_current;				/* 0x4C */
+#define NVM_CFG1_GLOB_LANE0_AMP_MASK                            0x000000FF
+#define NVM_CFG1_GLOB_LANE0_AMP_OFFSET                          0
+#define NVM_CFG1_GLOB_LANE1_AMP_MASK                            0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_AMP_OFFSET                          8
+#define NVM_CFG1_GLOB_LANE2_AMP_MASK                            0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_AMP_OFFSET                          16
+#define NVM_CFG1_GLOB_LANE3_AMP_MASK                            0xFF000000
+#define NVM_CFG1_GLOB_LANE3_AMP_OFFSET                          24
+
+	u32	pci_id;						/* 0x50 */
+#define NVM_CFG1_GLOB_VENDOR_ID_MASK                            0x0000FFFF
+#define NVM_CFG1_GLOB_VENDOR_ID_OFFSET                          0
+
+	u32	pci_subsys_id;					/* 0x54 */
+#define NVM_CFG1_GLOB_SUBSYSTEM_VENDOR_ID_MASK                  0x0000FFFF
+#define NVM_CFG1_GLOB_SUBSYSTEM_VENDOR_ID_OFFSET                0
+#define NVM_CFG1_GLOB_SUBSYSTEM_DEVICE_ID_MASK                  0xFFFF0000
+#define NVM_CFG1_GLOB_SUBSYSTEM_DEVICE_ID_OFFSET                16
+
+	u32	bar;						/* 0x58 */
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_MASK                   0x0000000F
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_OFFSET                 0
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_DISABLED               0x0
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_2K                     0x1
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_4K                     0x2
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_8K                     0x3
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_16K                    0x4
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_32K                    0x5
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_64K                    0x6
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_128K                   0x7
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_256K                   0x8
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_512K                   0x9
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_1M                     0xA
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_2M                     0xB
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_4M                     0xC
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_8M                     0xD
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_16M                    0xE
+#define NVM_CFG1_GLOB_EXPANSION_ROM_SIZE_32M                    0xF
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_MASK                     0x000000F0
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_OFFSET                   4
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_DISABLED                 0x0
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_4K                       0x1
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_8K                       0x2
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_16K                      0x3
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_32K                      0x4
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_64K                      0x5
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_128K                     0x6
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_256K                     0x7
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_512K                     0x8
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_1M                       0x9
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_2M                       0xA
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_4M                       0xB
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_8M                       0xC
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_16M                      0xD
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_32M                      0xE
+#define NVM_CFG1_GLOB_VF_PCI_BAR2_SIZE_64M                      0xF
+#define NVM_CFG1_GLOB_BAR2_SIZE_MASK                            0x00000F00
+#define NVM_CFG1_GLOB_BAR2_SIZE_OFFSET                          8
+#define NVM_CFG1_GLOB_BAR2_SIZE_DISABLED                        0x0
+#define NVM_CFG1_GLOB_BAR2_SIZE_64K                             0x1
+#define NVM_CFG1_GLOB_BAR2_SIZE_128K                            0x2
+#define NVM_CFG1_GLOB_BAR2_SIZE_256K                            0x3
+#define NVM_CFG1_GLOB_BAR2_SIZE_512K                            0x4
+#define NVM_CFG1_GLOB_BAR2_SIZE_1M                              0x5
+#define NVM_CFG1_GLOB_BAR2_SIZE_2M                              0x6
+#define NVM_CFG1_GLOB_BAR2_SIZE_4M                              0x7
+#define NVM_CFG1_GLOB_BAR2_SIZE_8M                              0x8
+#define NVM_CFG1_GLOB_BAR2_SIZE_16M                             0x9
+#define NVM_CFG1_GLOB_BAR2_SIZE_32M                             0xA
+#define NVM_CFG1_GLOB_BAR2_SIZE_64M                             0xB
+#define NVM_CFG1_GLOB_BAR2_SIZE_128M                            0xC
+#define NVM_CFG1_GLOB_BAR2_SIZE_256M                            0xD
+#define NVM_CFG1_GLOB_BAR2_SIZE_512M                            0xE
+#define NVM_CFG1_GLOB_BAR2_SIZE_1G                              0xF
+
+	u32 eagle_txfir_main;					/* 0x5C */
+#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_MASK                     0x000000FF
+#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_OFFSET                   0
+#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_MASK                     0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_OFFSET                   8
+#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_MASK                     0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_OFFSET                   16
+#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_MASK                     0xFF000000
+#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_OFFSET                   24
+
+	u32 eagle_txfir_post;					/* 0x60 */
+#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_MASK                     0x000000FF
+#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_OFFSET                   0
+#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_MASK                     0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_OFFSET                   8
+#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_MASK                     0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_OFFSET                   16
+#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_MASK                     0xFF000000
+#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_OFFSET                   24
+
+	u32 falcon_txfir_main;					/* 0x64 */
+#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_MASK                     0x000000FF
+#define NVM_CFG1_GLOB_LANE0_TXFIR_MAIN_OFFSET                   0
+#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_MASK                     0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_TXFIR_MAIN_OFFSET                   8
+#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_MASK                     0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_TXFIR_MAIN_OFFSET                   16
+#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_MASK                     0xFF000000
+#define NVM_CFG1_GLOB_LANE3_TXFIR_MAIN_OFFSET                   24
+
+	u32 falcon_txfir_post;					/* 0x68 */
+#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_MASK                     0x000000FF
+#define NVM_CFG1_GLOB_LANE0_TXFIR_POST_OFFSET                   0
+#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_MASK                     0x0000FF00
+#define NVM_CFG1_GLOB_LANE1_TXFIR_POST_OFFSET                   8
+#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_MASK                     0x00FF0000
+#define NVM_CFG1_GLOB_LANE2_TXFIR_POST_OFFSET                   16
+#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_MASK                     0xFF000000
+#define NVM_CFG1_GLOB_LANE3_TXFIR_POST_OFFSET                   24
+
+	u32 manufacture_ver;					/* 0x6C */
+#define NVM_CFG1_GLOB_MANUF0_VER_MASK                           0x0000003F
+#define NVM_CFG1_GLOB_MANUF0_VER_OFFSET                         0
+#define NVM_CFG1_GLOB_MANUF1_VER_MASK                           0x00000FC0
+#define NVM_CFG1_GLOB_MANUF1_VER_OFFSET                         6
+#define NVM_CFG1_GLOB_MANUF2_VER_MASK                           0x0003F000
+#define NVM_CFG1_GLOB_MANUF2_VER_OFFSET                         12
+#define NVM_CFG1_GLOB_MANUF3_VER_MASK                           0x00FC0000
+#define NVM_CFG1_GLOB_MANUF3_VER_OFFSET                         18
+#define NVM_CFG1_GLOB_MANUF4_VER_MASK                           0x3F000000
+#define NVM_CFG1_GLOB_MANUF4_VER_OFFSET                         24
+
+	u32 manufacture_time;					/* 0x70 */
+#define NVM_CFG1_GLOB_MANUF0_TIME_MASK                          0x0000003F
+#define NVM_CFG1_GLOB_MANUF0_TIME_OFFSET                        0
+#define NVM_CFG1_GLOB_MANUF1_TIME_MASK                          0x00000FC0
+#define NVM_CFG1_GLOB_MANUF1_TIME_OFFSET                        6
+#define NVM_CFG1_GLOB_MANUF2_TIME_MASK                          0x0003F000
+#define NVM_CFG1_GLOB_MANUF2_TIME_OFFSET                        12
+
+	u32 led_global_settings;				/* 0x74 */
+#define NVM_CFG1_GLOB_LED_SWAP_0_MASK                           0x0000000F
+#define NVM_CFG1_GLOB_LED_SWAP_0_OFFSET                         0
+#define NVM_CFG1_GLOB_LED_SWAP_1_MASK                           0x000000F0
+#define NVM_CFG1_GLOB_LED_SWAP_1_OFFSET                         4
+#define NVM_CFG1_GLOB_LED_SWAP_2_MASK                           0x00000F00
+#define NVM_CFG1_GLOB_LED_SWAP_2_OFFSET                         8
+#define NVM_CFG1_GLOB_LED_SWAP_3_MASK                           0x0000F000
+#define NVM_CFG1_GLOB_LED_SWAP_3_OFFSET                         12
+
+	u32	generic_cont1;					/* 0x78 */
+#define NVM_CFG1_GLOB_AVS_DAC_CODE_MASK                         0x000003FF
+#define NVM_CFG1_GLOB_AVS_DAC_CODE_OFFSET                       0
+
+	u32	mbi_version;					/* 0x7C */
+#define NVM_CFG1_GLOB_MBI_VERSION_0_MASK                        0x000000FF
+#define NVM_CFG1_GLOB_MBI_VERSION_0_OFFSET                      0
+#define NVM_CFG1_GLOB_MBI_VERSION_1_MASK                        0x0000FF00
+#define NVM_CFG1_GLOB_MBI_VERSION_1_OFFSET                      8
+#define NVM_CFG1_GLOB_MBI_VERSION_2_MASK                        0x00FF0000
+#define NVM_CFG1_GLOB_MBI_VERSION_2_OFFSET                      16
+
+	u32	mbi_date;					/* 0x80 */
+
+	u32	misc_sig;					/* 0x84 */
+
+	/*  Define the GPIO mapping to switch i2c mux */
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_0_MASK                   0x000000FF
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_0_OFFSET                 0
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_1_MASK                   0x0000FF00
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO_1_OFFSET                 8
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__NA                      0x0
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO0                   0x1
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO1                   0x2
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO2                   0x3
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO3                   0x4
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO4                   0x5
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO5                   0x6
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO6                   0x7
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO7                   0x8
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO8                   0x9
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO9                   0xA
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO10                  0xB
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO11                  0xC
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO12                  0xD
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO13                  0xE
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO14                  0xF
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO15                  0x10
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO16                  0x11
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO17                  0x12
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO18                  0x13
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO19                  0x14
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO20                  0x15
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO21                  0x16
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO22                  0x17
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO23                  0x18
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO24                  0x19
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO25                  0x1A
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO26                  0x1B
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO27                  0x1C
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO28                  0x1D
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO29                  0x1E
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO30                  0x1F
+#define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO31                  0x20
+
+	u32 reserved[46];					/* 0x88 */
+};
+
+struct nvm_cfg1_path {
+	u32 reserved[30];					/* 0x0 */
+};
+
+struct nvm_cfg1_port {
+	u32 power_dissipated;					/* 0x0 */
+#define NVM_CFG1_PORT_POWER_DIS_D0_MASK                         0x000000FF
+#define NVM_CFG1_PORT_POWER_DIS_D0_OFFSET                       0
+#define NVM_CFG1_PORT_POWER_DIS_D1_MASK                         0x0000FF00
+#define NVM_CFG1_PORT_POWER_DIS_D1_OFFSET                       8
+#define NVM_CFG1_PORT_POWER_DIS_D2_MASK                         0x00FF0000
+#define NVM_CFG1_PORT_POWER_DIS_D2_OFFSET                       16
+#define NVM_CFG1_PORT_POWER_DIS_D3_MASK                         0xFF000000
+#define NVM_CFG1_PORT_POWER_DIS_D3_OFFSET                       24
+
+	u32 power_consumed;					/* 0x4 */
+#define NVM_CFG1_PORT_POWER_CONS_D0_MASK                        0x000000FF
+#define NVM_CFG1_PORT_POWER_CONS_D0_OFFSET                      0
+#define NVM_CFG1_PORT_POWER_CONS_D1_MASK                        0x0000FF00
+#define NVM_CFG1_PORT_POWER_CONS_D1_OFFSET                      8
+#define NVM_CFG1_PORT_POWER_CONS_D2_MASK                        0x00FF0000
+#define NVM_CFG1_PORT_POWER_CONS_D2_OFFSET                      16
+#define NVM_CFG1_PORT_POWER_CONS_D3_MASK                        0xFF000000
+#define NVM_CFG1_PORT_POWER_CONS_D3_OFFSET                      24
+
+	u32 generic_cont0;					/* 0x8 */
+#define NVM_CFG1_PORT_LED_MODE_MASK                             0x000000FF
+#define NVM_CFG1_PORT_LED_MODE_OFFSET                           0
+#define NVM_CFG1_PORT_LED_MODE_MAC1                             0x0
+#define NVM_CFG1_PORT_LED_MODE_PHY1                             0x1
+#define NVM_CFG1_PORT_LED_MODE_PHY2                             0x2
+#define NVM_CFG1_PORT_LED_MODE_PHY3                             0x3
+#define NVM_CFG1_PORT_LED_MODE_MAC2                             0x4
+#define NVM_CFG1_PORT_LED_MODE_PHY4                             0x5
+#define NVM_CFG1_PORT_LED_MODE_PHY5                             0x6
+#define NVM_CFG1_PORT_LED_MODE_PHY6                             0x7
+#define NVM_CFG1_PORT_LED_MODE_MAC3                             0x8
+#define NVM_CFG1_PORT_LED_MODE_PHY7                             0x9
+#define NVM_CFG1_PORT_LED_MODE_PHY8                             0xA
+#define NVM_CFG1_PORT_LED_MODE_PHY9                             0xB
+#define NVM_CFG1_PORT_LED_MODE_MAC4                             0xC
+#define NVM_CFG1_PORT_LED_MODE_PHY10                            0xD
+#define NVM_CFG1_PORT_LED_MODE_PHY11                            0xE
+#define NVM_CFG1_PORT_LED_MODE_PHY12                            0xF
+#define NVM_CFG1_PORT_ROCE_PRIORITY_MASK                        0x0000FF00
+#define NVM_CFG1_PORT_ROCE_PRIORITY_OFFSET                      8
+#define NVM_CFG1_PORT_DCBX_MODE_MASK                            0x000F0000
+#define NVM_CFG1_PORT_DCBX_MODE_OFFSET                          16
+#define NVM_CFG1_PORT_DCBX_MODE_DISABLED                        0x0
+#define NVM_CFG1_PORT_DCBX_MODE_IEEE                            0x1
+#define NVM_CFG1_PORT_DCBX_MODE_CEE                             0x2
+#define NVM_CFG1_PORT_DCBX_MODE_DYNAMIC                         0x3
+
+	u32	pcie_cfg;					/* 0xC */
+#define NVM_CFG1_PORT_RESERVED15_MASK                           0x00000007
+#define NVM_CFG1_PORT_RESERVED15_OFFSET                         0
+
+	u32	features;					/* 0x10 */
+#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_MASK           0x00000001
+#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_OFFSET         0
+#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_DISABLED       0x0
+#define NVM_CFG1_PORT_ENABLE_WOL_ON_ACPI_PATTERN_ENABLED        0x1
+#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_MASK                     0x00000002
+#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_OFFSET                   1
+#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_DISABLED                 0x0
+#define NVM_CFG1_PORT_MAGIC_PACKET_WOL_ENABLED                  0x1
+
+	u32 speed_cap_mask;					/* 0x14 */
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK            0x0000FFFF
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_OFFSET          0
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G              0x1
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G             0x2
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G             0x8
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G             0x10
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G             0x20
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G            0x40
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_MASK            0xFFFF0000
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_OFFSET          16
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_1G              0x1
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_10G             0x2
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_25G             0x8
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_40G             0x10
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_50G             0x20
+#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_100G            0x40
+
+	u32 link_settings;					/* 0x18 */
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_MASK                       0x0000000F
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_OFFSET                     0
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG                    0x0
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_1G                         0x1
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_10G                        0x2
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_25G                        0x4
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_40G                        0x5
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_50G                        0x6
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_100G                       0x7
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK                     0x00000070
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET                   4
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_AUTONEG                  0x1
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_RX                       0x2
+#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX                       0x4
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_MASK                       0x00000780
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_OFFSET                     7
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_AUTONEG                    0x0
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_1G                         0x1
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_10G                        0x2
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_25G                        0x4
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_40G                        0x5
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_50G                        0x6
+#define NVM_CFG1_PORT_MFW_LINK_SPEED_100G                       0x7
+#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_MASK                     0x00003800
+#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_OFFSET                   11
+#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_AUTONEG                  0x1
+#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_RX                       0x2
+#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_TX                       0x4
+#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_MASK      0x00004000
+#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_OFFSET    14
+#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_DISABLED  0x0
+#define NVM_CFG1_PORT_OPTIC_MODULE_VENDOR_ENFORCEMENT_ENABLED   0x1
+
+	u32 phy_cfg;						/* 0x1C */
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_MASK                  0x0000FFFF
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_OFFSET                0
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_HIGIG                 0x1
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_SCRAMBLER             0x2
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_FIBER                 0x4
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_DISABLE_CL72_AN       0x8
+#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_DISABLE_FEC_AN        0x10
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_MASK                 0x00FF0000
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_OFFSET               16
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_BYPASS               0x0
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_KR                   0x2
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_KR2                  0x3
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_KR4                  0x4
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XFI                  0x8
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SFI                  0x9
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_1000X                0xB
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SGMII                0xC
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLAUI                0xD
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CAUI                 0xE
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLPPI                0xF
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CPPI                 0x10
+#define NVM_CFG1_PORT_AN_MODE_MASK                              0xFF000000
+#define NVM_CFG1_PORT_AN_MODE_OFFSET                            24
+#define NVM_CFG1_PORT_AN_MODE_NONE                              0x0
+#define NVM_CFG1_PORT_AN_MODE_CL73                              0x1
+#define NVM_CFG1_PORT_AN_MODE_CL37                              0x2
+#define NVM_CFG1_PORT_AN_MODE_CL73_BAM                          0x3
+#define NVM_CFG1_PORT_AN_MODE_CL37_BAM                          0x4
+#define NVM_CFG1_PORT_AN_MODE_HPAM                              0x5
+#define NVM_CFG1_PORT_AN_MODE_SGMII                             0x6
+
+	u32 mgmt_traffic;					/* 0x20 */
+#define NVM_CFG1_PORT_RESERVED61_MASK                           0x0000000F
+#define NVM_CFG1_PORT_RESERVED61_OFFSET                         0
+#define NVM_CFG1_PORT_RESERVED61_DISABLED                       0x0
+#define NVM_CFG1_PORT_RESERVED61_NCSI_OVER_RMII                 0x1
+#define NVM_CFG1_PORT_RESERVED61_NCSI_OVER_SMBUS                0x2
+
+	u32 ext_phy;						/* 0x24 */
+#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_MASK                    0x000000FF
+#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_OFFSET                  0
+#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_NONE                    0x0
+#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_BCM84844                0x1
+#define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_MASK                 0x0000FF00
+#define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_OFFSET               8
+
+	u32 mba_cfg1;						/* 0x28 */
+#define NVM_CFG1_PORT_MBA_MASK                                  0x00000001
+#define NVM_CFG1_PORT_MBA_OFFSET                                0
+#define NVM_CFG1_PORT_MBA_DISABLED                              0x0
+#define NVM_CFG1_PORT_MBA_ENABLED                               0x1
+#define NVM_CFG1_PORT_MBA_BOOT_TYPE_MASK                        0x00000006
+#define NVM_CFG1_PORT_MBA_BOOT_TYPE_OFFSET                      1
+#define NVM_CFG1_PORT_MBA_BOOT_TYPE_AUTO                        0x0
+#define NVM_CFG1_PORT_MBA_BOOT_TYPE_BBS                         0x1
+#define NVM_CFG1_PORT_MBA_BOOT_TYPE_INT18H                      0x2
+#define NVM_CFG1_PORT_MBA_BOOT_TYPE_INT19H                      0x3
+#define NVM_CFG1_PORT_MBA_DELAY_TIME_MASK                       0x00000078
+#define NVM_CFG1_PORT_MBA_DELAY_TIME_OFFSET                     3
+#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_MASK                    0x00000080
+#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_OFFSET                  7
+#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_CTRL_S                  0x0
+#define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_CTRL_B                  0x1
+#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_MASK                0x00000100
+#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_OFFSET              8
+#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_DISABLED            0x0
+#define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_ENABLED             0x1
+#define NVM_CFG1_PORT_RESERVED5_MASK                            0x0001FE00
+#define NVM_CFG1_PORT_RESERVED5_OFFSET                          9
+#define NVM_CFG1_PORT_RESERVED5_DISABLED                        0x0
+#define NVM_CFG1_PORT_RESERVED5_2K                              0x1
+#define NVM_CFG1_PORT_RESERVED5_4K                              0x2
+#define NVM_CFG1_PORT_RESERVED5_8K                              0x3
+#define NVM_CFG1_PORT_RESERVED5_16K                             0x4
+#define NVM_CFG1_PORT_RESERVED5_32K                             0x5
+#define NVM_CFG1_PORT_RESERVED5_64K                             0x6
+#define NVM_CFG1_PORT_RESERVED5_128K                            0x7
+#define NVM_CFG1_PORT_RESERVED5_256K                            0x8
+#define NVM_CFG1_PORT_RESERVED5_512K                            0x9
+#define NVM_CFG1_PORT_RESERVED5_1M                              0xA
+#define NVM_CFG1_PORT_RESERVED5_2M                              0xB
+#define NVM_CFG1_PORT_RESERVED5_4M                              0xC
+#define NVM_CFG1_PORT_RESERVED5_8M                              0xD
+#define NVM_CFG1_PORT_RESERVED5_16M                             0xE
+#define NVM_CFG1_PORT_RESERVED5_32M                             0xF
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_MASK                       0x001E0000
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_OFFSET                     17
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_AUTONEG                    0x0
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_1G                         0x1
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_10G                        0x2
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_25G                        0x4
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_40G                        0x5
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_50G                        0x6
+#define NVM_CFG1_PORT_MBA_LINK_SPEED_100G                       0x7
+#define NVM_CFG1_PORT_MBA_BOOT_RETRY_COUNT_MASK                 0x00E00000
+#define NVM_CFG1_PORT_MBA_BOOT_RETRY_COUNT_OFFSET               21
+
+	u32	mba_cfg2;					/* 0x2C */
+#define NVM_CFG1_PORT_MBA_VLAN_VALUE_MASK                       0x0000FFFF
+#define NVM_CFG1_PORT_MBA_VLAN_VALUE_OFFSET                     0
+#define NVM_CFG1_PORT_MBA_VLAN_MASK                             0x00010000
+#define NVM_CFG1_PORT_MBA_VLAN_OFFSET                           16
+
+	u32	vf_cfg;						/* 0x30 */
+#define NVM_CFG1_PORT_RESERVED8_MASK                            0x0000FFFF
+#define NVM_CFG1_PORT_RESERVED8_OFFSET                          0
+#define NVM_CFG1_PORT_RESERVED6_MASK                            0x000F0000
+#define NVM_CFG1_PORT_RESERVED6_OFFSET                          16
+#define NVM_CFG1_PORT_RESERVED6_DISABLED                        0x0
+#define NVM_CFG1_PORT_RESERVED6_4K                              0x1
+#define NVM_CFG1_PORT_RESERVED6_8K                              0x2
+#define NVM_CFG1_PORT_RESERVED6_16K                             0x3
+#define NVM_CFG1_PORT_RESERVED6_32K                             0x4
+#define NVM_CFG1_PORT_RESERVED6_64K                             0x5
+#define NVM_CFG1_PORT_RESERVED6_128K                            0x6
+#define NVM_CFG1_PORT_RESERVED6_256K                            0x7
+#define NVM_CFG1_PORT_RESERVED6_512K                            0x8
+#define NVM_CFG1_PORT_RESERVED6_1M                              0x9
+#define NVM_CFG1_PORT_RESERVED6_2M                              0xA
+#define NVM_CFG1_PORT_RESERVED6_4M                              0xB
+#define NVM_CFG1_PORT_RESERVED6_8M                              0xC
+#define NVM_CFG1_PORT_RESERVED6_16M                             0xD
+#define NVM_CFG1_PORT_RESERVED6_32M                             0xE
+#define NVM_CFG1_PORT_RESERVED6_64M                             0xF
+
+	struct nvm_cfg_mac_address	lldp_mac_address;	/* 0x34 */
+
+	u32				led_port_settings;	/* 0x3C */
+#define NVM_CFG1_PORT_LANE_LED_SPD_0_SEL_MASK                   0x000000FF
+#define NVM_CFG1_PORT_LANE_LED_SPD_0_SEL_OFFSET                 0
+#define NVM_CFG1_PORT_LANE_LED_SPD_1_SEL_MASK                   0x0000FF00
+#define NVM_CFG1_PORT_LANE_LED_SPD_1_SEL_OFFSET                 8
+#define NVM_CFG1_PORT_LANE_LED_SPD_2_SEL_MASK                   0x00FF0000
+#define NVM_CFG1_PORT_LANE_LED_SPD_2_SEL_OFFSET                 16
+#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_1G                      0x1
+#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_10G                     0x2
+#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_25G                     0x8
+#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_40G                     0x10
+#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_50G                     0x20
+#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_100G                    0x40
+
+	u32 transceiver_00;					/* 0x40 */
+
+	/*  Define for mapping of transceiver signal module absent */
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_MASK                     0x000000FF
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_OFFSET                   0
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_NA                       0x0
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO0                    0x1
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO1                    0x2
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO2                    0x3
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO3                    0x4
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO4                    0x5
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO5                    0x6
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO6                    0x7
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO7                    0x8
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO8                    0x9
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO9                    0xA
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO10                   0xB
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO11                   0xC
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO12                   0xD
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO13                   0xE
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO14                   0xF
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO15                   0x10
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO16                   0x11
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO17                   0x12
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO18                   0x13
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO19                   0x14
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO20                   0x15
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO21                   0x16
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO22                   0x17
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO23                   0x18
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO24                   0x19
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO25                   0x1A
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO26                   0x1B
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO27                   0x1C
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO28                   0x1D
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO29                   0x1E
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO30                   0x1F
+#define NVM_CFG1_PORT_TRANS_MODULE_ABS_GPIO31                   0x20
+	/*  Define the GPIO mux settings  to switch i2c mux to this port */
+#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_0_MASK                  0x00000F00
+#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_0_OFFSET                8
+#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_1_MASK                  0x0000F000
+#define NVM_CFG1_PORT_I2C_MUX_SEL_VALUE_1_OFFSET                12
+
+	u32 reserved[133];					/* 0x44 */
+};
+
+struct nvm_cfg1_func {
+	struct nvm_cfg_mac_address	mac_address;		/* 0x0 */
+
+	u32				rsrv1;			/* 0x8 */
+#define NVM_CFG1_FUNC_RESERVED1_MASK                            0x0000FFFF
+#define NVM_CFG1_FUNC_RESERVED1_OFFSET                          0
+#define NVM_CFG1_FUNC_RESERVED2_MASK                            0xFFFF0000
+#define NVM_CFG1_FUNC_RESERVED2_OFFSET                          16
+
+	u32				rsrv2;			/* 0xC */
+#define NVM_CFG1_FUNC_RESERVED3_MASK                            0x0000FFFF
+#define NVM_CFG1_FUNC_RESERVED3_OFFSET                          0
+#define NVM_CFG1_FUNC_RESERVED4_MASK                            0xFFFF0000
+#define NVM_CFG1_FUNC_RESERVED4_OFFSET                          16
+
+	u32				device_id;		/* 0x10 */
+#define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK                  0x0000FFFF
+#define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET                0
+#define NVM_CFG1_FUNC_VENDOR_DEVICE_ID_MASK                     0xFFFF0000
+#define NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET                   16
+
+	u32				cmn_cfg;		/* 0x14 */
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_MASK                    0x00000007
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_OFFSET                  0
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_PXE                     0x0
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_RPL                     0x1
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_BOOTP                   0x2
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_ISCSI_BOOT              0x3
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_FCOE_BOOT               0x4
+#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_NONE                    0x7
+#define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_MASK                     0x0007FFF8
+#define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_OFFSET                   3
+#define NVM_CFG1_FUNC_PERSONALITY_MASK                          0x00780000
+#define NVM_CFG1_FUNC_PERSONALITY_OFFSET                        19
+#define NVM_CFG1_FUNC_PERSONALITY_ETHERNET                      0x0
+#define NVM_CFG1_FUNC_PERSONALITY_ISCSI                         0x1
+#define NVM_CFG1_FUNC_PERSONALITY_FCOE                          0x2
+#define NVM_CFG1_FUNC_PERSONALITY_ROCE                          0x3
+#define NVM_CFG1_FUNC_BANDWIDTH_WEIGHT_MASK                     0x7F800000
+#define NVM_CFG1_FUNC_BANDWIDTH_WEIGHT_OFFSET                   23
+#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_MASK                   0x80000000
+#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_OFFSET                 31
+#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_DISABLED               0x0
+#define NVM_CFG1_FUNC_PAUSE_ON_HOST_RING_ENABLED                0x1
+
+	u32 pci_cfg;						/* 0x18 */
+#define NVM_CFG1_FUNC_NUMBER_OF_VFS_PER_PF_MASK                 0x0000007F
+#define NVM_CFG1_FUNC_NUMBER_OF_VFS_PER_PF_OFFSET               0
+#define NVM_CFG1_FUNC_RESERVESD12_MASK                          0x00003F80
+#define NVM_CFG1_FUNC_RESERVESD12_OFFSET                        7
+#define NVM_CFG1_FUNC_BAR1_SIZE_MASK                            0x0003C000
+#define NVM_CFG1_FUNC_BAR1_SIZE_OFFSET                          14
+#define NVM_CFG1_FUNC_BAR1_SIZE_DISABLED                        0x0
+#define NVM_CFG1_FUNC_BAR1_SIZE_64K                             0x1
+#define NVM_CFG1_FUNC_BAR1_SIZE_128K                            0x2
+#define NVM_CFG1_FUNC_BAR1_SIZE_256K                            0x3
+#define NVM_CFG1_FUNC_BAR1_SIZE_512K                            0x4
+#define NVM_CFG1_FUNC_BAR1_SIZE_1M                              0x5
+#define NVM_CFG1_FUNC_BAR1_SIZE_2M                              0x6
+#define NVM_CFG1_FUNC_BAR1_SIZE_4M                              0x7
+#define NVM_CFG1_FUNC_BAR1_SIZE_8M                              0x8
+#define NVM_CFG1_FUNC_BAR1_SIZE_16M                             0x9
+#define NVM_CFG1_FUNC_BAR1_SIZE_32M                             0xA
+#define NVM_CFG1_FUNC_BAR1_SIZE_64M                             0xB
+#define NVM_CFG1_FUNC_BAR1_SIZE_128M                            0xC
+#define NVM_CFG1_FUNC_BAR1_SIZE_256M                            0xD
+#define NVM_CFG1_FUNC_BAR1_SIZE_512M                            0xE
+#define NVM_CFG1_FUNC_BAR1_SIZE_1G                              0xF
+#define NVM_CFG1_FUNC_MAX_BANDWIDTH_MASK                        0x03FC0000
+#define NVM_CFG1_FUNC_MAX_BANDWIDTH_OFFSET                      18
+
+	struct nvm_cfg_mac_address	fcoe_node_wwn_mac_addr;	/* 0x1C */
+
+	struct nvm_cfg_mac_address	fcoe_port_wwn_mac_addr;	/* 0x24 */
+
+	u32				reserved[9];		/* 0x2C */
+};
+
+struct nvm_cfg1 {
+	struct nvm_cfg1_glob	glob;				/* 0x0 */
+
+	struct nvm_cfg1_path	path[MCP_GLOB_PATH_MAX];	/* 0x140 */
+
+	struct nvm_cfg1_port	port[MCP_GLOB_PORT_MAX];	/* 0x230 */
+
+	struct nvm_cfg1_func	func[MCP_GLOB_FUNC_MAX];	/* 0xB90 */
+};
+
+/******************************************
+* nvm_cfg structs
+******************************************/
+
+enum nvm_cfg_sections {
+	NVM_CFG_SECTION_NVM_CFG1,
+	NVM_CFG_SECTION_MAX
+};
+
+struct nvm_cfg {
+	u32		num_sections;
+	u32		sections_offset[NVM_CFG_SECTION_MAX];
+	struct nvm_cfg1 cfg1;
+};
+
+#define PORT_0          0
+#define PORT_1          1
+#define PORT_2          2
+#define PORT_3          3
+
+extern struct spad_layout g_spad;
+
+#define MCP_SPAD_SIZE                       0x00028000  /* 160 KB */
+
+#define SPAD_OFFSET(addr) (((u32)addr - (u32)CPU_SPAD_BASE))
+
+#define TO_OFFSIZE(_offset, _size)				\
+	(u32)((((u32)(_offset) >> 2) << OFFSIZE_OFFSET_SHIFT) |	\
+	      (((u32)(_size) >> 2) << OFFSIZE_SIZE_SHIFT))
+
+enum spad_sections {
+	SPAD_SECTION_TRACE,
+	SPAD_SECTION_NVM_CFG,
+	SPAD_SECTION_PUBLIC,
+	SPAD_SECTION_PRIVATE,
+	SPAD_SECTION_MAX
+};
+
+struct spad_layout {
+	struct nvm_cfg		nvm_cfg;
+	struct mcp_public_data	public_data;
+};
+
+#define CRC_MAGIC_VALUE                     0xDEBB20E3
+#define CRC32_POLYNOMIAL                    0xEDB88320
+#define NVM_CRC_SIZE                            (sizeof(u32))
+
+enum nvm_sw_arbitrator {
+	NVM_SW_ARB_HOST,
+	NVM_SW_ARB_MCP,
+	NVM_SW_ARB_UART,
+	NVM_SW_ARB_RESERVED
+};
+
+/****************************************************************************
+* Boot Strap Region                                                        *
+****************************************************************************/
+struct legacy_bootstrap_region {
+	u32	magic_value;
+#define NVM_MAGIC_VALUE          0x669955aa
+	u32	sram_start_addr;
+	u32	code_len;               /* boot code length (in dwords) */
+	u32	code_start_addr;
+	u32	crc;                    /* 32-bit CRC */
+};
+
+/****************************************************************************
+* Directories Region                                                       *
+****************************************************************************/
+struct nvm_code_entry {
+	u32	image_type;             /* Image type */
+	u32	nvm_start_addr;         /* NVM address of the image */
+	u32	len;                    /* Include CRC */
+	u32	sram_start_addr;
+	u32	sram_run_addr;          /* Relevant in case of MIM only */
+};
+
+enum nvm_image_type {
+	NVM_TYPE_TIM1		= 0x01,
+	NVM_TYPE_TIM2		= 0x02,
+	NVM_TYPE_MIM1		= 0x03,
+	NVM_TYPE_MIM2		= 0x04,
+	NVM_TYPE_MBA		= 0x05,
+	NVM_TYPE_MODULES_PN	= 0x06,
+	NVM_TYPE_VPD		= 0x07,
+	NVM_TYPE_MFW_TRACE1	= 0x08,
+	NVM_TYPE_MFW_TRACE2	= 0x09,
+	NVM_TYPE_NVM_CFG1	= 0x0a,
+	NVM_TYPE_L2B		= 0x0b,
+	NVM_TYPE_DIR1		= 0x0c,
+	NVM_TYPE_EAGLE_FW1	= 0x0d,
+	NVM_TYPE_FALCON_FW1	= 0x0e,
+	NVM_TYPE_PCIE_FW1	= 0x0f,
+	NVM_TYPE_HW_SET		= 0x10,
+	NVM_TYPE_LIM		= 0x11,
+	NVM_TYPE_AVS_FW1	= 0x12,
+	NVM_TYPE_DIR2		= 0x13,
+	NVM_TYPE_CCM		= 0x14,
+	NVM_TYPE_EAGLE_FW2	= 0x15,
+	NVM_TYPE_FALCON_FW2	= 0x16,
+	NVM_TYPE_PCIE_FW2	= 0x17,
+	NVM_TYPE_AVS_FW2	= 0x18,
+
+	NVM_TYPE_MAX,
+};
+
+#define MAX_NVM_DIR_ENTRIES 200
+
+struct nvm_dir {
+	s32 seq;
+#define NVM_DIR_NEXT_MFW_MASK   0x00000001
+#define NVM_DIR_SEQ_MASK        0xfffffffe
+#define NVM_DIR_NEXT_MFW(seq) ((seq) & NVM_DIR_NEXT_MFW_MASK)
+
+#define IS_DIR_SEQ_VALID(seq) ((seq & NVM_DIR_SEQ_MASK) != NVM_DIR_SEQ_MASK)
+
+	u32			num_images;
+	u32			rsrv;
+	struct nvm_code_entry	code[1]; /* Up to MAX_NVM_DIR_ENTRIES */
+};
+
+#define NVM_DIR_SIZE(_num_images) (sizeof(struct nvm_dir) +		 \
+				   (_num_images -			 \
+				    1) * sizeof(struct nvm_code_entry) + \
+				   NVM_CRC_SIZE)
+
+struct nvm_vpd_image {
+	u32	format_revision;
+#define VPD_IMAGE_VERSION        1
+
+	/* This array length depends on the number of VPD fields */
+	u8	vpd_data[1];
+};
+
+/****************************************************************************
+* NVRAM FULL MAP                                                           *
+****************************************************************************/
+#define DIR_ID_1    (0)
+#define DIR_ID_2    (1)
+#define MAX_DIR_IDS (2)
+
+#define MFW_BUNDLE_1    (0)
+#define MFW_BUNDLE_2    (1)
+#define MAX_MFW_BUNDLES (2)
+
+#define FLASH_PAGE_SIZE 0x1000
+#define NVM_DIR_MAX_SIZE    (FLASH_PAGE_SIZE)           /* 4Kb */
+#define ASIC_MIM_MAX_SIZE   (300 * FLASH_PAGE_SIZE)     /* 1.2Mb */
+#define FPGA_MIM_MAX_SIZE   (25 * FLASH_PAGE_SIZE)      /* 60Kb */
+
+#define LIM_MAX_SIZE        ((2 *				      \
+			      FLASH_PAGE_SIZE) -		      \
+			     sizeof(struct legacy_bootstrap_region) - \
+			     NVM_RSV_SIZE)
+#define LIM_OFFSET          (NVM_OFFSET(lim_image))
+#define NVM_RSV_SIZE            (44)
+#define MIM_MAX_SIZE(is_asic) ((is_asic) ? ASIC_MIM_MAX_SIZE : \
+			       FPGA_MIM_MAX_SIZE)
+#define MIM_OFFSET(idx, is_asic) (NVM_OFFSET(dir[MAX_MFW_BUNDLES]) + \
+				  ((idx ==			     \
+				    NVM_TYPE_MIM2) ? MIM_MAX_SIZE(is_asic) : 0))
+#define NVM_FIXED_AREA_SIZE(is_asic) (sizeof(struct nvm_image) + \
+				      MIM_MAX_SIZE(is_asic) * 2)
+
+union nvm_dir_union {
+	struct nvm_dir	dir;
+	u8		page[FLASH_PAGE_SIZE];
+};
+
+/*                        Address
+ *  +-------------------+ 0x000000
+ *  |    Bootstrap:     |
+ *  | magic_number      |
+ *  | sram_start_addr   |
+ *  | code_len          |
+ *  | code_start_addr   |
+ *  | crc               |
+ *  +-------------------+ 0x000014
+ *  | rsrv              |
+ *  +-------------------+ 0x000040
+ *  | LIM               |
+ *  +-------------------+ 0x002000
+ *  | Dir1              |
+ *  +-------------------+ 0x003000
+ *  | Dir2              |
+ *  +-------------------+ 0x004000
+ *  | MIM1              |
+ *  +-------------------+ 0x130000
+ *  | MIM2              |
+ *  +-------------------+ 0x25C000
+ *  | Rest Images:      |
+ *  | TIM1/2            |
+ *  | MFW_TRACE1/2      |
+ *  | Eagle/Falcon FW   |
+ *  | PCIE/AVS FW       |
+ *  | MBA/CCM/L2B       |
+ *  | VPD               |
+ *  | optic_modules     |
+ *  |  ...              |
+ *  +-------------------+ 0x400000
+ */
+struct nvm_image {
+/*********** !!!  FIXED SECTIONS  !!! DO NOT MODIFY !!! **********************/
+	/* NVM Offset  (size) */
+	struct legacy_bootstrap_region	bootstrap;
+	u8				rsrv[NVM_RSV_SIZE];
+	u8				lim_image[LIM_MAX_SIZE];
+	union nvm_dir_union		dir[MAX_MFW_BUNDLES];
+
+	/* MIM1_IMAGE                              0x004000 (0x12c000) */
+	/* MIM2_IMAGE                              0x130000 (0x12c000) */
+/*********** !!!  FIXED SECTIONS  !!! DO NOT MODIFY !!! **********************/
+};                              /* 0x134 */
+
+#define NVM_OFFSET(f)	((u32_t)((int_ptr_t)(&(((struct nvm_image *)0)->f))))
+
+struct hw_set_info {
+	u32	reg_type;
+#define GRC_REG_TYPE 1
+#define PHY_REG_TYPE 2
+#define PCI_REG_TYPE 4
+
+	u32	bank_num;
+	u32	pf_num;
+	u32	operation;
+#define READ_OP     1
+#define WRITE_OP    2
+#define RMW_SET_OP  3
+#define RMW_CLR_OP  4
+
+	u32	reg_addr;
+	u32	reg_data;
+
+	u32	reset_type;
+#define POR_RESET_TYPE	BIT(0)
+#define HARD_RESET_TYPE	BIT(1)
+#define CORE_RESET_TYPE	BIT(2)
+#define MCP_RESET_TYPE	BIT(3)
+#define PERSET_ASSERT	BIT(4)
+#define PERSET_DEASSERT	BIT(5)
+};
+
+struct hw_set_image {
+	u32			format_version;
+#define HW_SET_IMAGE_VERSION        1
+	u32			no_hw_sets;
+
+	/* This array length depends on the no_hw_sets */
+	struct hw_set_info	hw_sets[1];
+};
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
new file mode 100644
index 000000000000..ffa99273b353
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -0,0 +1,776 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/qed/qed_chain.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_reg_addr.h"
+
+#define QED_BAR_ACQUIRE_TIMEOUT 1000
+
+/* Invalid values */
+#define QED_BAR_INVALID_OFFSET          (cpu_to_le32(-1))
+
+struct qed_ptt {
+	struct list_head	list_entry;
+	unsigned int		idx;
+	struct pxp_ptt_entry	pxp;
+};
+
+struct qed_ptt_pool {
+	struct list_head	free_list;
+	spinlock_t		lock; /* ptt synchronized access */
+	struct qed_ptt		ptts[PXP_EXTERNAL_BAR_PF_WINDOW_NUM];
+};
+
+int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool),
+					      GFP_ATOMIC);
+	int i;
+
+	if (!p_pool)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&p_pool->free_list);
+	for (i = 0; i < PXP_EXTERNAL_BAR_PF_WINDOW_NUM; i++) {
+		p_pool->ptts[i].idx = i;
+		p_pool->ptts[i].pxp.offset = QED_BAR_INVALID_OFFSET;
+		p_pool->ptts[i].pxp.pretend.control = 0;
+		if (i >= RESERVED_PTT_MAX)
+			list_add(&p_pool->ptts[i].list_entry,
+				 &p_pool->free_list);
+	}
+
+	p_hwfn->p_ptt_pool = p_pool;
+	spin_lock_init(&p_pool->lock);
+
+	return 0;
+}
+
+void qed_ptt_invalidate(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ptt *p_ptt;
+	int i;
+
+	for (i = 0; i < PXP_EXTERNAL_BAR_PF_WINDOW_NUM; i++) {
+		p_ptt = &p_hwfn->p_ptt_pool->ptts[i];
+		p_ptt->pxp.offset = QED_BAR_INVALID_OFFSET;
+	}
+}
+
+void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->p_ptt_pool);
+	p_hwfn->p_ptt_pool = NULL;
+}
+
+struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ptt *p_ptt;
+	unsigned int i;
+
+	/* Take the free PTT from the list */
+	for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
+		spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
+
+		if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
+			p_ptt = list_first_entry(&p_hwfn->p_ptt_pool->free_list,
+						 struct qed_ptt, list_entry);
+			list_del(&p_ptt->list_entry);
+
+			spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
+
+			DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+				   "allocated ptt %d\n", p_ptt->idx);
+			return p_ptt;
+		}
+
+		spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
+		usleep_range(1000, 2000);
+	}
+
+	DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
+	return NULL;
+}
+
+void qed_ptt_release(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt)
+{
+	spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
+	list_add(&p_ptt->list_entry, &p_hwfn->p_ptt_pool->free_list);
+	spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
+}
+
+u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt)
+{
+	/* The HW is using DWORDS and we need to translate it to Bytes */
+	return le32_to_cpu(p_ptt->pxp.offset) << 2;
+}
+
+static u32 qed_ptt_config_addr(struct qed_ptt *p_ptt)
+{
+	return PXP_PF_WINDOW_ADMIN_PER_PF_START +
+	       p_ptt->idx * sizeof(struct pxp_ptt_entry);
+}
+
+u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt)
+{
+	return PXP_EXTERNAL_BAR_PF_WINDOW_START +
+	       p_ptt->idx * PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE;
+}
+
+void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     u32 new_hw_addr)
+{
+	u32 prev_hw_addr;
+
+	prev_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt);
+
+	if (new_hw_addr == prev_hw_addr)
+		return;
+
+	/* Update PTT entery in admin window */
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "Updating PTT entry %d to offset 0x%x\n",
+		   p_ptt->idx, new_hw_addr);
+
+	/* The HW is using DWORDS and the address is in Bytes */
+	p_ptt->pxp.offset = cpu_to_le32(new_hw_addr >> 2);
+
+	REG_WR(p_hwfn,
+	       qed_ptt_config_addr(p_ptt) +
+	       offsetof(struct pxp_ptt_entry, offset),
+	       le32_to_cpu(p_ptt->pxp.offset));
+}
+
+static u32 qed_set_ptt(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u32 hw_addr)
+{
+	u32 win_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt);
+	u32 offset;
+
+	offset = hw_addr - win_hw_addr;
+
+	/* Verify the address is within the window */
+	if (hw_addr < win_hw_addr ||
+	    offset >= PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE) {
+		qed_ptt_set_win(p_hwfn, p_ptt, hw_addr);
+		offset = 0;
+	}
+
+	return qed_ptt_get_bar_addr(p_ptt) + offset;
+}
+
+struct qed_ptt *qed_get_reserved_ptt(struct qed_hwfn *p_hwfn,
+				     enum reserved_ptts ptt_idx)
+{
+	if (ptt_idx >= RESERVED_PTT_MAX) {
+		DP_NOTICE(p_hwfn,
+			  "Requested PTT %d is out of range\n", ptt_idx);
+		return NULL;
+	}
+
+	return &p_hwfn->p_ptt_pool->ptts[ptt_idx];
+}
+
+void qed_wr(struct qed_hwfn *p_hwfn,
+	    struct qed_ptt *p_ptt,
+	    u32 hw_addr, u32 val)
+{
+	u32 bar_addr = qed_set_ptt(p_hwfn, p_ptt, hw_addr);
+
+	REG_WR(p_hwfn, bar_addr, val);
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "bar_addr 0x%x, hw_addr 0x%x, val 0x%x\n",
+		   bar_addr, hw_addr, val);
+}
+
+u32 qed_rd(struct qed_hwfn *p_hwfn,
+	   struct qed_ptt *p_ptt,
+	   u32 hw_addr)
+{
+	u32 bar_addr = qed_set_ptt(p_hwfn, p_ptt, hw_addr);
+	u32 val = REG_RD(p_hwfn, bar_addr);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "bar_addr 0x%x, hw_addr 0x%x, val 0x%x\n",
+		   bar_addr, hw_addr, val);
+
+	return val;
+}
+
+static void qed_memcpy_hw(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  void *addr,
+			  u32 hw_addr,
+			  size_t n,
+			  bool to_device)
+{
+	u32 dw_count, *host_addr, hw_offset;
+	size_t quota, done = 0;
+	u32 __iomem *reg_addr;
+
+	while (done < n) {
+		quota = min_t(size_t, n - done,
+			      PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE);
+
+		qed_ptt_set_win(p_hwfn, p_ptt, hw_addr + done);
+		hw_offset = qed_ptt_get_bar_addr(p_ptt);
+
+		dw_count = quota / 4;
+		host_addr = (u32 *)((u8 *)addr + done);
+		reg_addr = (u32 __iomem *)REG_ADDR(p_hwfn, hw_offset);
+		if (to_device)
+			while (dw_count--)
+				DIRECT_REG_WR(reg_addr++, *host_addr++);
+		else
+			while (dw_count--)
+				*host_addr++ = DIRECT_REG_RD(reg_addr++);
+
+		done += quota;
+	}
+}
+
+void qed_memcpy_from(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     void *dest, u32 hw_addr, size_t n)
+{
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "hw_addr 0x%x, dest %p hw_addr 0x%x, size %lu\n",
+		   hw_addr, dest, hw_addr, (unsigned long)n);
+
+	qed_memcpy_hw(p_hwfn, p_ptt, dest, hw_addr, n, false);
+}
+
+void qed_memcpy_to(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt,
+		   u32 hw_addr, void *src, size_t n)
+{
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "hw_addr 0x%x, hw_addr 0x%x, src %p size %lu\n",
+		   hw_addr, hw_addr, src, (unsigned long)n);
+
+	qed_memcpy_hw(p_hwfn, p_ptt, src, hw_addr, n, true);
+}
+
+void qed_fid_pretend(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     u16 fid)
+{
+	u16 control = 0;
+
+	SET_FIELD(control, PXP_PRETEND_CMD_IS_CONCRETE, 1);
+	SET_FIELD(control, PXP_PRETEND_CMD_PRETEND_FUNCTION, 1);
+
+	/* Every pretend undos previous pretends, including
+	 * previous port pretend.
+	 */
+	SET_FIELD(control, PXP_PRETEND_CMD_PORT, 0);
+	SET_FIELD(control, PXP_PRETEND_CMD_USE_PORT, 0);
+	SET_FIELD(control, PXP_PRETEND_CMD_PRETEND_PORT, 1);
+
+	if (!GET_FIELD(fid, PXP_CONCRETE_FID_VFVALID))
+		fid = GET_FIELD(fid, PXP_CONCRETE_FID_PFID);
+
+	p_ptt->pxp.pretend.control = cpu_to_le16(control);
+	p_ptt->pxp.pretend.fid.concrete_fid.fid = cpu_to_le16(fid);
+
+	REG_WR(p_hwfn,
+	       qed_ptt_config_addr(p_ptt) +
+	       offsetof(struct pxp_ptt_entry, pretend),
+	       *(u32 *)&p_ptt->pxp.pretend);
+}
+
+void qed_port_pretend(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u8 port_id)
+{
+	u16 control = 0;
+
+	SET_FIELD(control, PXP_PRETEND_CMD_PORT, port_id);
+	SET_FIELD(control, PXP_PRETEND_CMD_USE_PORT, 1);
+	SET_FIELD(control, PXP_PRETEND_CMD_PRETEND_PORT, 1);
+
+	p_ptt->pxp.pretend.control = cpu_to_le16(control);
+
+	REG_WR(p_hwfn,
+	       qed_ptt_config_addr(p_ptt) +
+	       offsetof(struct pxp_ptt_entry, pretend),
+	       *(u32 *)&p_ptt->pxp.pretend);
+}
+
+void qed_port_unpretend(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt)
+{
+	u16 control = 0;
+
+	SET_FIELD(control, PXP_PRETEND_CMD_PORT, 0);
+	SET_FIELD(control, PXP_PRETEND_CMD_USE_PORT, 0);
+	SET_FIELD(control, PXP_PRETEND_CMD_PRETEND_PORT, 1);
+
+	p_ptt->pxp.pretend.control = cpu_to_le16(control);
+
+	REG_WR(p_hwfn,
+	       qed_ptt_config_addr(p_ptt) +
+	       offsetof(struct pxp_ptt_entry, pretend),
+	       *(u32 *)&p_ptt->pxp.pretend);
+}
+
+/* DMAE */
+static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
+			    const u8 is_src_type_grc,
+			    const u8 is_dst_type_grc,
+			    struct qed_dmae_params *p_params)
+{
+	u32 opcode = 0;
+	u16 opcodeB = 0;
+
+	/* Whether the source is the PCIe or the GRC.
+	 * 0- The source is the PCIe
+	 * 1- The source is the GRC.
+	 */
+	opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
+				   : DMAE_CMD_SRC_MASK_PCIE) <<
+		   DMAE_CMD_SRC_SHIFT;
+	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) <<
+		   DMAE_CMD_SRC_PF_ID_SHIFT);
+
+	/* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
+	opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
+				   : DMAE_CMD_DST_MASK_PCIE) <<
+		   DMAE_CMD_DST_SHIFT;
+	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) <<
+		   DMAE_CMD_DST_PF_ID_SHIFT);
+
+	/* Whether to write a completion word to the completion destination:
+	 * 0-Do not write a completion word
+	 * 1-Write the completion word
+	 */
+	opcode |= (DMAE_CMD_COMP_WORD_EN_MASK << DMAE_CMD_COMP_WORD_EN_SHIFT);
+	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
+		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
+
+	if (p_params->flags & QED_DMAE_FLAG_COMPLETION_DST)
+		opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT);
+
+	opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT);
+
+	opcode |= ((p_hwfn->port_id) << DMAE_CMD_PORT_ID_SHIFT);
+
+	/* reset source address in next go */
+	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
+		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
+
+	/* reset dest address in next go */
+	opcode |= (DMAE_CMD_DST_ADDR_RESET_MASK <<
+		   DMAE_CMD_DST_ADDR_RESET_SHIFT);
+
+	opcodeB |= (DMAE_CMD_SRC_VF_ID_MASK <<
+		    DMAE_CMD_SRC_VF_ID_SHIFT);
+
+	opcodeB |= (DMAE_CMD_DST_VF_ID_MASK <<
+		    DMAE_CMD_DST_VF_ID_SHIFT);
+
+	p_hwfn->dmae_info.p_dmae_cmd->opcode = cpu_to_le32(opcode);
+	p_hwfn->dmae_info.p_dmae_cmd->opcode_b = cpu_to_le16(opcodeB);
+}
+
+u32 qed_dmae_idx_to_go_cmd(u8 idx)
+{
+	/* All the DMAE 'go' registers form an array in internal memory */
+	return DMAE_REG_GO_C0 + (idx << 2);
+}
+
+static int
+qed_dmae_post_command(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt)
+{
+	struct dmae_cmd *command = p_hwfn->dmae_info.p_dmae_cmd;
+	u8 idx_cmd = p_hwfn->dmae_info.channel, i;
+	int qed_status = 0;
+
+	/* verify address is not NULL */
+	if ((((command->dst_addr_lo == 0) && (command->dst_addr_hi == 0)) ||
+	     ((command->src_addr_lo == 0) && (command->src_addr_hi == 0)))) {
+		DP_NOTICE(p_hwfn,
+			  "source or destination address 0 idx_cmd=%d\n"
+			  "opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
+			   idx_cmd,
+			   le32_to_cpu(command->opcode),
+			   le16_to_cpu(command->opcode_b),
+			   le16_to_cpu(command->length),
+			   le32_to_cpu(command->src_addr_hi),
+			   le32_to_cpu(command->src_addr_lo),
+			   le32_to_cpu(command->dst_addr_hi),
+			   le32_to_cpu(command->dst_addr_lo));
+
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn,
+		   NETIF_MSG_HW,
+		   "Posting DMAE command [idx %d]: opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
+		   idx_cmd,
+		   le32_to_cpu(command->opcode),
+		   le16_to_cpu(command->opcode_b),
+		   le16_to_cpu(command->length),
+		   le32_to_cpu(command->src_addr_hi),
+		   le32_to_cpu(command->src_addr_lo),
+		   le32_to_cpu(command->dst_addr_hi),
+		   le32_to_cpu(command->dst_addr_lo));
+
+	/* Copy the command to DMAE - need to do it before every call
+	 * for source/dest address no reset.
+	 * The first 9 DWs are the command registers, the 10 DW is the
+	 * GO register, and the rest are result registers
+	 * (which are read only by the client).
+	 */
+	for (i = 0; i < DMAE_CMD_SIZE; i++) {
+		u32 data = (i < DMAE_CMD_SIZE_TO_FILL) ?
+			   *(((u32 *)command) + i) : 0;
+
+		qed_wr(p_hwfn, p_ptt,
+		       DMAE_REG_CMD_MEM +
+		       (idx_cmd * DMAE_CMD_SIZE * sizeof(u32)) +
+		       (i * sizeof(u32)), data);
+	}
+
+	qed_wr(p_hwfn, p_ptt,
+	       qed_dmae_idx_to_go_cmd(idx_cmd),
+	       DMAE_GO_VALUE);
+
+	return qed_status;
+}
+
+int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn)
+{
+	dma_addr_t *p_addr = &p_hwfn->dmae_info.completion_word_phys_addr;
+	struct dmae_cmd **p_cmd = &p_hwfn->dmae_info.p_dmae_cmd;
+	u32 **p_buff = &p_hwfn->dmae_info.p_intermediate_buffer;
+	u32 **p_comp = &p_hwfn->dmae_info.p_completion_word;
+
+	*p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				     sizeof(u32),
+				     p_addr,
+				     GFP_KERNEL);
+	if (!*p_comp) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n");
+		goto err;
+	}
+
+	p_addr = &p_hwfn->dmae_info.dmae_cmd_phys_addr;
+	*p_cmd = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				    sizeof(struct dmae_cmd),
+				    p_addr, GFP_KERNEL);
+	if (!*p_cmd) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct dmae_cmd'\n");
+		goto err;
+	}
+
+	p_addr = &p_hwfn->dmae_info.intermediate_buffer_phys_addr;
+	*p_buff = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				     sizeof(u32) * DMAE_MAX_RW_SIZE,
+				     p_addr, GFP_KERNEL);
+	if (!*p_buff) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `intermediate_buffer'\n");
+		goto err;
+	}
+
+	p_hwfn->dmae_info.channel = p_hwfn->rel_pf_id;
+
+	return 0;
+err:
+	qed_dmae_info_free(p_hwfn);
+	return -ENOMEM;
+}
+
+void qed_dmae_info_free(struct qed_hwfn *p_hwfn)
+{
+	dma_addr_t p_phys;
+
+	/* Just make sure no one is in the middle */
+	mutex_lock(&p_hwfn->dmae_info.mutex);
+
+	if (p_hwfn->dmae_info.p_completion_word) {
+		p_phys = p_hwfn->dmae_info.completion_word_phys_addr;
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  sizeof(u32),
+				  p_hwfn->dmae_info.p_completion_word,
+				  p_phys);
+		p_hwfn->dmae_info.p_completion_word = NULL;
+	}
+
+	if (p_hwfn->dmae_info.p_dmae_cmd) {
+		p_phys = p_hwfn->dmae_info.dmae_cmd_phys_addr;
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  sizeof(struct dmae_cmd),
+				  p_hwfn->dmae_info.p_dmae_cmd,
+				  p_phys);
+		p_hwfn->dmae_info.p_dmae_cmd = NULL;
+	}
+
+	if (p_hwfn->dmae_info.p_intermediate_buffer) {
+		p_phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr;
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  sizeof(u32) * DMAE_MAX_RW_SIZE,
+				  p_hwfn->dmae_info.p_intermediate_buffer,
+				  p_phys);
+		p_hwfn->dmae_info.p_intermediate_buffer = NULL;
+	}
+
+	mutex_unlock(&p_hwfn->dmae_info.mutex);
+}
+
+static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn)
+{
+	u32 wait_cnt = 0;
+	u32 wait_cnt_limit = 10000;
+
+	int qed_status = 0;
+
+	barrier();
+	while (*p_hwfn->dmae_info.p_completion_word != DMAE_COMPLETION_VAL) {
+		udelay(DMAE_MIN_WAIT_TIME);
+		if (++wait_cnt > wait_cnt_limit) {
+			DP_NOTICE(p_hwfn->cdev,
+				  "Timed-out waiting for operation to complete. Completion word is 0x%08x expected 0x%08x.\n",
+				  *p_hwfn->dmae_info.p_completion_word,
+				 DMAE_COMPLETION_VAL);
+			qed_status = -EBUSY;
+			break;
+		}
+
+		/* to sync the completion_word since we are not
+		 * using the volatile keyword for p_completion_word
+		 */
+		barrier();
+	}
+
+	if (qed_status == 0)
+		*p_hwfn->dmae_info.p_completion_word = 0;
+
+	return qed_status;
+}
+
+static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn,
+					  struct qed_ptt *p_ptt,
+					  u64 src_addr,
+					  u64 dst_addr,
+					  u8 src_type,
+					  u8 dst_type,
+					  u32 length)
+{
+	dma_addr_t phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr;
+	struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd;
+	int qed_status = 0;
+
+	switch (src_type) {
+	case QED_DMAE_ADDRESS_GRC:
+	case QED_DMAE_ADDRESS_HOST_PHYS:
+		cmd->src_addr_hi = cpu_to_le32(upper_32_bits(src_addr));
+		cmd->src_addr_lo = cpu_to_le32(lower_32_bits(src_addr));
+		break;
+	/* for virtual source addresses we use the intermediate buffer. */
+	case QED_DMAE_ADDRESS_HOST_VIRT:
+		cmd->src_addr_hi = cpu_to_le32(upper_32_bits(phys));
+		cmd->src_addr_lo = cpu_to_le32(lower_32_bits(phys));
+		memcpy(&p_hwfn->dmae_info.p_intermediate_buffer[0],
+		       (void *)(uintptr_t)src_addr,
+		       length * sizeof(u32));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dst_type) {
+	case QED_DMAE_ADDRESS_GRC:
+	case QED_DMAE_ADDRESS_HOST_PHYS:
+		cmd->dst_addr_hi = cpu_to_le32(upper_32_bits(dst_addr));
+		cmd->dst_addr_lo = cpu_to_le32(lower_32_bits(dst_addr));
+		break;
+	/* for virtual source addresses we use the intermediate buffer. */
+	case QED_DMAE_ADDRESS_HOST_VIRT:
+		cmd->dst_addr_hi = cpu_to_le32(upper_32_bits(phys));
+		cmd->dst_addr_lo = cpu_to_le32(lower_32_bits(phys));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	cmd->length = cpu_to_le16((u16)length);
+
+	qed_dmae_post_command(p_hwfn, p_ptt);
+
+	qed_status = qed_dmae_operation_wait(p_hwfn);
+
+	if (qed_status) {
+		DP_NOTICE(p_hwfn,
+			  "qed_dmae_host2grc: Wait Failed. source_addr 0x%llx, grc_addr 0x%llx, size_in_dwords 0x%x\n",
+			  src_addr,
+			  dst_addr,
+			  length);
+		return qed_status;
+	}
+
+	if (dst_type == QED_DMAE_ADDRESS_HOST_VIRT)
+		memcpy((void *)(uintptr_t)(dst_addr),
+		       &p_hwfn->dmae_info.p_intermediate_buffer[0],
+		       length * sizeof(u32));
+
+	return 0;
+}
+
+static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    u64 src_addr, u64 dst_addr,
+				    u8 src_type, u8 dst_type,
+				    u32 size_in_dwords,
+				    struct qed_dmae_params *p_params)
+{
+	dma_addr_t phys = p_hwfn->dmae_info.completion_word_phys_addr;
+	u16 length_cur = 0, i = 0, cnt_split = 0, length_mod = 0;
+	struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd;
+	u64 src_addr_split = 0, dst_addr_split = 0;
+	u16 length_limit = DMAE_MAX_RW_SIZE;
+	int qed_status = 0;
+	u32 offset = 0;
+
+	qed_dmae_opcode(p_hwfn,
+			(src_type == QED_DMAE_ADDRESS_GRC),
+			(dst_type == QED_DMAE_ADDRESS_GRC),
+			p_params);
+
+	cmd->comp_addr_lo = cpu_to_le32(lower_32_bits(phys));
+	cmd->comp_addr_hi = cpu_to_le32(upper_32_bits(phys));
+	cmd->comp_val = cpu_to_le32(DMAE_COMPLETION_VAL);
+
+	/* Check if the grc_addr is valid like < MAX_GRC_OFFSET */
+	cnt_split = size_in_dwords / length_limit;
+	length_mod = size_in_dwords % length_limit;
+
+	src_addr_split = src_addr;
+	dst_addr_split = dst_addr;
+
+	for (i = 0; i <= cnt_split; i++) {
+		offset = length_limit * i;
+
+		if (!(p_params->flags & QED_DMAE_FLAG_RW_REPL_SRC)) {
+			if (src_type == QED_DMAE_ADDRESS_GRC)
+				src_addr_split = src_addr + offset;
+			else
+				src_addr_split = src_addr + (offset * 4);
+		}
+
+		if (dst_type == QED_DMAE_ADDRESS_GRC)
+			dst_addr_split = dst_addr + offset;
+		else
+			dst_addr_split = dst_addr + (offset * 4);
+
+		length_cur = (cnt_split == i) ? length_mod : length_limit;
+
+		/* might be zero on last iteration */
+		if (!length_cur)
+			continue;
+
+		qed_status = qed_dmae_execute_sub_operation(p_hwfn,
+							    p_ptt,
+							    src_addr_split,
+							    dst_addr_split,
+							    src_type,
+							    dst_type,
+							    length_cur);
+		if (qed_status) {
+			DP_NOTICE(p_hwfn,
+				  "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
+				  qed_status,
+				  src_addr,
+				  dst_addr,
+				  length_cur);
+			break;
+		}
+	}
+
+	return qed_status;
+}
+
+int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u64 source_addr,
+		      u32 grc_addr,
+		      u32 size_in_dwords,
+		      u32 flags)
+{
+	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
+	struct qed_dmae_params params;
+	int rc;
+
+	memset(&params, 0, sizeof(struct qed_dmae_params));
+	params.flags = flags;
+
+	mutex_lock(&p_hwfn->dmae_info.mutex);
+
+	rc = qed_dmae_execute_command(p_hwfn, p_ptt, source_addr,
+				      grc_addr_in_dw,
+				      QED_DMAE_ADDRESS_HOST_VIRT,
+				      QED_DMAE_ADDRESS_GRC,
+				      size_in_dwords, &params);
+
+	mutex_unlock(&p_hwfn->dmae_info.mutex);
+
+	return rc;
+}
+
+u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
+		  enum protocol_type proto,
+		  union qed_qm_pq_params *p_params)
+{
+	u16 pq_id = 0;
+
+	if ((proto == PROTOCOLID_CORE || proto == PROTOCOLID_ETH) &&
+	    !p_params) {
+		DP_NOTICE(p_hwfn,
+			  "Protocol %d received NULL PQ params\n",
+			  proto);
+		return 0;
+	}
+
+	switch (proto) {
+	case PROTOCOLID_CORE:
+		if (p_params->core.tc == LB_TC)
+			pq_id = p_hwfn->qm_info.pure_lb_pq;
+		else
+			pq_id = p_hwfn->qm_info.offload_pq;
+		break;
+	case PROTOCOLID_ETH:
+		pq_id = p_params->eth.tc;
+		break;
+	default:
+		pq_id = 0;
+	}
+
+	pq_id = CM_TX_PQ_BASE + pq_id + RESC_START(p_hwfn, QED_PQ);
+
+	return pq_id;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
new file mode 100644
index 000000000000..e56d433793be
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -0,0 +1,263 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_HW_H
+#define _QED_HW_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_dev_api.h"
+
+/* Forward decleration */
+struct qed_ptt;
+
+enum reserved_ptts {
+	RESERVED_PTT_EDIAG,
+	RESERVED_PTT_USER_SPACE,
+	RESERVED_PTT_MAIN,
+	RESERVED_PTT_DPC,
+	RESERVED_PTT_MAX
+};
+
+enum _dmae_cmd_dst_mask {
+	DMAE_CMD_DST_MASK_NONE	= 0,
+	DMAE_CMD_DST_MASK_PCIE	= 1,
+	DMAE_CMD_DST_MASK_GRC	= 2
+};
+
+enum _dmae_cmd_src_mask {
+	DMAE_CMD_SRC_MASK_PCIE	= 0,
+	DMAE_CMD_SRC_MASK_GRC	= 1
+};
+
+enum _dmae_cmd_crc_mask {
+	DMAE_CMD_COMP_CRC_EN_MASK_NONE	= 0,
+	DMAE_CMD_COMP_CRC_EN_MASK_SET	= 1
+};
+
+/* definitions for DMA constants */
+#define DMAE_GO_VALUE   0x1
+
+#define DMAE_COMPLETION_VAL     0xD1AE
+#define DMAE_CMD_ENDIANITY      0x2
+
+#define DMAE_CMD_SIZE   14
+#define DMAE_CMD_SIZE_TO_FILL   (DMAE_CMD_SIZE - 5)
+#define DMAE_MIN_WAIT_TIME      0x2
+#define DMAE_MAX_CLIENTS        32
+
+/**
+ * @brief qed_gtt_init - Initialize GTT windows
+ *
+ * @param p_hwfn
+ */
+void qed_gtt_init(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_ptt_invalidate - Forces all ptt entries to be re-configured
+ *
+ * @param p_hwfn
+ */
+void qed_ptt_invalidate(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_ptt_pool_alloc - Allocate and initialize PTT pool
+ *
+ * @param p_hwfn
+ *
+ * @return struct _qed_status - success (0), negative - error.
+ */
+int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_ptt_pool_free -
+ *
+ * @param p_hwfn
+ */
+void qed_ptt_pool_free(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_ptt_get_hw_addr - Get PTT's GRC/HW address
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return u32
+ */
+u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt);
+
+/**
+ * @brief qed_ptt_get_bar_addr - Get PPT's external BAR address
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return u32
+ */
+u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt);
+
+/**
+ * @brief qed_ptt_set_win - Set PTT Window's GRC BAR address
+ *
+ * @param p_hwfn
+ * @param new_hw_addr
+ * @param p_ptt
+ */
+void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     u32 new_hw_addr);
+
+/**
+ * @brief qed_get_reserved_ptt - Get a specific reserved PTT
+ *
+ * @param p_hwfn
+ * @param ptt_idx
+ *
+ * @return struct qed_ptt *
+ */
+struct qed_ptt *qed_get_reserved_ptt(struct qed_hwfn *p_hwfn,
+				     enum reserved_ptts ptt_idx);
+
+/**
+ * @brief qed_wr - Write value to BAR using the given ptt
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param val
+ * @param hw_addr
+ */
+void qed_wr(struct qed_hwfn *p_hwfn,
+	    struct qed_ptt *p_ptt,
+	    u32 hw_addr,
+	    u32 val);
+
+/**
+ * @brief qed_rd - Read value from BAR using the given ptt
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param val
+ * @param hw_addr
+ */
+u32 qed_rd(struct qed_hwfn *p_hwfn,
+	   struct qed_ptt *p_ptt,
+	   u32 hw_addr);
+
+/**
+ * @brief qed_memcpy_from - copy n bytes from BAR using the given
+ *        ptt
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param dest
+ * @param hw_addr
+ * @param n
+ */
+void qed_memcpy_from(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     void *dest,
+		     u32 hw_addr,
+		     size_t n);
+
+/**
+ * @brief qed_memcpy_to - copy n bytes to BAR using the given
+ *        ptt
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param hw_addr
+ * @param src
+ * @param n
+ */
+void qed_memcpy_to(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt,
+		   u32 hw_addr,
+		   void *src,
+		   size_t n);
+/**
+ * @brief qed_fid_pretend - pretend to another function when
+ *        accessing the ptt window. There is no way to unpretend
+ *        a function. The only way to cancel a pretend is to
+ *        pretend back to the original function.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param fid - fid field of pxp_pretend structure. Can contain
+ *            either pf / vf, port/path fields are don't care.
+ */
+void qed_fid_pretend(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     u16 fid);
+
+/**
+ * @brief qed_port_pretend - pretend to another port when
+ *        accessing the ptt window
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param port_id - the port to pretend to
+ */
+void qed_port_pretend(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u8 port_id);
+
+/**
+ * @brief qed_port_unpretend - cancel any previously set port
+ *        pretend
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void qed_port_unpretend(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt);
+
+/**
+ * @brief qed_dmae_idx_to_go_cmd - map the idx to dmae cmd
+ * this is declared here since other files will require it.
+ * @param idx
+ */
+u32 qed_dmae_idx_to_go_cmd(u8 idx);
+
+/**
+ * @brief qed_dmae_info_alloc - Init the dmae_info structure
+ * which is part of p_hwfn.
+ * @param p_hwfn
+ */
+int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_dmae_info_free - Free the dmae_info structure
+ * which is part of p_hwfn
+ *
+ * @param p_hwfn
+ */
+void qed_dmae_info_free(struct qed_hwfn *p_hwfn);
+
+union qed_qm_pq_params {
+	struct {
+		u8 tc;
+	}	core;
+
+	struct {
+		u8	is_vf;
+		u8	vf_id;
+		u8	tc;
+	}	eth;
+};
+
+u16 qed_get_qm_pq(struct qed_hwfn *p_hwfn,
+		  enum protocol_type proto,
+		  union qed_qm_pq_params *params);
+
+int qed_init_fw_data(struct qed_dev *cdev,
+		     const u8 *fw_data);
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
new file mode 100644
index 000000000000..0b21a553cc7d
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -0,0 +1,798 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_reg_addr.h"
+
+enum cminterface {
+	MCM_SEC,
+	MCM_PRI,
+	UCM_SEC,
+	UCM_PRI,
+	TCM_SEC,
+	TCM_PRI,
+	YCM_SEC,
+	YCM_PRI,
+	XCM_SEC,
+	XCM_PRI,
+	NUM_OF_CM_INTERFACES
+};
+
+/* general constants */
+#define QM_PQ_ELEMENT_SIZE                      4 /* in bytes */
+#define QM_PQ_MEM_4KB(pq_size)	(pq_size ? DIV_ROUND_UP((pq_size + 1) *	\
+							QM_PQ_ELEMENT_SIZE, \
+							0x1000) : 0)
+#define QM_PQ_SIZE_256B(pq_size)	(pq_size ? DIV_ROUND_UP(pq_size, \
+								0x100) - 1 : 0)
+#define QM_INVALID_PQ_ID                        0xffff
+/* feature enable */
+#define QM_BYPASS_EN                            1
+#define QM_BYTE_CRD_EN                          1
+/* other PQ constants */
+#define QM_OTHER_PQS_PER_PF                     4
+/* WFQ constants */
+#define QM_WFQ_UPPER_BOUND		6250000
+#define QM_WFQ_VP_PQ_VOQ_SHIFT          0
+#define QM_WFQ_VP_PQ_PF_SHIFT           5
+#define QM_WFQ_INC_VAL(weight)          ((weight) * 0x9000)
+#define QM_WFQ_MAX_INC_VAL                      4375000
+#define QM_WFQ_INIT_CRD(inc_val)        (2 * (inc_val))
+/* RL constants */
+#define QM_RL_UPPER_BOUND                       6250000
+#define QM_RL_PERIOD                            5               /* in us */
+#define QM_RL_PERIOD_CLK_25M            (25 * QM_RL_PERIOD)
+#define QM_RL_INC_VAL(rate)		max_t(u32,	\
+					      (((rate ? rate : 1000000)	\
+						* QM_RL_PERIOD) / 8), 1)
+#define QM_RL_MAX_INC_VAL                       4375000
+/* AFullOprtnstcCrdMask constants */
+#define QM_OPPOR_LINE_VOQ_DEF           1
+#define QM_OPPOR_FW_STOP_DEF            0
+#define QM_OPPOR_PQ_EMPTY_DEF           1
+#define EAGLE_WORKAROUND_TC                     7
+/* Command Queue constants */
+#define PBF_CMDQ_PURE_LB_LINES                          150
+#define PBF_CMDQ_EAGLE_WORKAROUND_LINES         8
+#define PBF_CMDQ_LINES_RT_OFFSET(voq)           (		 \
+		PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET + voq * \
+		(PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET -	 \
+		 PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET))
+#define PBF_BTB_GUARANTEED_RT_OFFSET(voq)       (	      \
+		PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET + voq * \
+		(PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET -      \
+		 PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET))
+#define QM_VOQ_LINE_CRD(pbf_cmd_lines)          ((((pbf_cmd_lines) - \
+						   4) *		     \
+						  2) | QM_LINE_CRD_REG_SIGN_BIT)
+/* BTB: blocks constants (block size = 256B) */
+#define BTB_JUMBO_PKT_BLOCKS            38
+#define BTB_HEADROOM_BLOCKS                     BTB_JUMBO_PKT_BLOCKS
+#define BTB_EAGLE_WORKAROUND_BLOCKS     4
+#define BTB_PURE_LB_FACTOR                      10
+#define BTB_PURE_LB_RATIO                       7
+/* QM stop command constants */
+#define QM_STOP_PQ_MASK_WIDTH                   32
+#define QM_STOP_CMD_ADDR                                0x2
+#define QM_STOP_CMD_STRUCT_SIZE                 2
+#define QM_STOP_CMD_PAUSE_MASK_OFFSET   0
+#define QM_STOP_CMD_PAUSE_MASK_SHIFT    0
+#define QM_STOP_CMD_PAUSE_MASK_MASK             -1
+#define QM_STOP_CMD_GROUP_ID_OFFSET             1
+#define QM_STOP_CMD_GROUP_ID_SHIFT              16
+#define QM_STOP_CMD_GROUP_ID_MASK               15
+#define QM_STOP_CMD_PQ_TYPE_OFFSET              1
+#define QM_STOP_CMD_PQ_TYPE_SHIFT               24
+#define QM_STOP_CMD_PQ_TYPE_MASK                1
+#define QM_STOP_CMD_MAX_POLL_COUNT              100
+#define QM_STOP_CMD_POLL_PERIOD_US              500
+/* QM command macros */
+#define QM_CMD_STRUCT_SIZE(cmd)			cmd ## \
+	_STRUCT_SIZE
+#define QM_CMD_SET_FIELD(var, cmd, field,				  \
+			 value)        SET_FIELD(var[cmd ## _ ## field ## \
+						     _OFFSET],		  \
+						 cmd ## _ ## field,	  \
+						 value)
+/* QM: VOQ macros */
+#define PHYS_VOQ(port, tc, max_phy_tcs_pr_port)	((port) *	\
+						 (max_phy_tcs_pr_port) \
+						 + (tc))
+#define LB_VOQ(port)				( \
+		MAX_PHYS_VOQS + (port))
+#define VOQ(port, tc, max_phy_tcs_pr_port)	\
+	((tc) <		\
+	 LB_TC ? PHYS_VOQ(port,		\
+			  tc,			 \
+			  max_phy_tcs_pr_port) \
+		: LB_VOQ(port))
+/******************** INTERNAL IMPLEMENTATION *********************/
+/* Prepare PF RL enable/disable runtime init values */
+static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn,
+			     bool pf_rl_en)
+{
+	STORE_RT_REG(p_hwfn, QM_REG_RLPFENABLE_RT_OFFSET, pf_rl_en ? 1 : 0);
+	if (pf_rl_en) {
+		/* enable RLs for all VOQs */
+		STORE_RT_REG(p_hwfn, QM_REG_RLPFVOQENABLE_RT_OFFSET,
+			     (1 << MAX_NUM_VOQS) - 1);
+		/* write RL period */
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLPFPERIOD_RT_OFFSET,
+			     QM_RL_PERIOD_CLK_25M);
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLPFPERIODTIMER_RT_OFFSET,
+			     QM_RL_PERIOD_CLK_25M);
+		/* set credit threshold for QM bypass flow */
+		if (QM_BYPASS_EN)
+			STORE_RT_REG(p_hwfn,
+				     QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET,
+				     QM_RL_UPPER_BOUND);
+	}
+}
+
+/* Prepare PF WFQ enable/disable runtime init values */
+static void qed_enable_pf_wfq(struct qed_hwfn *p_hwfn,
+			      bool pf_wfq_en)
+{
+	STORE_RT_REG(p_hwfn, QM_REG_WFQPFENABLE_RT_OFFSET, pf_wfq_en ? 1 : 0);
+	/* set credit threshold for QM bypass flow */
+	if (pf_wfq_en && QM_BYPASS_EN)
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET,
+			     QM_WFQ_UPPER_BOUND);
+}
+
+/* Prepare VPORT RL enable/disable runtime init values */
+static void qed_enable_vport_rl(struct qed_hwfn *p_hwfn,
+				bool vport_rl_en)
+{
+	STORE_RT_REG(p_hwfn, QM_REG_RLGLBLENABLE_RT_OFFSET,
+		     vport_rl_en ? 1 : 0);
+	if (vport_rl_en) {
+		/* write RL period (use timer 0 only) */
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLPERIOD_0_RT_OFFSET,
+			     QM_RL_PERIOD_CLK_25M);
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET,
+			     QM_RL_PERIOD_CLK_25M);
+		/* set credit threshold for QM bypass flow */
+		if (QM_BYPASS_EN)
+			STORE_RT_REG(p_hwfn,
+				     QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET,
+				     QM_RL_UPPER_BOUND);
+	}
+}
+
+/* Prepare VPORT WFQ enable/disable runtime init values */
+static void qed_enable_vport_wfq(struct qed_hwfn *p_hwfn,
+				 bool vport_wfq_en)
+{
+	STORE_RT_REG(p_hwfn, QM_REG_WFQVPENABLE_RT_OFFSET,
+		     vport_wfq_en ? 1 : 0);
+	/* set credit threshold for QM bypass flow */
+	if (vport_wfq_en && QM_BYPASS_EN)
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET,
+			     QM_WFQ_UPPER_BOUND);
+}
+
+/* Prepare runtime init values to allocate PBF command queue lines for
+ * the specified VOQ
+ */
+static void qed_cmdq_lines_voq_rt_init(struct qed_hwfn *p_hwfn,
+				       u8 voq,
+				       u16 cmdq_lines)
+{
+	u32 qm_line_crd;
+
+	/* In A0 - Limit the size of pbf queue so that only 511 commands with
+	 * the minimum size of 4 (FCoE minimum size)
+	 */
+	bool is_bb_a0 = QED_IS_BB_A0(p_hwfn->cdev);
+
+	if (is_bb_a0)
+		cmdq_lines = min_t(u32, cmdq_lines, 1022);
+	qm_line_crd = QM_VOQ_LINE_CRD(cmdq_lines);
+	OVERWRITE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(voq),
+			 (u32)cmdq_lines);
+	STORE_RT_REG(p_hwfn, QM_REG_VOQCRDLINE_RT_OFFSET + voq, qm_line_crd);
+	STORE_RT_REG(p_hwfn, QM_REG_VOQINITCRDLINE_RT_OFFSET + voq,
+		     qm_line_crd);
+}
+
+/* Prepare runtime init values to allocate PBF command queue lines. */
+static void qed_cmdq_lines_rt_init(
+	struct qed_hwfn *p_hwfn,
+	u8 max_ports_per_engine,
+	u8 max_phys_tcs_per_port,
+	struct init_qm_port_params port_params[MAX_NUM_PORTS])
+{
+	u8 tc, voq, port_id;
+
+	/* clear PBF lines for all VOQs */
+	for (voq = 0; voq < MAX_NUM_VOQS; voq++)
+		STORE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(voq), 0);
+	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
+		if (port_params[port_id].active) {
+			u16 phys_lines, phys_lines_per_tc;
+			u8 phys_tcs = port_params[port_id].num_active_phys_tcs;
+
+			/* find #lines to divide between the active
+			 * physical TCs.
+			 */
+			phys_lines = port_params[port_id].num_pbf_cmd_lines -
+				     PBF_CMDQ_PURE_LB_LINES;
+			/* find #lines per active physical TC */
+			phys_lines_per_tc = phys_lines / phys_tcs;
+			/* init registers per active TC */
+			for (tc = 0; tc < phys_tcs; tc++) {
+				voq = PHYS_VOQ(port_id, tc,
+					       max_phys_tcs_per_port);
+				qed_cmdq_lines_voq_rt_init(p_hwfn, voq,
+							   phys_lines_per_tc);
+			}
+			/* init registers for pure LB TC */
+			qed_cmdq_lines_voq_rt_init(p_hwfn, LB_VOQ(port_id),
+						   PBF_CMDQ_PURE_LB_LINES);
+		}
+	}
+}
+
+static void qed_btb_blocks_rt_init(
+	struct qed_hwfn *p_hwfn,
+	u8 max_ports_per_engine,
+	u8 max_phys_tcs_per_port,
+	struct init_qm_port_params port_params[MAX_NUM_PORTS])
+{
+	u32 usable_blocks, pure_lb_blocks, phys_blocks;
+	u8 tc, voq, port_id;
+
+	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
+		u32 temp;
+		u8 phys_tcs;
+
+		if (!port_params[port_id].active)
+			continue;
+
+		phys_tcs = port_params[port_id].num_active_phys_tcs;
+
+		/* subtract headroom blocks */
+		usable_blocks = port_params[port_id].num_btb_blocks -
+				BTB_HEADROOM_BLOCKS;
+
+		/* find blocks per physical TC. use factor to avoid
+		 * floating arithmethic.
+		 */
+		pure_lb_blocks = (usable_blocks * BTB_PURE_LB_FACTOR) /
+				 (phys_tcs * BTB_PURE_LB_FACTOR +
+				  BTB_PURE_LB_RATIO);
+		pure_lb_blocks = max_t(u32, BTB_JUMBO_PKT_BLOCKS,
+				       pure_lb_blocks / BTB_PURE_LB_FACTOR);
+		phys_blocks = (usable_blocks - pure_lb_blocks) / phys_tcs;
+
+		/* init physical TCs */
+		for (tc = 0; tc < phys_tcs; tc++) {
+			voq = PHYS_VOQ(port_id, tc, max_phys_tcs_per_port);
+			STORE_RT_REG(p_hwfn, PBF_BTB_GUARANTEED_RT_OFFSET(voq),
+				     phys_blocks);
+		}
+
+		/* init pure LB TC */
+		temp = LB_VOQ(port_id);
+		STORE_RT_REG(p_hwfn, PBF_BTB_GUARANTEED_RT_OFFSET(temp),
+			     pure_lb_blocks);
+	}
+}
+
+/* Prepare Tx PQ mapping runtime init values for the specified PF */
+static void qed_tx_pq_map_rt_init(
+	struct qed_hwfn *p_hwfn,
+	struct qed_ptt *p_ptt,
+	struct qed_qm_pf_rt_init_params *p_params,
+	u32 base_mem_addr_4kb)
+{
+	struct init_qm_vport_params *vport_params = p_params->vport_params;
+	u16 num_pqs = p_params->num_pf_pqs + p_params->num_vf_pqs;
+	u16 first_pq_group = p_params->start_pq / QM_PF_QUEUE_GROUP_SIZE;
+	u16 last_pq_group = (p_params->start_pq + num_pqs - 1) /
+			    QM_PF_QUEUE_GROUP_SIZE;
+	bool is_bb_a0 = QED_IS_BB_A0(p_hwfn->cdev);
+	u16 i, pq_id, pq_group;
+
+	/* a bit per Tx PQ indicating if the PQ is associated with a VF */
+	u32 tx_pq_vf_mask[MAX_QM_TX_QUEUES / QM_PF_QUEUE_GROUP_SIZE] = { 0 };
+	u32 tx_pq_vf_mask_width = is_bb_a0 ? 32 : QM_PF_QUEUE_GROUP_SIZE;
+	u32 num_tx_pq_vf_masks = MAX_QM_TX_QUEUES / tx_pq_vf_mask_width;
+	u32 pq_mem_4kb = QM_PQ_MEM_4KB(p_params->num_pf_cids);
+	u32 vport_pq_mem_4kb = QM_PQ_MEM_4KB(p_params->num_vf_cids);
+	u32 mem_addr_4kb = base_mem_addr_4kb;
+
+	/* set mapping from PQ group to PF */
+	for (pq_group = first_pq_group; pq_group <= last_pq_group; pq_group++)
+		STORE_RT_REG(p_hwfn, QM_REG_PQTX2PF_0_RT_OFFSET + pq_group,
+			     (u32)(p_params->pf_id));
+	/* set PQ sizes */
+	STORE_RT_REG(p_hwfn, QM_REG_MAXPQSIZE_0_RT_OFFSET,
+		     QM_PQ_SIZE_256B(p_params->num_pf_cids));
+	STORE_RT_REG(p_hwfn, QM_REG_MAXPQSIZE_1_RT_OFFSET,
+		     QM_PQ_SIZE_256B(p_params->num_vf_cids));
+
+	/* go over all Tx PQs */
+	for (i = 0, pq_id = p_params->start_pq; i < num_pqs; i++, pq_id++) {
+		u8 voq = VOQ(p_params->port_id, p_params->pq_params[i].tc_id,
+			     p_params->max_phys_tcs_per_port);
+		bool is_vf_pq = (i >= p_params->num_pf_pqs);
+		struct qm_rf_pq_map tx_pq_map;
+
+		/* update first Tx PQ of VPORT/TC */
+		u8 vport_id_in_pf = p_params->pq_params[i].vport_id -
+				    p_params->start_vport;
+		u16 *pq_ids = &vport_params[vport_id_in_pf].first_tx_pq_id[0];
+		u16 first_tx_pq_id = pq_ids[p_params->pq_params[i].tc_id];
+
+		if (first_tx_pq_id == QM_INVALID_PQ_ID) {
+			/* create new VP PQ */
+			pq_ids[p_params->pq_params[i].tc_id] = pq_id;
+			first_tx_pq_id = pq_id;
+			/* map VP PQ to VOQ and PF */
+			STORE_RT_REG(p_hwfn,
+				     QM_REG_WFQVPMAP_RT_OFFSET +
+				     first_tx_pq_id,
+				     (voq << QM_WFQ_VP_PQ_VOQ_SHIFT) |
+				     (p_params->pf_id <<
+				      QM_WFQ_VP_PQ_PF_SHIFT));
+		}
+		/* fill PQ map entry */
+		memset(&tx_pq_map, 0, sizeof(tx_pq_map));
+		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_PQ_VALID, 1);
+		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_VALID,
+			  is_vf_pq ? 1 : 0);
+		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VP_PQ_ID, first_tx_pq_id);
+		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_ID,
+			  is_vf_pq ? p_params->pq_params[i].vport_id : 0);
+		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VOQ, voq);
+		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_WRR_WEIGHT_GROUP,
+			  p_params->pq_params[i].wrr_group);
+		/* write PQ map entry to CAM */
+		STORE_RT_REG(p_hwfn, QM_REG_TXPQMAP_RT_OFFSET + pq_id,
+			     *((u32 *)&tx_pq_map));
+		/* set base address */
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_BASEADDRTXPQ_RT_OFFSET + pq_id,
+			     mem_addr_4kb);
+		/* check if VF PQ */
+		if (is_vf_pq) {
+			/* if PQ is associated with a VF, add indication
+			 * to PQ VF mask
+			 */
+			tx_pq_vf_mask[pq_id / tx_pq_vf_mask_width] |=
+				(1 << (pq_id % tx_pq_vf_mask_width));
+			mem_addr_4kb += vport_pq_mem_4kb;
+		} else {
+			mem_addr_4kb += pq_mem_4kb;
+		}
+	}
+
+	/* store Tx PQ VF mask to size select register */
+	for (i = 0; i < num_tx_pq_vf_masks; i++) {
+		if (tx_pq_vf_mask[i]) {
+			if (is_bb_a0) {
+				u32 curr_mask = 0, addr;
+
+				addr = QM_REG_MAXPQSIZETXSEL_0 + (i * 4);
+				if (!p_params->is_first_pf)
+					curr_mask = qed_rd(p_hwfn, p_ptt,
+							   addr);
+
+				addr = QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i;
+
+				STORE_RT_REG(p_hwfn, addr,
+					     curr_mask | tx_pq_vf_mask[i]);
+			} else {
+				u32 addr;
+
+				addr = QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET + i;
+				STORE_RT_REG(p_hwfn, addr,
+					     tx_pq_vf_mask[i]);
+			}
+		}
+	}
+}
+
+/* Prepare Other PQ mapping runtime init values for the specified PF */
+static void qed_other_pq_map_rt_init(struct qed_hwfn *p_hwfn,
+				     u8 port_id,
+				     u8 pf_id,
+				     u32 num_pf_cids,
+				     u32 num_tids,
+				     u32 base_mem_addr_4kb)
+{
+	u16 i, pq_id;
+
+	/* a single other PQ group is used in each PF,
+	 * where PQ group i is used in PF i.
+	 */
+	u16 pq_group = pf_id;
+	u32 pq_size = num_pf_cids + num_tids;
+	u32 pq_mem_4kb = QM_PQ_MEM_4KB(pq_size);
+	u32 mem_addr_4kb = base_mem_addr_4kb;
+
+	/* map PQ group to PF */
+	STORE_RT_REG(p_hwfn, QM_REG_PQOTHER2PF_0_RT_OFFSET + pq_group,
+		     (u32)(pf_id));
+	/* set PQ sizes */
+	STORE_RT_REG(p_hwfn, QM_REG_MAXPQSIZE_2_RT_OFFSET,
+		     QM_PQ_SIZE_256B(pq_size));
+	/* set base address */
+	for (i = 0, pq_id = pf_id * QM_PF_QUEUE_GROUP_SIZE;
+	     i < QM_OTHER_PQS_PER_PF; i++, pq_id++) {
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_BASEADDROTHERPQ_RT_OFFSET + pq_id,
+			     mem_addr_4kb);
+		mem_addr_4kb += pq_mem_4kb;
+	}
+}
+
+/* Prepare PF WFQ runtime init values for the specified PF.
+ * Return -1 on error.
+ */
+static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
+			      struct qed_qm_pf_rt_init_params *p_params)
+{
+	u16 num_tx_pqs = p_params->num_pf_pqs + p_params->num_vf_pqs;
+	u32 crd_reg_offset;
+	u32 inc_val;
+	u16 i;
+
+	if (p_params->pf_id < MAX_NUM_PFS_BB)
+		crd_reg_offset = QM_REG_WFQPFCRD_RT_OFFSET;
+	else
+		crd_reg_offset = QM_REG_WFQPFCRD_MSB_RT_OFFSET +
+				 (p_params->pf_id % MAX_NUM_PFS_BB);
+
+	inc_val = QM_WFQ_INC_VAL(p_params->pf_wfq);
+	if (inc_val > QM_WFQ_MAX_INC_VAL) {
+		DP_NOTICE(p_hwfn, "Invalid PF WFQ weight configuration");
+		return -1;
+	}
+	STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + p_params->pf_id,
+		     inc_val);
+	STORE_RT_REG(p_hwfn,
+		     QM_REG_WFQPFUPPERBOUND_RT_OFFSET + p_params->pf_id,
+		     QM_WFQ_UPPER_BOUND | QM_WFQ_CRD_REG_SIGN_BIT);
+
+	for (i = 0; i < num_tx_pqs; i++) {
+		u8 voq = VOQ(p_params->port_id, p_params->pq_params[i].tc_id,
+			     p_params->max_phys_tcs_per_port);
+
+		OVERWRITE_RT_REG(p_hwfn,
+				 crd_reg_offset + voq * MAX_NUM_PFS_BB,
+				 QM_WFQ_INIT_CRD(inc_val) |
+				 QM_WFQ_CRD_REG_SIGN_BIT);
+	}
+
+	return 0;
+}
+
+/* Prepare PF RL runtime init values for the specified PF.
+ * Return -1 on error.
+ */
+static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn,
+			     u8 pf_id,
+			     u32 pf_rl)
+{
+	u32 inc_val = QM_RL_INC_VAL(pf_rl);
+
+	if (inc_val > QM_RL_MAX_INC_VAL) {
+		DP_NOTICE(p_hwfn, "Invalid PF rate limit configuration");
+		return -1;
+	}
+	STORE_RT_REG(p_hwfn, QM_REG_RLPFCRD_RT_OFFSET + pf_id,
+		     QM_RL_CRD_REG_SIGN_BIT);
+	STORE_RT_REG(p_hwfn, QM_REG_RLPFUPPERBOUND_RT_OFFSET + pf_id,
+		     QM_RL_UPPER_BOUND | QM_RL_CRD_REG_SIGN_BIT);
+	STORE_RT_REG(p_hwfn, QM_REG_RLPFINCVAL_RT_OFFSET + pf_id, inc_val);
+	return 0;
+}
+
+/* Prepare VPORT WFQ runtime init values for the specified VPORTs.
+ * Return -1 on error.
+ */
+static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
+			      u8 start_vport,
+			      u8 num_vports,
+			      struct init_qm_vport_params *vport_params)
+{
+	u8 tc, i, vport_id;
+	u32 inc_val;
+
+	/* go over all PF VPORTs */
+	for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
+		u32 temp = QM_REG_WFQVPUPPERBOUND_RT_OFFSET;
+		u16 *pq_ids = &vport_params[i].first_tx_pq_id[0];
+
+		if (!vport_params[i].vport_wfq)
+			continue;
+
+		inc_val = QM_WFQ_INC_VAL(vport_params[i].vport_wfq);
+		if (inc_val > QM_WFQ_MAX_INC_VAL) {
+			DP_NOTICE(p_hwfn,
+				  "Invalid VPORT WFQ weight configuration");
+			return -1;
+		}
+
+		/* each VPORT can have several VPORT PQ IDs for
+		 * different TCs
+		 */
+		for (tc = 0; tc < NUM_OF_TCS; tc++) {
+			u16 vport_pq_id = pq_ids[tc];
+
+			if (vport_pq_id != QM_INVALID_PQ_ID) {
+				STORE_RT_REG(p_hwfn,
+					     QM_REG_WFQVPWEIGHT_RT_OFFSET +
+					     vport_pq_id, inc_val);
+				STORE_RT_REG(p_hwfn, temp + vport_pq_id,
+					     QM_WFQ_UPPER_BOUND |
+					     QM_WFQ_CRD_REG_SIGN_BIT);
+				STORE_RT_REG(p_hwfn,
+					     QM_REG_WFQVPCRD_RT_OFFSET +
+					     vport_pq_id,
+					     QM_WFQ_INIT_CRD(inc_val) |
+					     QM_WFQ_CRD_REG_SIGN_BIT);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int qed_vport_rl_rt_init(struct qed_hwfn *p_hwfn,
+				u8 start_vport,
+				u8 num_vports,
+				struct init_qm_vport_params *vport_params)
+{
+	u8 i, vport_id;
+
+	/* go over all PF VPORTs */
+	for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
+		u32 inc_val = QM_RL_INC_VAL(vport_params[i].vport_rl);
+
+		if (inc_val > QM_RL_MAX_INC_VAL) {
+			DP_NOTICE(p_hwfn,
+				  "Invalid VPORT rate-limit configuration");
+			return -1;
+		}
+
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLCRD_RT_OFFSET + vport_id,
+			     QM_RL_CRD_REG_SIGN_BIT);
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLUPPERBOUND_RT_OFFSET + vport_id,
+			     QM_RL_UPPER_BOUND | QM_RL_CRD_REG_SIGN_BIT);
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLINCVAL_RT_OFFSET + vport_id,
+			     inc_val);
+	}
+
+	return 0;
+}
+
+static bool qed_poll_on_qm_cmd_ready(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt)
+{
+	u32 reg_val, i;
+
+	for (i = 0, reg_val = 0; i < QM_STOP_CMD_MAX_POLL_COUNT && reg_val == 0;
+	     i++) {
+		udelay(QM_STOP_CMD_POLL_PERIOD_US);
+		reg_val = qed_rd(p_hwfn, p_ptt, QM_REG_SDMCMDREADY);
+	}
+
+	/* check if timeout while waiting for SDM command ready */
+	if (i == QM_STOP_CMD_MAX_POLL_COUNT) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+			   "Timeout when waiting for QM SDM command ready signal\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool qed_send_qm_cmd(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    u32 cmd_addr,
+			    u32 cmd_data_lsb,
+			    u32 cmd_data_msb)
+{
+	if (!qed_poll_on_qm_cmd_ready(p_hwfn, p_ptt))
+		return false;
+
+	qed_wr(p_hwfn, p_ptt, QM_REG_SDMCMDADDR, cmd_addr);
+	qed_wr(p_hwfn, p_ptt, QM_REG_SDMCMDDATALSB, cmd_data_lsb);
+	qed_wr(p_hwfn, p_ptt, QM_REG_SDMCMDDATAMSB, cmd_data_msb);
+	qed_wr(p_hwfn, p_ptt, QM_REG_SDMCMDGO, 1);
+	qed_wr(p_hwfn, p_ptt, QM_REG_SDMCMDGO, 0);
+
+	return qed_poll_on_qm_cmd_ready(p_hwfn, p_ptt);
+}
+
+/******************** INTERFACE IMPLEMENTATION *********************/
+u32 qed_qm_pf_mem_size(u8 pf_id,
+		       u32 num_pf_cids,
+		       u32 num_vf_cids,
+		       u32 num_tids,
+		       u16 num_pf_pqs,
+		       u16 num_vf_pqs)
+{
+	return QM_PQ_MEM_4KB(num_pf_cids) * num_pf_pqs +
+	       QM_PQ_MEM_4KB(num_vf_cids) * num_vf_pqs +
+	       QM_PQ_MEM_4KB(num_pf_cids + num_tids) * QM_OTHER_PQS_PER_PF;
+}
+
+int qed_qm_common_rt_init(
+	struct qed_hwfn *p_hwfn,
+	struct qed_qm_common_rt_init_params *p_params)
+{
+	/* init AFullOprtnstcCrdMask */
+	u32 mask = (QM_OPPOR_LINE_VOQ_DEF <<
+		    QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_SHIFT) |
+		   (QM_BYTE_CRD_EN << QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_SHIFT) |
+		   (p_params->pf_wfq_en <<
+		    QM_RF_OPPORTUNISTIC_MASK_PFWFQ_SHIFT) |
+		   (p_params->vport_wfq_en <<
+		    QM_RF_OPPORTUNISTIC_MASK_VPWFQ_SHIFT) |
+		   (p_params->pf_rl_en <<
+		    QM_RF_OPPORTUNISTIC_MASK_PFRL_SHIFT) |
+		   (p_params->vport_rl_en <<
+		    QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_SHIFT) |
+		   (QM_OPPOR_FW_STOP_DEF <<
+		    QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_SHIFT) |
+		   (QM_OPPOR_PQ_EMPTY_DEF <<
+		    QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_SHIFT);
+
+	STORE_RT_REG(p_hwfn, QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET, mask);
+	qed_enable_pf_rl(p_hwfn, p_params->pf_rl_en);
+	qed_enable_pf_wfq(p_hwfn, p_params->pf_wfq_en);
+	qed_enable_vport_rl(p_hwfn, p_params->vport_rl_en);
+	qed_enable_vport_wfq(p_hwfn, p_params->vport_wfq_en);
+	qed_cmdq_lines_rt_init(p_hwfn,
+			       p_params->max_ports_per_engine,
+			       p_params->max_phys_tcs_per_port,
+			       p_params->port_params);
+	qed_btb_blocks_rt_init(p_hwfn,
+			       p_params->max_ports_per_engine,
+			       p_params->max_phys_tcs_per_port,
+			       p_params->port_params);
+	return 0;
+}
+
+int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      struct qed_qm_pf_rt_init_params *p_params)
+{
+	struct init_qm_vport_params *vport_params = p_params->vport_params;
+	u32 other_mem_size_4kb = QM_PQ_MEM_4KB(p_params->num_pf_cids +
+					       p_params->num_tids) *
+				 QM_OTHER_PQS_PER_PF;
+	u8 tc, i;
+
+	/* clear first Tx PQ ID array for each VPORT */
+	for (i = 0; i < p_params->num_vports; i++)
+		for (tc = 0; tc < NUM_OF_TCS; tc++)
+			vport_params[i].first_tx_pq_id[tc] = QM_INVALID_PQ_ID;
+
+	/* map Other PQs (if any) */
+	qed_other_pq_map_rt_init(p_hwfn, p_params->port_id, p_params->pf_id,
+				 p_params->num_pf_cids, p_params->num_tids, 0);
+
+	/* map Tx PQs */
+	qed_tx_pq_map_rt_init(p_hwfn, p_ptt, p_params, other_mem_size_4kb);
+
+	if (p_params->pf_wfq)
+		if (qed_pf_wfq_rt_init(p_hwfn, p_params))
+			return -1;
+
+	if (qed_pf_rl_rt_init(p_hwfn, p_params->pf_id, p_params->pf_rl))
+		return -1;
+
+	if (qed_vp_wfq_rt_init(p_hwfn, p_params->start_vport,
+			       p_params->num_vports, vport_params))
+		return -1;
+
+	if (qed_vport_rl_rt_init(p_hwfn, p_params->start_vport,
+				 p_params->num_vports, vport_params))
+		return -1;
+
+	return 0;
+}
+
+int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt,
+		   u8 pf_id,
+		   u32 pf_rl)
+{
+	u32 inc_val = QM_RL_INC_VAL(pf_rl);
+
+	if (inc_val > QM_RL_MAX_INC_VAL) {
+		DP_NOTICE(p_hwfn, "Invalid PF rate limit configuration");
+		return -1;
+	}
+
+	qed_wr(p_hwfn, p_ptt,
+	       QM_REG_RLPFCRD + pf_id * 4,
+	       QM_RL_CRD_REG_SIGN_BIT);
+	qed_wr(p_hwfn, p_ptt, QM_REG_RLPFINCVAL + pf_id * 4, inc_val);
+
+	return 0;
+}
+
+int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u8 vport_id,
+		      u32 vport_rl)
+{
+	u32 inc_val = QM_RL_INC_VAL(vport_rl);
+
+	if (inc_val > QM_RL_MAX_INC_VAL) {
+		DP_NOTICE(p_hwfn, "Invalid VPORT rate-limit configuration");
+		return -1;
+	}
+
+	qed_wr(p_hwfn, p_ptt,
+	       QM_REG_RLGLBLCRD + vport_id * 4,
+	       QM_RL_CRD_REG_SIGN_BIT);
+	qed_wr(p_hwfn, p_ptt, QM_REG_RLGLBLINCVAL + vport_id * 4, inc_val);
+
+	return 0;
+}
+
+bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  bool is_release_cmd,
+			  bool is_tx_pq,
+			  u16 start_pq,
+			  u16 num_pqs)
+{
+	u32 cmd_arr[QM_CMD_STRUCT_SIZE(QM_STOP_CMD)] = { 0 };
+	u32 pq_mask = 0, last_pq = start_pq + num_pqs - 1, pq_id;
+
+	/* set command's PQ type */
+	QM_CMD_SET_FIELD(cmd_arr, QM_STOP_CMD, PQ_TYPE, is_tx_pq ? 0 : 1);
+
+	for (pq_id = start_pq; pq_id <= last_pq; pq_id++) {
+		/* set PQ bit in mask (stop command only) */
+		if (!is_release_cmd)
+			pq_mask |= (1 << (pq_id % QM_STOP_PQ_MASK_WIDTH));
+
+		/* if last PQ or end of PQ mask, write command */
+		if ((pq_id == last_pq) ||
+		    (pq_id % QM_STOP_PQ_MASK_WIDTH ==
+		     (QM_STOP_PQ_MASK_WIDTH - 1))) {
+			QM_CMD_SET_FIELD(cmd_arr, QM_STOP_CMD,
+					 PAUSE_MASK, pq_mask);
+			QM_CMD_SET_FIELD(cmd_arr, QM_STOP_CMD,
+					 GROUP_ID,
+					 pq_id / QM_STOP_PQ_MASK_WIDTH);
+			if (!qed_send_qm_cmd(p_hwfn, p_ptt, QM_STOP_CMD_ADDR,
+					     cmd_arr[0], cmd_arr[1]))
+				return false;
+			pq_mask = 0;
+		}
+	}
+
+	return true;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
new file mode 100644
index 000000000000..796f1390e598
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -0,0 +1,531 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_reg_addr.h"
+
+#define QED_INIT_MAX_POLL_COUNT 100
+#define QED_INIT_POLL_PERIOD_US 500
+
+static u32 pxp_global_win[] = {
+	0,
+	0,
+	0x1c02, /* win 2: addr=0x1c02000, size=4096 bytes */
+	0x1c80, /* win 3: addr=0x1c80000, size=4096 bytes */
+	0x1d00, /* win 4: addr=0x1d00000, size=4096 bytes */
+	0x1d01, /* win 5: addr=0x1d01000, size=4096 bytes */
+	0x1d80, /* win 6: addr=0x1d80000, size=4096 bytes */
+	0x1d81, /* win 7: addr=0x1d81000, size=4096 bytes */
+	0x1d82, /* win 8: addr=0x1d82000, size=4096 bytes */
+	0x1e00, /* win 9: addr=0x1e00000, size=4096 bytes */
+	0x1e80, /* win 10: addr=0x1e80000, size=4096 bytes */
+	0x1f00, /* win 11: addr=0x1f00000, size=4096 bytes */
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+};
+
+void qed_init_iro_array(struct qed_dev *cdev)
+{
+	cdev->iro_arr = iro_arr;
+}
+
+/* Runtime configuration helpers */
+void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn)
+{
+	int i;
+
+	for (i = 0; i < RUNTIME_ARRAY_SIZE; i++)
+		p_hwfn->rt_data[i].b_valid = false;
+}
+
+void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
+			   u32 rt_offset,
+			   u32 val)
+{
+	p_hwfn->rt_data[rt_offset].init_val = val;
+	p_hwfn->rt_data[rt_offset].b_valid = true;
+}
+
+void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
+			   u32 rt_offset,
+			   u32 *val,
+			   size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size / sizeof(u32); i++) {
+		p_hwfn->rt_data[rt_offset + i].init_val = val[i];
+		p_hwfn->rt_data[rt_offset + i].b_valid = true;
+	}
+}
+
+static void qed_init_rt(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt,
+			u32 addr,
+			u32 rt_offset,
+			u32 size)
+{
+	struct qed_rt_data *rt_data = p_hwfn->rt_data + rt_offset;
+	u32 i;
+
+	for (i = 0; i < size; i++) {
+		if (!rt_data[i].b_valid)
+			continue;
+		qed_wr(p_hwfn, p_ptt, addr + (i << 2), rt_data[i].init_val);
+	}
+}
+
+int qed_init_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rt_data *rt_data;
+
+	rt_data = kzalloc(sizeof(*rt_data) * RUNTIME_ARRAY_SIZE, GFP_ATOMIC);
+	if (!rt_data)
+		return -ENOMEM;
+
+	p_hwfn->rt_data = rt_data;
+
+	return 0;
+}
+
+void qed_init_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->rt_data);
+	p_hwfn->rt_data = NULL;
+}
+
+static int qed_init_array_dmae(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt,
+			       u32 addr,
+			       u32 dmae_data_offset,
+			       u32 size,
+			       const u32 *buf,
+			       bool b_must_dmae,
+			       bool b_can_dmae)
+{
+	int rc = 0;
+
+	/* Perform DMAE only for lengthy enough sections or for wide-bus */
+	if (!b_can_dmae || (!b_must_dmae && (size < 16))) {
+		const u32 *data = buf + dmae_data_offset;
+		u32 i;
+
+		for (i = 0; i < size; i++)
+			qed_wr(p_hwfn, p_ptt, addr + (i << 2), data[i]);
+	} else {
+		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
+				       (uintptr_t)(buf + dmae_data_offset),
+				       addr, size, 0);
+	}
+
+	return rc;
+}
+
+static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      u32 addr,
+			      u32 fill,
+			      u32 fill_count)
+{
+	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
+
+	memset(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE);
+
+	/* invoke the DMAE virtual/physical buffer API with
+	 * 1. DMAE init channel
+	 * 2. addr,
+	 * 3. p_hwfb->temp_data,
+	 * 4. fill_count
+	 */
+
+	return qed_dmae_host2grc(p_hwfn, p_ptt,
+				 (uintptr_t)(&zero_buffer[0]),
+				 addr, fill_count,
+				 QED_DMAE_FLAG_RW_REPL_SRC);
+}
+
+static void qed_init_fill(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  u32 addr,
+			  u32 fill,
+			  u32 fill_count)
+{
+	u32 i;
+
+	for (i = 0; i < fill_count; i++, addr += sizeof(u32))
+		qed_wr(p_hwfn, p_ptt, addr, fill);
+}
+
+static int qed_init_cmd_array(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      struct init_write_op *cmd,
+			      bool b_must_dmae,
+			      bool b_can_dmae)
+{
+	u32 data = le32_to_cpu(cmd->data);
+	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+	u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
+	u32 offset, output_len, input_len, max_size;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	union init_array_hdr *hdr;
+	const u32 *array_data;
+	int rc = 0;
+	u32 size;
+
+	array_data = cdev->fw_data->arr_data;
+
+	hdr = (union init_array_hdr *)(array_data +
+				       dmae_array_offset);
+	data = le32_to_cpu(hdr->raw.data);
+	switch (GET_FIELD(data, INIT_ARRAY_RAW_HDR_TYPE)) {
+	case INIT_ARR_ZIPPED:
+		offset = dmae_array_offset + 1;
+		input_len = GET_FIELD(data,
+				      INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE);
+		max_size = MAX_ZIPPED_SIZE * 4;
+		memset(p_hwfn->unzip_buf, 0, max_size);
+
+		output_len = qed_unzip_data(p_hwfn, input_len,
+					    (u8 *)&array_data[offset],
+					    max_size, (u8 *)p_hwfn->unzip_buf);
+		if (output_len) {
+			rc = qed_init_array_dmae(p_hwfn, p_ptt, addr, 0,
+						 output_len,
+						 p_hwfn->unzip_buf,
+						 b_must_dmae, b_can_dmae);
+		} else {
+			DP_NOTICE(p_hwfn, "Failed to unzip dmae data\n");
+			rc = -EINVAL;
+		}
+		break;
+	case INIT_ARR_PATTERN:
+	{
+		u32 repeats = GET_FIELD(data,
+					INIT_ARRAY_PATTERN_HDR_REPETITIONS);
+		u32 i;
+
+		size = GET_FIELD(data, INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE);
+
+		for (i = 0; i < repeats; i++, addr += size << 2) {
+			rc = qed_init_array_dmae(p_hwfn, p_ptt, addr,
+						 dmae_array_offset + 1,
+						 size, array_data,
+						 b_must_dmae, b_can_dmae);
+			if (rc)
+				break;
+		}
+		break;
+	}
+	case INIT_ARR_STANDARD:
+		size = GET_FIELD(data, INIT_ARRAY_STANDARD_HDR_SIZE);
+		rc = qed_init_array_dmae(p_hwfn, p_ptt, addr,
+					 dmae_array_offset + 1,
+					 size, array_data,
+					 b_must_dmae, b_can_dmae);
+		break;
+	}
+
+	return rc;
+}
+
+/* init_ops write command */
+static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt,
+			   struct init_write_op *cmd,
+			   bool b_can_dmae)
+{
+	u32 data = le32_to_cpu(cmd->data);
+	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+	bool b_must_dmae = GET_FIELD(data, INIT_WRITE_OP_WIDE_BUS);
+	union init_write_args *arg = &cmd->args;
+	int rc = 0;
+
+	/* Sanitize */
+	if (b_must_dmae && !b_can_dmae) {
+		DP_NOTICE(p_hwfn,
+			  "Need to write to %08x for Wide-bus but DMAE isn't allowed\n",
+			  addr);
+		return -EINVAL;
+	}
+
+	switch (GET_FIELD(data, INIT_WRITE_OP_SOURCE)) {
+	case INIT_SRC_INLINE:
+		qed_wr(p_hwfn, p_ptt, addr,
+		       le32_to_cpu(arg->inline_val));
+		break;
+	case INIT_SRC_ZEROS:
+		if (b_must_dmae ||
+		    (b_can_dmae && (le32_to_cpu(arg->zeros_count) >= 64)))
+			rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0,
+						le32_to_cpu(arg->zeros_count));
+		else
+			qed_init_fill(p_hwfn, p_ptt, addr, 0,
+				      le32_to_cpu(arg->zeros_count));
+		break;
+	case INIT_SRC_ARRAY:
+		rc = qed_init_cmd_array(p_hwfn, p_ptt, cmd,
+					b_must_dmae, b_can_dmae);
+		break;
+	case INIT_SRC_RUNTIME:
+		qed_init_rt(p_hwfn, p_ptt, addr,
+			    le16_to_cpu(arg->runtime.offset),
+			    le16_to_cpu(arg->runtime.size));
+		break;
+	}
+
+	return rc;
+}
+
+static inline bool comp_eq(u32 val, u32 expected_val)
+{
+	return val == expected_val;
+}
+
+static inline bool comp_and(u32 val, u32 expected_val)
+{
+	return (val & expected_val) == expected_val;
+}
+
+static inline bool comp_or(u32 val, u32 expected_val)
+{
+	return (val | expected_val) > 0;
+}
+
+/* init_ops read/poll commands */
+static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    struct init_read_op *cmd)
+{
+	u32 data = le32_to_cpu(cmd->op_data);
+	u32 addr = GET_FIELD(data, INIT_READ_OP_ADDRESS) << 2;
+
+	bool	(*comp_check)(u32	val,
+			      u32	expected_val);
+	u32	delay = QED_INIT_POLL_PERIOD_US, val;
+
+	val = qed_rd(p_hwfn, p_ptt, addr);
+
+	data = le32_to_cpu(cmd->op_data);
+	if (GET_FIELD(data, INIT_READ_OP_POLL)) {
+		int i;
+
+		switch (GET_FIELD(data, INIT_READ_OP_POLL_COMP)) {
+		case INIT_COMPARISON_EQ:
+			comp_check = comp_eq;
+			break;
+		case INIT_COMPARISON_OR:
+			comp_check = comp_or;
+			break;
+		case INIT_COMPARISON_AND:
+			comp_check = comp_and;
+			break;
+		default:
+			comp_check = NULL;
+			DP_ERR(p_hwfn, "Invalid poll comparison type %08x\n",
+			       data);
+			return;
+		}
+
+		for (i = 0;
+		     i < QED_INIT_MAX_POLL_COUNT &&
+		     !comp_check(val, le32_to_cpu(cmd->expected_val));
+		     i++) {
+			udelay(delay);
+			val = qed_rd(p_hwfn, p_ptt, addr);
+		}
+
+		if (i == QED_INIT_MAX_POLL_COUNT)
+			DP_ERR(p_hwfn,
+			       "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n",
+			       addr, le32_to_cpu(cmd->expected_val),
+			       val, data);
+	}
+}
+
+/* init_ops callbacks entry point */
+static void qed_init_cmd_cb(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    struct init_callback_op *p_cmd)
+{
+	DP_NOTICE(p_hwfn, "Currently init values have no need of callbacks\n");
+}
+
+static u8 qed_init_cmd_mode_match(struct qed_hwfn *p_hwfn,
+				  u16 *offset,
+				  int modes)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	const u8 *modes_tree_buf;
+	u8 arg1, arg2, tree_val;
+
+	modes_tree_buf = cdev->fw_data->modes_tree_buf;
+	tree_val = modes_tree_buf[(*offset)++];
+	switch (tree_val) {
+	case INIT_MODE_OP_NOT:
+		return qed_init_cmd_mode_match(p_hwfn, offset, modes) ^ 1;
+	case INIT_MODE_OP_OR:
+		arg1	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		arg2	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		return arg1 | arg2;
+	case INIT_MODE_OP_AND:
+		arg1	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		arg2	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		return arg1 & arg2;
+	default:
+		tree_val -= MAX_INIT_MODE_OPS;
+		return (modes & (1 << tree_val)) ? 1 : 0;
+	}
+}
+
+static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn,
+			     struct init_if_mode_op *p_cmd,
+			     int modes)
+{
+	u16 offset = le16_to_cpu(p_cmd->modes_buf_offset);
+
+	if (qed_init_cmd_mode_match(p_hwfn, &offset, modes))
+		return 0;
+	else
+		return GET_FIELD(le32_to_cpu(p_cmd->op_data),
+				 INIT_IF_MODE_OP_CMD_OFFSET);
+}
+
+static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn,
+			      struct init_if_phase_op *p_cmd,
+			      u32 phase,
+			      u32 phase_id)
+{
+	u32 data = le32_to_cpu(p_cmd->phase_data);
+	u32 op_data = le32_to_cpu(p_cmd->op_data);
+
+	if (!(GET_FIELD(data, INIT_IF_PHASE_OP_PHASE) == phase &&
+	      (GET_FIELD(data, INIT_IF_PHASE_OP_PHASE_ID) == ANY_PHASE_ID ||
+	       GET_FIELD(data, INIT_IF_PHASE_OP_PHASE_ID) == phase_id)))
+		return GET_FIELD(op_data, INIT_IF_PHASE_OP_CMD_OFFSET);
+	else
+		return 0;
+}
+
+int qed_init_run(struct qed_hwfn *p_hwfn,
+		 struct qed_ptt *p_ptt,
+		 int phase,
+		 int phase_id,
+		 int modes)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u32 cmd_num, num_init_ops;
+	union init_op *init_ops;
+	bool b_dmae = false;
+	int rc = 0;
+
+	num_init_ops = cdev->fw_data->init_ops_size;
+	init_ops = cdev->fw_data->init_ops;
+
+	p_hwfn->unzip_buf = kzalloc(MAX_ZIPPED_SIZE * 4, GFP_ATOMIC);
+	if (!p_hwfn->unzip_buf) {
+		DP_NOTICE(p_hwfn, "Failed to allocate unzip buffer\n");
+		return -ENOMEM;
+	}
+
+	for (cmd_num = 0; cmd_num < num_init_ops; cmd_num++) {
+		union init_op *cmd = &init_ops[cmd_num];
+		u32 data = le32_to_cpu(cmd->raw.op_data);
+
+		switch (GET_FIELD(data, INIT_CALLBACK_OP_OP)) {
+		case INIT_OP_WRITE:
+			rc = qed_init_cmd_wr(p_hwfn, p_ptt, &cmd->write,
+					     b_dmae);
+			break;
+		case INIT_OP_READ:
+			qed_init_cmd_rd(p_hwfn, p_ptt, &cmd->read);
+			break;
+		case INIT_OP_IF_MODE:
+			cmd_num += qed_init_cmd_mode(p_hwfn, &cmd->if_mode,
+						     modes);
+			break;
+		case INIT_OP_IF_PHASE:
+			cmd_num += qed_init_cmd_phase(p_hwfn, &cmd->if_phase,
+						      phase, phase_id);
+			b_dmae = GET_FIELD(data, INIT_IF_PHASE_OP_DMAE_ENABLE);
+			break;
+		case INIT_OP_DELAY:
+			/* qed_init_run is always invoked from
+			 * sleep-able context
+			 */
+			udelay(le32_to_cpu(cmd->delay.delay));
+			break;
+
+		case INIT_OP_CALLBACK:
+			qed_init_cmd_cb(p_hwfn, p_ptt, &cmd->callback);
+			break;
+		}
+
+		if (rc)
+			break;
+	}
+
+	kfree(p_hwfn->unzip_buf);
+	return rc;
+}
+
+void qed_gtt_init(struct qed_hwfn *p_hwfn)
+{
+	u32 gtt_base;
+	u32 i;
+
+	/* Set the global windows */
+	gtt_base = PXP_PF_WINDOW_ADMIN_START + PXP_PF_WINDOW_ADMIN_GLOBAL_START;
+
+	for (i = 0; i < ARRAY_SIZE(pxp_global_win); i++)
+		if (pxp_global_win[i])
+			REG_WR(p_hwfn, gtt_base + i * PXP_GLOBAL_ENTRY_SIZE,
+			       pxp_global_win[i]);
+}
+
+int qed_init_fw_data(struct qed_dev *cdev,
+		     const u8 *data)
+{
+	struct qed_fw_data *fw = cdev->fw_data;
+	struct bin_buffer_hdr *buf_hdr;
+	u32 offset, len;
+
+	if (!data) {
+		DP_NOTICE(cdev, "Invalid fw data\n");
+		return -EINVAL;
+	}
+
+	buf_hdr = (struct bin_buffer_hdr *)data;
+
+	offset = buf_hdr[BIN_BUF_INIT_CMD].offset;
+	fw->init_ops = (union init_op *)(data + offset);
+
+	offset = buf_hdr[BIN_BUF_INIT_VAL].offset;
+	fw->arr_data = (u32 *)(data + offset);
+
+	offset = buf_hdr[BIN_BUF_INIT_MODE_TREE].offset;
+	fw->modes_tree_buf = (u8 *)(data + offset);
+	len = buf_hdr[BIN_BUF_INIT_CMD].length;
+	fw->init_ops_size = len / sizeof(struct init_raw_op);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
new file mode 100644
index 000000000000..1e832049983d
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
@@ -0,0 +1,110 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_INIT_OPS_H
+#define _QED_INIT_OPS_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include "qed.h"
+
+/**
+ * @brief qed_init_iro_array - init iro_arr.
+ *
+ *
+ * @param cdev
+ */
+void qed_init_iro_array(struct qed_dev *cdev);
+
+/**
+ * @brief qed_init_run - Run the init-sequence.
+ *
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param phase
+ * @param phase_id
+ * @param modes
+ * @return _qed_status_t
+ */
+int qed_init_run(struct qed_hwfn *p_hwfn,
+		 struct qed_ptt *p_ptt,
+		 int phase,
+		 int phase_id,
+		 int modes);
+
+/**
+ * @brief qed_init_hwfn_allocate - Allocate RT array, Store 'values' ptrs.
+ *
+ *
+ * @param p_hwfn
+ *
+ * @return _qed_status_t
+ */
+int qed_init_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_init_hwfn_deallocate
+ *
+ *
+ * @param p_hwfn
+ */
+void qed_init_free(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_init_clear_rt_data - Clears the runtime init array.
+ *
+ *
+ * @param p_hwfn
+ */
+void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_init_store_rt_reg - Store a configuration value in the RT array.
+ *
+ *
+ * @param p_hwfn
+ * @param rt_offset
+ * @param val
+ */
+void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
+			   u32 rt_offset,
+			   u32 val);
+
+#define STORE_RT_REG(hwfn, offset, val)	\
+	qed_init_store_rt_reg(hwfn, offset, val)
+
+#define OVERWRITE_RT_REG(hwfn, offset, val) \
+	qed_init_store_rt_reg(hwfn, offset, val)
+
+/**
+ * @brief
+ *
+ *
+ * @param p_hwfn
+ * @param rt_offset
+ * @param val
+ * @param size
+ */
+void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
+			   u32 rt_offset,
+			   u32 *val,
+			   size_t size);
+
+#define STORE_RT_REG_AGG(hwfn, offset, val) \
+	qed_init_store_rt_agg(hwfn, offset, (u32 *)&val, sizeof(val))
+
+/**
+ * @brief
+ *      Initialize GTT global windows and set admin window
+ *      related params of GTT/PTT to default values.
+ *
+ * @param p_hwfn
+ */
+void qed_gtt_init(struct qed_hwfn *p_hwfn);
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
new file mode 100644
index 000000000000..37d926a5fae5
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -0,0 +1,802 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_int.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+struct qed_pi_info {
+	qed_int_comp_cb_t	comp_cb;
+	void			*cookie;
+};
+
+struct qed_sb_sp_info {
+	struct qed_sb_info	sb_info;
+
+	/* per protocol index data */
+	struct qed_pi_info	pi_info_arr[PIS_PER_SB];
+};
+
+void qed_int_sp_dpc(unsigned long hwfn_cookie)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)hwfn_cookie;
+	struct qed_pi_info *pi_info = NULL;
+	struct qed_sb_info *sb_info;
+	int arr_size;
+	u16 rc = 0;
+
+	if (!p_hwfn) {
+		DP_ERR(p_hwfn->cdev, "DPC called - no hwfn!\n");
+		return;
+	}
+
+	if (!p_hwfn->p_sp_sb) {
+		DP_ERR(p_hwfn->cdev, "DPC called - no p_sp_sb\n");
+		return;
+	}
+
+	sb_info = &p_hwfn->p_sp_sb->sb_info;
+	arr_size = ARRAY_SIZE(p_hwfn->p_sp_sb->pi_info_arr);
+	if (!sb_info) {
+		DP_ERR(p_hwfn->cdev,
+		       "Status block is NULL - cannot ack interrupts\n");
+		return;
+	}
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "DPC Called! (hwfn %p %d)\n",
+		   p_hwfn, p_hwfn->my_id);
+
+	/* Disable ack for def status block. Required both for msix +
+	 * inta in non-mask mode, in inta does no harm.
+	 */
+	qed_sb_ack(sb_info, IGU_INT_DISABLE, 0);
+
+	/* Gather Interrupts/Attentions information */
+	if (!sb_info->sb_virt) {
+		DP_ERR(
+			p_hwfn->cdev,
+			"Interrupt Status block is NULL - cannot check for new interrupts!\n");
+	} else {
+		u32 tmp_index = sb_info->sb_ack;
+
+		rc = qed_sb_update_sb_idx(sb_info);
+		DP_VERBOSE(p_hwfn->cdev, NETIF_MSG_INTR,
+			   "Interrupt indices: 0x%08x --> 0x%08x\n",
+			   tmp_index, sb_info->sb_ack);
+	}
+
+	/* Check if we expect interrupts at this time. if not just ack them */
+	if (!(rc & QED_SB_EVENT_MASK)) {
+		qed_sb_ack(sb_info, IGU_INT_ENABLE, 1);
+		return;
+	}
+
+	/* Check the validity of the DPC ptt. If not ack interrupts and fail */
+	if (!p_hwfn->p_dpc_ptt) {
+		DP_NOTICE(p_hwfn->cdev, "Failed to allocate PTT\n");
+		qed_sb_ack(sb_info, IGU_INT_ENABLE, 1);
+		return;
+	}
+
+	if (rc & QED_SB_IDX) {
+		int pi;
+
+		/* Look for a free index */
+		for (pi = 0; pi < arr_size; pi++) {
+			pi_info = &p_hwfn->p_sp_sb->pi_info_arr[pi];
+			if (pi_info->comp_cb)
+				pi_info->comp_cb(p_hwfn, pi_info->cookie);
+		}
+	}
+
+	qed_sb_ack(sb_info, IGU_INT_ENABLE, 1);
+}
+
+/* coalescing timeout = timeset << (timer_res + 1) */
+#define QED_CAU_DEF_RX_USECS 24
+#define QED_CAU_DEF_TX_USECS 48
+
+void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
+			   struct cau_sb_entry *p_sb_entry,
+			   u8 pf_id,
+			   u16 vf_number,
+			   u8 vf_valid)
+{
+	u32 cau_state;
+
+	memset(p_sb_entry, 0, sizeof(*p_sb_entry));
+
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_PF_NUMBER, pf_id);
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_VF_NUMBER, vf_number);
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_VF_VALID, vf_valid);
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET0, 0x7F);
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET1, 0x7F);
+
+	/* setting the time resultion to a fixed value ( = 1) */
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0,
+		  QED_CAU_DEF_RX_TIMER_RES);
+	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1,
+		  QED_CAU_DEF_TX_TIMER_RES);
+
+	cau_state = CAU_HC_DISABLE_STATE;
+
+	if (p_hwfn->cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
+		cau_state = CAU_HC_ENABLE_STATE;
+		if (!p_hwfn->cdev->rx_coalesce_usecs)
+			p_hwfn->cdev->rx_coalesce_usecs =
+				QED_CAU_DEF_RX_USECS;
+		if (!p_hwfn->cdev->tx_coalesce_usecs)
+			p_hwfn->cdev->tx_coalesce_usecs =
+				QED_CAU_DEF_TX_USECS;
+	}
+
+	SET_FIELD(p_sb_entry->data, CAU_SB_ENTRY_STATE0, cau_state);
+	SET_FIELD(p_sb_entry->data, CAU_SB_ENTRY_STATE1, cau_state);
+}
+
+void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 dma_addr_t sb_phys,
+			 u16 igu_sb_id,
+			 u16 vf_number,
+			 u8 vf_valid)
+{
+	struct cau_sb_entry sb_entry;
+	u32 val;
+
+	qed_init_cau_sb_entry(p_hwfn, &sb_entry, p_hwfn->rel_pf_id,
+			      vf_number, vf_valid);
+
+	if (p_hwfn->hw_init_done) {
+		val = CAU_REG_SB_ADDR_MEMORY + igu_sb_id * sizeof(u64);
+		qed_wr(p_hwfn, p_ptt, val, lower_32_bits(sb_phys));
+		qed_wr(p_hwfn, p_ptt, val + sizeof(u32),
+		       upper_32_bits(sb_phys));
+
+		val = CAU_REG_SB_VAR_MEMORY + igu_sb_id * sizeof(u64);
+		qed_wr(p_hwfn, p_ptt, val, sb_entry.data);
+		qed_wr(p_hwfn, p_ptt, val + sizeof(u32), sb_entry.params);
+	} else {
+		/* Initialize Status Block Address */
+		STORE_RT_REG_AGG(p_hwfn,
+				 CAU_REG_SB_ADDR_MEMORY_RT_OFFSET +
+				 igu_sb_id * 2,
+				 sb_phys);
+
+		STORE_RT_REG_AGG(p_hwfn,
+				 CAU_REG_SB_VAR_MEMORY_RT_OFFSET +
+				 igu_sb_id * 2,
+				 sb_entry);
+	}
+
+	/* Configure pi coalescing if set */
+	if (p_hwfn->cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
+		u8 timeset = p_hwfn->cdev->rx_coalesce_usecs >>
+			     (QED_CAU_DEF_RX_TIMER_RES + 1);
+		u8 num_tc = 1, i;
+
+		qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
+				    QED_COAL_RX_STATE_MACHINE,
+				    timeset);
+
+		timeset = p_hwfn->cdev->tx_coalesce_usecs >>
+			  (QED_CAU_DEF_TX_TIMER_RES + 1);
+
+		for (i = 0; i < num_tc; i++) {
+			qed_int_cau_conf_pi(p_hwfn, p_ptt,
+					    igu_sb_id, TX_PI(i),
+					    QED_COAL_TX_STATE_MACHINE,
+					    timeset);
+		}
+	}
+}
+
+void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 igu_sb_id,
+			 u32 pi_index,
+			 enum qed_coalescing_fsm coalescing_fsm,
+			 u8 timeset)
+{
+	struct cau_pi_entry pi_entry;
+	u32 sb_offset;
+	u32 pi_offset;
+
+	sb_offset = igu_sb_id * PIS_PER_SB;
+	memset(&pi_entry, 0, sizeof(struct cau_pi_entry));
+
+	SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_PI_TIMESET, timeset);
+	if (coalescing_fsm == QED_COAL_RX_STATE_MACHINE)
+		SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_FSM_SEL, 0);
+	else
+		SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_FSM_SEL, 1);
+
+	pi_offset = sb_offset + pi_index;
+	if (p_hwfn->hw_init_done) {
+		qed_wr(p_hwfn, p_ptt,
+		       CAU_REG_PI_MEMORY + pi_offset * sizeof(u32),
+		       *((u32 *)&(pi_entry)));
+	} else {
+		STORE_RT_REG(p_hwfn,
+			     CAU_REG_PI_MEMORY_RT_OFFSET + pi_offset,
+			     *((u32 *)&(pi_entry)));
+	}
+}
+
+void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      struct qed_sb_info *sb_info)
+{
+	/* zero status block and ack counter */
+	sb_info->sb_ack = 0;
+	memset(sb_info->sb_virt, 0, sizeof(*sb_info->sb_virt));
+
+	qed_int_cau_conf_sb(p_hwfn, p_ptt, sb_info->sb_phys,
+			    sb_info->igu_sb_id, 0, 0);
+}
+
+/**
+ * @brief qed_get_igu_sb_id - given a sw sb_id return the
+ *        igu_sb_id
+ *
+ * @param p_hwfn
+ * @param sb_id
+ *
+ * @return u16
+ */
+static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn,
+			     u16 sb_id)
+{
+	u16 igu_sb_id;
+
+	/* Assuming continuous set of IGU SBs dedicated for given PF */
+	if (sb_id == QED_SP_SB_ID)
+		igu_sb_id = p_hwfn->hw_info.p_igu_info->igu_dsb_id;
+	else
+		igu_sb_id = sb_id + p_hwfn->hw_info.p_igu_info->igu_base_sb;
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "SB [%s] index is 0x%04x\n",
+		   (sb_id == QED_SP_SB_ID) ? "DSB" : "non-DSB", igu_sb_id);
+
+	return igu_sb_id;
+}
+
+int qed_int_sb_init(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
+		    struct qed_sb_info *sb_info,
+		    void *sb_virt_addr,
+		    dma_addr_t sb_phy_addr,
+		    u16 sb_id)
+{
+	sb_info->sb_virt = sb_virt_addr;
+	sb_info->sb_phys = sb_phy_addr;
+
+	sb_info->igu_sb_id = qed_get_igu_sb_id(p_hwfn, sb_id);
+
+	if (sb_id != QED_SP_SB_ID) {
+		p_hwfn->sbs_info[sb_id] = sb_info;
+		p_hwfn->num_sbs++;
+	}
+
+	sb_info->cdev = p_hwfn->cdev;
+
+	/* The igu address will hold the absolute address that needs to be
+	 * written to for a specific status block
+	 */
+	sb_info->igu_addr = (u8 __iomem *)p_hwfn->regview +
+					  GTT_BAR0_MAP_REG_IGU_CMD +
+					  (sb_info->igu_sb_id << 3);
+
+	sb_info->flags |= QED_SB_INFO_INIT;
+
+	qed_int_sb_setup(p_hwfn, p_ptt, sb_info);
+
+	return 0;
+}
+
+int qed_int_sb_release(struct qed_hwfn *p_hwfn,
+		       struct qed_sb_info *sb_info,
+		       u16 sb_id)
+{
+	if (sb_id == QED_SP_SB_ID) {
+		DP_ERR(p_hwfn, "Do Not free sp sb using this function");
+		return -EINVAL;
+	}
+
+	/* zero status block and ack counter */
+	sb_info->sb_ack = 0;
+	memset(sb_info->sb_virt, 0, sizeof(*sb_info->sb_virt));
+
+	p_hwfn->sbs_info[sb_id] = NULL;
+	p_hwfn->num_sbs--;
+
+	return 0;
+}
+
+static void qed_int_sp_sb_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_sb_sp_info *p_sb = p_hwfn->p_sp_sb;
+
+	if (p_sb) {
+		if (p_sb->sb_info.sb_virt)
+			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+					  SB_ALIGNED_SIZE(p_hwfn),
+					  p_sb->sb_info.sb_virt,
+					  p_sb->sb_info.sb_phys);
+		kfree(p_sb);
+	}
+}
+
+static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt)
+{
+	struct qed_sb_sp_info *p_sb;
+	dma_addr_t p_phys = 0;
+	void *p_virt;
+
+	/* SB struct */
+	p_sb = kmalloc(sizeof(*p_sb), GFP_ATOMIC);
+	if (!p_sb) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sb_info'\n");
+		return -ENOMEM;
+	}
+
+	/* SB ring  */
+	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				    SB_ALIGNED_SIZE(p_hwfn),
+				    &p_phys, GFP_KERNEL);
+	if (!p_virt) {
+		DP_NOTICE(p_hwfn, "Failed to allocate status block\n");
+		kfree(p_sb);
+		return -ENOMEM;
+	}
+
+	/* Status Block setup */
+	p_hwfn->p_sp_sb = p_sb;
+	qed_int_sb_init(p_hwfn, p_ptt, &p_sb->sb_info, p_virt,
+			p_phys, QED_SP_SB_ID);
+
+	memset(p_sb->pi_info_arr, 0, sizeof(p_sb->pi_info_arr));
+
+	return 0;
+}
+
+static void qed_int_sp_sb_setup(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt)
+{
+	if (!p_hwfn)
+		return;
+
+	if (p_hwfn->p_sp_sb)
+		qed_int_sb_setup(p_hwfn, p_ptt, &p_hwfn->p_sp_sb->sb_info);
+	else
+		DP_NOTICE(p_hwfn->cdev,
+			  "Failed to setup Slow path status block - NULL pointer\n");
+}
+
+int qed_int_register_cb(struct qed_hwfn *p_hwfn,
+			qed_int_comp_cb_t comp_cb,
+			void *cookie,
+			u8 *sb_idx,
+			__le16 **p_fw_cons)
+{
+	struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
+	int qed_status = -ENOMEM;
+	u8 pi;
+
+	/* Look for a free index */
+	for (pi = 0; pi < ARRAY_SIZE(p_sp_sb->pi_info_arr); pi++) {
+		if (!p_sp_sb->pi_info_arr[pi].comp_cb) {
+			p_sp_sb->pi_info_arr[pi].comp_cb = comp_cb;
+			p_sp_sb->pi_info_arr[pi].cookie = cookie;
+			*sb_idx = pi;
+			*p_fw_cons = &p_sp_sb->sb_info.sb_virt->pi_array[pi];
+			qed_status = 0;
+			break;
+		}
+	}
+
+	return qed_status;
+}
+
+int qed_int_unregister_cb(struct qed_hwfn *p_hwfn, u8 pi)
+{
+	struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
+	int qed_status = -ENOMEM;
+
+	if (p_sp_sb->pi_info_arr[pi].comp_cb) {
+		p_sp_sb->pi_info_arr[pi].comp_cb = NULL;
+		p_sp_sb->pi_info_arr[pi].cookie = NULL;
+		qed_status = 0;
+	}
+
+	return qed_status;
+}
+
+u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn)
+{
+	return p_hwfn->p_sp_sb->sb_info.igu_sb_id;
+}
+
+void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    enum qed_int_mode int_mode)
+{
+	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN;
+
+	p_hwfn->cdev->int_mode = int_mode;
+	switch (p_hwfn->cdev->int_mode) {
+	case QED_INT_MODE_INTA:
+		igu_pf_conf |= IGU_PF_CONF_INT_LINE_EN;
+		igu_pf_conf |= IGU_PF_CONF_SINGLE_ISR_EN;
+		break;
+
+	case QED_INT_MODE_MSI:
+		igu_pf_conf |= IGU_PF_CONF_MSI_MSIX_EN;
+		igu_pf_conf |= IGU_PF_CONF_SINGLE_ISR_EN;
+		break;
+
+	case QED_INT_MODE_MSIX:
+		igu_pf_conf |= IGU_PF_CONF_MSI_MSIX_EN;
+		break;
+	case QED_INT_MODE_POLL:
+		break;
+	}
+
+	qed_wr(p_hwfn, p_ptt, IGU_REG_PF_CONFIGURATION, igu_pf_conf);
+}
+
+void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt,
+			enum qed_int_mode int_mode)
+{
+	int i;
+
+	p_hwfn->b_int_enabled = 1;
+
+	/* Mask non-link attentions */
+	for (i = 0; i < 9; i++)
+		qed_wr(p_hwfn, p_ptt,
+		       MISC_REG_AEU_ENABLE1_IGU_OUT_0 + (i << 2), 0);
+
+	/* Enable interrupt Generation */
+	qed_int_igu_enable_int(p_hwfn, p_ptt, int_mode);
+
+	/* Flush the writes to IGU */
+	mmiowb();
+}
+
+void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt)
+{
+	p_hwfn->b_int_enabled = 0;
+
+	qed_wr(p_hwfn, p_ptt, IGU_REG_PF_CONFIGURATION, 0);
+}
+
+#define IGU_CLEANUP_SLEEP_LENGTH                (1000)
+void qed_int_igu_cleanup_sb(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    u32 sb_id,
+			    bool cleanup_set,
+			    u16 opaque_fid
+			    )
+{
+	u32 pxp_addr = IGU_CMD_INT_ACK_BASE + sb_id;
+	u32 sleep_cnt = IGU_CLEANUP_SLEEP_LENGTH;
+	u32 data = 0;
+	u32 cmd_ctrl = 0;
+	u32 val = 0;
+	u32 sb_bit = 0;
+	u32 sb_bit_addr = 0;
+
+	/* Set the data field */
+	SET_FIELD(data, IGU_CLEANUP_CLEANUP_SET, cleanup_set ? 1 : 0);
+	SET_FIELD(data, IGU_CLEANUP_CLEANUP_TYPE, 0);
+	SET_FIELD(data, IGU_CLEANUP_COMMAND_TYPE, IGU_COMMAND_TYPE_SET);
+
+	/* Set the control register */
+	SET_FIELD(cmd_ctrl, IGU_CTRL_REG_PXP_ADDR, pxp_addr);
+	SET_FIELD(cmd_ctrl, IGU_CTRL_REG_FID, opaque_fid);
+	SET_FIELD(cmd_ctrl, IGU_CTRL_REG_TYPE, IGU_CTRL_CMD_TYPE_WR);
+
+	qed_wr(p_hwfn, p_ptt, IGU_REG_COMMAND_REG_32LSB_DATA, data);
+
+	barrier();
+
+	qed_wr(p_hwfn, p_ptt, IGU_REG_COMMAND_REG_CTRL, cmd_ctrl);
+
+	/* Flush the write to IGU */
+	mmiowb();
+
+	/* calculate where to read the status bit from */
+	sb_bit = 1 << (sb_id % 32);
+	sb_bit_addr = sb_id / 32 * sizeof(u32);
+
+	sb_bit_addr += IGU_REG_CLEANUP_STATUS_0;
+
+	/* Now wait for the command to complete */
+	do {
+		val = qed_rd(p_hwfn, p_ptt, sb_bit_addr);
+
+		if ((val & sb_bit) == (cleanup_set ? sb_bit : 0))
+			break;
+
+		usleep_range(5000, 10000);
+	} while (--sleep_cnt);
+
+	if (!sleep_cnt)
+		DP_NOTICE(p_hwfn,
+			  "Timeout waiting for clear status 0x%08x [for sb %d]\n",
+			  val, sb_id);
+}
+
+void qed_int_igu_init_pure_rt_single(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     u32 sb_id,
+				     u16 opaque,
+				     bool b_set)
+{
+	int pi;
+
+	/* Set */
+	if (b_set)
+		qed_int_igu_cleanup_sb(p_hwfn, p_ptt, sb_id, 1, opaque);
+
+	/* Clear */
+	qed_int_igu_cleanup_sb(p_hwfn, p_ptt, sb_id, 0, opaque);
+
+	/* Clear the CAU for the SB */
+	for (pi = 0; pi < 12; pi++)
+		qed_wr(p_hwfn, p_ptt,
+		       CAU_REG_PI_MEMORY + (sb_id * 12 + pi) * 4, 0);
+}
+
+void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      bool b_set,
+			      bool b_slowpath)
+{
+	u32 igu_base_sb = p_hwfn->hw_info.p_igu_info->igu_base_sb;
+	u32 igu_sb_cnt = p_hwfn->hw_info.p_igu_info->igu_sb_cnt;
+	u32 sb_id = 0;
+	u32 val = 0;
+
+	val = qed_rd(p_hwfn, p_ptt, IGU_REG_BLOCK_CONFIGURATION);
+	val |= IGU_REG_BLOCK_CONFIGURATION_VF_CLEANUP_EN;
+	val &= ~IGU_REG_BLOCK_CONFIGURATION_PXP_TPH_INTERFACE_EN;
+	qed_wr(p_hwfn, p_ptt, IGU_REG_BLOCK_CONFIGURATION, val);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+		   "IGU cleaning SBs [%d,...,%d]\n",
+		   igu_base_sb, igu_base_sb + igu_sb_cnt - 1);
+
+	for (sb_id = igu_base_sb; sb_id < igu_base_sb + igu_sb_cnt; sb_id++)
+		qed_int_igu_init_pure_rt_single(p_hwfn, p_ptt, sb_id,
+						p_hwfn->hw_info.opaque_fid,
+						b_set);
+
+	if (b_slowpath) {
+		sb_id = p_hwfn->hw_info.p_igu_info->igu_dsb_id;
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "IGU cleaning slowpath SB [%d]\n", sb_id);
+		qed_int_igu_init_pure_rt_single(p_hwfn, p_ptt, sb_id,
+						p_hwfn->hw_info.opaque_fid,
+						b_set);
+	}
+}
+
+int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt)
+{
+	struct qed_igu_info *p_igu_info;
+	struct qed_igu_block *blk;
+	u32 val;
+	u16 sb_id;
+	u16 prev_sb_id = 0xFF;
+
+	p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_ATOMIC);
+
+	if (!p_hwfn->hw_info.p_igu_info)
+		return -ENOMEM;
+
+	p_igu_info = p_hwfn->hw_info.p_igu_info;
+
+	/* Initialize base sb / sb cnt for PFs */
+	p_igu_info->igu_base_sb		= 0xffff;
+	p_igu_info->igu_sb_cnt		= 0;
+	p_igu_info->igu_dsb_id		= 0xffff;
+	p_igu_info->igu_base_sb_iov	= 0xffff;
+
+	for (sb_id = 0; sb_id < QED_MAPPING_MEMORY_SIZE(p_hwfn->cdev);
+	     sb_id++) {
+		blk = &p_igu_info->igu_map.igu_blocks[sb_id];
+
+		val = qed_rd(p_hwfn, p_ptt,
+			     IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id);
+
+		/* stop scanning when hit first invalid PF entry */
+		if (!GET_FIELD(val, IGU_MAPPING_LINE_VALID) &&
+		    GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID))
+			break;
+
+		blk->status = QED_IGU_STATUS_VALID;
+		blk->function_id = GET_FIELD(val,
+					     IGU_MAPPING_LINE_FUNCTION_NUMBER);
+		blk->is_pf = GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID);
+		blk->vector_number = GET_FIELD(val,
+					       IGU_MAPPING_LINE_VECTOR_NUMBER);
+
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "IGU_BLOCK[sb_id]:%x:func_id = %d is_pf = %d vector_num = 0x%x\n",
+			   val, blk->function_id, blk->is_pf,
+			   blk->vector_number);
+
+		if (blk->is_pf) {
+			if (blk->function_id == p_hwfn->rel_pf_id) {
+				blk->status |= QED_IGU_STATUS_PF;
+
+				if (blk->vector_number == 0) {
+					if (p_igu_info->igu_dsb_id == 0xffff)
+						p_igu_info->igu_dsb_id = sb_id;
+				} else {
+					if (p_igu_info->igu_base_sb ==
+					    0xffff) {
+						p_igu_info->igu_base_sb = sb_id;
+					} else if (prev_sb_id != sb_id - 1) {
+						DP_NOTICE(p_hwfn->cdev,
+							  "consecutive igu vectors for HWFN %x broken",
+							  p_hwfn->rel_pf_id);
+						break;
+					}
+					prev_sb_id = sb_id;
+					/* we don't count the default */
+					(p_igu_info->igu_sb_cnt)++;
+				}
+			}
+		}
+	}
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+		   "IGU igu_base_sb=0x%x igu_sb_cnt=%d igu_dsb_id=0x%x\n",
+		   p_igu_info->igu_base_sb,
+		   p_igu_info->igu_sb_cnt,
+		   p_igu_info->igu_dsb_id);
+
+	if (p_igu_info->igu_base_sb == 0xffff ||
+	    p_igu_info->igu_dsb_id == 0xffff ||
+	    p_igu_info->igu_sb_cnt == 0) {
+		DP_NOTICE(p_hwfn,
+			  "IGU CAM returned invalid values igu_base_sb=0x%x igu_sb_cnt=%d igu_dsb_id=0x%x\n",
+			   p_igu_info->igu_base_sb,
+			   p_igu_info->igu_sb_cnt,
+			   p_igu_info->igu_dsb_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Initialize igu runtime registers
+ *
+ * @param p_hwfn
+ */
+void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn)
+{
+	u32 igu_pf_conf = 0;
+
+	igu_pf_conf |= IGU_PF_CONF_FUNC_EN;
+
+	STORE_RT_REG(p_hwfn, IGU_REG_PF_CONFIGURATION_RT_OFFSET, igu_pf_conf);
+}
+
+u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn)
+{
+	u64 intr_status = 0;
+	u32 intr_status_lo = 0;
+	u32 intr_status_hi = 0;
+	u32 lsb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_LSB_UPPER -
+			       IGU_CMD_INT_ACK_BASE;
+	u32 msb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_MSB_UPPER -
+			       IGU_CMD_INT_ACK_BASE;
+
+	intr_status_lo = REG_RD(p_hwfn,
+				GTT_BAR0_MAP_REG_IGU_CMD +
+				lsb_igu_cmd_addr * 8);
+	intr_status_hi = REG_RD(p_hwfn,
+				GTT_BAR0_MAP_REG_IGU_CMD +
+				msb_igu_cmd_addr * 8);
+	intr_status = ((u64)intr_status_hi << 32) + (u64)intr_status_lo;
+
+	return intr_status;
+}
+
+static void qed_int_sp_dpc_setup(struct qed_hwfn *p_hwfn)
+{
+	tasklet_init(p_hwfn->sp_dpc,
+		     qed_int_sp_dpc, (unsigned long)p_hwfn);
+	p_hwfn->b_sp_dpc_enabled = true;
+}
+
+static int qed_int_sp_dpc_alloc(struct qed_hwfn *p_hwfn)
+{
+	p_hwfn->sp_dpc = kmalloc(sizeof(*p_hwfn->sp_dpc), GFP_ATOMIC);
+	if (!p_hwfn->sp_dpc)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void qed_int_sp_dpc_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->sp_dpc);
+}
+
+int qed_int_alloc(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt)
+{
+	int rc = 0;
+
+	rc = qed_int_sp_dpc_alloc(p_hwfn);
+	if (rc) {
+		DP_ERR(p_hwfn->cdev, "Failed to allocate sp dpc mem\n");
+		return rc;
+	}
+	rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt);
+	if (rc) {
+		DP_ERR(p_hwfn->cdev, "Failed to allocate sp sb mem\n");
+		return rc;
+	}
+
+	return rc;
+}
+
+void qed_int_free(struct qed_hwfn *p_hwfn)
+{
+	qed_int_sp_sb_free(p_hwfn);
+	qed_int_sp_dpc_free(p_hwfn);
+}
+
+void qed_int_setup(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt)
+{
+	qed_int_sp_sb_setup(p_hwfn, p_ptt);
+	qed_int_sp_dpc_setup(p_hwfn);
+}
+
+int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
+			int *p_iov_blks)
+{
+	struct qed_igu_info *info = p_hwfn->hw_info.p_igu_info;
+
+	if (!info)
+		return 0;
+
+	if (p_iov_blks)
+		*p_iov_blks = info->free_blks;
+
+	return info->igu_sb_cnt;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
new file mode 100644
index 000000000000..16b57518e706
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -0,0 +1,391 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_INT_H
+#define _QED_INT_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include "qed.h"
+
+/* Fields of IGU PF CONFIGRATION REGISTER */
+#define IGU_PF_CONF_FUNC_EN       (0x1 << 0)    /* function enable        */
+#define IGU_PF_CONF_MSI_MSIX_EN   (0x1 << 1)    /* MSI/MSIX enable        */
+#define IGU_PF_CONF_INT_LINE_EN   (0x1 << 2)    /* INT enable             */
+#define IGU_PF_CONF_ATTN_BIT_EN   (0x1 << 3)    /* attention enable       */
+#define IGU_PF_CONF_SINGLE_ISR_EN (0x1 << 4)    /* single ISR mode enable */
+#define IGU_PF_CONF_SIMD_MODE     (0x1 << 5)    /* simd all ones mode     */
+
+/* Igu control commands
+ */
+enum igu_ctrl_cmd {
+	IGU_CTRL_CMD_TYPE_RD,
+	IGU_CTRL_CMD_TYPE_WR,
+	MAX_IGU_CTRL_CMD
+};
+
+/* Control register for the IGU command register
+ */
+struct igu_ctrl_reg {
+	u32 ctrl_data;
+#define IGU_CTRL_REG_FID_MASK           0xFFFF  /* Opaque_FID	 */
+#define IGU_CTRL_REG_FID_SHIFT          0
+#define IGU_CTRL_REG_PXP_ADDR_MASK      0xFFF   /* Command address */
+#define IGU_CTRL_REG_PXP_ADDR_SHIFT     16
+#define IGU_CTRL_REG_RESERVED_MASK      0x1
+#define IGU_CTRL_REG_RESERVED_SHIFT     28
+#define IGU_CTRL_REG_TYPE_MASK          0x1 /* use enum igu_ctrl_cmd */
+#define IGU_CTRL_REG_TYPE_SHIFT         31
+};
+
+enum qed_coalescing_fsm {
+	QED_COAL_RX_STATE_MACHINE,
+	QED_COAL_TX_STATE_MACHINE
+};
+
+/**
+ * @brief qed_int_cau_conf_pi - configure cau for a given
+ *        status block
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param igu_sb_id
+ * @param pi_index
+ * @param state
+ * @param timeset
+ */
+void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 igu_sb_id,
+			 u32 pi_index,
+			 enum qed_coalescing_fsm coalescing_fsm,
+			 u8 timeset);
+
+/**
+ * @brief qed_int_igu_enable_int - enable device interrupts
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param int_mode - interrupt mode to use
+ */
+void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    enum qed_int_mode int_mode);
+
+/**
+ * @brief qed_int_igu_disable_int - disable device interrupts
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt);
+
+/**
+ * @brief qed_int_igu_read_sisr_reg - Reads the single isr multiple dpc
+ *        register from igu.
+ *
+ * @param p_hwfn
+ *
+ * @return u64
+ */
+u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn);
+
+#define QED_SP_SB_ID 0xffff
+/**
+ * @brief qed_int_sb_init - Initializes the sb_info structure.
+ *
+ * once the structure is initialized it can be passed to sb related functions.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param sb_info	points to an uninitialized (but
+ *			allocated) sb_info structure
+ * @param sb_virt_addr
+ * @param sb_phy_addr
+ * @param sb_id	the sb_id to be used (zero based in driver)
+ *			should use QED_SP_SB_ID for SP Status block
+ *
+ * @return int
+ */
+int qed_int_sb_init(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
+		    struct qed_sb_info *sb_info,
+		    void *sb_virt_addr,
+		    dma_addr_t sb_phy_addr,
+		    u16 sb_id);
+/**
+ * @brief qed_int_sb_setup - Setup the sb.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param sb_info	initialized sb_info structure
+ */
+void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      struct qed_sb_info *sb_info);
+
+/**
+ * @brief qed_int_sb_release - releases the sb_info structure.
+ *
+ * once the structure is released, it's memory can be freed
+ *
+ * @param p_hwfn
+ * @param sb_info	points to an allocated sb_info structure
+ * @param sb_id		the sb_id to be used (zero based in driver)
+ *			should never be equal to QED_SP_SB_ID
+ *			(SP Status block)
+ *
+ * @return int
+ */
+int qed_int_sb_release(struct qed_hwfn *p_hwfn,
+		       struct qed_sb_info *sb_info,
+		       u16 sb_id);
+
+/**
+ * @brief qed_int_sp_dpc - To be called when an interrupt is received on the
+ *        default status block.
+ *
+ * @param p_hwfn - pointer to hwfn
+ *
+ */
+void qed_int_sp_dpc(unsigned long hwfn_cookie);
+
+/**
+ * @brief qed_int_get_num_sbs - get the number of status
+ *        blocks configured for this funciton in the igu.
+ *
+ * @param p_hwfn
+ * @param p_iov_blks - configured free blks for vfs
+ *
+ * @return int - number of status blocks configured
+ */
+int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
+			int *p_iov_blks);
+
+/**
+ * @file
+ *
+ * @brief Interrupt handler
+ */
+
+#define QED_CAU_DEF_RX_TIMER_RES 0
+#define QED_CAU_DEF_TX_TIMER_RES 0
+
+#define QED_SB_ATT_IDX  0x0001
+#define QED_SB_EVENT_MASK       0x0003
+
+#define SB_ALIGNED_SIZE(p_hwfn)	\
+	ALIGNED_TYPE_SIZE(struct status_block, p_hwfn)
+
+struct qed_igu_block {
+	u8	status;
+#define QED_IGU_STATUS_FREE     0x01
+#define QED_IGU_STATUS_VALID    0x02
+#define QED_IGU_STATUS_PF       0x04
+
+	u8	vector_number;
+	u8	function_id;
+	u8	is_pf;
+};
+
+struct qed_igu_map {
+	struct qed_igu_block igu_blocks[MAX_TOT_SB_PER_PATH];
+};
+
+struct qed_igu_info {
+	struct qed_igu_map	igu_map;
+	u16			igu_dsb_id;
+	u16			igu_base_sb;
+	u16			igu_base_sb_iov;
+	u16			igu_sb_cnt;
+	u16			igu_sb_cnt_iov;
+	u16			free_blks;
+};
+
+/* TODO Names of function may change... */
+void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      bool b_set,
+			      bool b_slowpath);
+
+void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_int_igu_read_cam - Reads the IGU CAM.
+ *	This function needs to be called during hardware
+ *	prepare. It reads the info from igu cam to know which
+ *	status block is the default / base status block etc.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return int
+ */
+int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt);
+
+typedef int (*qed_int_comp_cb_t)(struct qed_hwfn *p_hwfn,
+				 void *cookie);
+/**
+ * @brief qed_int_register_cb - Register callback func for
+ *      slowhwfn statusblock.
+ *
+ *	Every protocol that uses the slowhwfn status block
+ *	should register a callback function that will be called
+ *	once there is an update of the sp status block.
+ *
+ * @param p_hwfn
+ * @param comp_cb - function to be called when there is an
+ *                  interrupt on the sp sb
+ *
+ * @param cookie  - passed to the callback function
+ * @param sb_idx  - OUT parameter which gives the chosen index
+ *                  for this protocol.
+ * @param p_fw_cons  - pointer to the actual address of the
+ *                     consumer for this protocol.
+ *
+ * @return int
+ */
+int qed_int_register_cb(struct qed_hwfn *p_hwfn,
+			qed_int_comp_cb_t comp_cb,
+			void *cookie,
+			u8 *sb_idx,
+			__le16 **p_fw_cons);
+
+/**
+ * @brief qed_int_unregister_cb - Unregisters callback
+ *      function from sp sb.
+ *      Partner of qed_int_register_cb -> should be called
+ *      when no longer required.
+ *
+ * @param p_hwfn
+ * @param pi
+ *
+ * @return int
+ */
+int qed_int_unregister_cb(struct qed_hwfn *p_hwfn,
+			  u8 pi);
+
+/**
+ * @brief qed_int_get_sp_sb_id - Get the slowhwfn sb id.
+ *
+ * @param p_hwfn
+ *
+ * @return u16
+ */
+u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief Status block cleanup. Should be called for each status
+ *        block that will be used -> both PF / VF
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param sb_id		- igu status block id
+ * @param cleanup_set	- set(1) / clear(0)
+ * @param opaque_fid    - the function for which to perform
+ *			cleanup, for example a PF on behalf of
+ *			its VFs.
+ */
+void qed_int_igu_cleanup_sb(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    u32 sb_id,
+			    bool cleanup_set,
+			    u16 opaque_fid);
+
+/**
+ * @brief Status block cleanup. Should be called for each status
+ *        block that will be used -> both PF / VF
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param sb_id		- igu status block id
+ * @param opaque	- opaque fid of the sb owner.
+ * @param cleanup_set	- set(1) / clear(0)
+ */
+void qed_int_igu_init_pure_rt_single(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     u32 sb_id,
+				     u16 opaque,
+				     bool b_set);
+
+/**
+ * @brief qed_int_cau_conf - configure cau for a given status
+ *        block
+ *
+ * @param p_hwfn
+ * @param ptt
+ * @param sb_phys
+ * @param igu_sb_id
+ * @param vf_number
+ * @param vf_valid
+ */
+void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 dma_addr_t sb_phys,
+			 u16 igu_sb_id,
+			 u16 vf_number,
+			 u8 vf_valid);
+
+/**
+ * @brief qed_int_alloc
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return int
+ */
+int qed_int_alloc(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt);
+
+/**
+ * @brief qed_int_free
+ *
+ * @param p_hwfn
+ */
+void qed_int_free(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_int_setup
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void qed_int_setup(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt);
+
+/**
+ * @brief - Enable Interrupt & Attention for hw function
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param int_mode
+ */
+void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt,
+			enum qed_int_mode int_mode);
+
+/**
+ * @brief - Initialize CAU status block entry
+ *
+ * @param p_hwfn
+ * @param p_sb_entry
+ * @param pf_id
+ * @param vf_number
+ * @param vf_valid
+ */
+void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
+			   struct cau_sb_entry *p_sb_entry,
+			   u8 pf_id,
+			   u16 vf_number,
+			   u8 vf_valid);
+
+#define QED_MAPPING_MEMORY_SIZE(dev)	(NUM_OF_SBS(dev))
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
new file mode 100644
index 000000000000..d0b1ff0ca3c8
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -0,0 +1,948 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/stddef.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include <linux/dma-mapping.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/qed/qed_if.h>
+
+#include "qed.h"
+#include "qed_sp.h"
+#include "qed_dev_api.h"
+#include "qed_mcp.h"
+#include "qed_hw.h"
+
+static const char version[] =
+	"QLogic QL4xxx 40G/100G Ethernet Driver qed " DRV_MODULE_VERSION "\n";
+
+MODULE_DESCRIPTION("QLogic 25G/40G/50G/100G Core Module");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+#define FW_FILE_VERSION				\
+	__stringify(FW_MAJOR_VERSION) "."	\
+	__stringify(FW_MINOR_VERSION) "."	\
+	__stringify(FW_REVISION_VERSION) "."	\
+	__stringify(FW_ENGINEERING_VERSION)
+
+#define QED_FW_FILE_NAME	\
+	"qed/qed_init_values_zipped-" FW_FILE_VERSION ".bin"
+
+static int __init qed_init(void)
+{
+	pr_notice("qed_init called\n");
+
+	pr_info("%s", version);
+
+	return 0;
+}
+
+static void __exit qed_cleanup(void)
+{
+	pr_notice("qed_cleanup called\n");
+}
+
+module_init(qed_init);
+module_exit(qed_cleanup);
+
+/* Check if the DMA controller on the machine can properly handle the DMA
+ * addressing required by the device.
+*/
+static int qed_set_coherency_mask(struct qed_dev *cdev)
+{
+	struct device *dev = &cdev->pdev->dev;
+
+	if (dma_set_mask(dev, DMA_BIT_MASK(64)) == 0) {
+		if (dma_set_coherent_mask(dev, DMA_BIT_MASK(64)) != 0) {
+			DP_NOTICE(cdev,
+				  "Can't request 64-bit consistent allocations\n");
+			return -EIO;
+		}
+	} else if (dma_set_mask(dev, DMA_BIT_MASK(32)) != 0) {
+		DP_NOTICE(cdev, "Can't request 64b/32b DMA addresses\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void qed_free_pci(struct qed_dev *cdev)
+{
+	struct pci_dev *pdev = cdev->pdev;
+
+	if (cdev->doorbells)
+		iounmap(cdev->doorbells);
+	if (cdev->regview)
+		iounmap(cdev->regview);
+	if (atomic_read(&pdev->enable_cnt) == 1)
+		pci_release_regions(pdev);
+
+	pci_disable_device(pdev);
+}
+
+/* Performs PCI initializations as well as initializing PCI-related parameters
+ * in the device structrue. Returns 0 in case of success.
+ */
+static int qed_init_pci(struct qed_dev *cdev,
+			struct pci_dev *pdev)
+{
+	int rc;
+
+	cdev->pdev = pdev;
+
+	rc = pci_enable_device(pdev);
+	if (rc) {
+		DP_NOTICE(cdev, "Cannot enable PCI device\n");
+		goto err0;
+	}
+
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		DP_NOTICE(cdev, "No memory region found in bar #0\n");
+		rc = -EIO;
+		goto err1;
+	}
+
+	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+		DP_NOTICE(cdev, "No memory region found in bar #2\n");
+		rc = -EIO;
+		goto err1;
+	}
+
+	if (atomic_read(&pdev->enable_cnt) == 1) {
+		rc = pci_request_regions(pdev, "qed");
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to request PCI memory resources\n");
+			goto err1;
+		}
+		pci_set_master(pdev);
+		pci_save_state(pdev);
+	}
+
+	if (!pci_is_pcie(pdev)) {
+		DP_NOTICE(cdev, "The bus is not PCI Express\n");
+		rc = -EIO;
+		goto err2;
+	}
+
+	cdev->pci_params.pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM);
+	if (cdev->pci_params.pm_cap == 0)
+		DP_NOTICE(cdev, "Cannot find power management capability\n");
+
+	rc = qed_set_coherency_mask(cdev);
+	if (rc)
+		goto err2;
+
+	cdev->pci_params.mem_start = pci_resource_start(pdev, 0);
+	cdev->pci_params.mem_end = pci_resource_end(pdev, 0);
+	cdev->pci_params.irq = pdev->irq;
+
+	cdev->regview = pci_ioremap_bar(pdev, 0);
+	if (!cdev->regview) {
+		DP_NOTICE(cdev, "Cannot map register space, aborting\n");
+		rc = -ENOMEM;
+		goto err2;
+	}
+
+	cdev->db_phys_addr = pci_resource_start(cdev->pdev, 2);
+	cdev->db_size = pci_resource_len(cdev->pdev, 2);
+	cdev->doorbells = ioremap_wc(cdev->db_phys_addr, cdev->db_size);
+	if (!cdev->doorbells) {
+		DP_NOTICE(cdev, "Cannot map doorbell space\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+
+err2:
+	pci_release_regions(pdev);
+err1:
+	pci_disable_device(pdev);
+err0:
+	return rc;
+}
+
+int qed_fill_dev_info(struct qed_dev *cdev,
+		      struct qed_dev_info *dev_info)
+{
+	memset(dev_info, 0, sizeof(struct qed_dev_info));
+
+	dev_info->num_hwfns = cdev->num_hwfns;
+	dev_info->pci_mem_start = cdev->pci_params.mem_start;
+	dev_info->pci_mem_end = cdev->pci_params.mem_end;
+	dev_info->pci_irq = cdev->pci_params.irq;
+	dev_info->is_mf = IS_MF(&cdev->hwfns[0]);
+	ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
+
+	dev_info->fw_major = FW_MAJOR_VERSION;
+	dev_info->fw_minor = FW_MINOR_VERSION;
+	dev_info->fw_rev = FW_REVISION_VERSION;
+	dev_info->fw_eng = FW_ENGINEERING_VERSION;
+	dev_info->mf_mode = cdev->mf_mode;
+
+	qed_mcp_get_mfw_ver(cdev, &dev_info->mfw_rev);
+
+	return 0;
+}
+
+static void qed_free_cdev(struct qed_dev *cdev)
+{
+	kfree((void *)cdev);
+}
+
+static struct qed_dev *qed_alloc_cdev(struct pci_dev *pdev)
+{
+	struct qed_dev *cdev;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return cdev;
+
+	qed_init_struct(cdev);
+
+	return cdev;
+}
+
+/* Sets the requested power state */
+static int qed_set_power_state(struct qed_dev *cdev,
+			       pci_power_t state)
+{
+	if (!cdev)
+		return -ENODEV;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV, "Omitting Power state change\n");
+	return 0;
+}
+
+/* probing */
+static struct qed_dev *qed_probe(struct pci_dev *pdev,
+				 enum qed_protocol protocol,
+				 u32 dp_module,
+				 u8 dp_level)
+{
+	struct qed_dev *cdev;
+	int rc;
+
+	cdev = qed_alloc_cdev(pdev);
+	if (!cdev)
+		goto err0;
+
+	cdev->protocol = protocol;
+
+	qed_init_dp(cdev, dp_module, dp_level);
+
+	rc = qed_init_pci(cdev, pdev);
+	if (rc) {
+		DP_ERR(cdev, "init pci failed\n");
+		goto err1;
+	}
+	DP_INFO(cdev, "PCI init completed successfully\n");
+
+	rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
+	if (rc) {
+		DP_ERR(cdev, "hw prepare failed\n");
+		goto err2;
+	}
+
+	DP_INFO(cdev, "qed_probe completed successffuly\n");
+
+	return cdev;
+
+err2:
+	qed_free_pci(cdev);
+err1:
+	qed_free_cdev(cdev);
+err0:
+	return NULL;
+}
+
+static void qed_remove(struct qed_dev *cdev)
+{
+	if (!cdev)
+		return;
+
+	qed_hw_remove(cdev);
+
+	qed_free_pci(cdev);
+
+	qed_set_power_state(cdev, PCI_D3hot);
+
+	qed_free_cdev(cdev);
+}
+
+static void qed_disable_msix(struct qed_dev *cdev)
+{
+	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+		pci_disable_msix(cdev->pdev);
+		kfree(cdev->int_params.msix_table);
+	} else if (cdev->int_params.out.int_mode == QED_INT_MODE_MSI) {
+		pci_disable_msi(cdev->pdev);
+	}
+
+	memset(&cdev->int_params.out, 0, sizeof(struct qed_int_param));
+}
+
+static int qed_enable_msix(struct qed_dev *cdev,
+			   struct qed_int_params *int_params)
+{
+	int i, rc, cnt;
+
+	cnt = int_params->in.num_vectors;
+
+	for (i = 0; i < cnt; i++)
+		int_params->msix_table[i].entry = i;
+
+	rc = pci_enable_msix_range(cdev->pdev, int_params->msix_table,
+				   int_params->in.min_msix_cnt, cnt);
+	if (rc < cnt && rc >= int_params->in.min_msix_cnt &&
+	    (rc % cdev->num_hwfns)) {
+		pci_disable_msix(cdev->pdev);
+
+		/* If fastpath is initialized, we need at least one interrupt
+		 * per hwfn [and the slow path interrupts]. New requested number
+		 * should be a multiple of the number of hwfns.
+		 */
+		cnt = (rc / cdev->num_hwfns) * cdev->num_hwfns;
+		DP_NOTICE(cdev,
+			  "Trying to enable MSI-X with less vectors (%d out of %d)\n",
+			  cnt, int_params->in.num_vectors);
+		rc = pci_enable_msix_exact(cdev->pdev,
+					   int_params->msix_table, cnt);
+		if (!rc)
+			rc = cnt;
+	}
+
+	if (rc > 0) {
+		/* MSI-x configuration was achieved */
+		int_params->out.int_mode = QED_INT_MODE_MSIX;
+		int_params->out.num_vectors = rc;
+		rc = 0;
+	} else {
+		DP_NOTICE(cdev,
+			  "Failed to enable MSI-X [Requested %d vectors][rc %d]\n",
+			  cnt, rc);
+	}
+
+	return rc;
+}
+
+/* This function outputs the int mode and the number of enabled msix vector */
+static int qed_set_int_mode(struct qed_dev *cdev, bool force_mode)
+{
+	struct qed_int_params *int_params = &cdev->int_params;
+	struct msix_entry *tbl;
+	int rc = 0, cnt;
+
+	switch (int_params->in.int_mode) {
+	case QED_INT_MODE_MSIX:
+		/* Allocate MSIX table */
+		cnt = int_params->in.num_vectors;
+		int_params->msix_table = kcalloc(cnt, sizeof(*tbl), GFP_KERNEL);
+		if (!int_params->msix_table) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		/* Enable MSIX */
+		rc = qed_enable_msix(cdev, int_params);
+		if (!rc)
+			goto out;
+
+		DP_NOTICE(cdev, "Failed to enable MSI-X\n");
+		kfree(int_params->msix_table);
+		if (force_mode)
+			goto out;
+		/* Fallthrough */
+
+	case QED_INT_MODE_MSI:
+		rc = pci_enable_msi(cdev->pdev);
+		if (!rc) {
+			int_params->out.int_mode = QED_INT_MODE_MSI;
+			goto out;
+		}
+
+		DP_NOTICE(cdev, "Failed to enable MSI\n");
+		if (force_mode)
+			goto out;
+		/* Fallthrough */
+
+	case QED_INT_MODE_INTA:
+			int_params->out.int_mode = QED_INT_MODE_INTA;
+			rc = 0;
+			goto out;
+	default:
+		DP_NOTICE(cdev, "Unknown int_mode value %d\n",
+			  int_params->in.int_mode);
+		rc = -EINVAL;
+	}
+
+out:
+	cdev->int_coalescing_mode = QED_COAL_MODE_ENABLE;
+
+	return rc;
+}
+
+static void qed_simd_handler_config(struct qed_dev *cdev, void *token,
+				    int index, void(*handler)(void *))
+{
+	struct qed_hwfn *hwfn = &cdev->hwfns[index % cdev->num_hwfns];
+	int relative_idx = index / cdev->num_hwfns;
+
+	hwfn->simd_proto_handler[relative_idx].func = handler;
+	hwfn->simd_proto_handler[relative_idx].token = token;
+}
+
+static void qed_simd_handler_clean(struct qed_dev *cdev, int index)
+{
+	struct qed_hwfn *hwfn = &cdev->hwfns[index % cdev->num_hwfns];
+	int relative_idx = index / cdev->num_hwfns;
+
+	memset(&hwfn->simd_proto_handler[relative_idx], 0,
+	       sizeof(struct qed_simd_fp_handler));
+}
+
+static irqreturn_t qed_msix_sp_int(int irq, void *tasklet)
+{
+	tasklet_schedule((struct tasklet_struct *)tasklet);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t qed_single_int(int irq, void *dev_instance)
+{
+	struct qed_dev *cdev = (struct qed_dev *)dev_instance;
+	struct qed_hwfn *hwfn;
+	irqreturn_t rc = IRQ_NONE;
+	u64 status;
+	int i, j;
+
+	for (i = 0; i < cdev->num_hwfns; i++) {
+		status = qed_int_igu_read_sisr_reg(&cdev->hwfns[i]);
+
+		if (!status)
+			continue;
+
+		hwfn = &cdev->hwfns[i];
+
+		/* Slowpath interrupt */
+		if (unlikely(status & 0x1)) {
+			tasklet_schedule(hwfn->sp_dpc);
+			status &= ~0x1;
+			rc = IRQ_HANDLED;
+		}
+
+		/* Fastpath interrupts */
+		for (j = 0; j < 64; j++) {
+			if ((0x2ULL << j) & status) {
+				hwfn->simd_proto_handler[j].func(
+					hwfn->simd_proto_handler[j].token);
+				status &= ~(0x2ULL << j);
+				rc = IRQ_HANDLED;
+			}
+		}
+
+		if (unlikely(status))
+			DP_VERBOSE(hwfn, NETIF_MSG_INTR,
+				   "got an unknown interrupt status 0x%llx\n",
+				   status);
+	}
+
+	return rc;
+}
+
+static int qed_slowpath_irq_req(struct qed_dev *cdev)
+{
+	int i = 0, rc = 0;
+
+	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+		/* Request all the slowpath MSI-X vectors */
+		for (i = 0; i < cdev->num_hwfns; i++) {
+			snprintf(cdev->hwfns[i].name, NAME_SIZE,
+				 "sp-%d-%02x:%02x.%02x",
+				 i, cdev->pdev->bus->number,
+				 PCI_SLOT(cdev->pdev->devfn),
+				 cdev->hwfns[i].abs_pf_id);
+
+			rc = request_irq(cdev->int_params.msix_table[i].vector,
+					 qed_msix_sp_int, 0,
+					 cdev->hwfns[i].name,
+					 cdev->hwfns[i].sp_dpc);
+			if (rc)
+				break;
+
+			DP_VERBOSE(&cdev->hwfns[i],
+				   (NETIF_MSG_INTR | QED_MSG_SP),
+				   "Requested slowpath MSI-X\n");
+		}
+
+		if (i != cdev->num_hwfns) {
+			/* Free already request MSI-X vectors */
+			for (i--; i >= 0; i--) {
+				unsigned int vec =
+					cdev->int_params.msix_table[i].vector;
+				synchronize_irq(vec);
+				free_irq(cdev->int_params.msix_table[i].vector,
+					 cdev->hwfns[i].sp_dpc);
+			}
+		}
+	} else {
+		unsigned long flags = 0;
+
+		snprintf(cdev->name, NAME_SIZE, "%02x:%02x.%02x",
+			 cdev->pdev->bus->number, PCI_SLOT(cdev->pdev->devfn),
+			 PCI_FUNC(cdev->pdev->devfn));
+
+		if (cdev->int_params.out.int_mode == QED_INT_MODE_INTA)
+			flags |= IRQF_SHARED;
+
+		rc = request_irq(cdev->pdev->irq, qed_single_int,
+				 flags, cdev->name, cdev);
+	}
+
+	return rc;
+}
+
+static void qed_slowpath_irq_free(struct qed_dev *cdev)
+{
+	int i;
+
+	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+		for_each_hwfn(cdev, i) {
+			synchronize_irq(cdev->int_params.msix_table[i].vector);
+			free_irq(cdev->int_params.msix_table[i].vector,
+				 cdev->hwfns[i].sp_dpc);
+		}
+	} else {
+		free_irq(cdev->pdev->irq, cdev);
+	}
+}
+
+static int qed_nic_stop(struct qed_dev *cdev)
+{
+	int i, rc;
+
+	rc = qed_hw_stop(cdev);
+
+	for (i = 0; i < cdev->num_hwfns; i++) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		if (p_hwfn->b_sp_dpc_enabled) {
+			tasklet_disable(p_hwfn->sp_dpc);
+			p_hwfn->b_sp_dpc_enabled = false;
+			DP_VERBOSE(cdev, NETIF_MSG_IFDOWN,
+				   "Disabled sp taskelt [hwfn %d] at %p\n",
+				   i, p_hwfn->sp_dpc);
+		}
+	}
+
+	return rc;
+}
+
+static int qed_nic_reset(struct qed_dev *cdev)
+{
+	int rc;
+
+	rc = qed_hw_reset(cdev);
+	if (rc)
+		return rc;
+
+	qed_resc_free(cdev);
+
+	return 0;
+}
+
+static int qed_nic_setup(struct qed_dev *cdev)
+{
+	int rc;
+
+	rc = qed_resc_alloc(cdev);
+	if (rc)
+		return rc;
+
+	DP_INFO(cdev, "Allocated qed resources\n");
+
+	qed_resc_setup(cdev);
+
+	return rc;
+}
+
+static int qed_set_int_fp(struct qed_dev *cdev, u16 cnt)
+{
+	int limit = 0;
+
+	/* Mark the fastpath as free/used */
+	cdev->int_params.fp_initialized = cnt ? true : false;
+
+	if (cdev->int_params.out.int_mode != QED_INT_MODE_MSIX)
+		limit = cdev->num_hwfns * 63;
+	else if (cdev->int_params.fp_msix_cnt)
+		limit = cdev->int_params.fp_msix_cnt;
+
+	if (!limit)
+		return -ENOMEM;
+
+	return min_t(int, cnt, limit);
+}
+
+static int qed_get_int_fp(struct qed_dev *cdev, struct qed_int_info *info)
+{
+	memset(info, 0, sizeof(struct qed_int_info));
+
+	if (!cdev->int_params.fp_initialized) {
+		DP_INFO(cdev,
+			"Protocol driver requested interrupt information, but its support is not yet configured\n");
+		return -EINVAL;
+	}
+
+	/* Need to expose only MSI-X information; Single IRQ is handled solely
+	 * by qed.
+	 */
+	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+		int msix_base = cdev->int_params.fp_msix_base;
+
+		info->msix_cnt = cdev->int_params.fp_msix_cnt;
+		info->msix = &cdev->int_params.msix_table[msix_base];
+	}
+
+	return 0;
+}
+
+static int qed_slowpath_setup_int(struct qed_dev *cdev,
+				  enum qed_int_mode int_mode)
+{
+	int rc, i;
+	u8 num_vectors = 0;
+
+	memset(&cdev->int_params, 0, sizeof(struct qed_int_params));
+
+	cdev->int_params.in.int_mode = int_mode;
+	for_each_hwfn(cdev, i)
+		num_vectors +=  qed_int_get_num_sbs(&cdev->hwfns[i], NULL) + 1;
+	cdev->int_params.in.num_vectors = num_vectors;
+
+	/* We want a minimum of one slowpath and one fastpath vector per hwfn */
+	cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2;
+
+	rc = qed_set_int_mode(cdev, false);
+	if (rc)  {
+		DP_ERR(cdev, "qed_slowpath_setup_int ERR\n");
+		return rc;
+	}
+
+	cdev->int_params.fp_msix_base = cdev->num_hwfns;
+	cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors -
+				       cdev->num_hwfns;
+
+	return 0;
+}
+
+u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len,
+		   u8 *input_buf, u32 max_size, u8 *unzip_buf)
+{
+	int rc;
+
+	p_hwfn->stream->next_in = input_buf;
+	p_hwfn->stream->avail_in = input_len;
+	p_hwfn->stream->next_out = unzip_buf;
+	p_hwfn->stream->avail_out = max_size;
+
+	rc = zlib_inflateInit2(p_hwfn->stream, MAX_WBITS);
+
+	if (rc != Z_OK) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_DRV, "zlib init failed, rc = %d\n",
+			   rc);
+		return 0;
+	}
+
+	rc = zlib_inflate(p_hwfn->stream, Z_FINISH);
+	zlib_inflateEnd(p_hwfn->stream);
+
+	if (rc != Z_OK && rc != Z_STREAM_END) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_DRV, "FW unzip error: %s, rc=%d\n",
+			   p_hwfn->stream->msg, rc);
+		return 0;
+	}
+
+	return p_hwfn->stream->total_out / 4;
+}
+
+static int qed_alloc_stream_mem(struct qed_dev *cdev)
+{
+	int i;
+	void *workspace;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		p_hwfn->stream = kzalloc(sizeof(*p_hwfn->stream), GFP_KERNEL);
+		if (!p_hwfn->stream)
+			return -ENOMEM;
+
+		workspace = vzalloc(zlib_inflate_workspacesize());
+		if (!workspace)
+			return -ENOMEM;
+		p_hwfn->stream->workspace = workspace;
+	}
+
+	return 0;
+}
+
+static void qed_free_stream_mem(struct qed_dev *cdev)
+{
+	int i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		if (!p_hwfn->stream)
+			return;
+
+		vfree(p_hwfn->stream->workspace);
+		kfree(p_hwfn->stream);
+	}
+}
+
+static void qed_update_pf_params(struct qed_dev *cdev,
+				 struct qed_pf_params *params)
+{
+	int i;
+
+	for (i = 0; i < cdev->num_hwfns; i++) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		p_hwfn->pf_params = *params;
+	}
+}
+
+static int qed_slowpath_start(struct qed_dev *cdev,
+			      struct qed_slowpath_params *params)
+{
+	struct qed_mcp_drv_version drv_version;
+	const u8 *data = NULL;
+	struct qed_hwfn *hwfn;
+	int rc;
+
+	rc = request_firmware(&cdev->firmware, QED_FW_FILE_NAME,
+			      &cdev->pdev->dev);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to find fw file - /lib/firmware/%s\n",
+			  QED_FW_FILE_NAME);
+		goto err;
+	}
+
+	rc = qed_nic_setup(cdev);
+	if (rc)
+		goto err;
+
+	rc = qed_slowpath_setup_int(cdev, params->int_mode);
+	if (rc)
+		goto err1;
+
+	/* Request the slowpath IRQ */
+	rc = qed_slowpath_irq_req(cdev);
+	if (rc)
+		goto err2;
+
+	/* Allocate stream for unzipping */
+	rc = qed_alloc_stream_mem(cdev);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to allocate stream memory\n");
+		goto err3;
+	}
+
+	/* Start the slowpath */
+	data = cdev->firmware->data;
+
+	rc = qed_hw_init(cdev, true, cdev->int_params.out.int_mode,
+			 true, data);
+	if (rc)
+		goto err3;
+
+	DP_INFO(cdev,
+		"HW initialization and function start completed successfully\n");
+
+	hwfn = QED_LEADING_HWFN(cdev);
+	drv_version.version = (params->drv_major << 24) |
+			      (params->drv_minor << 16) |
+			      (params->drv_rev << 8) |
+			      (params->drv_eng);
+	strlcpy(drv_version.name, params->name,
+		MCP_DRV_VER_STR_SIZE - 4);
+	rc = qed_mcp_send_drv_version(hwfn, hwfn->p_main_ptt,
+				      &drv_version);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed sending drv version command\n");
+		return rc;
+	}
+
+	return 0;
+
+err3:
+	qed_free_stream_mem(cdev);
+	qed_slowpath_irq_free(cdev);
+err2:
+	qed_disable_msix(cdev);
+err1:
+	qed_resc_free(cdev);
+err:
+	release_firmware(cdev->firmware);
+
+	return rc;
+}
+
+static int qed_slowpath_stop(struct qed_dev *cdev)
+{
+	if (!cdev)
+		return -ENODEV;
+
+	qed_free_stream_mem(cdev);
+
+	qed_nic_stop(cdev);
+	qed_slowpath_irq_free(cdev);
+
+	qed_disable_msix(cdev);
+	qed_nic_reset(cdev);
+
+	release_firmware(cdev->firmware);
+
+	return 0;
+}
+
+static void qed_set_id(struct qed_dev *cdev, char name[NAME_SIZE],
+		       char ver_str[VER_SIZE])
+{
+	int i;
+
+	memcpy(cdev->name, name, NAME_SIZE);
+	for_each_hwfn(cdev, i)
+		snprintf(cdev->hwfns[i].name, NAME_SIZE, "%s-%d", name, i);
+
+	memcpy(cdev->ver_str, ver_str, VER_SIZE);
+	cdev->drv_type = DRV_ID_DRV_TYPE_LINUX;
+}
+
+static u32 qed_sb_init(struct qed_dev *cdev,
+		       struct qed_sb_info *sb_info,
+		       void *sb_virt_addr,
+		       dma_addr_t sb_phy_addr, u16 sb_id,
+		       enum qed_sb_type type)
+{
+	struct qed_hwfn *p_hwfn;
+	int hwfn_index;
+	u16 rel_sb_id;
+	u8 n_hwfns;
+	u32 rc;
+
+	/* RoCE uses single engine and CMT uses two engines. When using both
+	 * we force only a single engine. Storage uses only engine 0 too.
+	 */
+	if (type == QED_SB_TYPE_L2_QUEUE)
+		n_hwfns = cdev->num_hwfns;
+	else
+		n_hwfns = 1;
+
+	hwfn_index = sb_id % n_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
+	rel_sb_id = sb_id / n_hwfns;
+
+	DP_VERBOSE(cdev, NETIF_MSG_INTR,
+		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
+		   hwfn_index, rel_sb_id, sb_id);
+
+	rc = qed_int_sb_init(p_hwfn, p_hwfn->p_main_ptt, sb_info,
+			     sb_virt_addr, sb_phy_addr, rel_sb_id);
+
+	return rc;
+}
+
+static u32 qed_sb_release(struct qed_dev *cdev,
+			  struct qed_sb_info *sb_info,
+			  u16 sb_id)
+{
+	struct qed_hwfn *p_hwfn;
+	int hwfn_index;
+	u16 rel_sb_id;
+	u32 rc;
+
+	hwfn_index = sb_id % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
+	rel_sb_id = sb_id / cdev->num_hwfns;
+
+	DP_VERBOSE(cdev, NETIF_MSG_INTR,
+		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
+		   hwfn_index, rel_sb_id, sb_id);
+
+	rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id);
+
+	return rc;
+}
+
+static int qed_drain(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn;
+	struct qed_ptt *ptt;
+	int i, rc;
+
+	for_each_hwfn(cdev, i) {
+		hwfn = &cdev->hwfns[i];
+		ptt = qed_ptt_acquire(hwfn);
+		if (!ptt) {
+			DP_NOTICE(hwfn, "Failed to drain NIG; No PTT\n");
+			return -EBUSY;
+		}
+		rc = qed_mcp_drain(hwfn, ptt);
+		if (rc)
+			return rc;
+		qed_ptt_release(hwfn, ptt);
+	}
+
+	return 0;
+}
+
+const struct qed_common_ops qed_common_ops_pass = {
+	.probe = &qed_probe,
+	.remove = &qed_remove,
+	.set_power_state = &qed_set_power_state,
+	.set_id = &qed_set_id,
+	.update_pf_params = &qed_update_pf_params,
+	.slowpath_start = &qed_slowpath_start,
+	.slowpath_stop = &qed_slowpath_stop,
+	.set_fp_int = &qed_set_int_fp,
+	.get_fp_int = &qed_get_int_fp,
+	.sb_init = &qed_sb_init,
+	.sb_release = &qed_sb_release,
+	.simd_handler_config = &qed_simd_handler_config,
+	.simd_handler_clean = &qed_simd_handler_clean,
+	.drain = &qed_drain,
+	.update_msglvl = &qed_init_dp,
+	.chain_alloc = &qed_chain_alloc,
+	.chain_free = &qed_chain_free,
+};
+
+u32 qed_get_protocol_version(enum qed_protocol protocol)
+{
+	switch (protocol) {
+	case QED_PROTOCOL_ETH:
+		return QED_ETH_INTERFACE_VERSION;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL(qed_get_protocol_version);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
new file mode 100644
index 000000000000..601d3f5daf13
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -0,0 +1,549 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#define CHIP_MCP_RESP_ITER_US 10
+
+#define QED_DRV_MB_MAX_RETRIES	(500 * 1000)	/* Account for 5 sec */
+#define QED_MCP_RESET_RETRIES	(50 * 1000)	/* Account for 500 msec */
+
+#define DRV_INNER_WR(_p_hwfn, _p_ptt, _ptr, _offset, _val)	     \
+	qed_wr(_p_hwfn, _p_ptt, (_p_hwfn->mcp_info->_ptr + _offset), \
+	       _val)
+
+#define DRV_INNER_RD(_p_hwfn, _p_ptt, _ptr, _offset) \
+	qed_rd(_p_hwfn, _p_ptt, (_p_hwfn->mcp_info->_ptr + _offset))
+
+#define DRV_MB_WR(_p_hwfn, _p_ptt, _field, _val)  \
+	DRV_INNER_WR(p_hwfn, _p_ptt, drv_mb_addr, \
+		     offsetof(struct public_drv_mb, _field), _val)
+
+#define DRV_MB_RD(_p_hwfn, _p_ptt, _field)	   \
+	DRV_INNER_RD(_p_hwfn, _p_ptt, drv_mb_addr, \
+		     offsetof(struct public_drv_mb, _field))
+
+#define PDA_COMP (((FW_MAJOR_VERSION) + (FW_MINOR_VERSION << 8)) << \
+		  DRV_ID_PDA_COMP_VER_SHIFT)
+
+#define MCP_BYTES_PER_MBIT_SHIFT 17
+
+bool qed_mcp_is_init(struct qed_hwfn *p_hwfn)
+{
+	if (!p_hwfn->mcp_info || !p_hwfn->mcp_info->public_base)
+		return false;
+	return true;
+}
+
+void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt)
+{
+	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+					PUBLIC_PORT);
+	u32 mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, addr);
+
+	p_hwfn->mcp_info->port_addr = SECTION_ADDR(mfw_mb_offsize,
+						   MFW_PORT(p_hwfn));
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "port_addr = 0x%x, port_id 0x%02x\n",
+		   p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn));
+}
+
+void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt)
+{
+	u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length);
+	u32 tmp, i;
+
+	if (!p_hwfn->mcp_info->public_base)
+		return;
+
+	for (i = 0; i < length; i++) {
+		tmp = qed_rd(p_hwfn, p_ptt,
+			     p_hwfn->mcp_info->mfw_mb_addr +
+			     (i << 2) + sizeof(u32));
+
+		/* The MB data is actually BE; Need to force it to cpu */
+		((u32 *)p_hwfn->mcp_info->mfw_mb_cur)[i] =
+			be32_to_cpu((__force __be32)tmp);
+	}
+}
+
+int qed_mcp_free(struct qed_hwfn *p_hwfn)
+{
+	if (p_hwfn->mcp_info) {
+		kfree(p_hwfn->mcp_info->mfw_mb_cur);
+		kfree(p_hwfn->mcp_info->mfw_mb_shadow);
+	}
+	kfree(p_hwfn->mcp_info);
+
+	return 0;
+}
+
+static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_info *p_info = p_hwfn->mcp_info;
+	u32 drv_mb_offsize, mfw_mb_offsize;
+	u32 mcp_pf_id = MCP_PF_ID(p_hwfn);
+
+	p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR);
+	if (!p_info->public_base)
+		return 0;
+
+	p_info->public_base |= GRCBASE_MCP;
+
+	/* Calculate the driver and MFW mailbox address */
+	drv_mb_offsize = qed_rd(p_hwfn, p_ptt,
+				SECTION_OFFSIZE_ADDR(p_info->public_base,
+						     PUBLIC_DRV_MB));
+	p_info->drv_mb_addr = SECTION_ADDR(drv_mb_offsize, mcp_pf_id);
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 0x%x\n",
+		   drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id);
+
+	/* Set the MFW MB address */
+	mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
+				SECTION_OFFSIZE_ADDR(p_info->public_base,
+						     PUBLIC_MFW_MB));
+	p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
+	p_info->mfw_mb_length =	(u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr);
+
+	/* Get the current driver mailbox sequence before sending
+	 * the first command
+	 */
+	p_info->drv_mb_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_mb_header) &
+			     DRV_MSG_SEQ_NUMBER_MASK;
+
+	/* Get current FW pulse sequence */
+	p_info->drv_pulse_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_pulse_mb) &
+				DRV_PULSE_SEQ_MASK;
+
+	p_info->mcp_hist = (u16)qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
+
+	return 0;
+}
+
+int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_info *p_info;
+	u32 size;
+
+	/* Allocate mcp_info structure */
+	p_hwfn->mcp_info = kzalloc(sizeof(*p_hwfn->mcp_info), GFP_ATOMIC);
+	if (!p_hwfn->mcp_info)
+		goto err;
+	p_info = p_hwfn->mcp_info;
+
+	if (qed_load_mcp_offsets(p_hwfn, p_ptt) != 0) {
+		DP_NOTICE(p_hwfn, "MCP is not initialized\n");
+		/* Do not free mcp_info here, since public_base indicate that
+		 * the MCP is not initialized
+		 */
+		return 0;
+	}
+
+	size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
+	p_info->mfw_mb_cur = kzalloc(size, GFP_ATOMIC);
+	p_info->mfw_mb_shadow =
+		kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS(
+				p_info->mfw_mb_length), GFP_ATOMIC);
+	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
+		goto err;
+
+	/* Initialize the MFW mutex */
+	mutex_init(&p_info->mutex);
+
+	return 0;
+
+err:
+	DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n");
+	qed_mcp_free(p_hwfn);
+	return -ENOMEM;
+}
+
+int qed_mcp_reset(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt)
+{
+	u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
+	u8 delay = CHIP_MCP_RESP_ITER_US;
+	u32 org_mcp_reset_seq, cnt = 0;
+	int rc = 0;
+
+	/* Set drv command along with the updated sequence */
+	org_mcp_reset_seq = qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
+	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header,
+		  (DRV_MSG_CODE_MCP_RESET | seq));
+
+	do {
+		/* Wait for MFW response */
+		udelay(delay);
+		/* Give the FW up to 500 second (50*1000*10usec) */
+	} while ((org_mcp_reset_seq == qed_rd(p_hwfn, p_ptt,
+					      MISCS_REG_GENERIC_POR_0)) &&
+		 (cnt++ < QED_MCP_RESET_RETRIES));
+
+	if (org_mcp_reset_seq !=
+	    qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0)) {
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "MCP was reset after %d usec\n", cnt * delay);
+	} else {
+		DP_ERR(p_hwfn, "Failed to reset MCP\n");
+		rc = -EAGAIN;
+	}
+
+	return rc;
+}
+
+static int qed_do_mcp_cmd(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  u32 cmd,
+			  u32 param,
+			  u32 *o_mcp_resp,
+			  u32 *o_mcp_param)
+{
+	u8 delay = CHIP_MCP_RESP_ITER_US;
+	u32 seq, cnt = 1, actual_mb_seq;
+	int rc = 0;
+
+	/* Get actual driver mailbox sequence */
+	actual_mb_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_mb_header) &
+			DRV_MSG_SEQ_NUMBER_MASK;
+
+	/* Use MCP history register to check if MCP reset occurred between
+	 * init time and now.
+	 */
+	if (p_hwfn->mcp_info->mcp_hist !=
+	    qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0)) {
+		DP_VERBOSE(p_hwfn, QED_MSG_SP, "Rereading MCP offsets\n");
+		qed_load_mcp_offsets(p_hwfn, p_ptt);
+		qed_mcp_cmd_port_init(p_hwfn, p_ptt);
+	}
+	seq = ++p_hwfn->mcp_info->drv_mb_seq;
+
+	/* Set drv param */
+	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_param, param);
+
+	/* Set drv command along with the updated sequence */
+	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (cmd | seq));
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "wrote command (%x) to MFW MB param 0x%08x\n",
+		   (cmd | seq), param);
+
+	do {
+		/* Wait for MFW response */
+		udelay(delay);
+		*o_mcp_resp = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_header);
+
+		/* Give the FW up to 5 second (500*10ms) */
+	} while ((seq != (*o_mcp_resp & FW_MSG_SEQ_NUMBER_MASK)) &&
+		 (cnt++ < QED_DRV_MB_MAX_RETRIES));
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "[after %d ms] read (%x) seq is (%x) from FW MB\n",
+		   cnt * delay, *o_mcp_resp, seq);
+
+	/* Is this a reply to our command? */
+	if (seq == (*o_mcp_resp & FW_MSG_SEQ_NUMBER_MASK)) {
+		*o_mcp_resp &= FW_MSG_CODE_MASK;
+		/* Get the MCP param */
+		*o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
+	} else {
+		/* FW BUG! */
+		DP_ERR(p_hwfn, "MFW failed to respond!\n");
+		*o_mcp_resp = 0;
+		rc = -EAGAIN;
+	}
+	return rc;
+}
+
+int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+		struct qed_ptt *p_ptt,
+		u32 cmd,
+		u32 param,
+		u32 *o_mcp_resp,
+		u32 *o_mcp_param)
+{
+	int rc = 0;
+
+	/* MCP not initialized */
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		return -EBUSY;
+	}
+
+	/* Lock Mutex to ensure only single thread is
+	 * accessing the MCP at one time
+	 */
+	mutex_lock(&p_hwfn->mcp_info->mutex);
+	rc = qed_do_mcp_cmd(p_hwfn, p_ptt, cmd, param,
+			    o_mcp_resp, o_mcp_param);
+	/* Release Mutex */
+	mutex_unlock(&p_hwfn->mcp_info->mutex);
+
+	return rc;
+}
+
+static void qed_mcp_set_drv_ver(struct qed_dev *cdev,
+				struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt)
+{
+	u32 i;
+
+	/* Copy version string to MCP */
+	for (i = 0; i < MCP_DRV_VER_STR_SIZE_DWORD; i++)
+		DRV_MB_WR(p_hwfn, p_ptt, union_data.ver_str[i],
+			  *(u32 *)&cdev->ver_str[i * sizeof(u32)]);
+}
+
+int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     u32 *p_load_code)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u32 param;
+	int rc;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		return -EBUSY;
+	}
+
+	/* Save driver's version to shmem */
+	qed_mcp_set_drv_ver(cdev, p_hwfn, p_ptt);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "fw_seq 0x%08x, drv_pulse 0x%x\n",
+		   p_hwfn->mcp_info->drv_mb_seq,
+		   p_hwfn->mcp_info->drv_pulse_seq);
+
+	/* Load Request */
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_REQ,
+			 (PDA_COMP | DRV_ID_MCP_HSI_VER_CURRENT |
+			  cdev->drv_type),
+			 p_load_code, &param);
+
+	/* if mcp fails to respond we must abort */
+	if (rc) {
+		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+		return rc;
+	}
+
+	/* If MFW refused (e.g. other port is in diagnostic mode) we
+	 * must abort. This can happen in the following cases:
+	 * - Other port is in diagnostic mode
+	 * - Previously loaded function on the engine is not compliant with
+	 *   the requester.
+	 * - MFW cannot cope with the requester's DRV_MFW_HSI_VERSION.
+	 *      -
+	 */
+	if (!(*p_load_code) ||
+	    ((*p_load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED_HSI) ||
+	    ((*p_load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED_PDA) ||
+	    ((*p_load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG)) {
+		DP_ERR(p_hwfn, "MCP refused load request, aborting\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+int qed_mcp_get_mfw_ver(struct qed_dev *cdev,
+			u32 *p_mfw_ver)
+{
+	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
+	struct qed_ptt *p_ptt;
+	u32 global_offsize;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	global_offsize = qed_rd(p_hwfn, p_ptt,
+				SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->
+						     public_base,
+						     PUBLIC_GLOBAL));
+	*p_mfw_ver = qed_rd(p_hwfn, p_ptt,
+			    SECTION_ADDR(global_offsize, 0) +
+			    offsetof(struct public_global, mfw_ver));
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return 0;
+}
+
+static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct public_func *p_data,
+				  int pfid)
+{
+	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+					PUBLIC_FUNC);
+	u32 mfw_path_offsize = qed_rd(p_hwfn, p_ptt, addr);
+	u32 func_addr = SECTION_ADDR(mfw_path_offsize, pfid);
+	u32 i, size;
+
+	memset(p_data, 0, sizeof(*p_data));
+
+	size = min_t(u32, sizeof(*p_data),
+		     QED_SECTION_SIZE(mfw_path_offsize));
+	for (i = 0; i < size / sizeof(u32); i++)
+		((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
+					    func_addr + (i << 2));
+
+	return size;
+}
+
+static int
+qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn,
+			struct public_func *p_info,
+			enum qed_pci_personality *p_proto)
+{
+	int rc = 0;
+
+	switch (p_info->config & FUNC_MF_CFG_PROTOCOL_MASK) {
+	case FUNC_MF_CFG_PROTOCOL_ETHERNET:
+		*p_proto = QED_PCI_ETH;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_function_info *info;
+	struct public_func shmem_info;
+
+	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
+			       MCP_PF_ID(p_hwfn));
+	info = &p_hwfn->mcp_info->func_info;
+
+	info->pause_on_host = (shmem_info.config &
+			       FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
+
+	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info,
+				    &info->protocol)) {
+		DP_ERR(p_hwfn, "Unknown personality %08x\n",
+		       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
+		return -EINVAL;
+	}
+
+	if (p_hwfn->cdev->mf_mode != SF) {
+		info->bandwidth_min = (shmem_info.config &
+				       FUNC_MF_CFG_MIN_BW_MASK) >>
+				      FUNC_MF_CFG_MIN_BW_SHIFT;
+		if (info->bandwidth_min < 1 || info->bandwidth_min > 100) {
+			DP_INFO(p_hwfn,
+				"bandwidth minimum out of bounds [%02x]. Set to 1\n",
+				info->bandwidth_min);
+			info->bandwidth_min = 1;
+		}
+
+		info->bandwidth_max = (shmem_info.config &
+				       FUNC_MF_CFG_MAX_BW_MASK) >>
+				      FUNC_MF_CFG_MAX_BW_SHIFT;
+		if (info->bandwidth_max < 1 || info->bandwidth_max > 100) {
+			DP_INFO(p_hwfn,
+				"bandwidth maximum out of bounds [%02x]. Set to 100\n",
+				info->bandwidth_max);
+			info->bandwidth_max = 100;
+		}
+	}
+
+	if (shmem_info.mac_upper || shmem_info.mac_lower) {
+		info->mac[0] = (u8)(shmem_info.mac_upper >> 8);
+		info->mac[1] = (u8)(shmem_info.mac_upper);
+		info->mac[2] = (u8)(shmem_info.mac_lower >> 24);
+		info->mac[3] = (u8)(shmem_info.mac_lower >> 16);
+		info->mac[4] = (u8)(shmem_info.mac_lower >> 8);
+		info->mac[5] = (u8)(shmem_info.mac_lower);
+	} else {
+		DP_NOTICE(p_hwfn, "MAC is 0 in shmem\n");
+	}
+
+	info->wwn_port = (u64)shmem_info.fcoe_wwn_port_name_upper |
+			 (((u64)shmem_info.fcoe_wwn_port_name_lower) << 32);
+	info->wwn_node = (u64)shmem_info.fcoe_wwn_node_name_upper |
+			 (((u64)shmem_info.fcoe_wwn_node_name_lower) << 32);
+
+	info->ovlan = (u16)(shmem_info.ovlan_stag & FUNC_MF_CFG_OV_STAG_MASK);
+
+	DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP),
+		   "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n",
+		info->pause_on_host, info->protocol,
+		info->bandwidth_min, info->bandwidth_max,
+		info->mac[0], info->mac[1], info->mac[2],
+		info->mac[3], info->mac[4], info->mac[5],
+		info->wwn_port, info->wwn_node, info->ovlan);
+
+	return 0;
+}
+
+int qed_mcp_drain(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt)
+{
+	u32 resp = 0, param = 0;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt,
+			 DRV_MSG_CODE_NIG_DRAIN, 100,
+			 &resp, &param);
+
+	/* Wait for the drain to complete before returning */
+	msleep(120);
+
+	return rc;
+}
+
+int
+qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_mcp_drv_version *p_ver)
+{
+	int rc = 0;
+	u32 param = 0, reply = 0, i;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		return -EBUSY;
+	}
+
+	DRV_MB_WR(p_hwfn, p_ptt, union_data.drv_version.version,
+		  p_ver->version);
+	/* Copy version string to shmem */
+	for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / 4; i++) {
+		DRV_MB_WR(p_hwfn, p_ptt,
+			  union_data.drv_version.name[i * sizeof(u32)],
+			  *(u32 *)&p_ver->name[i * sizeof(u32)]);
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_VERSION, 0, &reply,
+			 &param);
+	if (rc) {
+		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+		return rc;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
new file mode 100644
index 000000000000..230c2550dc89
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -0,0 +1,232 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_MCP_H
+#define _QED_MCP_H
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include "qed_hsi.h"
+
+struct qed_mcp_function_info {
+	u8				pause_on_host;
+
+	enum qed_pci_personality	protocol;
+
+	u8				bandwidth_min;
+	u8				bandwidth_max;
+
+	u8				mac[ETH_ALEN];
+
+	u64				wwn_port;
+	u64				wwn_node;
+
+#define QED_MCP_VLAN_UNSET              (0xffff)
+	u16				ovlan;
+};
+
+struct qed_mcp_nvm_common {
+	u32	offset;
+	u32	param;
+	u32	resp;
+	u32	cmd;
+};
+
+struct qed_mcp_drv_version {
+	u32	version;
+	u8	name[MCP_DRV_VER_STR_SIZE - 4];
+};
+
+/**
+ * @brief Get the management firmware version value
+ *
+ * @param cdev       - qed dev pointer
+ * @param mfw_ver    - mfw version value
+ *
+ * @return int - 0 - operation was successul.
+ */
+int qed_mcp_get_mfw_ver(struct qed_dev *cdev,
+			u32 *mfw_ver);
+
+/**
+ * @brief General function for sending commands to the MCP
+ *        mailbox. It acquire mutex lock for the entire
+ *        operation, from sending the request until the MCP
+ *        response. Waiting for MCP response will be checked up
+ *        to 5 seconds every 5ms.
+ *
+ * @param p_hwfn     - hw function
+ * @param p_ptt      - PTT required for register access
+ * @param cmd        - command to be sent to the MCP.
+ * @param param      - Optional param
+ * @param o_mcp_resp - The MCP response code (exclude sequence).
+ * @param o_mcp_param- Optional parameter provided by the MCP
+ *                     response
+ * @return int - 0 - operation
+ * was successul.
+ */
+int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+		struct qed_ptt *p_ptt,
+		u32 cmd,
+		u32 param,
+		u32 *o_mcp_resp,
+		u32 *o_mcp_param);
+
+/**
+ * @brief - drains the nig, allowing completion to pass in case of pauses.
+ *          (Should be called only from sleepable context)
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_drain(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt);
+
+/**
+ * @brief Send driver version to MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param version - Version value
+ * @param name - Protocol driver name
+ *
+ * @return int - 0 - operation was successul.
+ */
+int
+qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_mcp_drv_version *p_ver);
+
+/* Using hwfn number (and not pf_num) is required since in CMT mode,
+ * same pf_num may be used by two different hwfn
+ * TODO - this shouldn't really be in .h file, but until all fields
+ * required during hw-init will be placed in their correct place in shmem
+ * we need it in qed_dev.c [for readin the nvram reflection in shmem].
+ */
+#define MCP_PF_ID_BY_REL(p_hwfn, rel_pfid) (QED_IS_BB((p_hwfn)->cdev) ?	       \
+					    ((rel_pfid) |		       \
+					     ((p_hwfn)->abs_pf_id & 1) << 3) : \
+					    rel_pfid)
+#define MCP_PF_ID(p_hwfn) MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
+
+/* TODO - this is only correct as long as only BB is supported, and
+ * no port-swapping is implemented; Afterwards we'll need to fix it.
+ */
+#define MFW_PORT(_p_hwfn)       ((_p_hwfn)->abs_pf_id %	\
+				 ((_p_hwfn)->cdev->num_ports_in_engines * 2))
+struct qed_mcp_info {
+	struct mutex				mutex; /* MCP access lock */
+	u32					public_base;
+	u32					drv_mb_addr;
+	u32					mfw_mb_addr;
+	u32					port_addr;
+	u16					drv_mb_seq;
+	u16					drv_pulse_seq;
+	struct qed_mcp_function_info		func_info;
+
+	u8					*mfw_mb_cur;
+	u8					*mfw_mb_shadow;
+	u16					mfw_mb_length;
+	u16					mcp_hist;
+};
+
+/**
+ * @brief Initialize the interface with the MCP
+ *
+ * @param p_hwfn - HW func
+ * @param p_ptt - PTT required for register access
+ *
+ * @return int
+ */
+int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt);
+
+/**
+ * @brief Initialize the port interface with the MCP
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * Can only be called after `num_ports_in_engines' is set
+ */
+void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt);
+/**
+ * @brief Releases resources allocated during the init process.
+ *
+ * @param p_hwfn - HW func
+ * @param p_ptt - PTT required for register access
+ *
+ * @return int
+ */
+
+int qed_mcp_free(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief Sends a LOAD_REQ to the MFW, and in case operation
+ *        succeed, returns whether this PF is the first on the
+ *        chip/engine/port or function. This function should be
+ *        called when driver is ready to accept MFW events after
+ *        Storms initializations are done.
+ *
+ * @param p_hwfn       - hw function
+ * @param p_ptt        - PTT required for register access
+ * @param p_load_code  - The MCP response param containing one
+ *      of the following:
+ *      FW_MSG_CODE_DRV_LOAD_ENGINE
+ *      FW_MSG_CODE_DRV_LOAD_PORT
+ *      FW_MSG_CODE_DRV_LOAD_FUNCTION
+ * @return int -
+ *      0 - Operation was successul.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     u32 *p_load_code);
+
+/**
+ * @brief Read the MFW mailbox into Current buffer.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt);
+
+/**
+ * @brief - calls during init to read shmem of all function-related info.
+ *
+ * @param p_hwfn
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt);
+
+/**
+ * @brief - Reset the MCP using mailbox command.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_reset(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt);
+
+/**
+ * @brief indicates whether the MFW objects [under mcp_info] are accessible
+ *
+ * @param p_hwfn
+ *
+ * @return true iff MFW is running and mcp_info is initialized
+ */
+bool qed_mcp_is_init(struct qed_hwfn *p_hwfn);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
new file mode 100644
index 000000000000..7a5ce5914ace
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -0,0 +1,366 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef REG_ADDR_H
+#define REG_ADDR_H
+
+#define  CDU_REG_CID_ADDR_PARAMS_CONTEXT_SIZE_SHIFT \
+	0
+
+#define  CDU_REG_CID_ADDR_PARAMS_CONTEXT_SIZE		( \
+		0xfff << 0)
+
+#define  CDU_REG_CID_ADDR_PARAMS_BLOCK_WASTE_SHIFT \
+	12
+
+#define  CDU_REG_CID_ADDR_PARAMS_BLOCK_WASTE		( \
+		0xfff << 12)
+
+#define  CDU_REG_CID_ADDR_PARAMS_NCIB_SHIFT \
+	24
+
+#define  CDU_REG_CID_ADDR_PARAMS_NCIB			( \
+		0xff << 24)
+
+#define  XSDM_REG_OPERATION_GEN \
+	0xf80408UL
+#define  NIG_REG_RX_BRB_OUT_EN \
+	0x500e18UL
+#define  NIG_REG_STORM_OUT_EN \
+	0x500e08UL
+#define  PSWRQ2_REG_L2P_VALIDATE_VFID \
+	0x240c50UL
+#define  PGLUE_B_REG_USE_CLIENTID_IN_TAG	\
+	0x2aae04UL
+#define  PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER	\
+	0x2aa16cUL
+#define  BAR0_MAP_REG_MSDM_RAM \
+	0x1d00000UL
+#define  BAR0_MAP_REG_USDM_RAM \
+	0x1d80000UL
+#define  BAR0_MAP_REG_PSDM_RAM \
+	0x1f00000UL
+#define  BAR0_MAP_REG_TSDM_RAM \
+	0x1c80000UL
+#define  NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF \
+	0x5011f4UL
+#define  PRS_REG_SEARCH_TCP \
+	0x1f0400UL
+#define  PRS_REG_SEARCH_UDP \
+	0x1f0404UL
+#define  PRS_REG_SEARCH_FCOE \
+	0x1f0408UL
+#define  PRS_REG_SEARCH_ROCE \
+	0x1f040cUL
+#define  PRS_REG_SEARCH_OPENFLOW	\
+	0x1f0434UL
+#define  TM_REG_PF_ENABLE_CONN \
+	0x2c043cUL
+#define  TM_REG_PF_ENABLE_TASK \
+	0x2c0444UL
+#define  TM_REG_PF_SCAN_ACTIVE_CONN \
+	0x2c04fcUL
+#define  TM_REG_PF_SCAN_ACTIVE_TASK \
+	0x2c0500UL
+#define  IGU_REG_LEADING_EDGE_LATCH \
+	0x18082cUL
+#define  IGU_REG_TRAILING_EDGE_LATCH \
+	0x180830UL
+#define  QM_REG_USG_CNT_PF_TX \
+	0x2f2eacUL
+#define  QM_REG_USG_CNT_PF_OTHER	\
+	0x2f2eb0UL
+#define  DORQ_REG_PF_DB_ENABLE \
+	0x100508UL
+#define  QM_REG_PF_EN \
+	0x2f2ea4UL
+#define  TCFC_REG_STRONG_ENABLE_PF \
+	0x2d0708UL
+#define  CCFC_REG_STRONG_ENABLE_PF \
+	0x2e0708UL
+#define  PGLUE_B_REG_PGL_ADDR_88_F0 \
+	0x2aa404UL
+#define  PGLUE_B_REG_PGL_ADDR_8C_F0 \
+	0x2aa408UL
+#define  PGLUE_B_REG_PGL_ADDR_90_F0 \
+	0x2aa40cUL
+#define  PGLUE_B_REG_PGL_ADDR_94_F0 \
+	0x2aa410UL
+#define  PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR \
+	0x2aa138UL
+#define  PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ \
+	0x2aa174UL
+#define  MISC_REG_GEN_PURP_CR0 \
+	0x008c80UL
+#define  MCP_REG_SCRATCH	\
+	0xe20000UL
+#define  CNIG_REG_NW_PORT_MODE_BB_B0 \
+	0x218200UL
+#define  MISCS_REG_CHIP_NUM \
+	0x00976cUL
+#define  MISCS_REG_CHIP_REV \
+	0x009770UL
+#define  MISCS_REG_CMT_ENABLED_FOR_PAIR \
+	0x00971cUL
+#define  MISCS_REG_CHIP_TEST_REG	\
+	0x009778UL
+#define  MISCS_REG_CHIP_METAL \
+	0x009774UL
+#define  BRB_REG_HEADER_SIZE \
+	0x340804UL
+#define  BTB_REG_HEADER_SIZE \
+	0xdb0804UL
+#define  CAU_REG_LONG_TIMEOUT_THRESHOLD \
+	0x1c0708UL
+#define  CCFC_REG_ACTIVITY_COUNTER \
+	0x2e8800UL
+#define  CDU_REG_CID_ADDR_PARAMS	\
+	0x580900UL
+#define  DBG_REG_CLIENT_ENABLE \
+	0x010004UL
+#define  DMAE_REG_INIT \
+	0x00c000UL
+#define  DORQ_REG_IFEN \
+	0x100040UL
+#define  GRC_REG_TIMEOUT_EN \
+	0x050404UL
+#define  IGU_REG_BLOCK_CONFIGURATION \
+	0x180040UL
+#define  MCM_REG_INIT \
+	0x1200000UL
+#define  MCP2_REG_DBG_DWORD_ENABLE \
+	0x052404UL
+#define  MISC_REG_PORT_MODE \
+	0x008c00UL
+#define  MISCS_REG_CLK_100G_MODE	\
+	0x009070UL
+#define  MSDM_REG_ENABLE_IN1 \
+	0xfc0004UL
+#define  MSEM_REG_ENABLE_IN \
+	0x1800004UL
+#define  NIG_REG_CM_HDR \
+	0x500840UL
+#define  NCSI_REG_CONFIG	\
+	0x040200UL
+#define  PBF_REG_INIT \
+	0xd80000UL
+#define  PTU_REG_ATC_INIT_ARRAY \
+	0x560000UL
+#define  PCM_REG_INIT \
+	0x1100000UL
+#define  PGLUE_B_REG_ADMIN_PER_PF_REGION	\
+	0x2a9000UL
+#define  PRM_REG_DISABLE_PRM \
+	0x230000UL
+#define  PRS_REG_SOFT_RST \
+	0x1f0000UL
+#define  PSDM_REG_ENABLE_IN1 \
+	0xfa0004UL
+#define  PSEM_REG_ENABLE_IN \
+	0x1600004UL
+#define  PSWRQ_REG_DBG_SELECT \
+	0x280020UL
+#define  PSWRQ2_REG_CDUT_P_SIZE \
+	0x24000cUL
+#define  PSWHST_REG_DISCARD_INTERNAL_WRITES \
+	0x2a0040UL
+#define  PSWHST2_REG_DBGSYN_ALMOST_FULL_THR \
+	0x29e050UL
+#define  PSWRD_REG_DBG_SELECT \
+	0x29c040UL
+#define  PSWRD2_REG_CONF11 \
+	0x29d064UL
+#define  PSWWR_REG_USDM_FULL_TH \
+	0x29a040UL
+#define  PSWWR2_REG_CDU_FULL_TH2	\
+	0x29b040UL
+#define  QM_REG_MAXPQSIZE_0 \
+	0x2f0434UL
+#define  RSS_REG_RSS_INIT_EN \
+	0x238804UL
+#define  RDIF_REG_STOP_ON_ERROR \
+	0x300040UL
+#define  SRC_REG_SOFT_RST \
+	0x23874cUL
+#define  TCFC_REG_ACTIVITY_COUNTER \
+	0x2d8800UL
+#define  TCM_REG_INIT \
+	0x1180000UL
+#define  TM_REG_PXP_READ_DATA_FIFO_INIT \
+	0x2c0014UL
+#define  TSDM_REG_ENABLE_IN1 \
+	0xfb0004UL
+#define  TSEM_REG_ENABLE_IN \
+	0x1700004UL
+#define  TDIF_REG_STOP_ON_ERROR \
+	0x310040UL
+#define  UCM_REG_INIT \
+	0x1280000UL
+#define  UMAC_REG_IPG_HD_BKP_CNTL_BB_B0 \
+	0x051004UL
+#define  USDM_REG_ENABLE_IN1 \
+	0xfd0004UL
+#define  USEM_REG_ENABLE_IN \
+	0x1900004UL
+#define  XCM_REG_INIT \
+	0x1000000UL
+#define  XSDM_REG_ENABLE_IN1 \
+	0xf80004UL
+#define  XSEM_REG_ENABLE_IN \
+	0x1400004UL
+#define  YCM_REG_INIT \
+	0x1080000UL
+#define  YSDM_REG_ENABLE_IN1 \
+	0xf90004UL
+#define  YSEM_REG_ENABLE_IN \
+	0x1500004UL
+#define  XYLD_REG_SCBD_STRICT_PRIO \
+	0x4c0000UL
+#define  TMLD_REG_SCBD_STRICT_PRIO \
+	0x4d0000UL
+#define  MULD_REG_SCBD_STRICT_PRIO \
+	0x4e0000UL
+#define  YULD_REG_SCBD_STRICT_PRIO \
+	0x4c8000UL
+#define  MISC_REG_SHARED_MEM_ADDR \
+	0x008c20UL
+#define  DMAE_REG_GO_C0 \
+	0x00c048UL
+#define  DMAE_REG_GO_C1 \
+	0x00c04cUL
+#define  DMAE_REG_GO_C2 \
+	0x00c050UL
+#define  DMAE_REG_GO_C3 \
+	0x00c054UL
+#define  DMAE_REG_GO_C4 \
+	0x00c058UL
+#define  DMAE_REG_GO_C5 \
+	0x00c05cUL
+#define  DMAE_REG_GO_C6 \
+	0x00c060UL
+#define  DMAE_REG_GO_C7 \
+	0x00c064UL
+#define  DMAE_REG_GO_C8 \
+	0x00c068UL
+#define  DMAE_REG_GO_C9 \
+	0x00c06cUL
+#define  DMAE_REG_GO_C10	\
+	0x00c070UL
+#define  DMAE_REG_GO_C11	\
+	0x00c074UL
+#define  DMAE_REG_GO_C12	\
+	0x00c078UL
+#define  DMAE_REG_GO_C13	\
+	0x00c07cUL
+#define  DMAE_REG_GO_C14	\
+	0x00c080UL
+#define  DMAE_REG_GO_C15	\
+	0x00c084UL
+#define  DMAE_REG_GO_C16	\
+	0x00c088UL
+#define  DMAE_REG_GO_C17	\
+	0x00c08cUL
+#define  DMAE_REG_GO_C18	\
+	0x00c090UL
+#define  DMAE_REG_GO_C19	\
+	0x00c094UL
+#define  DMAE_REG_GO_C20	\
+	0x00c098UL
+#define  DMAE_REG_GO_C21	\
+	0x00c09cUL
+#define  DMAE_REG_GO_C22	\
+	0x00c0a0UL
+#define  DMAE_REG_GO_C23	\
+	0x00c0a4UL
+#define  DMAE_REG_GO_C24	\
+	0x00c0a8UL
+#define  DMAE_REG_GO_C25	\
+	0x00c0acUL
+#define  DMAE_REG_GO_C26	\
+	0x00c0b0UL
+#define  DMAE_REG_GO_C27	\
+	0x00c0b4UL
+#define  DMAE_REG_GO_C28	\
+	0x00c0b8UL
+#define  DMAE_REG_GO_C29	\
+	0x00c0bcUL
+#define  DMAE_REG_GO_C30	\
+	0x00c0c0UL
+#define  DMAE_REG_GO_C31	\
+	0x00c0c4UL
+#define  DMAE_REG_CMD_MEM \
+	0x00c800UL
+#define  QM_REG_MAXPQSIZETXSEL_0	\
+	0x2f0440UL
+#define  QM_REG_SDMCMDREADY \
+	0x2f1e10UL
+#define  QM_REG_SDMCMDADDR \
+	0x2f1e04UL
+#define  QM_REG_SDMCMDDATALSB \
+	0x2f1e08UL
+#define  QM_REG_SDMCMDDATAMSB \
+	0x2f1e0cUL
+#define  QM_REG_SDMCMDGO	\
+	0x2f1e14UL
+#define  QM_REG_RLPFCRD \
+	0x2f4d80UL
+#define  QM_REG_RLPFINCVAL \
+	0x2f4c80UL
+#define  QM_REG_RLGLBLCRD \
+	0x2f4400UL
+#define  QM_REG_RLGLBLINCVAL \
+	0x2f3400UL
+#define  IGU_REG_ATTENTION_ENABLE \
+	0x18083cUL
+#define  IGU_REG_ATTN_MSG_ADDR_L	\
+	0x180820UL
+#define  IGU_REG_ATTN_MSG_ADDR_H	\
+	0x180824UL
+#define  MISC_REG_AEU_GENERAL_ATTN_0 \
+	0x008400UL
+#define  CAU_REG_SB_ADDR_MEMORY \
+	0x1c8000UL
+#define  CAU_REG_SB_VAR_MEMORY \
+	0x1c6000UL
+#define  CAU_REG_PI_MEMORY \
+	0x1d0000UL
+#define  IGU_REG_PF_CONFIGURATION \
+	0x180800UL
+#define  MISC_REG_AEU_ENABLE1_IGU_OUT_0 \
+	0x00849cUL
+#define  MISC_REG_AEU_MASK_ATTN_IGU \
+	0x008494UL
+#define  IGU_REG_CLEANUP_STATUS_0 \
+	0x180980UL
+#define  IGU_REG_CLEANUP_STATUS_1 \
+	0x180a00UL
+#define  IGU_REG_CLEANUP_STATUS_2 \
+	0x180a80UL
+#define  IGU_REG_CLEANUP_STATUS_3 \
+	0x180b00UL
+#define  IGU_REG_CLEANUP_STATUS_4 \
+	0x180b80UL
+#define  IGU_REG_COMMAND_REG_32LSB_DATA \
+	0x180840UL
+#define  IGU_REG_COMMAND_REG_CTRL \
+	0x180848UL
+#define  IGU_REG_BLOCK_CONFIGURATION_VF_CLEANUP_EN	( \
+		0x1 << 1)
+#define  IGU_REG_BLOCK_CONFIGURATION_PXP_TPH_INTERFACE_EN	( \
+		0x1 << 0)
+#define  IGU_REG_MAPPING_MEMORY \
+	0x184000UL
+#define  MISCS_REG_GENERIC_POR_0	\
+	0x0096d4UL
+#define  MCP_REG_NVM_CFG4 \
+	0xe0642cUL
+#define  MCP_REG_NVM_CFG4_FLASH_SIZE	( \
+		0x7 << 0)
+#define  MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT \
+	0
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
new file mode 100644
index 000000000000..74657d227583
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -0,0 +1,333 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_SP_H
+#define _QED_SP_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/qed_chain.h>
+#include "qed.h"
+#include "qed_hsi.h"
+
+enum spq_mode {
+	QED_SPQ_MODE_BLOCK,     /* Client will poll a designated mem. address */
+	QED_SPQ_MODE_CB,        /* Client supplies a callback */
+	QED_SPQ_MODE_EBLOCK,    /* QED should block until completion */
+};
+
+struct qed_spq_comp_cb {
+	void	(*function)(struct qed_hwfn *,
+			    void *,
+			    union event_ring_data *,
+			    u8 fw_return_code);
+	void	*cookie;
+};
+
+union ramrod_data {
+	struct pf_start_ramrod_data pf_start;
+};
+
+#define EQ_MAX_CREDIT   0xffffffff
+
+enum spq_priority {
+	QED_SPQ_PRIORITY_NORMAL,
+	QED_SPQ_PRIORITY_HIGH,
+};
+
+union qed_spq_req_comp {
+	struct qed_spq_comp_cb	cb;
+	u64			*done_addr;
+};
+
+struct qed_spq_comp_done {
+	u64	done;
+	u8	fw_return_code;
+};
+
+struct qed_spq_entry {
+	struct list_head		list;
+
+	u8				flags;
+
+	/* HSI slow path element */
+	struct slow_path_element	elem;
+
+	union ramrod_data		ramrod;
+
+	enum spq_priority		priority;
+
+	/* pending queue for this entry */
+	struct list_head		*queue;
+
+	enum spq_mode			comp_mode;
+	struct qed_spq_comp_cb		comp_cb;
+	struct qed_spq_comp_done	comp_done; /* SPQ_MODE_EBLOCK */
+};
+
+struct qed_eq {
+	struct qed_chain	chain;
+	u8			eq_sb_index;    /* index within the SB */
+	__le16			*p_fw_cons;     /* ptr to index value */
+};
+
+struct qed_consq {
+	struct qed_chain chain;
+};
+
+struct qed_spq {
+	spinlock_t		lock; /* SPQ lock */
+
+	struct list_head	unlimited_pending;
+	struct list_head	pending;
+	struct list_head	completion_pending;
+	struct list_head	free_pool;
+
+	struct qed_chain	chain;
+
+	/* allocated dma-able memory for spq entries (+ramrod data) */
+	dma_addr_t		p_phys;
+	struct qed_spq_entry	*p_virt;
+
+	/* Used as index for completions (returns on EQ by FW) */
+	u16			echo_idx;
+
+	/* Statistics */
+	u32			unlimited_pending_count;
+	u32			normal_count;
+	u32			high_count;
+	u32			comp_sent_count;
+	u32			comp_count;
+
+	u32			cid;
+};
+
+/**
+ * @brief qed_spq_post - Posts a Slow hwfn request to FW, or lacking that
+ *        Pends it to the future list.
+ *
+ * @param p_hwfn
+ * @param p_req
+ *
+ * @return int
+ */
+int qed_spq_post(struct qed_hwfn *p_hwfn,
+		 struct qed_spq_entry *p_ent,
+		 u8 *fw_return_code);
+
+/**
+ * @brief qed_spq_allocate - Alloocates & initializes the SPQ and EQ.
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+int qed_spq_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_spq_setup - Reset the SPQ to its start state.
+ *
+ * @param p_hwfn
+ */
+void qed_spq_setup(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_spq_deallocate - Deallocates the given SPQ struct.
+ *
+ * @param p_hwfn
+ */
+void qed_spq_free(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_spq_get_entry - Obtain an entrry from the spq
+ *        free pool list.
+ *
+ *
+ *
+ * @param p_hwfn
+ * @param pp_ent
+ *
+ * @return int
+ */
+int
+qed_spq_get_entry(struct qed_hwfn *p_hwfn,
+		  struct qed_spq_entry **pp_ent);
+
+/**
+ * @brief qed_spq_return_entry - Return an entry to spq free
+ *                                 pool list
+ *
+ * @param p_hwfn
+ * @param p_ent
+ */
+void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
+			  struct qed_spq_entry *p_ent);
+/**
+ * @brief qed_eq_allocate - Allocates & initializes an EQ struct
+ *
+ * @param p_hwfn
+ * @param num_elem number of elements in the eq
+ *
+ * @return struct qed_eq* - a newly allocated structure; NULL upon error.
+ */
+struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
+			    u16 num_elem);
+
+/**
+ * @brief qed_eq_setup - Reset the SPQ to its start state.
+ *
+ * @param p_hwfn
+ * @param p_eq
+ */
+void qed_eq_setup(struct qed_hwfn *p_hwfn,
+		  struct qed_eq *p_eq);
+
+/**
+ * @brief qed_eq_deallocate - deallocates the given EQ struct.
+ *
+ * @param p_hwfn
+ * @param p_eq
+ */
+void qed_eq_free(struct qed_hwfn *p_hwfn,
+		 struct qed_eq *p_eq);
+
+/**
+ * @brief qed_eq_prod_update - update the FW with default EQ producer
+ *
+ * @param p_hwfn
+ * @param prod
+ */
+void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
+			u16 prod);
+
+/**
+ * @brief qed_eq_completion - Completes currently pending EQ elements
+ *
+ * @param p_hwfn
+ * @param cookie
+ *
+ * @return int
+ */
+int qed_eq_completion(struct qed_hwfn *p_hwfn,
+		      void *cookie);
+
+/**
+ * @brief qed_spq_completion - Completes a single event
+ *
+ * @param p_hwfn
+ * @param echo - echo value from cookie (used for determining completion)
+ * @param p_data - data from cookie (used in callback function if applicable)
+ *
+ * @return int
+ */
+int qed_spq_completion(struct qed_hwfn *p_hwfn,
+		       __le16 echo,
+		       u8 fw_return_code,
+		       union event_ring_data *p_data);
+
+/**
+ * @brief qed_spq_get_cid - Given p_hwfn, return cid for the hwfn's SPQ
+ *
+ * @param p_hwfn
+ *
+ * @return u32 - SPQ CID
+ */
+u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_consq_alloc - Allocates & initializes an ConsQ
+ *        struct
+ *
+ * @param p_hwfn
+ *
+ * @return struct qed_eq* - a newly allocated structure; NULL upon error.
+ */
+struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_consq_setup - Reset the ConsQ to its start
+ *        state.
+ *
+ * @param p_hwfn
+ * @param p_eq
+ */
+void qed_consq_setup(struct qed_hwfn *p_hwfn,
+		     struct qed_consq *p_consq);
+
+/**
+ * @brief qed_consq_free - deallocates the given ConsQ struct.
+ *
+ * @param p_hwfn
+ * @param p_eq
+ */
+void qed_consq_free(struct qed_hwfn *p_hwfn,
+		    struct qed_consq *p_consq);
+
+/**
+ * @file
+ *
+ * @brief Slow-hwfn low-level commands (Ramrods) function definitions.
+ */
+
+#define QED_SP_EQ_COMPLETION  0x01
+#define QED_SP_CQE_COMPLETION 0x02
+
+struct qed_sp_init_request_params {
+	size_t			ramrod_data_size;
+	enum spq_mode		comp_mode;
+	struct qed_spq_comp_cb *p_comp_data;
+};
+
+int qed_sp_init_request(struct qed_hwfn *p_hwfn,
+			struct qed_spq_entry **pp_ent,
+			u32 cid,
+			u16 opaque_fid,
+			u8 cmd,
+			u8 protocol,
+			struct qed_sp_init_request_params *p_params);
+
+/**
+ * @brief qed_sp_pf_start - PF Function Start Ramrod
+ *
+ * This ramrod is sent to initialize a physical function (PF). It will
+ * configure the function related parameters and write its completion to the
+ * event ring specified in the parameters.
+ *
+ * Ramrods complete on the common event ring for the PF. This ring is
+ * allocated by the driver on host memory and its parameters are written
+ * to the internal RAM of the UStorm by the Function Start Ramrod.
+ *
+ * @param p_hwfn
+ * @param mode
+ *
+ * @return int
+ */
+
+int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
+		    enum mf_mode mode);
+
+/**
+ * @brief qed_sp_pf_stop - PF Function Stop Ramrod
+ *
+ * This ramrod is sent to close a Physical Function (PF). It is the last ramrod
+ * sent and the last completion written to the PFs Event Ring. This ramrod also
+ * deletes the context for the Slowhwfn connection on this PF.
+ *
+ * @note Not required for first packet.
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+
+int qed_sp_pf_stop(struct qed_hwfn *p_hwfn);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
new file mode 100644
index 000000000000..6f7879136633
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -0,0 +1,170 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include "qed.h"
+#include <linux/qed/qed_chain.h>
+#include "qed_cxt.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+int qed_sp_init_request(struct qed_hwfn *p_hwfn,
+			struct qed_spq_entry **pp_ent,
+			u32 cid,
+			u16 opaque_fid,
+			u8 cmd,
+			u8 protocol,
+			struct qed_sp_init_request_params *p_params)
+{
+	int rc = -EINVAL;
+	struct qed_spq_entry *p_ent = NULL;
+	u32 opaque_cid = opaque_fid << 16 | cid;
+
+	if (!pp_ent)
+		return -ENOMEM;
+
+	rc = qed_spq_get_entry(p_hwfn, pp_ent);
+
+	if (rc != 0)
+		return rc;
+
+	p_ent = *pp_ent;
+
+	p_ent->elem.hdr.cid		= cpu_to_le32(opaque_cid);
+	p_ent->elem.hdr.cmd_id		= cmd;
+	p_ent->elem.hdr.protocol_id	= protocol;
+
+	p_ent->priority		= QED_SPQ_PRIORITY_NORMAL;
+	p_ent->comp_mode	= p_params->comp_mode;
+	p_ent->comp_done.done	= 0;
+
+	switch (p_ent->comp_mode) {
+	case QED_SPQ_MODE_EBLOCK:
+		p_ent->comp_cb.cookie = &p_ent->comp_done;
+		break;
+
+	case QED_SPQ_MODE_BLOCK:
+		if (!p_params->p_comp_data)
+			return -EINVAL;
+
+		p_ent->comp_cb.cookie = p_params->p_comp_data->cookie;
+		break;
+
+	case QED_SPQ_MODE_CB:
+		if (!p_params->p_comp_data)
+			p_ent->comp_cb.function = NULL;
+		else
+			p_ent->comp_cb = *p_params->p_comp_data;
+		break;
+
+	default:
+		DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n",
+			  p_ent->comp_mode);
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+		   "Initialized: CID %08x cmd %02x protocol %02x data_addr %lu comp_mode [%s]\n",
+		   opaque_cid, cmd, protocol,
+		   (unsigned long)&p_ent->ramrod,
+		   D_TRINE(p_ent->comp_mode, QED_SPQ_MODE_EBLOCK,
+			   QED_SPQ_MODE_BLOCK, "MODE_EBLOCK", "MODE_BLOCK",
+			   "MODE_CB"));
+	if (p_params->ramrod_data_size)
+		memset(&p_ent->ramrod, 0, p_params->ramrod_data_size);
+
+	return 0;
+}
+
+int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
+		    enum mf_mode mode)
+{
+	struct qed_sp_init_request_params params;
+	struct pf_start_ramrod_data *p_ramrod = NULL;
+	u16 sb = qed_int_get_sp_sb_id(p_hwfn);
+	u8 sb_index = p_hwfn->p_eq->eq_sb_index;
+	struct qed_spq_entry *p_ent = NULL;
+	int rc = -EINVAL;
+
+	/* update initial eq producer */
+	qed_eq_prod_update(p_hwfn,
+			   qed_chain_get_prod_idx(&p_hwfn->p_eq->chain));
+
+	memset(&params, 0, sizeof(params));
+	params.ramrod_data_size = sizeof(*p_ramrod);
+	params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn,
+				 &p_ent,
+				 qed_spq_get_cid(p_hwfn),
+				 p_hwfn->hw_info.opaque_fid,
+				 COMMON_RAMROD_PF_START,
+				 PROTOCOLID_COMMON,
+				 &params);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.pf_start;
+
+	p_ramrod->event_ring_sb_id	= cpu_to_le16(sb);
+	p_ramrod->event_ring_sb_index	= sb_index;
+	p_ramrod->path_id		= QED_PATH_ID(p_hwfn);
+	p_ramrod->dont_log_ramrods	= 0;
+	p_ramrod->log_type_mask		= cpu_to_le16(0xf);
+	p_ramrod->mf_mode = mode;
+	p_ramrod->outer_tag = p_hwfn->hw_info.ovlan;
+
+	/* Place EQ address in RAMROD */
+	p_ramrod->event_ring_pbl_addr.hi =
+			DMA_HI_LE(p_hwfn->p_eq->chain.pbl.p_phys_table);
+	p_ramrod->event_ring_pbl_addr.lo =
+			DMA_LO_LE(p_hwfn->p_eq->chain.pbl.p_phys_table);
+	p_ramrod->event_ring_num_pages = (u8)p_hwfn->p_eq->chain.page_cnt;
+
+	p_ramrod->consolid_q_pbl_addr.hi =
+			DMA_HI_LE(p_hwfn->p_consq->chain.pbl.p_phys_table);
+	p_ramrod->consolid_q_pbl_addr.lo =
+			DMA_LO_LE(p_hwfn->p_consq->chain.pbl.p_phys_table);
+
+	p_hwfn->hw_info.personality = PERSONALITY_ETH;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+		   "Setting event_ring_sb [id %04x index %02x], mf [%s] outer_tag [%d]\n",
+		   sb, sb_index,
+		   (p_ramrod->mf_mode == SF) ? "SF" : "Multi-Pf",
+		   p_ramrod->outer_tag);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_sp_pf_stop(struct qed_hwfn *p_hwfn)
+{
+	struct qed_sp_init_request_params params;
+	struct qed_spq_entry *p_ent = NULL;
+	int rc = -EINVAL;
+
+	memset(&params, 0, sizeof(params));
+	params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, qed_spq_get_cid(p_hwfn),
+				 p_hwfn->hw_info.opaque_fid,
+				 COMMON_RAMROD_PF_STOP, PROTOCOLID_COMMON,
+				 &params);
+	if (rc)
+		return rc;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
new file mode 100644
index 000000000000..f28ecb197309
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -0,0 +1,831 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+/***************************************************************************
+* Structures & Definitions
+***************************************************************************/
+
+#define SPQ_HIGH_PRI_RESERVE_DEFAULT    (1)
+#define SPQ_BLOCK_SLEEP_LENGTH          (1000)
+
+/***************************************************************************
+* Blocking Imp. (BLOCK/EBLOCK mode)
+***************************************************************************/
+static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn,
+				void *cookie,
+				union event_ring_data *data,
+				u8 fw_return_code)
+{
+	struct qed_spq_comp_done *comp_done;
+
+	comp_done = (struct qed_spq_comp_done *)cookie;
+
+	comp_done->done			= 0x1;
+	comp_done->fw_return_code	= fw_return_code;
+
+	/* make update visible to waiting thread */
+	smp_wmb();
+}
+
+static int qed_spq_block(struct qed_hwfn *p_hwfn,
+			 struct qed_spq_entry *p_ent,
+			 u8 *p_fw_ret)
+{
+	int sleep_count = SPQ_BLOCK_SLEEP_LENGTH;
+	struct qed_spq_comp_done *comp_done;
+	int rc;
+
+	comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie;
+	while (sleep_count) {
+		/* validate we receive completion update */
+		smp_rmb();
+		if (comp_done->done == 1) {
+			if (p_fw_ret)
+				*p_fw_ret = comp_done->fw_return_code;
+			return 0;
+		}
+		usleep_range(5000, 10000);
+		sleep_count--;
+	}
+
+	DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n");
+	rc = qed_mcp_drain(p_hwfn, p_hwfn->p_main_ptt);
+	if (rc != 0)
+		DP_NOTICE(p_hwfn, "MCP drain failed\n");
+
+	/* Retry after drain */
+	sleep_count = SPQ_BLOCK_SLEEP_LENGTH;
+	while (sleep_count) {
+		/* validate we receive completion update */
+		smp_rmb();
+		if (comp_done->done == 1) {
+			if (p_fw_ret)
+				*p_fw_ret = comp_done->fw_return_code;
+			return 0;
+		}
+		usleep_range(5000, 10000);
+		sleep_count--;
+	}
+
+	if (comp_done->done == 1) {
+		if (p_fw_ret)
+			*p_fw_ret = comp_done->fw_return_code;
+		return 0;
+	}
+
+	DP_NOTICE(p_hwfn, "Ramrod is stuck, MCP drain failed\n");
+
+	return -EBUSY;
+}
+
+/***************************************************************************
+* SPQ entries inner API
+***************************************************************************/
+static int
+qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
+		   struct qed_spq_entry *p_ent)
+{
+	p_ent->elem.hdr.echo = 0;
+	p_hwfn->p_spq->echo_idx++;
+	p_ent->flags = 0;
+
+	switch (p_ent->comp_mode) {
+	case QED_SPQ_MODE_EBLOCK:
+	case QED_SPQ_MODE_BLOCK:
+		p_ent->comp_cb.function = qed_spq_blocking_cb;
+		break;
+	case QED_SPQ_MODE_CB:
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n",
+			  p_ent->comp_mode);
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+		   "Ramrod header: [CID 0x%08x CMD 0x%02x protocol 0x%02x] Data pointer: [%08x:%08x] Completion Mode: %s\n",
+		   p_ent->elem.hdr.cid,
+		   p_ent->elem.hdr.cmd_id,
+		   p_ent->elem.hdr.protocol_id,
+		   p_ent->elem.data_ptr.hi,
+		   p_ent->elem.data_ptr.lo,
+		   D_TRINE(p_ent->comp_mode, QED_SPQ_MODE_EBLOCK,
+			   QED_SPQ_MODE_BLOCK, "MODE_EBLOCK", "MODE_BLOCK",
+			   "MODE_CB"));
+
+	return 0;
+}
+
+/***************************************************************************
+* HSI access
+***************************************************************************/
+static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
+				  struct qed_spq *p_spq)
+{
+	u16				pq;
+	struct qed_cxt_info		cxt_info;
+	struct core_conn_context	*p_cxt;
+	union qed_qm_pq_params		pq_params;
+	int				rc;
+
+	cxt_info.iid = p_spq->cid;
+
+	rc = qed_cxt_get_cid_info(p_hwfn, &cxt_info);
+
+	if (rc < 0) {
+		DP_NOTICE(p_hwfn, "Cannot find context info for cid=%d\n",
+			  p_spq->cid);
+		return;
+	}
+
+	p_cxt = cxt_info.p_cxt;
+
+	SET_FIELD(p_cxt->xstorm_ag_context.flags10,
+		  XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN, 1);
+	SET_FIELD(p_cxt->xstorm_ag_context.flags1,
+		  XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE, 1);
+	SET_FIELD(p_cxt->xstorm_ag_context.flags9,
+		  XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN, 1);
+
+	/* QM physical queue */
+	memset(&pq_params, 0, sizeof(pq_params));
+	pq_params.core.tc = LB_TC;
+	pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params);
+	p_cxt->xstorm_ag_context.physical_q0 = cpu_to_le16(pq);
+
+	p_cxt->xstorm_st_context.spq_base_lo =
+		DMA_LO_LE(p_spq->chain.p_phys_addr);
+	p_cxt->xstorm_st_context.spq_base_hi =
+		DMA_HI_LE(p_spq->chain.p_phys_addr);
+
+	p_cxt->xstorm_st_context.consolid_base_addr.lo =
+		DMA_LO_LE(p_hwfn->p_consq->chain.p_phys_addr);
+	p_cxt->xstorm_st_context.consolid_base_addr.hi =
+		DMA_HI_LE(p_hwfn->p_consq->chain.p_phys_addr);
+}
+
+static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
+			   struct qed_spq *p_spq,
+			   struct qed_spq_entry *p_ent)
+{
+	struct qed_chain		*p_chain = &p_hwfn->p_spq->chain;
+	struct slow_path_element	*elem;
+	struct core_db_data		db;
+
+	elem = qed_chain_produce(p_chain);
+	if (!elem) {
+		DP_NOTICE(p_hwfn, "Failed to produce from SPQ chain\n");
+		return -EINVAL;
+	}
+
+	*elem = p_ent->elem; /* struct assignment */
+
+	/* send a doorbell on the slow hwfn session */
+	memset(&db, 0, sizeof(db));
+	SET_FIELD(db.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(db.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
+	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_CORE_SPQ_PROD_CMD);
+	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
+
+	/* validate producer is up to-date */
+	rmb();
+
+	db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
+
+	/* do not reorder */
+	barrier();
+
+	DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
+
+	/* make sure doorbell is rang */
+	mmiowb();
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n",
+		   qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY),
+		   p_spq->cid, db.params, db.agg_flags,
+		   qed_chain_get_prod_idx(p_chain));
+
+	return 0;
+}
+
+/***************************************************************************
+* Asynchronous events
+***************************************************************************/
+static int
+qed_async_event_completion(struct qed_hwfn *p_hwfn,
+			   struct event_ring_entry *p_eqe)
+{
+	DP_NOTICE(p_hwfn,
+		  "Unknown Async completion for protocol: %d\n",
+		   p_eqe->protocol_id);
+	return -EINVAL;
+}
+
+/***************************************************************************
+* EQ API
+***************************************************************************/
+void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
+			u16 prod)
+{
+	u32 addr = GTT_BAR0_MAP_REG_USDM_RAM +
+		   USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id);
+
+	REG_WR16(p_hwfn, addr, prod);
+
+	/* keep prod updates ordered */
+	mmiowb();
+}
+
+int qed_eq_completion(struct qed_hwfn *p_hwfn,
+		      void *cookie)
+
+{
+	struct qed_eq *p_eq = cookie;
+	struct qed_chain *p_chain = &p_eq->chain;
+	int rc = 0;
+
+	/* take a snapshot of the FW consumer */
+	u16 fw_cons_idx = le16_to_cpu(*p_eq->p_fw_cons);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "fw_cons_idx %x\n", fw_cons_idx);
+
+	/* Need to guarantee the fw_cons index we use points to a usuable
+	 * element (to comply with our chain), so our macros would comply
+	 */
+	if ((fw_cons_idx & qed_chain_get_usable_per_page(p_chain)) ==
+	    qed_chain_get_usable_per_page(p_chain))
+		fw_cons_idx += qed_chain_get_unusable_per_page(p_chain);
+
+	/* Complete current segment of eq entries */
+	while (fw_cons_idx != qed_chain_get_cons_idx(p_chain)) {
+		struct event_ring_entry *p_eqe = qed_chain_consume(p_chain);
+
+		if (!p_eqe) {
+			rc = -EINVAL;
+			break;
+		}
+
+		DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+			   "op %x prot %x res0 %x echo %x fwret %x flags %x\n",
+			   p_eqe->opcode,
+			   p_eqe->protocol_id,
+			   p_eqe->reserved0,
+			   le16_to_cpu(p_eqe->echo),
+			   p_eqe->fw_return_code,
+			   p_eqe->flags);
+
+		if (GET_FIELD(p_eqe->flags, EVENT_RING_ENTRY_ASYNC)) {
+			if (qed_async_event_completion(p_hwfn, p_eqe))
+				rc = -EINVAL;
+		} else if (qed_spq_completion(p_hwfn,
+					      p_eqe->echo,
+					      p_eqe->fw_return_code,
+					      &p_eqe->data)) {
+			rc = -EINVAL;
+		}
+
+		qed_chain_recycle_consumed(p_chain);
+	}
+
+	qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain));
+
+	return rc;
+}
+
+struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
+			    u16 num_elem)
+{
+	struct qed_eq *p_eq;
+
+	/* Allocate EQ struct */
+	p_eq = kzalloc(sizeof(*p_eq), GFP_ATOMIC);
+	if (!p_eq) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_eq'\n");
+		return NULL;
+	}
+
+	/* Allocate and initialize EQ chain*/
+	if (qed_chain_alloc(p_hwfn->cdev,
+			    QED_CHAIN_USE_TO_PRODUCE,
+			    QED_CHAIN_MODE_PBL,
+			    num_elem,
+			    sizeof(union event_ring_element),
+			    &p_eq->chain)) {
+		DP_NOTICE(p_hwfn, "Failed to allocate eq chain\n");
+		goto eq_allocate_fail;
+	}
+
+	/* register EQ completion on the SP SB */
+	qed_int_register_cb(p_hwfn,
+			    qed_eq_completion,
+			    p_eq,
+			    &p_eq->eq_sb_index,
+			    &p_eq->p_fw_cons);
+
+	return p_eq;
+
+eq_allocate_fail:
+	qed_eq_free(p_hwfn, p_eq);
+	return NULL;
+}
+
+void qed_eq_setup(struct qed_hwfn *p_hwfn,
+		  struct qed_eq *p_eq)
+{
+	qed_chain_reset(&p_eq->chain);
+}
+
+void qed_eq_free(struct qed_hwfn *p_hwfn,
+		 struct qed_eq *p_eq)
+{
+	if (!p_eq)
+		return;
+	qed_chain_free(p_hwfn->cdev, &p_eq->chain);
+	kfree(p_eq);
+}
+
+/***************************************************************************
+* Slow hwfn Queue (spq)
+***************************************************************************/
+void qed_spq_setup(struct qed_hwfn *p_hwfn)
+{
+	struct qed_spq		*p_spq	= p_hwfn->p_spq;
+	struct qed_spq_entry	*p_virt = NULL;
+	dma_addr_t		p_phys	= 0;
+	unsigned int		i	= 0;
+
+	INIT_LIST_HEAD(&p_spq->pending);
+	INIT_LIST_HEAD(&p_spq->completion_pending);
+	INIT_LIST_HEAD(&p_spq->free_pool);
+	INIT_LIST_HEAD(&p_spq->unlimited_pending);
+	spin_lock_init(&p_spq->lock);
+
+	/* SPQ empty pool */
+	p_phys	= p_spq->p_phys + offsetof(struct qed_spq_entry, ramrod);
+	p_virt	= p_spq->p_virt;
+
+	for (i = 0; i < p_spq->chain.capacity; i++) {
+		p_virt->elem.data_ptr.hi = DMA_HI_LE(p_phys);
+		p_virt->elem.data_ptr.lo = DMA_LO_LE(p_phys);
+
+		list_add_tail(&p_virt->list, &p_spq->free_pool);
+
+		p_virt++;
+		p_phys += sizeof(struct qed_spq_entry);
+	}
+
+	/* Statistics */
+	p_spq->normal_count		= 0;
+	p_spq->comp_count		= 0;
+	p_spq->comp_sent_count		= 0;
+	p_spq->unlimited_pending_count	= 0;
+	p_spq->echo_idx			= 0;
+
+	/* SPQ cid, cannot fail */
+	qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_spq->cid);
+	qed_spq_hw_initialize(p_hwfn, p_spq);
+
+	/* reset the chain itself */
+	qed_chain_reset(&p_spq->chain);
+}
+
+int qed_spq_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_spq		*p_spq	= NULL;
+	dma_addr_t		p_phys	= 0;
+	struct qed_spq_entry	*p_virt = NULL;
+
+	/* SPQ struct */
+	p_spq =
+		kzalloc(sizeof(struct qed_spq), GFP_ATOMIC);
+	if (!p_spq) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n");
+		return -ENOMEM;
+	}
+
+	/* SPQ ring  */
+	if (qed_chain_alloc(p_hwfn->cdev,
+			    QED_CHAIN_USE_TO_PRODUCE,
+			    QED_CHAIN_MODE_SINGLE,
+			    0,   /* N/A when the mode is SINGLE */
+			    sizeof(struct slow_path_element),
+			    &p_spq->chain)) {
+		DP_NOTICE(p_hwfn, "Failed to allocate spq chain\n");
+		goto spq_allocate_fail;
+	}
+
+	/* allocate and fill the SPQ elements (incl. ramrod data list) */
+	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				    p_spq->chain.capacity *
+				    sizeof(struct qed_spq_entry),
+				    &p_phys,
+				    GFP_KERNEL);
+
+	if (!p_virt)
+		goto spq_allocate_fail;
+
+	p_spq->p_virt = p_virt;
+	p_spq->p_phys = p_phys;
+	p_hwfn->p_spq = p_spq;
+
+	return 0;
+
+spq_allocate_fail:
+	qed_chain_free(p_hwfn->cdev, &p_spq->chain);
+	kfree(p_spq);
+	return -ENOMEM;
+}
+
+void qed_spq_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+
+	if (!p_spq)
+		return;
+
+	if (p_spq->p_virt)
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  p_spq->chain.capacity *
+				  sizeof(struct qed_spq_entry),
+				  p_spq->p_virt,
+				  p_spq->p_phys);
+
+	qed_chain_free(p_hwfn->cdev, &p_spq->chain);
+	;
+	kfree(p_spq);
+}
+
+int
+qed_spq_get_entry(struct qed_hwfn *p_hwfn,
+		  struct qed_spq_entry **pp_ent)
+{
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+	struct qed_spq_entry *p_ent = NULL;
+	int rc = 0;
+
+	spin_lock_bh(&p_spq->lock);
+
+	if (list_empty(&p_spq->free_pool)) {
+		p_ent = kzalloc(sizeof(*p_ent), GFP_ATOMIC);
+		if (!p_ent) {
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
+		p_ent->queue = &p_spq->unlimited_pending;
+	} else {
+		p_ent = list_first_entry(&p_spq->free_pool,
+					 struct qed_spq_entry,
+					 list);
+		list_del(&p_ent->list);
+		p_ent->queue = &p_spq->pending;
+	}
+
+	*pp_ent = p_ent;
+
+out_unlock:
+	spin_unlock_bh(&p_spq->lock);
+	return rc;
+}
+
+/* Locked variant; Should be called while the SPQ lock is taken */
+static void __qed_spq_return_entry(struct qed_hwfn *p_hwfn,
+				   struct qed_spq_entry *p_ent)
+{
+	list_add_tail(&p_ent->list, &p_hwfn->p_spq->free_pool);
+}
+
+void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
+			  struct qed_spq_entry *p_ent)
+{
+	spin_lock_bh(&p_hwfn->p_spq->lock);
+	__qed_spq_return_entry(p_hwfn, p_ent);
+	spin_unlock_bh(&p_hwfn->p_spq->lock);
+}
+
+/**
+ * @brief qed_spq_add_entry - adds a new entry to the pending
+ *        list. Should be used while lock is being held.
+ *
+ * Addes an entry to the pending list is there is room (en empty
+ * element is available in the free_pool), or else places the
+ * entry in the unlimited_pending pool.
+ *
+ * @param p_hwfn
+ * @param p_ent
+ * @param priority
+ *
+ * @return int
+ */
+static int
+qed_spq_add_entry(struct qed_hwfn *p_hwfn,
+		  struct qed_spq_entry *p_ent,
+		  enum spq_priority priority)
+{
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+
+	if (p_ent->queue == &p_spq->unlimited_pending) {
+		struct qed_spq_entry *p_en2;
+
+		if (list_empty(&p_spq->free_pool)) {
+			list_add_tail(&p_ent->list, &p_spq->unlimited_pending);
+			p_spq->unlimited_pending_count++;
+
+			return 0;
+		}
+
+		p_en2 = list_first_entry(&p_spq->free_pool,
+					 struct qed_spq_entry,
+					 list);
+		list_del(&p_en2->list);
+
+		/* Strcut assignment */
+		*p_en2 = *p_ent;
+
+		kfree(p_ent);
+
+		p_ent = p_en2;
+	}
+
+	/* entry is to be placed in 'pending' queue */
+	switch (priority) {
+	case QED_SPQ_PRIORITY_NORMAL:
+		list_add_tail(&p_ent->list, &p_spq->pending);
+		p_spq->normal_count++;
+		break;
+	case QED_SPQ_PRIORITY_HIGH:
+		list_add(&p_ent->list, &p_spq->pending);
+		p_spq->high_count++;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/***************************************************************************
+* Accessor
+***************************************************************************/
+u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn)
+{
+	if (!p_hwfn->p_spq)
+		return 0xffffffff;      /* illegal */
+	return p_hwfn->p_spq->cid;
+}
+
+/***************************************************************************
+* Posting new Ramrods
+***************************************************************************/
+static int qed_spq_post_list(struct qed_hwfn *p_hwfn,
+			     struct list_head *head,
+			     u32 keep_reserve)
+{
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+	int rc;
+
+	while (qed_chain_get_elem_left(&p_spq->chain) > keep_reserve &&
+	       !list_empty(head)) {
+		struct qed_spq_entry *p_ent =
+			list_first_entry(head, struct qed_spq_entry, list);
+		list_del(&p_ent->list);
+		list_add_tail(&p_ent->list, &p_spq->completion_pending);
+		p_spq->comp_sent_count++;
+
+		rc = qed_spq_hw_post(p_hwfn, p_spq, p_ent);
+		if (rc) {
+			list_del(&p_ent->list);
+			__qed_spq_return_entry(p_hwfn, p_ent);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
+{
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+	struct qed_spq_entry *p_ent = NULL;
+
+	while (!list_empty(&p_spq->free_pool)) {
+		if (list_empty(&p_spq->unlimited_pending))
+			break;
+
+		p_ent = list_first_entry(&p_spq->unlimited_pending,
+					 struct qed_spq_entry,
+					 list);
+		if (!p_ent)
+			return -EINVAL;
+
+		list_del(&p_ent->list);
+
+		qed_spq_add_entry(p_hwfn, p_ent, p_ent->priority);
+	}
+
+	return qed_spq_post_list(p_hwfn, &p_spq->pending,
+				 SPQ_HIGH_PRI_RESERVE_DEFAULT);
+}
+
+int qed_spq_post(struct qed_hwfn *p_hwfn,
+		 struct qed_spq_entry *p_ent,
+		 u8 *fw_return_code)
+{
+	int rc = 0;
+	struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL;
+	bool b_ret_ent = true;
+
+	if (!p_hwfn)
+		return -EINVAL;
+
+	if (!p_ent) {
+		DP_NOTICE(p_hwfn, "Got a NULL pointer\n");
+		return -EINVAL;
+	}
+
+	/* Complete the entry */
+	rc = qed_spq_fill_entry(p_hwfn, p_ent);
+
+	spin_lock_bh(&p_spq->lock);
+
+	/* Check return value after LOCK is taken for cleaner error flow */
+	if (rc)
+		goto spq_post_fail;
+
+	/* Add the request to the pending queue */
+	rc = qed_spq_add_entry(p_hwfn, p_ent, p_ent->priority);
+	if (rc)
+		goto spq_post_fail;
+
+	rc = qed_spq_pend_post(p_hwfn);
+	if (rc) {
+		/* Since it's possible that pending failed for a different
+		 * entry [although unlikely], the failed entry was already
+		 * dealt with; No need to return it here.
+		 */
+		b_ret_ent = false;
+		goto spq_post_fail;
+	}
+
+	spin_unlock_bh(&p_spq->lock);
+
+	if (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK) {
+		/* For entries in QED BLOCK mode, the completion code cannot
+		 * perform the necessary cleanup - if it did, we couldn't
+		 * access p_ent here to see whether it's successful or not.
+		 * Thus, after gaining the answer perform the cleanup here.
+		 */
+		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code);
+		if (rc)
+			goto spq_post_fail2;
+
+		/* return to pool */
+		qed_spq_return_entry(p_hwfn, p_ent);
+	}
+	return rc;
+
+spq_post_fail2:
+	spin_lock_bh(&p_spq->lock);
+	list_del(&p_ent->list);
+	qed_chain_return_produced(&p_spq->chain);
+
+spq_post_fail:
+	/* return to the free pool */
+	if (b_ret_ent)
+		__qed_spq_return_entry(p_hwfn, p_ent);
+	spin_unlock_bh(&p_spq->lock);
+
+	return rc;
+}
+
+int qed_spq_completion(struct qed_hwfn *p_hwfn,
+		       __le16 echo,
+		       u8 fw_return_code,
+		       union event_ring_data *p_data)
+{
+	struct qed_spq		*p_spq;
+	struct qed_spq_entry	*p_ent = NULL;
+	struct qed_spq_entry	*tmp;
+	struct qed_spq_entry	*found = NULL;
+	int			rc;
+
+	if (!p_hwfn)
+		return -EINVAL;
+
+	p_spq = p_hwfn->p_spq;
+	if (!p_spq)
+		return -EINVAL;
+
+	spin_lock_bh(&p_spq->lock);
+	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending,
+				 list) {
+		if (p_ent->elem.hdr.echo == echo) {
+			list_del(&p_ent->list);
+
+			qed_chain_return_produced(&p_spq->chain);
+			p_spq->comp_count++;
+			found = p_ent;
+			break;
+		}
+	}
+
+	/* Release lock before callback, as callback may post
+	 * an additional ramrod.
+	 */
+	spin_unlock_bh(&p_spq->lock);
+
+	if (!found) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to find an entry this EQE completes\n");
+		return -EEXIST;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Complete: func %p cookie %p)\n",
+		   p_ent->comp_cb.function, p_ent->comp_cb.cookie);
+	if (found->comp_cb.function)
+		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
+					fw_return_code);
+
+	if (found->comp_mode != QED_SPQ_MODE_EBLOCK)
+		/* EBLOCK is responsible for freeing its own entry */
+		qed_spq_return_entry(p_hwfn, found);
+
+	/* Attempt to post pending requests */
+	spin_lock_bh(&p_spq->lock);
+	rc = qed_spq_pend_post(p_hwfn);
+	spin_unlock_bh(&p_spq->lock);
+
+	return rc;
+}
+
+struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_consq *p_consq;
+
+	/* Allocate ConsQ struct */
+	p_consq = kzalloc(sizeof(*p_consq), GFP_ATOMIC);
+	if (!p_consq) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_consq'\n");
+		return NULL;
+	}
+
+	/* Allocate and initialize EQ chain*/
+	if (qed_chain_alloc(p_hwfn->cdev,
+			    QED_CHAIN_USE_TO_PRODUCE,
+			    QED_CHAIN_MODE_PBL,
+			    QED_CHAIN_PAGE_SIZE / 0x80,
+			    0x80,
+			    &p_consq->chain)) {
+		DP_NOTICE(p_hwfn, "Failed to allocate consq chain");
+		goto consq_allocate_fail;
+	}
+
+	return p_consq;
+
+consq_allocate_fail:
+	qed_consq_free(p_hwfn, p_consq);
+	return NULL;
+}
+
+void qed_consq_setup(struct qed_hwfn *p_hwfn,
+		     struct qed_consq *p_consq)
+{
+	qed_chain_reset(&p_consq->chain);
+}
+
+void qed_consq_free(struct qed_hwfn *p_hwfn,
+		    struct qed_consq *p_consq)
+{
+	if (!p_consq)
+		return;
+	qed_chain_free(p_hwfn->cdev, &p_consq->chain);
+	kfree(p_consq);
+}
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
new file mode 100644
index 000000000000..6a4347639c03
--- /dev/null
+++ b/include/linux/qed/common_hsi.h
@@ -0,0 +1,607 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __COMMON_HSI__
+#define __COMMON_HSI__
+
+#define FW_MAJOR_VERSION	8
+#define FW_MINOR_VERSION	4
+#define FW_REVISION_VERSION	2
+#define FW_ENGINEERING_VERSION	0
+
+/***********************/
+/* COMMON HW CONSTANTS */
+/***********************/
+
+/* PCI functions */
+#define MAX_NUM_PORTS_K2	(4)
+#define MAX_NUM_PORTS_BB	(2)
+#define MAX_NUM_PORTS		(MAX_NUM_PORTS_K2)
+
+#define MAX_NUM_PFS_K2	(16)
+#define MAX_NUM_PFS_BB	(8)
+#define MAX_NUM_PFS	(MAX_NUM_PFS_K2)
+#define MAX_NUM_OF_PFS_IN_CHIP (16) /* On both engines */
+
+#define MAX_NUM_VFS_K2	(192)
+#define MAX_NUM_VFS_BB	(120)
+#define MAX_NUM_VFS	(MAX_NUM_VFS_K2)
+
+#define MAX_NUM_FUNCTIONS_BB	(MAX_NUM_PFS_BB + MAX_NUM_VFS_BB)
+#define MAX_NUM_FUNCTIONS	(MAX_NUM_PFS + MAX_NUM_VFS)
+
+#define MAX_FUNCTION_NUMBER_BB	(MAX_NUM_PFS + MAX_NUM_VFS_BB)
+#define MAX_FUNCTION_NUMBER	(MAX_NUM_PFS + MAX_NUM_VFS)
+
+#define MAX_NUM_VPORTS_K2	(208)
+#define MAX_NUM_VPORTS_BB	(160)
+#define MAX_NUM_VPORTS		(MAX_NUM_VPORTS_K2)
+
+#define MAX_NUM_L2_QUEUES_K2	(320)
+#define MAX_NUM_L2_QUEUES_BB	(256)
+#define MAX_NUM_L2_QUEUES	(MAX_NUM_L2_QUEUES_K2)
+
+/* Traffic classes in network-facing blocks (PBF, BTB, NIG, BRB, PRS and QM) */
+#define NUM_PHYS_TCS_4PORT_K2	(4)
+#define NUM_OF_PHYS_TCS		(8)
+
+#define NUM_TCS_4PORT_K2	(NUM_PHYS_TCS_4PORT_K2 + 1)
+#define NUM_OF_TCS		(NUM_OF_PHYS_TCS + 1)
+
+#define LB_TC			(NUM_OF_PHYS_TCS)
+
+/* Num of possible traffic priority values */
+#define NUM_OF_PRIO		(8)
+
+#define MAX_NUM_VOQS_K2		(NUM_TCS_4PORT_K2 * MAX_NUM_PORTS_K2)
+#define MAX_NUM_VOQS_BB		(NUM_OF_TCS * MAX_NUM_PORTS_BB)
+#define MAX_NUM_VOQS		(MAX_NUM_VOQS_K2)
+#define MAX_PHYS_VOQS		(NUM_OF_PHYS_TCS * MAX_NUM_PORTS_BB)
+
+/* CIDs */
+#define NUM_OF_CONNECTION_TYPES	(8)
+#define NUM_OF_LCIDS		(320)
+#define NUM_OF_LTIDS		(320)
+
+/*****************/
+/* CDU CONSTANTS */
+/*****************/
+
+#define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT              (17)
+#define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK             (0x1ffff)
+
+/*****************/
+/* DQ CONSTANTS  */
+/*****************/
+
+/* DEMS */
+#define DQ_DEMS_LEGACY			0
+
+/* XCM agg val selection */
+#define DQ_XCM_AGG_VAL_SEL_WORD2  0
+#define DQ_XCM_AGG_VAL_SEL_WORD3  1
+#define DQ_XCM_AGG_VAL_SEL_WORD4  2
+#define DQ_XCM_AGG_VAL_SEL_WORD5  3
+#define DQ_XCM_AGG_VAL_SEL_REG3   4
+#define DQ_XCM_AGG_VAL_SEL_REG4   5
+#define DQ_XCM_AGG_VAL_SEL_REG5   6
+#define DQ_XCM_AGG_VAL_SEL_REG6   7
+
+/* XCM agg val selection */
+#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD \
+	DQ_XCM_AGG_VAL_SEL_WORD2
+#define DQ_XCM_ETH_TX_BD_CONS_CMD \
+	DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_CORE_TX_BD_CONS_CMD \
+	DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_ETH_TX_BD_PROD_CMD \
+	DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_CORE_TX_BD_PROD_CMD \
+	DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_CORE_SPQ_PROD_CMD \
+	DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD            DQ_XCM_AGG_VAL_SEL_WORD5
+
+/* XCM agg counter flag selection */
+#define DQ_XCM_AGG_FLG_SHIFT_BIT14  0
+#define DQ_XCM_AGG_FLG_SHIFT_BIT15  1
+#define DQ_XCM_AGG_FLG_SHIFT_CF12   2
+#define DQ_XCM_AGG_FLG_SHIFT_CF13   3
+#define DQ_XCM_AGG_FLG_SHIFT_CF18   4
+#define DQ_XCM_AGG_FLG_SHIFT_CF19   5
+#define DQ_XCM_AGG_FLG_SHIFT_CF22   6
+#define DQ_XCM_AGG_FLG_SHIFT_CF23   7
+
+/* XCM agg counter flag selection */
+#define DQ_XCM_ETH_DQ_CF_CMD		(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_CORE_DQ_CF_CMD		(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_ETH_TERMINATE_CMD	(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_CORE_TERMINATE_CMD	(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ETH_SLOW_PATH_CMD	(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_CORE_SLOW_PATH_CMD	(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_TPH_EN_CMD		(1 << \
+					DQ_XCM_AGG_FLG_SHIFT_CF23)
+
+/*****************/
+/* QM CONSTANTS  */
+/*****************/
+
+/* number of TX queues in the QM */
+#define MAX_QM_TX_QUEUES_K2	512
+#define MAX_QM_TX_QUEUES_BB	448
+#define MAX_QM_TX_QUEUES	MAX_QM_TX_QUEUES_K2
+
+/* number of Other queues in the QM */
+#define MAX_QM_OTHER_QUEUES_BB	64
+#define MAX_QM_OTHER_QUEUES_K2	128
+#define MAX_QM_OTHER_QUEUES	MAX_QM_OTHER_QUEUES_K2
+
+/* number of queues in a PF queue group */
+#define QM_PF_QUEUE_GROUP_SIZE	8
+
+/* base number of Tx PQs in the CM PQ representation.
+ * should be used when storing PQ IDs in CM PQ registers and context
+ */
+#define CM_TX_PQ_BASE	0x200
+
+/* QM registers data */
+#define QM_LINE_CRD_REG_WIDTH		16
+#define QM_LINE_CRD_REG_SIGN_BIT	(1 << (QM_LINE_CRD_REG_WIDTH - 1))
+#define QM_BYTE_CRD_REG_WIDTH		24
+#define QM_BYTE_CRD_REG_SIGN_BIT	(1 << (QM_BYTE_CRD_REG_WIDTH - 1))
+#define QM_WFQ_CRD_REG_WIDTH		32
+#define QM_WFQ_CRD_REG_SIGN_BIT		(1 << (QM_WFQ_CRD_REG_WIDTH - 1))
+#define QM_RL_CRD_REG_WIDTH		32
+#define QM_RL_CRD_REG_SIGN_BIT		(1 << (QM_RL_CRD_REG_WIDTH - 1))
+
+/*****************/
+/* CAU CONSTANTS */
+/*****************/
+
+#define CAU_FSM_ETH_RX  0
+#define CAU_FSM_ETH_TX  1
+
+/* Number of Protocol Indices per Status Block */
+#define PIS_PER_SB    12
+
+#define CAU_HC_STOPPED_STATE	3
+#define CAU_HC_DISABLE_STATE	4
+#define CAU_HC_ENABLE_STATE	0
+
+/*****************/
+/* IGU CONSTANTS */
+/*****************/
+
+#define MAX_SB_PER_PATH_K2	(368)
+#define MAX_SB_PER_PATH_BB	(288)
+#define MAX_TOT_SB_PER_PATH \
+	MAX_SB_PER_PATH_K2
+
+#define MAX_SB_PER_PF_MIMD	129
+#define MAX_SB_PER_PF_SIMD	64
+#define MAX_SB_PER_VF		64
+
+/* Memory addresses on the BAR for the IGU Sub Block */
+#define IGU_MEM_BASE			0x0000
+
+#define IGU_MEM_MSIX_BASE		0x0000
+#define IGU_MEM_MSIX_UPPER		0x0101
+#define IGU_MEM_MSIX_RESERVED_UPPER	0x01ff
+
+#define IGU_MEM_PBA_MSIX_BASE		0x0200
+#define IGU_MEM_PBA_MSIX_UPPER		0x0202
+#define IGU_MEM_PBA_MSIX_RESERVED_UPPER	0x03ff
+
+#define IGU_CMD_INT_ACK_BASE		0x0400
+#define IGU_CMD_INT_ACK_UPPER		(IGU_CMD_INT_ACK_BASE +	\
+					 MAX_TOT_SB_PER_PATH -	\
+					 1)
+#define IGU_CMD_INT_ACK_RESERVED_UPPER	0x05ff
+
+#define IGU_CMD_ATTN_BIT_UPD_UPPER	0x05f0
+#define IGU_CMD_ATTN_BIT_SET_UPPER	0x05f1
+#define IGU_CMD_ATTN_BIT_CLR_UPPER	0x05f2
+
+#define IGU_REG_SISR_MDPC_WMASK_UPPER		0x05f3
+#define IGU_REG_SISR_MDPC_WMASK_LSB_UPPER	0x05f4
+#define IGU_REG_SISR_MDPC_WMASK_MSB_UPPER	0x05f5
+#define IGU_REG_SISR_MDPC_WOMASK_UPPER		0x05f6
+
+#define IGU_CMD_PROD_UPD_BASE			0x0600
+#define IGU_CMD_PROD_UPD_UPPER			(IGU_CMD_PROD_UPD_BASE +\
+						 MAX_TOT_SB_PER_PATH - \
+						 1)
+#define IGU_CMD_PROD_UPD_RESERVED_UPPER		0x07ff
+
+/*****************/
+/* PXP CONSTANTS */
+/*****************/
+
+/* PTT and GTT */
+#define PXP_NUM_PF_WINDOWS		12
+#define PXP_PER_PF_ENTRY_SIZE		8
+#define PXP_NUM_GLOBAL_WINDOWS		243
+#define PXP_GLOBAL_ENTRY_SIZE		4
+#define PXP_ADMIN_WINDOW_ALLOWED_LENGTH	4
+#define PXP_PF_WINDOW_ADMIN_START	0
+#define PXP_PF_WINDOW_ADMIN_LENGTH	0x1000
+#define PXP_PF_WINDOW_ADMIN_END		(PXP_PF_WINDOW_ADMIN_START + \
+					 PXP_PF_WINDOW_ADMIN_LENGTH - 1)
+#define PXP_PF_WINDOW_ADMIN_PER_PF_START	0
+#define PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH	(PXP_NUM_PF_WINDOWS * \
+						 PXP_PER_PF_ENTRY_SIZE)
+#define PXP_PF_WINDOW_ADMIN_PER_PF_END	(PXP_PF_WINDOW_ADMIN_PER_PF_START + \
+					 PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH - 1)
+#define PXP_PF_WINDOW_ADMIN_GLOBAL_START	0x200
+#define PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH	(PXP_NUM_GLOBAL_WINDOWS * \
+						 PXP_GLOBAL_ENTRY_SIZE)
+#define PXP_PF_WINDOW_ADMIN_GLOBAL_END \
+		(PXP_PF_WINDOW_ADMIN_GLOBAL_START + \
+		 PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH - 1)
+#define PXP_PF_GLOBAL_PRETEND_ADDR	0x1f0
+#define PXP_PF_ME_OPAQUE_MASK_ADDR	0xf4
+#define PXP_PF_ME_OPAQUE_ADDR		0x1f8
+#define PXP_PF_ME_CONCRETE_ADDR		0x1fc
+
+#define PXP_EXTERNAL_BAR_PF_WINDOW_START	0x1000
+#define PXP_EXTERNAL_BAR_PF_WINDOW_NUM		PXP_NUM_PF_WINDOWS
+#define PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE	0x1000
+#define PXP_EXTERNAL_BAR_PF_WINDOW_LENGTH \
+	(PXP_EXTERNAL_BAR_PF_WINDOW_NUM * \
+	 PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE)
+#define PXP_EXTERNAL_BAR_PF_WINDOW_END \
+	(PXP_EXTERNAL_BAR_PF_WINDOW_START + \
+	 PXP_EXTERNAL_BAR_PF_WINDOW_LENGTH - 1)
+
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START \
+	(PXP_EXTERNAL_BAR_PF_WINDOW_END + 1)
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_NUM		PXP_NUM_GLOBAL_WINDOWS
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_SINGLE_SIZE	0x1000
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH \
+	(PXP_EXTERNAL_BAR_GLOBAL_WINDOW_NUM * \
+	 PXP_EXTERNAL_BAR_GLOBAL_WINDOW_SINGLE_SIZE)
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_END \
+	(PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \
+	 PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1)
+
+#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN	12
+#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER	1024
+
+/* ILT Records */
+#define PXP_NUM_ILT_RECORDS_BB 7600
+#define PXP_NUM_ILT_RECORDS_K2 11000
+#define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2)
+
+/******************/
+/* PBF CONSTANTS  */
+/******************/
+
+/* Number of PBF command queue lines. Each line is 32B. */
+#define PBF_MAX_CMD_LINES 3328
+
+/* Number of BTB blocks. Each block is 256B. */
+#define BTB_MAX_BLOCKS 1440
+
+/*****************/
+/* PRS CONSTANTS */
+/*****************/
+
+/* Async data KCQ CQE */
+struct async_data {
+	__le32	cid;
+	__le16	itid;
+	u8	error_code;
+	u8	fw_debug_param;
+};
+
+struct regpair {
+	__le32	lo;
+	__le32	hi;
+};
+
+/* Event Data Union */
+union event_ring_data {
+	u8				bytes[8];
+	struct async_data		async_info;
+};
+
+/* Event Ring Entry */
+struct event_ring_entry {
+	u8			protocol_id;
+	u8			opcode;
+	__le16			reserved0;
+	__le16			echo;
+	u8			fw_return_code;
+	u8			flags;
+#define EVENT_RING_ENTRY_ASYNC_MASK      0x1
+#define EVENT_RING_ENTRY_ASYNC_SHIFT     0
+#define EVENT_RING_ENTRY_RESERVED1_MASK  0x7F
+#define EVENT_RING_ENTRY_RESERVED1_SHIFT 1
+	union event_ring_data	data;
+};
+
+/* Multi function mode */
+enum mf_mode {
+	SF,
+	MF_OVLAN,
+	MF_NPAR,
+	MAX_MF_MODE
+};
+
+/* Per-protocol connection types */
+enum protocol_type {
+	PROTOCOLID_RESERVED1,
+	PROTOCOLID_RESERVED2,
+	PROTOCOLID_RESERVED3,
+	PROTOCOLID_CORE,
+	PROTOCOLID_ETH,
+	PROTOCOLID_RESERVED4,
+	PROTOCOLID_RESERVED5,
+	PROTOCOLID_PREROCE,
+	PROTOCOLID_COMMON,
+	PROTOCOLID_RESERVED6,
+	MAX_PROTOCOL_TYPE
+};
+
+/* status block structure */
+struct cau_pi_entry {
+	u32 prod;
+#define CAU_PI_ENTRY_PROD_VAL_MASK    0xFFFF
+#define CAU_PI_ENTRY_PROD_VAL_SHIFT   0
+#define CAU_PI_ENTRY_PI_TIMESET_MASK  0x7F
+#define CAU_PI_ENTRY_PI_TIMESET_SHIFT 16
+#define CAU_PI_ENTRY_FSM_SEL_MASK     0x1
+#define CAU_PI_ENTRY_FSM_SEL_SHIFT    23
+#define CAU_PI_ENTRY_RESERVED_MASK    0xFF
+#define CAU_PI_ENTRY_RESERVED_SHIFT   24
+};
+
+/* status block structure */
+struct cau_sb_entry {
+	u32 data;
+#define CAU_SB_ENTRY_SB_PROD_MASK      0xFFFFFF
+#define CAU_SB_ENTRY_SB_PROD_SHIFT     0
+#define CAU_SB_ENTRY_STATE0_MASK       0xF
+#define CAU_SB_ENTRY_STATE0_SHIFT      24
+#define CAU_SB_ENTRY_STATE1_MASK       0xF
+#define CAU_SB_ENTRY_STATE1_SHIFT      28
+	u32 params;
+#define CAU_SB_ENTRY_SB_TIMESET0_MASK  0x7F
+#define CAU_SB_ENTRY_SB_TIMESET0_SHIFT 0
+#define CAU_SB_ENTRY_SB_TIMESET1_MASK  0x7F
+#define CAU_SB_ENTRY_SB_TIMESET1_SHIFT 7
+#define CAU_SB_ENTRY_TIMER_RES0_MASK   0x3
+#define CAU_SB_ENTRY_TIMER_RES0_SHIFT  14
+#define CAU_SB_ENTRY_TIMER_RES1_MASK   0x3
+#define CAU_SB_ENTRY_TIMER_RES1_SHIFT  16
+#define CAU_SB_ENTRY_VF_NUMBER_MASK    0xFF
+#define CAU_SB_ENTRY_VF_NUMBER_SHIFT   18
+#define CAU_SB_ENTRY_VF_VALID_MASK     0x1
+#define CAU_SB_ENTRY_VF_VALID_SHIFT    26
+#define CAU_SB_ENTRY_PF_NUMBER_MASK    0xF
+#define CAU_SB_ENTRY_PF_NUMBER_SHIFT   27
+#define CAU_SB_ENTRY_TPH_MASK          0x1
+#define CAU_SB_ENTRY_TPH_SHIFT         31
+};
+
+/* core doorbell data */
+struct core_db_data {
+	u8 params;
+#define CORE_DB_DATA_DEST_MASK         0x3
+#define CORE_DB_DATA_DEST_SHIFT        0
+#define CORE_DB_DATA_AGG_CMD_MASK      0x3
+#define CORE_DB_DATA_AGG_CMD_SHIFT     2
+#define CORE_DB_DATA_BYPASS_EN_MASK    0x1
+#define CORE_DB_DATA_BYPASS_EN_SHIFT   4
+#define CORE_DB_DATA_RESERVED_MASK     0x1
+#define CORE_DB_DATA_RESERVED_SHIFT    5
+#define CORE_DB_DATA_AGG_VAL_SEL_MASK  0x3
+#define CORE_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8	agg_flags;
+	__le16	spq_prod;
+};
+
+/* Enum of doorbell aggregative command selection */
+enum db_agg_cmd_sel {
+	DB_AGG_CMD_NOP,
+	DB_AGG_CMD_SET,
+	DB_AGG_CMD_ADD,
+	DB_AGG_CMD_MAX,
+	MAX_DB_AGG_CMD_SEL
+};
+
+/* Enum of doorbell destination */
+enum db_dest {
+	DB_DEST_XCM,
+	DB_DEST_UCM,
+	DB_DEST_TCM,
+	DB_NUM_DESTINATIONS,
+	MAX_DB_DEST
+};
+
+/* Structure for doorbell address, in legacy mode */
+struct db_legacy_addr {
+	__le32 addr;
+#define DB_LEGACY_ADDR_RESERVED0_MASK  0x3
+#define DB_LEGACY_ADDR_RESERVED0_SHIFT 0
+#define DB_LEGACY_ADDR_DEMS_MASK       0x7
+#define DB_LEGACY_ADDR_DEMS_SHIFT      2
+#define DB_LEGACY_ADDR_ICID_MASK       0x7FFFFFF
+#define DB_LEGACY_ADDR_ICID_SHIFT      5
+};
+
+/* Igu interrupt command */
+enum igu_int_cmd {
+	IGU_INT_ENABLE	= 0,
+	IGU_INT_DISABLE = 1,
+	IGU_INT_NOP	= 2,
+	IGU_INT_NOP2	= 3,
+	MAX_IGU_INT_CMD
+};
+
+/* IGU producer or consumer update command */
+struct igu_prod_cons_update {
+	u32 sb_id_and_flags;
+#define IGU_PROD_CONS_UPDATE_SB_INDEX_MASK        0xFFFFFF
+#define IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT       0
+#define IGU_PROD_CONS_UPDATE_UPDATE_FLAG_MASK     0x1
+#define IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT    24
+#define IGU_PROD_CONS_UPDATE_ENABLE_INT_MASK      0x3
+#define IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT     25
+#define IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_MASK  0x1
+#define IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT 27
+#define IGU_PROD_CONS_UPDATE_TIMER_MASK_MASK      0x1
+#define IGU_PROD_CONS_UPDATE_TIMER_MASK_SHIFT     28
+#define IGU_PROD_CONS_UPDATE_RESERVED0_MASK       0x3
+#define IGU_PROD_CONS_UPDATE_RESERVED0_SHIFT      29
+#define IGU_PROD_CONS_UPDATE_COMMAND_TYPE_MASK    0x1
+#define IGU_PROD_CONS_UPDATE_COMMAND_TYPE_SHIFT   31
+	u32 reserved1;
+};
+
+/* Igu segments access for default status block only */
+enum igu_seg_access {
+	IGU_SEG_ACCESS_REG	= 0,
+	IGU_SEG_ACCESS_ATTN	= 1,
+	MAX_IGU_SEG_ACCESS
+};
+
+struct parsing_and_err_flags {
+	__le16 flags;
+#define PARSING_AND_ERR_FLAGS_L3TYPE_MASK                      0x3
+#define PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT                     0
+#define PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK                  0x3
+#define PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT                 2
+#define PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK                    0x1
+#define PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT                   4
+#define PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK               0x1
+#define PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT              5
+#define PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK        0x1
+#define PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT       6
+#define PARSING_AND_ERR_FLAGS_TIMESYNCPKT_MASK                 0x1
+#define PARSING_AND_ERR_FLAGS_TIMESYNCPKT_SHIFT                7
+#define PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_MASK           0x1
+#define PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_SHIFT          8
+#define PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK                  0x1
+#define PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT                 9
+#define PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK                0x1
+#define PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT               10
+#define PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK                 0x1
+#define PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT                11
+#define PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_MASK         0x1
+#define PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_SHIFT        12
+#define PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK            0x1
+#define PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT           13
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK  0x1
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT 14
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK          0x1
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT         15
+};
+
+/* Concrete Function ID. */
+struct pxp_concrete_fid {
+	__le16 fid;
+#define PXP_CONCRETE_FID_PFID_MASK     0xF
+#define PXP_CONCRETE_FID_PFID_SHIFT    0
+#define PXP_CONCRETE_FID_PORT_MASK     0x3
+#define PXP_CONCRETE_FID_PORT_SHIFT    4
+#define PXP_CONCRETE_FID_PATH_MASK     0x1
+#define PXP_CONCRETE_FID_PATH_SHIFT    6
+#define PXP_CONCRETE_FID_VFVALID_MASK  0x1
+#define PXP_CONCRETE_FID_VFVALID_SHIFT 7
+#define PXP_CONCRETE_FID_VFID_MASK     0xFF
+#define PXP_CONCRETE_FID_VFID_SHIFT    8
+};
+
+struct pxp_pretend_concrete_fid {
+	__le16 fid;
+#define PXP_PRETEND_CONCRETE_FID_PFID_MASK      0xF
+#define PXP_PRETEND_CONCRETE_FID_PFID_SHIFT     0
+#define PXP_PRETEND_CONCRETE_FID_RESERVED_MASK  0x7
+#define PXP_PRETEND_CONCRETE_FID_RESERVED_SHIFT 4
+#define PXP_PRETEND_CONCRETE_FID_VFVALID_MASK   0x1
+#define PXP_PRETEND_CONCRETE_FID_VFVALID_SHIFT  7
+#define PXP_PRETEND_CONCRETE_FID_VFID_MASK      0xFF
+#define PXP_PRETEND_CONCRETE_FID_VFID_SHIFT     8
+};
+
+union pxp_pretend_fid {
+	struct pxp_pretend_concrete_fid concrete_fid;
+	__le16				opaque_fid;
+};
+
+/* Pxp Pretend Command Register. */
+struct pxp_pretend_cmd {
+	union pxp_pretend_fid	fid;
+	__le16			control;
+#define PXP_PRETEND_CMD_PATH_MASK              0x1
+#define PXP_PRETEND_CMD_PATH_SHIFT             0
+#define PXP_PRETEND_CMD_USE_PORT_MASK          0x1
+#define PXP_PRETEND_CMD_USE_PORT_SHIFT         1
+#define PXP_PRETEND_CMD_PORT_MASK              0x3
+#define PXP_PRETEND_CMD_PORT_SHIFT             2
+#define PXP_PRETEND_CMD_RESERVED0_MASK         0xF
+#define PXP_PRETEND_CMD_RESERVED0_SHIFT        4
+#define PXP_PRETEND_CMD_RESERVED1_MASK         0xF
+#define PXP_PRETEND_CMD_RESERVED1_SHIFT        8
+#define PXP_PRETEND_CMD_PRETEND_PATH_MASK      0x1
+#define PXP_PRETEND_CMD_PRETEND_PATH_SHIFT     12
+#define PXP_PRETEND_CMD_PRETEND_PORT_MASK      0x1
+#define PXP_PRETEND_CMD_PRETEND_PORT_SHIFT     13
+#define PXP_PRETEND_CMD_PRETEND_FUNCTION_MASK  0x1
+#define PXP_PRETEND_CMD_PRETEND_FUNCTION_SHIFT 14
+#define PXP_PRETEND_CMD_IS_CONCRETE_MASK       0x1
+#define PXP_PRETEND_CMD_IS_CONCRETE_SHIFT      15
+};
+
+/* PTT Record in PXP Admin Window. */
+struct pxp_ptt_entry {
+	__le32			offset;
+#define PXP_PTT_ENTRY_OFFSET_MASK     0x7FFFFF
+#define PXP_PTT_ENTRY_OFFSET_SHIFT    0
+#define PXP_PTT_ENTRY_RESERVED0_MASK  0x1FF
+#define PXP_PTT_ENTRY_RESERVED0_SHIFT 23
+	struct pxp_pretend_cmd	pretend;
+};
+
+/* RSS hash type */
+enum rss_hash_type {
+	RSS_HASH_TYPE_DEFAULT	= 0,
+	RSS_HASH_TYPE_IPV4	= 1,
+	RSS_HASH_TYPE_TCP_IPV4	= 2,
+	RSS_HASH_TYPE_IPV6	= 3,
+	RSS_HASH_TYPE_TCP_IPV6	= 4,
+	RSS_HASH_TYPE_UDP_IPV4	= 5,
+	RSS_HASH_TYPE_UDP_IPV6	= 6,
+	MAX_RSS_HASH_TYPE
+};
+
+/* status block structure */
+struct status_block {
+	__le16	pi_array[PIS_PER_SB];
+	__le32	sb_num;
+#define STATUS_BLOCK_SB_NUM_MASK      0x1FF
+#define STATUS_BLOCK_SB_NUM_SHIFT     0
+#define STATUS_BLOCK_ZERO_PAD_MASK    0x7F
+#define STATUS_BLOCK_ZERO_PAD_SHIFT   9
+#define STATUS_BLOCK_ZERO_PAD2_MASK   0xFFFF
+#define STATUS_BLOCK_ZERO_PAD2_SHIFT  16
+	__le32 prod_index;
+#define STATUS_BLOCK_PROD_INDEX_MASK  0xFFFFFF
+#define STATUS_BLOCK_PROD_INDEX_SHIFT 0
+#define STATUS_BLOCK_ZERO_PAD3_MASK   0xFF
+#define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
+};
+
+#endif /* __COMMON_HSI__ */
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
new file mode 100644
index 000000000000..b920c3605c46
--- /dev/null
+++ b/include/linux/qed/qed_chain.h
@@ -0,0 +1,539 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_CHAIN_H
+#define _QED_CHAIN_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/qed/common_hsi.h>
+
+/* dma_addr_t manip */
+#define DMA_LO_LE(x)            cpu_to_le32(lower_32_bits(x))
+#define DMA_HI_LE(x)            cpu_to_le32(upper_32_bits(x))
+
+#define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
+#define HILO_DMA(hi, lo)        HILO_GEN(hi, lo, dma_addr_t)
+#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
+#define HILO_DMA_REGPAIR(regpair)       (HILO_DMA(regpair.hi, regpair.lo))
+#define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
+
+enum qed_chain_mode {
+	/* Each Page contains a next pointer at its end */
+	QED_CHAIN_MODE_NEXT_PTR,
+
+	/* Chain is a single page (next ptr) is unrequired */
+	QED_CHAIN_MODE_SINGLE,
+
+	/* Page pointers are located in a side list */
+	QED_CHAIN_MODE_PBL,
+};
+
+enum qed_chain_use_mode {
+	QED_CHAIN_USE_TO_PRODUCE,		/* Chain starts empty */
+	QED_CHAIN_USE_TO_CONSUME,		/* Chain starts full */
+	QED_CHAIN_USE_TO_CONSUME_PRODUCE,	/* Chain starts empty */
+};
+
+struct qed_chain_next {
+	struct regpair	next_phys;
+	void		*next_virt;
+};
+
+struct qed_chain_pbl {
+	dma_addr_t	p_phys_table;
+	void		*p_virt_table;
+	u16		prod_page_idx;
+	u16		cons_page_idx;
+};
+
+struct qed_chain {
+	void			*p_virt_addr;
+	dma_addr_t		p_phys_addr;
+	void			*p_prod_elem;
+	void			*p_cons_elem;
+	u16			page_cnt;
+	enum qed_chain_mode	mode;
+	enum qed_chain_use_mode intended_use; /* used to produce/consume */
+	u16			capacity; /*< number of _usable_ elements */
+	u16			size; /* number of elements */
+	u16			prod_idx;
+	u16			cons_idx;
+	u16			elem_per_page;
+	u16			elem_per_page_mask;
+	u16			elem_unusable;
+	u16			usable_per_page;
+	u16			elem_size;
+	u16			next_page_mask;
+	struct qed_chain_pbl	pbl;
+};
+
+#define QED_CHAIN_PBL_ENTRY_SIZE        (8)
+#define QED_CHAIN_PAGE_SIZE             (0x1000)
+#define ELEMS_PER_PAGE(elem_size)       (QED_CHAIN_PAGE_SIZE / (elem_size))
+
+#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)     \
+	((mode == QED_CHAIN_MODE_NEXT_PTR) ?	     \
+	 (1 + ((sizeof(struct qed_chain_next) - 1) / \
+	       (elem_size))) : 0)
+
+#define USABLE_ELEMS_PER_PAGE(elem_size, mode) \
+	((u32)(ELEMS_PER_PAGE(elem_size) -     \
+	       UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)))
+
+#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \
+	DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode))
+
+/* Accessors */
+static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain)
+{
+	return p_chain->prod_idx;
+}
+
+static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain)
+{
+	return p_chain->cons_idx;
+}
+
+static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
+{
+	u16 used;
+
+	/* we don't need to trancate upon assignmet, as we assign u32->u16 */
+	used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) -
+		(u32)p_chain->cons_idx;
+	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
+		used -= (used / p_chain->elem_per_page);
+
+	return p_chain->capacity - used;
+}
+
+static inline u8 qed_chain_is_full(struct qed_chain *p_chain)
+{
+	return qed_chain_get_elem_left(p_chain) == p_chain->capacity;
+}
+
+static inline u8 qed_chain_is_empty(struct qed_chain *p_chain)
+{
+	return qed_chain_get_elem_left(p_chain) == 0;
+}
+
+static inline u16 qed_chain_get_elem_per_page(
+	struct qed_chain *p_chain)
+{
+	return p_chain->elem_per_page;
+}
+
+static inline u16 qed_chain_get_usable_per_page(
+	struct qed_chain *p_chain)
+{
+	return p_chain->usable_per_page;
+}
+
+static inline u16 qed_chain_get_unusable_per_page(
+	struct qed_chain *p_chain)
+{
+	return p_chain->elem_unusable;
+}
+
+static inline u16 qed_chain_get_size(struct qed_chain *p_chain)
+{
+	return p_chain->size;
+}
+
+static inline dma_addr_t
+qed_chain_get_pbl_phys(struct qed_chain *p_chain)
+{
+	return p_chain->pbl.p_phys_table;
+}
+
+/**
+ * @brief qed_chain_advance_page -
+ *
+ * Advance the next element accros pages for a linked chain
+ *
+ * @param p_chain
+ * @param p_next_elem
+ * @param idx_to_inc
+ * @param page_to_inc
+ */
+static inline void
+qed_chain_advance_page(struct qed_chain *p_chain,
+		       void **p_next_elem,
+		       u16 *idx_to_inc,
+		       u16 *page_to_inc)
+
+{
+	switch (p_chain->mode) {
+	case QED_CHAIN_MODE_NEXT_PTR:
+	{
+		struct qed_chain_next *p_next = *p_next_elem;
+		*p_next_elem = p_next->next_virt;
+		*idx_to_inc += p_chain->elem_unusable;
+		break;
+	}
+	case QED_CHAIN_MODE_SINGLE:
+		*p_next_elem = p_chain->p_virt_addr;
+		break;
+
+	case QED_CHAIN_MODE_PBL:
+		/* It is assumed pages are sequential, next element needs
+		 * to change only when passing going back to first from last.
+		 */
+		if (++(*page_to_inc) == p_chain->page_cnt) {
+			*page_to_inc = 0;
+			*p_next_elem = p_chain->p_virt_addr;
+		}
+	}
+}
+
+#define is_unusable_idx(p, idx)	\
+	(((p)->idx & (p)->elem_per_page_mask) == (p)->usable_per_page)
+
+#define is_unusable_next_idx(p, idx) \
+	((((p)->idx + 1) & (p)->elem_per_page_mask) == (p)->usable_per_page)
+
+#define test_ans_skip(p, idx)				\
+	do {						\
+		if (is_unusable_idx(p, idx)) {		\
+			(p)->idx += (p)->elem_unusable;	\
+		}					\
+	} while (0)
+
+/**
+ * @brief qed_chain_return_multi_produced -
+ *
+ * A chain in which the driver "Produces" elements should use this API
+ * to indicate previous produced elements are now consumed.
+ *
+ * @param p_chain
+ * @param num
+ */
+static inline void
+qed_chain_return_multi_produced(struct qed_chain *p_chain,
+				u16 num)
+{
+	p_chain->cons_idx += num;
+	test_ans_skip(p_chain, cons_idx);
+}
+
+/**
+ * @brief qed_chain_return_produced -
+ *
+ * A chain in which the driver "Produces" elements should use this API
+ * to indicate previous produced elements are now consumed.
+ *
+ * @param p_chain
+ */
+static inline void qed_chain_return_produced(struct qed_chain *p_chain)
+{
+	p_chain->cons_idx++;
+	test_ans_skip(p_chain, cons_idx);
+}
+
+/**
+ * @brief qed_chain_produce -
+ *
+ * A chain in which the driver "Produces" elements should use this to get
+ * a pointer to the next element which can be "Produced". It's driver
+ * responsibility to validate that the chain has room for new element.
+ *
+ * @param p_chain
+ *
+ * @return void*, a pointer to next element
+ */
+static inline void *qed_chain_produce(struct qed_chain *p_chain)
+{
+	void *ret = NULL;
+
+	if ((p_chain->prod_idx & p_chain->elem_per_page_mask) ==
+	    p_chain->next_page_mask) {
+		qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
+				       &p_chain->prod_idx,
+				       &p_chain->pbl.prod_page_idx);
+	}
+
+	ret = p_chain->p_prod_elem;
+	p_chain->prod_idx++;
+	p_chain->p_prod_elem = (void *)(((u8 *)p_chain->p_prod_elem) +
+					p_chain->elem_size);
+
+	return ret;
+}
+
+/**
+ * @brief qed_chain_get_capacity -
+ *
+ * Get the maximum number of BDs in chain
+ *
+ * @param p_chain
+ * @param num
+ *
+ * @return u16, number of unusable BDs
+ */
+static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain)
+{
+	return p_chain->capacity;
+}
+
+/**
+ * @brief qed_chain_recycle_consumed -
+ *
+ * Returns an element which was previously consumed;
+ * Increments producers so they could be written to FW.
+ *
+ * @param p_chain
+ */
+static inline void
+qed_chain_recycle_consumed(struct qed_chain *p_chain)
+{
+	test_ans_skip(p_chain, prod_idx);
+	p_chain->prod_idx++;
+}
+
+/**
+ * @brief qed_chain_consume -
+ *
+ * A Chain in which the driver utilizes data written by a different source
+ * (i.e., FW) should use this to access passed buffers.
+ *
+ * @param p_chain
+ *
+ * @return void*, a pointer to the next buffer written
+ */
+static inline void *qed_chain_consume(struct qed_chain *p_chain)
+{
+	void *ret = NULL;
+
+	if ((p_chain->cons_idx & p_chain->elem_per_page_mask) ==
+	    p_chain->next_page_mask) {
+		qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
+				       &p_chain->cons_idx,
+				       &p_chain->pbl.cons_page_idx);
+	}
+
+	ret = p_chain->p_cons_elem;
+	p_chain->cons_idx++;
+	p_chain->p_cons_elem = (void *)(((u8 *)p_chain->p_cons_elem) +
+					p_chain->elem_size);
+
+	return ret;
+}
+
+/**
+ * @brief qed_chain_reset - Resets the chain to its start state
+ *
+ * @param p_chain pointer to a previously allocted chain
+ */
+static inline void qed_chain_reset(struct qed_chain *p_chain)
+{
+	int i;
+
+	p_chain->prod_idx	= 0;
+	p_chain->cons_idx	= 0;
+	p_chain->p_cons_elem	= p_chain->p_virt_addr;
+	p_chain->p_prod_elem	= p_chain->p_virt_addr;
+
+	if (p_chain->mode == QED_CHAIN_MODE_PBL) {
+		p_chain->pbl.prod_page_idx	= p_chain->page_cnt - 1;
+		p_chain->pbl.cons_page_idx	= p_chain->page_cnt - 1;
+	}
+
+	switch (p_chain->intended_use) {
+	case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
+	case QED_CHAIN_USE_TO_PRODUCE:
+		/* Do nothing */
+		break;
+
+	case QED_CHAIN_USE_TO_CONSUME:
+		/* produce empty elements */
+		for (i = 0; i < p_chain->capacity; i++)
+			qed_chain_recycle_consumed(p_chain);
+		break;
+	}
+}
+
+/**
+ * @brief qed_chain_init - Initalizes a basic chain struct
+ *
+ * @param p_chain
+ * @param p_virt_addr
+ * @param p_phys_addr	physical address of allocated buffer's beginning
+ * @param page_cnt	number of pages in the allocated buffer
+ * @param elem_size	size of each element in the chain
+ * @param intended_use
+ * @param mode
+ */
+static inline void qed_chain_init(struct qed_chain *p_chain,
+				  void *p_virt_addr,
+				  dma_addr_t p_phys_addr,
+				  u16 page_cnt,
+				  u8 elem_size,
+				  enum qed_chain_use_mode intended_use,
+				  enum qed_chain_mode mode)
+{
+	/* chain fixed parameters */
+	p_chain->p_virt_addr	= p_virt_addr;
+	p_chain->p_phys_addr	= p_phys_addr;
+	p_chain->elem_size	= elem_size;
+	p_chain->page_cnt	= page_cnt;
+	p_chain->mode		= mode;
+
+	p_chain->intended_use		= intended_use;
+	p_chain->elem_per_page		= ELEMS_PER_PAGE(elem_size);
+	p_chain->usable_per_page =
+		USABLE_ELEMS_PER_PAGE(elem_size, mode);
+	p_chain->capacity		= p_chain->usable_per_page * page_cnt;
+	p_chain->size			= p_chain->elem_per_page * page_cnt;
+	p_chain->elem_per_page_mask	= p_chain->elem_per_page - 1;
+
+	p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
+
+	p_chain->next_page_mask = (p_chain->usable_per_page &
+				   p_chain->elem_per_page_mask);
+
+	if (mode == QED_CHAIN_MODE_NEXT_PTR) {
+		struct qed_chain_next	*p_next;
+		u16			i;
+
+		for (i = 0; i < page_cnt - 1; i++) {
+			/* Increment mem_phy to the next page. */
+			p_phys_addr += QED_CHAIN_PAGE_SIZE;
+
+			/* Initialize the physical address of the next page. */
+			p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
+							   elem_size *
+							   p_chain->
+							   usable_per_page);
+
+			p_next->next_phys.lo	= DMA_LO_LE(p_phys_addr);
+			p_next->next_phys.hi	= DMA_HI_LE(p_phys_addr);
+
+			/* Initialize the virtual address of the next page. */
+			p_next->next_virt = (void *)((u8 *)p_virt_addr +
+						     QED_CHAIN_PAGE_SIZE);
+
+			/* Move to the next page. */
+			p_virt_addr = p_next->next_virt;
+		}
+
+		/* Last page's next should point to beginning of the chain */
+		p_next = (struct qed_chain_next *)((u8 *)p_virt_addr +
+						   elem_size *
+						   p_chain->usable_per_page);
+
+		p_next->next_phys.lo	= DMA_LO_LE(p_chain->p_phys_addr);
+		p_next->next_phys.hi	= DMA_HI_LE(p_chain->p_phys_addr);
+		p_next->next_virt	= p_chain->p_virt_addr;
+	}
+	qed_chain_reset(p_chain);
+}
+
+/**
+ * @brief qed_chain_pbl_init - Initalizes a basic pbl chain
+ *        struct
+ * @param p_chain
+ * @param p_virt_addr	virtual address of allocated buffer's beginning
+ * @param p_phys_addr	physical address of allocated buffer's beginning
+ * @param page_cnt	number of pages in the allocated buffer
+ * @param elem_size	size of each element in the chain
+ * @param use_mode
+ * @param p_phys_pbl	pointer to a pre-allocated side table
+ *                      which will hold physical page addresses.
+ * @param p_virt_pbl	pointer to a pre allocated side table
+ *                      which will hold virtual page addresses.
+ */
+static inline void
+qed_chain_pbl_init(struct qed_chain *p_chain,
+		   void *p_virt_addr,
+		   dma_addr_t p_phys_addr,
+		   u16 page_cnt,
+		   u8 elem_size,
+		   enum qed_chain_use_mode use_mode,
+		   dma_addr_t p_phys_pbl,
+		   dma_addr_t *p_virt_pbl)
+{
+	dma_addr_t *p_pbl_dma = p_virt_pbl;
+	int i;
+
+	qed_chain_init(p_chain, p_virt_addr, p_phys_addr, page_cnt,
+		       elem_size, use_mode, QED_CHAIN_MODE_PBL);
+
+	p_chain->pbl.p_phys_table = p_phys_pbl;
+	p_chain->pbl.p_virt_table = p_virt_pbl;
+
+	/* Fill the PBL with physical addresses*/
+	for (i = 0; i < page_cnt; i++) {
+		*p_pbl_dma = p_phys_addr;
+		p_phys_addr += QED_CHAIN_PAGE_SIZE;
+		p_pbl_dma++;
+	}
+}
+
+/**
+ * @brief qed_chain_set_prod - sets the prod to the given
+ *        value
+ *
+ * @param prod_idx
+ * @param p_prod_elem
+ */
+static inline void qed_chain_set_prod(struct qed_chain *p_chain,
+				      u16 prod_idx,
+				      void *p_prod_elem)
+{
+	p_chain->prod_idx	= prod_idx;
+	p_chain->p_prod_elem	= p_prod_elem;
+}
+
+/**
+ * @brief qed_chain_get_elem -
+ *
+ * get a pointer to an element represented by absolute idx
+ *
+ * @param p_chain
+ * @assumption p_chain->size is a power of 2
+ *
+ * @return void*, a pointer to next element
+ */
+static inline void *qed_chain_sge_get_elem(struct qed_chain *p_chain,
+					   u16 idx)
+{
+	void *ret = NULL;
+
+	if (idx >= p_chain->size)
+		return NULL;
+
+	ret = (u8 *)p_chain->p_virt_addr + p_chain->elem_size * idx;
+
+	return ret;
+}
+
+/**
+ * @brief qed_chain_sge_inc_cons_prod
+ *
+ * for sge chains, producer isn't increased serially, the ring
+ * is expected to be full at all times. Once elements are
+ * consumed, they are immediately produced.
+ *
+ * @param p_chain
+ * @param cnt
+ *
+ * @return inline void
+ */
+static inline void
+qed_chain_sge_inc_cons_prod(struct qed_chain *p_chain,
+			    u16 cnt)
+{
+	p_chain->prod_idx += cnt;
+	p_chain->cons_idx += cnt;
+}
+
+#endif
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
new file mode 100644
index 000000000000..dc9a1353f971
--- /dev/null
+++ b/include/linux/qed/qed_if.h
@@ -0,0 +1,498 @@
+/* QLogic qed NIC Driver
+ *
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_IF_H
+#define _QED_IF_H
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/io.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/qed_chain.h>
+
+#define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \
+					    (void __iomem *)(reg_addr))
+
+#define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr))
+
+#define QED_COALESCE_MAX 0xFF
+
+/* forward */
+struct qed_dev;
+
+struct qed_eth_pf_params {
+	/* The following parameters are used during HW-init
+	 * and these parameters need to be passed as arguments
+	 * to update_pf_params routine invoked before slowpath start
+	 */
+	u16 num_cons;
+};
+
+struct qed_pf_params {
+	struct qed_eth_pf_params eth_pf_params;
+};
+
+enum qed_int_mode {
+	QED_INT_MODE_INTA,
+	QED_INT_MODE_MSIX,
+	QED_INT_MODE_MSI,
+	QED_INT_MODE_POLL,
+};
+
+struct qed_sb_info {
+	struct status_block	*sb_virt;
+	dma_addr_t		sb_phys;
+	u32			sb_ack; /* Last given ack */
+	u16			igu_sb_id;
+	void __iomem		*igu_addr;
+	u8			flags;
+#define QED_SB_INFO_INIT        0x1
+#define QED_SB_INFO_SETUP       0x2
+
+	struct qed_dev		*cdev;
+};
+
+struct qed_dev_info {
+	unsigned long	pci_mem_start;
+	unsigned long	pci_mem_end;
+	unsigned int	pci_irq;
+	u8		num_hwfns;
+
+	u8		hw_mac[ETH_ALEN];
+	bool		is_mf;
+
+	/* FW version */
+	u16		fw_major;
+	u16		fw_minor;
+	u16		fw_rev;
+	u16		fw_eng;
+
+	/* MFW version */
+	u32		mfw_rev;
+
+	u32		flash_size;
+	u8		mf_mode;
+};
+
+enum qed_sb_type {
+	QED_SB_TYPE_L2_QUEUE,
+};
+
+enum qed_protocol {
+	QED_PROTOCOL_ETH,
+};
+
+struct qed_link_params {
+	bool	link_up;
+
+#define QED_LINK_OVERRIDE_SPEED_AUTONEG         BIT(0)
+#define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS      BIT(1)
+#define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED    BIT(2)
+#define QED_LINK_OVERRIDE_PAUSE_CONFIG          BIT(3)
+	u32	override_flags;
+	bool	autoneg;
+	u32	adv_speeds;
+	u32	forced_speed;
+#define QED_LINK_PAUSE_AUTONEG_ENABLE           BIT(0)
+#define QED_LINK_PAUSE_RX_ENABLE                BIT(1)
+#define QED_LINK_PAUSE_TX_ENABLE                BIT(2)
+	u32	pause_config;
+};
+
+struct qed_link_output {
+	bool	link_up;
+
+	u32	supported_caps;         /* In SUPPORTED defs */
+	u32	advertised_caps;        /* In ADVERTISED defs */
+	u32	lp_caps;                /* In ADVERTISED defs */
+	u32	speed;                  /* In Mb/s */
+	u8	duplex;                 /* In DUPLEX defs */
+	u8	port;                   /* In PORT defs */
+	bool	autoneg;
+	u32	pause_config;
+};
+
+#define QED_DRV_VER_STR_SIZE 12
+struct qed_slowpath_params {
+	u32	int_mode;
+	u8	drv_major;
+	u8	drv_minor;
+	u8	drv_rev;
+	u8	drv_eng;
+	u8	name[QED_DRV_VER_STR_SIZE];
+};
+
+#define ILT_PAGE_SIZE_TCFC 0x8000 /* 32KB */
+
+struct qed_int_info {
+	struct msix_entry	*msix;
+	u8			msix_cnt;
+
+	/* This should be updated by the protocol driver */
+	u8			used_cnt;
+};
+
+struct qed_common_cb_ops {
+	void	(*link_update)(void			*dev,
+			       struct qed_link_output	*link);
+};
+
+struct qed_common_ops {
+	struct qed_dev*	(*probe)(struct pci_dev *dev,
+				 enum qed_protocol protocol,
+				 u32 dp_module, u8 dp_level);
+
+	void		(*remove)(struct qed_dev *cdev);
+
+	int		(*set_power_state)(struct qed_dev *cdev,
+					   pci_power_t state);
+
+	void		(*set_id)(struct qed_dev *cdev,
+				  char name[],
+				  char ver_str[]);
+
+	/* Client drivers need to make this call before slowpath_start.
+	 * PF params required for the call before slowpath_start is
+	 * documented within the qed_pf_params structure definition.
+	 */
+	void		(*update_pf_params)(struct qed_dev *cdev,
+					    struct qed_pf_params *params);
+	int		(*slowpath_start)(struct qed_dev *cdev,
+					  struct qed_slowpath_params *params);
+
+	int		(*slowpath_stop)(struct qed_dev *cdev);
+
+	/* Requests to use `cnt' interrupts for fastpath.
+	 * upon success, returns number of interrupts allocated for fastpath.
+	 */
+	int		(*set_fp_int)(struct qed_dev *cdev,
+				      u16 cnt);
+
+	/* Fills `info' with pointers required for utilizing interrupts */
+	int		(*get_fp_int)(struct qed_dev *cdev,
+				      struct qed_int_info *info);
+
+	u32		(*sb_init)(struct qed_dev *cdev,
+				   struct qed_sb_info *sb_info,
+				   void *sb_virt_addr,
+				   dma_addr_t sb_phy_addr,
+				   u16 sb_id,
+				   enum qed_sb_type type);
+
+	u32		(*sb_release)(struct qed_dev *cdev,
+				      struct qed_sb_info *sb_info,
+				      u16 sb_id);
+
+	void		(*simd_handler_config)(struct qed_dev *cdev,
+					       void *token,
+					       int index,
+					       void (*handler)(void *));
+
+	void		(*simd_handler_clean)(struct qed_dev *cdev,
+					      int index);
+/**
+ * @brief set_link - set links according to params
+ *
+ * @param cdev
+ * @param params - values used to override the default link configuration
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int		(*set_link)(struct qed_dev *cdev,
+				    struct qed_link_params *params);
+
+/**
+ * @brief get_link - returns the current link state.
+ *
+ * @param cdev
+ * @param if_link - structure to be filled with current link configuration.
+ */
+	void		(*get_link)(struct qed_dev *cdev,
+				    struct qed_link_output *if_link);
+
+/**
+ * @brief - drains chip in case Tx completions fail to arrive due to pause.
+ *
+ * @param cdev
+ */
+	int		(*drain)(struct qed_dev *cdev);
+
+/**
+ * @brief update_msglvl - update module debug level
+ *
+ * @param cdev
+ * @param dp_module
+ * @param dp_level
+ */
+	void		(*update_msglvl)(struct qed_dev *cdev,
+					 u32 dp_module,
+					 u8 dp_level);
+
+	int		(*chain_alloc)(struct qed_dev *cdev,
+				       enum qed_chain_use_mode intended_use,
+				       enum qed_chain_mode mode,
+				       u16 num_elems,
+				       size_t elem_size,
+				       struct qed_chain *p_chain);
+
+	void		(*chain_free)(struct qed_dev *cdev,
+				      struct qed_chain *p_chain);
+};
+
+/**
+ * @brief qed_get_protocol_version
+ *
+ * @param protocol
+ *
+ * @return version supported by qed for given protocol driver
+ */
+u32 qed_get_protocol_version(enum qed_protocol protocol);
+
+#define MASK_FIELD(_name, _value) \
+	((_value) &= (_name ## _MASK))
+
+#define FIELD_VALUE(_name, _value) \
+	((_value & _name ## _MASK) << _name ## _SHIFT)
+
+#define SET_FIELD(value, name, flag)			       \
+	do {						       \
+		(value) &= ~(name ## _MASK << name ## _SHIFT); \
+		(value) |= (((u64)flag) << (name ## _SHIFT));  \
+	} while (0)
+
+#define GET_FIELD(value, name) \
+	(((value) >> (name ## _SHIFT)) & name ## _MASK)
+
+/* Debug print definitions */
+#define DP_ERR(cdev, fmt, ...)						     \
+		pr_err("[%s:%d(%s)]" fmt,				     \
+		       __func__, __LINE__,				     \
+		       DP_NAME(cdev) ? DP_NAME(cdev) : "",		     \
+		       ## __VA_ARGS__)					     \
+
+#define DP_NOTICE(cdev, fmt, ...)				      \
+	do {							      \
+		if (unlikely((cdev)->dp_level <= QED_LEVEL_NOTICE)) { \
+			pr_notice("[%s:%d(%s)]" fmt,		      \
+				  __func__, __LINE__,		      \
+				  DP_NAME(cdev) ? DP_NAME(cdev) : "", \
+				  ## __VA_ARGS__);		      \
+								      \
+		}						      \
+	} while (0)
+
+#define DP_INFO(cdev, fmt, ...)					      \
+	do {							      \
+		if (unlikely((cdev)->dp_level <= QED_LEVEL_INFO)) {   \
+			pr_notice("[%s:%d(%s)]" fmt,		      \
+				  __func__, __LINE__,		      \
+				  DP_NAME(cdev) ? DP_NAME(cdev) : "", \
+				  ## __VA_ARGS__);		      \
+		}						      \
+	} while (0)
+
+#define DP_VERBOSE(cdev, module, fmt, ...)				\
+	do {								\
+		if (unlikely(((cdev)->dp_level <= QED_LEVEL_VERBOSE) &&	\
+			     ((cdev)->dp_module & module))) {		\
+			pr_notice("[%s:%d(%s)]" fmt,			\
+				  __func__, __LINE__,			\
+				  DP_NAME(cdev) ? DP_NAME(cdev) : "",	\
+				  ## __VA_ARGS__);			\
+		}							\
+	} while (0)
+
+enum DP_LEVEL {
+	QED_LEVEL_VERBOSE	= 0x0,
+	QED_LEVEL_INFO		= 0x1,
+	QED_LEVEL_NOTICE	= 0x2,
+	QED_LEVEL_ERR		= 0x3,
+};
+
+#define QED_LOG_LEVEL_SHIFT     (30)
+#define QED_LOG_VERBOSE_MASK    (0x3fffffff)
+#define QED_LOG_INFO_MASK       (0x40000000)
+#define QED_LOG_NOTICE_MASK     (0x80000000)
+
+enum DP_MODULE {
+	QED_MSG_SPQ	= 0x10000,
+	QED_MSG_STATS	= 0x20000,
+	QED_MSG_DCB	= 0x40000,
+	QED_MSG_IOV	= 0x80000,
+	QED_MSG_SP	= 0x100000,
+	QED_MSG_STORAGE = 0x200000,
+	QED_MSG_CXT	= 0x800000,
+	QED_MSG_ILT	= 0x2000000,
+	QED_MSG_ROCE	= 0x4000000,
+	QED_MSG_DEBUG	= 0x8000000,
+	/* to be added...up to 0x8000000 */
+};
+
+struct qed_eth_stats {
+	u64	no_buff_discards;
+	u64	packet_too_big_discard;
+	u64	ttl0_discard;
+	u64	rx_ucast_bytes;
+	u64	rx_mcast_bytes;
+	u64	rx_bcast_bytes;
+	u64	rx_ucast_pkts;
+	u64	rx_mcast_pkts;
+	u64	rx_bcast_pkts;
+	u64	mftag_filter_discards;
+	u64	mac_filter_discards;
+	u64	tx_ucast_bytes;
+	u64	tx_mcast_bytes;
+	u64	tx_bcast_bytes;
+	u64	tx_ucast_pkts;
+	u64	tx_mcast_pkts;
+	u64	tx_bcast_pkts;
+	u64	tx_err_drop_pkts;
+	u64	tpa_coalesced_pkts;
+	u64	tpa_coalesced_events;
+	u64	tpa_aborts_num;
+	u64	tpa_not_coalesced_pkts;
+	u64	tpa_coalesced_bytes;
+
+	/* port */
+	u64	rx_64_byte_packets;
+	u64	rx_127_byte_packets;
+	u64	rx_255_byte_packets;
+	u64	rx_511_byte_packets;
+	u64	rx_1023_byte_packets;
+	u64	rx_1518_byte_packets;
+	u64	rx_1522_byte_packets;
+	u64	rx_2047_byte_packets;
+	u64	rx_4095_byte_packets;
+	u64	rx_9216_byte_packets;
+	u64	rx_16383_byte_packets;
+	u64	rx_crc_errors;
+	u64	rx_mac_crtl_frames;
+	u64	rx_pause_frames;
+	u64	rx_pfc_frames;
+	u64	rx_align_errors;
+	u64	rx_carrier_errors;
+	u64	rx_oversize_packets;
+	u64	rx_jabbers;
+	u64	rx_undersize_packets;
+	u64	rx_fragments;
+	u64	tx_64_byte_packets;
+	u64	tx_65_to_127_byte_packets;
+	u64	tx_128_to_255_byte_packets;
+	u64	tx_256_to_511_byte_packets;
+	u64	tx_512_to_1023_byte_packets;
+	u64	tx_1024_to_1518_byte_packets;
+	u64	tx_1519_to_2047_byte_packets;
+	u64	tx_2048_to_4095_byte_packets;
+	u64	tx_4096_to_9216_byte_packets;
+	u64	tx_9217_to_16383_byte_packets;
+	u64	tx_pause_frames;
+	u64	tx_pfc_frames;
+	u64	tx_lpi_entry_count;
+	u64	tx_total_collisions;
+	u64	brb_truncates;
+	u64	brb_discards;
+	u64	rx_mac_bytes;
+	u64	rx_mac_uc_packets;
+	u64	rx_mac_mc_packets;
+	u64	rx_mac_bc_packets;
+	u64	rx_mac_frames_ok;
+	u64	tx_mac_bytes;
+	u64	tx_mac_uc_packets;
+	u64	tx_mac_mc_packets;
+	u64	tx_mac_bc_packets;
+	u64	tx_mac_ctrl_frames;
+};
+
+#define QED_SB_IDX              0x0002
+
+#define RX_PI           0
+#define TX_PI(tc)       (RX_PI + 1 + tc)
+
+static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
+{
+	u32 prod = 0;
+	u16 rc = 0;
+
+	prod = le32_to_cpu(sb_info->sb_virt->prod_index) &
+	       STATUS_BLOCK_PROD_INDEX_MASK;
+	if (sb_info->sb_ack != prod) {
+		sb_info->sb_ack = prod;
+		rc |= QED_SB_IDX;
+	}
+
+	/* Let SB update */
+	mmiowb();
+	return rc;
+}
+
+/**
+ *
+ * @brief This function creates an update command for interrupts that is
+ *        written to the IGU.
+ *
+ * @param sb_info       - This is the structure allocated and
+ *                 initialized per status block. Assumption is
+ *                 that it was initialized using qed_sb_init
+ * @param int_cmd       - Enable/Disable/Nop
+ * @param upd_flg       - whether igu consumer should be
+ *                 updated.
+ *
+ * @return inline void
+ */
+static inline void qed_sb_ack(struct qed_sb_info *sb_info,
+			      enum igu_int_cmd int_cmd,
+			      u8 upd_flg)
+{
+	struct igu_prod_cons_update igu_ack = { 0 };
+
+	igu_ack.sb_id_and_flags =
+		((sb_info->sb_ack << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) |
+		 (upd_flg << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) |
+		 (int_cmd << IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT) |
+		 (IGU_SEG_ACCESS_REG <<
+		  IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT));
+
+	DIRECT_REG_WR(sb_info->igu_addr, igu_ack.sb_id_and_flags);
+
+	/* Both segments (interrupts & acks) are written to same place address;
+	 * Need to guarantee all commands will be received (in-order) by HW.
+	 */
+	mmiowb();
+	barrier();
+}
+
+static inline void __internal_ram_wr(void *p_hwfn,
+				     void __iomem *addr,
+				     int size,
+				     u32 *data)
+
+{
+	unsigned int i;
+
+	for (i = 0; i < size / sizeof(*data); i++)
+		DIRECT_REG_WR(&((u32 __iomem *)addr)[i], data[i]);
+}
+
+static inline void internal_ram_wr(void __iomem *addr,
+				   int size,
+				   u32 *data)
+{
+	__internal_ram_wr(NULL, addr, size, data);
+}
+
+#endif
-- 
cgit v1.2.3


From 25c089d78f3833edf614fc377e75e9cf848562f5 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Mon, 26 Oct 2015 11:02:26 +0200
Subject: qed: Add basic L2 interface

This patch adds a public API for a network driver to work on top of QED.
The interface itself is very minimal - it's mostly infrastructure, as the
only content it has after this patch is a query for HW-based information
required for the creation of a network interface [I.e., no actual
protocol-specific configurations are supported].

Signed-off-by: Manish Chopra <Manish.Chopra@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/Makefile  |   2 +-
 drivers/net/ethernet/qlogic/qed/qed.h     |  14 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c |  62 +++++++
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |   1 +
 drivers/net/ethernet/qlogic/qed/qed_l2.c  |  87 ++++++++++
 include/linux/qed/eth_common.h            | 279 ++++++++++++++++++++++++++++++
 include/linux/qed/qed_eth_if.h            |  38 ++++
 7 files changed, 482 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_l2.c
 create mode 100644 include/linux/qed/eth_common.h
 create mode 100644 include/linux/qed/qed_eth_if.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 6969b5c66929..5c2fd57236fe 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_QED) := qed.o
 
 qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
-	 qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o
+	 qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index a63ef3120d78..e03371d3e622 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -25,6 +25,7 @@
 #include <linux/qed/qed_if.h>
 #include "qed_hsi.h"
 
+extern const struct qed_common_ops qed_common_ops_pass;
 #define DRV_MODULE_VERSION "8.4.0.0"
 
 #define MAX_HWFNS_PER_DEVICE    (4)
@@ -91,13 +92,22 @@ struct qed_qm_iids {
 
 enum QED_RESOURCES {
 	QED_SB,
+	QED_L2_QUEUE,
 	QED_VPORT,
+	QED_RSS_ENG,
 	QED_PQ,
 	QED_RL,
+	QED_MAC,
+	QED_VLAN,
 	QED_ILT,
 	QED_MAX_RESC,
 };
 
+enum QED_FEATURE {
+	QED_PF_L2_QUE,
+	QED_MAX_FEATURES,
+};
+
 struct qed_hw_info {
 	/* PCI personality */
 	enum qed_pci_personality	personality;
@@ -105,6 +115,7 @@ struct qed_hw_info {
 	/* Resource Allocation scheme results */
 	u32				resc_start[QED_MAX_RESC];
 	u32				resc_num[QED_MAX_RESC];
+	u32				feat_num[QED_MAX_FEATURES];
 
 #define RESC_START(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_start[resc])
 #define RESC_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_num[resc])
@@ -266,6 +277,9 @@ struct qed_hwfn {
 
 	struct qed_mcp_info		*mcp_info;
 
+	struct qed_hw_cid_data		*p_tx_cids;
+	struct qed_hw_cid_data		*p_rx_cids;
+
 	struct qed_dmae_info		dmae_info;
 
 	/* QM init */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 5b845220ae8c..3243cb4160c3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -89,6 +89,15 @@ void qed_resc_free(struct qed_dev *cdev)
 
 	kfree(cdev->reset_stats);
 
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		kfree(p_hwfn->p_tx_cids);
+		p_hwfn->p_tx_cids = NULL;
+		kfree(p_hwfn->p_rx_cids);
+		p_hwfn->p_rx_cids = NULL;
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -202,6 +211,29 @@ int qed_resc_alloc(struct qed_dev *cdev)
 	if (!cdev->fw_data)
 		return -ENOMEM;
 
+	/* Allocate Memory for the Queue->CID mapping */
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		int tx_size = sizeof(struct qed_hw_cid_data) *
+				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
+		int rx_size = sizeof(struct qed_hw_cid_data) *
+				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
+
+		p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL);
+		if (!p_hwfn->p_tx_cids) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to allocate memory for Tx Cids\n");
+			goto alloc_err;
+		}
+
+		p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
+		if (!p_hwfn->p_rx_cids) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to allocate memory for Rx Cids\n");
+			goto alloc_err;
+		}
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -881,6 +913,20 @@ static void get_function_id(struct qed_hwfn *p_hwfn)
 				    PXP_CONCRETE_FID_PORT);
 }
 
+static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
+{
+	u32 *feat_num = p_hwfn->hw_info.feat_num;
+	int num_features = 1;
+
+	feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) /
+						num_features,
+					RESC_NUM(p_hwfn, QED_L2_QUEUE));
+	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
+		   "#PF_L2_QUEUES=%d #SBS=%d num_features=%d\n",
+		   feat_num[QED_PF_L2_QUE], RESC_NUM(p_hwfn, QED_SB),
+		   num_features);
+}
+
 static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 {
 	u32 *resc_start = p_hwfn->hw_info.resc_start;
@@ -893,29 +939,45 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 	resc_num[QED_SB] = min_t(u32,
 				 (MAX_SB_PER_PATH_BB / num_funcs),
 				 qed_int_get_num_sbs(p_hwfn, NULL));
+	resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs;
 	resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
+	resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs;
 	resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs;
 	resc_num[QED_RL] = 8;
+	resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs;
+	resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) /
+			     num_funcs;
 	resc_num[QED_ILT] = 950;
 
 	for (i = 0; i < QED_MAX_RESC; i++)
 		resc_start[i] = resc_num[i] * p_hwfn->rel_pf_id;
 
+	qed_hw_set_feat(p_hwfn);
+
 	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
 		   "The numbers for each resource are:\n"
 		   "SB = %d start = %d\n"
+		   "L2_QUEUE = %d start = %d\n"
 		   "VPORT = %d start = %d\n"
 		   "PQ = %d start = %d\n"
 		   "RL = %d start = %d\n"
+		   "MAC = %d start = %d\n"
+		   "VLAN = %d start = %d\n"
 		   "ILT = %d start = %d\n",
 		   p_hwfn->hw_info.resc_num[QED_SB],
 		   p_hwfn->hw_info.resc_start[QED_SB],
+		   p_hwfn->hw_info.resc_num[QED_L2_QUEUE],
+		   p_hwfn->hw_info.resc_start[QED_L2_QUEUE],
 		   p_hwfn->hw_info.resc_num[QED_VPORT],
 		   p_hwfn->hw_info.resc_start[QED_VPORT],
 		   p_hwfn->hw_info.resc_num[QED_PQ],
 		   p_hwfn->hw_info.resc_start[QED_PQ],
 		   p_hwfn->hw_info.resc_num[QED_RL],
 		   p_hwfn->hw_info.resc_start[QED_RL],
+		   p_hwfn->hw_info.resc_num[QED_MAC],
+		   p_hwfn->hw_info.resc_start[QED_MAC],
+		   p_hwfn->hw_info.resc_num[QED_VLAN],
+		   p_hwfn->hw_info.resc_start[QED_VLAN],
 		   p_hwfn->hw_info.resc_num[QED_ILT],
 		   p_hwfn->hw_info.resc_start[QED_ILT]);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 61c15a57c267..27f2c005e2b0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/qed/common_hsi.h>
+#include <linux/qed/eth_common.h>
 
 struct qed_hwfn;
 struct qed_ptt;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
new file mode 100644
index 000000000000..f2e76024409a
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -0,0 +1,87 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/param.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include "qed.h"
+#include <linux/qed/qed_chain.h>
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include <linux/qed/qed_eth_if.h>
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+static int qed_fill_eth_dev_info(struct qed_dev *cdev,
+				 struct qed_dev_eth_info *info)
+{
+	int i;
+
+	memset(info, 0, sizeof(*info));
+
+	info->num_tc = 1;
+
+	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+		for_each_hwfn(cdev, i)
+			info->num_queues += FEAT_NUM(&cdev->hwfns[i],
+						     QED_PF_L2_QUE);
+		if (cdev->int_params.fp_msix_cnt)
+			info->num_queues = min_t(u8, info->num_queues,
+						 cdev->int_params.fp_msix_cnt);
+	} else {
+		info->num_queues = cdev->num_hwfns;
+	}
+
+	info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN);
+	ether_addr_copy(info->port_mac,
+			cdev->hwfns[0].hw_info.hw_mac_addr);
+
+	qed_fill_dev_info(cdev, &info->common);
+
+	return 0;
+}
+
+static const struct qed_eth_ops qed_eth_ops_pass = {
+	.common = &qed_common_ops_pass,
+	.fill_dev_info = &qed_fill_eth_dev_info,
+};
+
+const struct qed_eth_ops *qed_get_eth_ops(u32 version)
+{
+	if (version != QED_ETH_INTERFACE_VERSION) {
+		pr_notice("Cannot supply ethtool operations [%08x != %08x]\n",
+			  version, QED_ETH_INTERFACE_VERSION);
+		return NULL;
+	}
+
+	return &qed_eth_ops_pass;
+}
+EXPORT_SYMBOL(qed_get_eth_ops);
+
+void qed_put_eth_ops(void)
+{
+	/* TODO - reference count for module? */
+}
+EXPORT_SYMBOL(qed_put_eth_ops);
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
new file mode 100644
index 000000000000..320b3373ac1d
--- /dev/null
+++ b/include/linux/qed/eth_common.h
@@ -0,0 +1,279 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __ETH_COMMON__
+#define __ETH_COMMON__
+
+/********************/
+/* ETH FW CONSTANTS */
+/********************/
+#define ETH_CACHE_LINE_SIZE                 64
+
+#define ETH_MAX_RAMROD_PER_CON                          8
+#define ETH_TX_BD_PAGE_SIZE_BYTES                       4096
+#define ETH_RX_BD_PAGE_SIZE_BYTES                       4096
+#define ETH_RX_SGE_PAGE_SIZE_BYTES                      4096
+#define ETH_RX_CQE_PAGE_SIZE_BYTES                      4096
+#define ETH_RX_NUM_NEXT_PAGE_BDS                        2
+#define ETH_RX_NUM_NEXT_PAGE_SGES                       2
+
+#define ETH_TX_MIN_BDS_PER_NON_LSO_PKT                          1
+#define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET                       18
+#define ETH_TX_MAX_LSO_HDR_NBD                                          4
+#define ETH_TX_MIN_BDS_PER_LSO_PKT                                      3
+#define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT       3
+#define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT            2
+#define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE          2
+#define ETH_TX_MAX_NON_LSO_PKT_LEN                  (9700 - (4 + 12 + 8))
+#define ETH_TX_MAX_LSO_HDR_BYTES                    510
+
+#define ETH_NUM_STATISTIC_COUNTERS                      MAX_NUM_VPORTS
+
+#define ETH_REG_CQE_PBL_SIZE                3
+
+/* num of MAC/VLAN filters */
+#define ETH_NUM_MAC_FILTERS                                     512
+#define ETH_NUM_VLAN_FILTERS                            512
+
+/* approx. multicast constants */
+#define ETH_MULTICAST_BIN_FROM_MAC_SEED     0
+#define ETH_MULTICAST_MAC_BINS                          256
+#define ETH_MULTICAST_MAC_BINS_IN_REGS          (ETH_MULTICAST_MAC_BINS / 32)
+
+/*  ethernet vport update constants */
+#define ETH_FILTER_RULES_COUNT                          10
+#define ETH_RSS_IND_TABLE_ENTRIES_NUM           128
+#define ETH_RSS_KEY_SIZE_REGS                       10
+#define ETH_RSS_ENGINE_NUM_K2               207
+#define ETH_RSS_ENGINE_NUM_BB               127
+
+/* TPA constants */
+#define ETH_TPA_MAX_AGGS_NUM              64
+#define ETH_TPA_CQE_START_SGL_SIZE        3
+#define ETH_TPA_CQE_CONT_SGL_SIZE         6
+#define ETH_TPA_CQE_END_SGL_SIZE          4
+
+/* Queue Zone sizes */
+#define TSTORM_QZONE_SIZE    0
+#define MSTORM_QZONE_SIZE    sizeof(struct mstorm_eth_queue_zone)
+#define USTORM_QZONE_SIZE    sizeof(struct ustorm_eth_queue_zone)
+#define XSTORM_QZONE_SIZE    0
+#define YSTORM_QZONE_SIZE    sizeof(struct ystorm_eth_queue_zone)
+#define PSTORM_QZONE_SIZE    0
+
+/* Interrupt coalescing TimeSet */
+struct coalescing_timeset {
+	u8	timeset;
+	u8	valid;
+};
+
+struct eth_tx_1st_bd_flags {
+	u8 bitfields;
+#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_MASK  0x1
+#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0
+#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_MASK          0x1
+#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT         1
+#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_MASK          0x1
+#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT         2
+#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_MASK   0x1
+#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT  3
+#define ETH_TX_1ST_BD_FLAGS_LSO_MASK              0x1
+#define ETH_TX_1ST_BD_FLAGS_LSO_SHIFT             4
+#define ETH_TX_1ST_BD_FLAGS_START_BD_MASK         0x1
+#define ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT        5
+#define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK     0x1
+#define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT    6
+#define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK     0x1
+#define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT    7
+};
+
+/* The parsing information data fo rthe first tx bd of a given packet. */
+struct eth_tx_data_1st_bd {
+	__le16				vlan;
+	u8				nbds;
+	struct eth_tx_1st_bd_flags	bd_flags;
+	__le16				fw_use_only;
+};
+
+/* The parsing information data for the second tx bd of a given packet. */
+struct eth_tx_data_2nd_bd {
+	__le16	tunn_ip_size;
+	__le16	bitfields;
+#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK     0x1FFF
+#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT    0
+#define ETH_TX_DATA_2ND_BD_RESERVED0_MASK                 0x7
+#define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
+	__le16	bitfields2;
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK  0xF
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK       0x3
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT      4
+#define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_MASK            0x3
+#define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_SHIFT           6
+#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_MASK                 0x3
+#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_SHIFT                8
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_MASK           0x1
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT          10
+#define ETH_TX_DATA_2ND_BD_IPV6_EXT_MASK                  0x1
+#define ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT                 11
+#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_MASK             0x1
+#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT            12
+#define ETH_TX_DATA_2ND_BD_L4_UDP_MASK                    0x1
+#define ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT                   13
+#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_MASK       0x1
+#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT      14
+#define ETH_TX_DATA_2ND_BD_RESERVED1_MASK                 0x1
+#define ETH_TX_DATA_2ND_BD_RESERVED1_SHIFT                15
+};
+
+/* Regular ETH Rx FP CQE. */
+struct eth_fast_path_rx_reg_cqe {
+	u8	type;
+	u8	bitfields;
+#define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK  0x7
+#define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT 0
+#define ETH_FAST_PATH_RX_REG_CQE_TC_MASK             0xF
+#define ETH_FAST_PATH_RX_REG_CQE_TC_SHIFT            3
+#define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_MASK      0x1
+#define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_SHIFT     7
+	__le16				pkt_len;
+	struct parsing_and_err_flags	pars_flags;
+	__le16				vlan_tag;
+	__le32				rss_hash;
+	__le16				len_on_bd;
+	u8				placement_offset;
+	u8				reserved;
+	__le16				pbl[ETH_REG_CQE_PBL_SIZE];
+	u8				reserved1[10];
+};
+
+/* The L4 pseudo checksum mode for Ethernet */
+enum eth_l4_pseudo_checksum_mode {
+	ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH,
+	ETH_L4_PSEUDO_CSUM_ZERO_LENGTH,
+	MAX_ETH_L4_PSEUDO_CHECKSUM_MODE
+};
+
+struct eth_rx_bd {
+	struct regpair addr;
+};
+
+/* regular ETH Rx SP CQE */
+struct eth_slow_path_rx_cqe {
+	u8	type;
+	u8	ramrod_cmd_id;
+	u8	error_flag;
+	u8	reserved[27];
+	__le16	echo;
+};
+
+/* union for all ETH Rx CQE types */
+union eth_rx_cqe {
+	struct eth_fast_path_rx_reg_cqe		fast_path_regular;
+	struct eth_slow_path_rx_cqe		slow_path;
+};
+
+/* ETH Rx CQE type */
+enum eth_rx_cqe_type {
+	ETH_RX_CQE_TYPE_UNUSED,
+	ETH_RX_CQE_TYPE_REGULAR,
+	ETH_RX_CQE_TYPE_SLOW_PATH,
+	MAX_ETH_RX_CQE_TYPE
+};
+
+/* ETH Rx producers data */
+struct eth_rx_prod_data {
+	__le16	bd_prod;
+	__le16	sge_prod;
+	__le16	cqe_prod;
+	__le16	reserved;
+};
+
+/* The first tx bd of a given packet */
+struct eth_tx_1st_bd {
+	struct regpair			addr;
+	__le16				nbytes;
+	struct eth_tx_data_1st_bd	data;
+};
+
+/* The second tx bd of a given packet */
+struct eth_tx_2nd_bd {
+	struct regpair			addr;
+	__le16				nbytes;
+	struct eth_tx_data_2nd_bd	data;
+};
+
+/* The parsing information data for the third tx bd of a given packet. */
+struct eth_tx_data_3rd_bd {
+	__le16	lso_mss;
+	u8	bitfields;
+#define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK  0xF
+#define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0
+#define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK         0xF
+#define ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT        4
+	u8	resereved0[3];
+};
+
+/* The third tx bd of a given packet */
+struct eth_tx_3rd_bd {
+	struct regpair			addr;
+	__le16				nbytes;
+	struct eth_tx_data_3rd_bd	data;
+};
+
+/* The common non-special TX BD ring element */
+struct eth_tx_bd {
+	struct regpair	addr;
+	__le16		nbytes;
+	__le16		reserved0;
+	__le32		reserved1;
+};
+
+union eth_tx_bd_types {
+	struct eth_tx_1st_bd	first_bd;
+	struct eth_tx_2nd_bd	second_bd;
+	struct eth_tx_3rd_bd	third_bd;
+	struct eth_tx_bd	reg_bd;
+};
+
+/* Mstorm Queue Zone */
+struct mstorm_eth_queue_zone {
+	struct eth_rx_prod_data rx_producers;
+	__le32			reserved[2];
+};
+
+/* Ustorm Queue Zone */
+struct ustorm_eth_queue_zone {
+	struct coalescing_timeset	int_coalescing_timeset;
+	__le16				reserved[3];
+};
+
+/* Ystorm Queue Zone */
+struct ystorm_eth_queue_zone {
+	struct coalescing_timeset	int_coalescing_timeset;
+	__le16				reserved[3];
+};
+
+/* ETH doorbell data */
+struct eth_db_data {
+	u8 params;
+#define ETH_DB_DATA_DEST_MASK         0x3
+#define ETH_DB_DATA_DEST_SHIFT        0
+#define ETH_DB_DATA_AGG_CMD_MASK      0x3
+#define ETH_DB_DATA_AGG_CMD_SHIFT     2
+#define ETH_DB_DATA_BYPASS_EN_MASK    0x1
+#define ETH_DB_DATA_BYPASS_EN_SHIFT   4
+#define ETH_DB_DATA_RESERVED_MASK     0x1
+#define ETH_DB_DATA_RESERVED_SHIFT    5
+#define ETH_DB_DATA_AGG_VAL_SEL_MASK  0x3
+#define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8	agg_flags;
+	__le16	bd_prod;
+};
+
+#endif /* __ETH_COMMON__ */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
new file mode 100644
index 000000000000..fbd8700f0b31
--- /dev/null
+++ b/include/linux/qed/qed_eth_if.h
@@ -0,0 +1,38 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_ETH_IF_H
+#define _QED_ETH_IF_H
+
+#include <linux/list.h>
+#include <linux/if_link.h>
+#include <linux/qed/eth_common.h>
+#include <linux/qed/qed_if.h>
+
+struct qed_dev_eth_info {
+	struct qed_dev_info common;
+
+	u8	num_queues;
+	u8	num_tc;
+
+	u8	port_mac[ETH_ALEN];
+	u8	num_vlan_filters;
+};
+
+struct qed_eth_ops {
+	const struct qed_common_ops *common;
+
+	int (*fill_dev_info)(struct qed_dev *cdev,
+			     struct qed_dev_eth_info *info);
+
+};
+
+const struct qed_eth_ops *qed_get_eth_ops(u32 version);
+void qed_put_eth_ops(void);
+
+#endif
-- 
cgit v1.2.3


From cee4d26448c1000ccc1711eb5e6ed4c15f18fa83 Mon Sep 17 00:00:00 2001
From: Manish Chopra <Manish.Chopra@qlogic.com>
Date: Mon, 26 Oct 2015 11:02:28 +0200
Subject: qed: Add slowpath L2 support

This patch adds to the qed the support to configure various L2 elements,
such as channels and basic filtering conditions.
It also enhances its public API to allow qede to later utilize this
functionality.

Signed-off-by: Manish Chopra <Manish.Chopra@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c     |  114 ++
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h |   58 +
 drivers/net/ethernet/qlogic/qed/qed_hsi.h     |  294 +++++
 drivers/net/ethernet/qlogic/qed/qed_l2.c      | 1605 +++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c    |   10 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.c     |   16 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.h     |   13 +
 drivers/net/ethernet/qlogic/qed/qed_sp.h      |   27 +
 drivers/net/ethernet/qlogic/qed/qed_spq.c     |   29 +
 include/linux/qed/qed_eth_if.h                |  120 ++
 10 files changed, 2286 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3243cb4160c3..3d1bdbf9ade1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -799,6 +799,60 @@ int qed_hw_stop(struct qed_dev *cdev)
 	return rc;
 }
 
+void qed_hw_stop_fastpath(struct qed_dev *cdev)
+{
+	int i, j;
+
+	for_each_hwfn(cdev, j) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[j];
+		struct qed_ptt *p_ptt   = p_hwfn->p_main_ptt;
+
+		DP_VERBOSE(p_hwfn,
+			   NETIF_MSG_IFDOWN,
+			   "Shutting down the fastpath\n");
+
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1);
+
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_TCP, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_UDP, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_FCOE, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_ROCE, 0x0);
+		qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_OPENFLOW, 0x0);
+
+		qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
+		qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
+		for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
+			if ((!qed_rd(p_hwfn, p_ptt,
+				     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
+			    (!qed_rd(p_hwfn, p_ptt,
+				     TM_REG_PF_SCAN_ACTIVE_TASK)))
+				break;
+
+			usleep_range(1000, 2000);
+		}
+		if (i == QED_HW_STOP_RETRY_LIMIT)
+			DP_NOTICE(p_hwfn,
+				  "Timers linear scans are not over [Connection %02x Tasks %02x]\n",
+				  (u8)qed_rd(p_hwfn, p_ptt,
+					     TM_REG_PF_SCAN_ACTIVE_CONN),
+				  (u8)qed_rd(p_hwfn, p_ptt,
+					     TM_REG_PF_SCAN_ACTIVE_TASK));
+
+		qed_int_igu_init_pure_rt(p_hwfn, p_ptt, false, false);
+
+		/* Need to wait 1ms to guarantee SBs are cleared */
+		usleep_range(1000, 2000);
+	}
+}
+
+void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn)
+{
+	/* Re-open incoming traffic */
+	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
+	       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
+}
+
 static int qed_reg_assert(struct qed_hwfn *hwfn,
 			  struct qed_ptt *ptt, u32 reg,
 			  bool expected)
@@ -1337,3 +1391,63 @@ void qed_chain_free(struct qed_dev *cdev,
 			  p_chain->p_virt_addr,
 			  p_chain->p_phys_addr);
 }
+
+int qed_fw_l2_queue(struct qed_hwfn *p_hwfn,
+		    u16 src_id, u16 *dst_id)
+{
+	if (src_id >= RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
+		u16 min, max;
+
+		min = (u16)RESC_START(p_hwfn, QED_L2_QUEUE);
+		max = min + RESC_NUM(p_hwfn, QED_L2_QUEUE);
+		DP_NOTICE(p_hwfn,
+			  "l2_queue id [%d] is not valid, available indices [%d - %d]\n",
+			  src_id, min, max);
+
+		return -EINVAL;
+	}
+
+	*dst_id = RESC_START(p_hwfn, QED_L2_QUEUE) + src_id;
+
+	return 0;
+}
+
+int qed_fw_vport(struct qed_hwfn *p_hwfn,
+		 u8 src_id, u8 *dst_id)
+{
+	if (src_id >= RESC_NUM(p_hwfn, QED_VPORT)) {
+		u8 min, max;
+
+		min = (u8)RESC_START(p_hwfn, QED_VPORT);
+		max = min + RESC_NUM(p_hwfn, QED_VPORT);
+		DP_NOTICE(p_hwfn,
+			  "vport id [%d] is not valid, available indices [%d - %d]\n",
+			  src_id, min, max);
+
+		return -EINVAL;
+	}
+
+	*dst_id = RESC_START(p_hwfn, QED_VPORT) + src_id;
+
+	return 0;
+}
+
+int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
+		   u8 src_id, u8 *dst_id)
+{
+	if (src_id >= RESC_NUM(p_hwfn, QED_RSS_ENG)) {
+		u8 min, max;
+
+		min = (u8)RESC_START(p_hwfn, QED_RSS_ENG);
+		max = min + RESC_NUM(p_hwfn, QED_RSS_ENG);
+		DP_NOTICE(p_hwfn,
+			  "rss_eng id [%d] is not valid, available indices [%d - %d]\n",
+			  src_id, min, max);
+
+		return -EINVAL;
+	}
+
+	*dst_id = RESC_START(p_hwfn, QED_RSS_ENG) + src_id;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 5051af5f378e..773070d04ab8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -86,6 +86,25 @@ int qed_hw_init(struct qed_dev *cdev,
  */
 int qed_hw_stop(struct qed_dev *cdev);
 
+/**
+ * @brief qed_hw_stop_fastpath -should be called incase
+ *		slowpath is still required for the device,
+ *		but fastpath is not.
+ *
+ * @param cdev
+ *
+ */
+void qed_hw_stop_fastpath(struct qed_dev *cdev);
+
+/**
+ * @brief qed_hw_start_fastpath -restart fastpath traffic,
+ *		only if hw_stop_fastpath was called
+ *
+ * @param cdev
+ *
+ */
+void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn);
+
 /**
  * @brief qed_hw_reset -
  *
@@ -206,6 +225,45 @@ qed_chain_alloc(struct qed_dev *cdev,
 void qed_chain_free(struct qed_dev *cdev,
 		    struct qed_chain *p_chain);
 
+/**
+ * @@brief qed_fw_l2_queue - Get absolute L2 queue ID
+ *
+ *  @param p_hwfn
+ *  @param src_id - relative to p_hwfn
+ *  @param dst_id - absolute per engine
+ *
+ *  @return int
+ */
+int qed_fw_l2_queue(struct qed_hwfn *p_hwfn,
+		    u16 src_id,
+		    u16 *dst_id);
+
+/**
+ * @@brief qed_fw_vport - Get absolute vport ID
+ *
+ *  @param p_hwfn
+ *  @param src_id - relative to p_hwfn
+ *  @param dst_id - absolute per engine
+ *
+ *  @return int
+ */
+int qed_fw_vport(struct qed_hwfn *p_hwfn,
+		 u8 src_id,
+		 u8 *dst_id);
+
+/**
+ * @@brief qed_fw_rss_eng - Get absolute RSS engine ID
+ *
+ *  @param p_hwfn
+ *  @param src_id - relative to p_hwfn
+ *  @param dst_id - absolute per engine
+ *
+ *  @return int
+ */
+int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
+		   u8 src_id,
+		   u8 *dst_id);
+
 /**
  * *@brief Cleanup of previous driver remains prior to load
  *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 27f2c005e2b0..5909823463ab 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -2561,6 +2561,300 @@ struct eth_conn_context {
 	struct ustorm_eth_conn_st_ctx	ustorm_st_context;
 };
 
+enum eth_filter_action {
+	ETH_FILTER_ACTION_REMOVE,
+	ETH_FILTER_ACTION_ADD,
+	ETH_FILTER_ACTION_REPLACE,
+	MAX_ETH_FILTER_ACTION
+};
+
+struct eth_filter_cmd {
+	u8      type /* Filter Type (MAC/VLAN/Pair/VNI) */;
+	u8      vport_id /* the vport id */;
+	u8      action /* filter command action: add/remove/replace */;
+	u8      reserved0;
+	__le32  vni;
+	__le16  mac_lsb;
+	__le16  mac_mid;
+	__le16  mac_msb;
+	__le16  vlan_id;
+};
+
+struct eth_filter_cmd_header {
+	u8      rx;
+	u8      tx;
+	u8      cmd_cnt;
+	u8      assert_on_error;
+	u8      reserved1[4];
+};
+
+enum eth_filter_type {
+	ETH_FILTER_TYPE_MAC,
+	ETH_FILTER_TYPE_VLAN,
+	ETH_FILTER_TYPE_PAIR,
+	ETH_FILTER_TYPE_INNER_MAC,
+	ETH_FILTER_TYPE_INNER_VLAN,
+	ETH_FILTER_TYPE_INNER_PAIR,
+	ETH_FILTER_TYPE_INNER_MAC_VNI_PAIR,
+	ETH_FILTER_TYPE_MAC_VNI_PAIR,
+	ETH_FILTER_TYPE_VNI,
+	MAX_ETH_FILTER_TYPE
+};
+
+enum eth_ramrod_cmd_id {
+	ETH_RAMROD_UNUSED,
+	ETH_RAMROD_VPORT_START /* VPort Start Ramrod */,
+	ETH_RAMROD_VPORT_UPDATE /* VPort Update Ramrod */,
+	ETH_RAMROD_VPORT_STOP /* VPort Stop Ramrod */,
+	ETH_RAMROD_RX_QUEUE_START /* RX Queue Start Ramrod */,
+	ETH_RAMROD_RX_QUEUE_STOP /* RX Queue Stop Ramrod */,
+	ETH_RAMROD_TX_QUEUE_START /* TX Queue Start Ramrod */,
+	ETH_RAMROD_TX_QUEUE_STOP /* TX Queue Stop Ramrod */,
+	ETH_RAMROD_FILTERS_UPDATE /* Add or Remove Mac/Vlan/Pair filters */,
+	ETH_RAMROD_RX_QUEUE_UPDATE /* RX Queue Update Ramrod */,
+	ETH_RAMROD_RESERVED,
+	ETH_RAMROD_RESERVED2,
+	ETH_RAMROD_RESERVED3,
+	ETH_RAMROD_RESERVED4,
+	ETH_RAMROD_RESERVED5,
+	ETH_RAMROD_RESERVED6,
+	ETH_RAMROD_RESERVED7,
+	ETH_RAMROD_RESERVED8,
+	MAX_ETH_RAMROD_CMD_ID
+};
+
+struct eth_vport_rss_config {
+	__le16 capabilities;
+#define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_MASK	0x1
+#define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_SHIFT       0
+#define ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY_MASK	0x1
+#define ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY_SHIFT       1
+#define ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY_MASK    0x1
+#define ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY_SHIFT   2
+#define ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY_MASK    0x1
+#define ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY_SHIFT   3
+#define ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY_MASK    0x1
+#define ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY_SHIFT   4
+#define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_MASK    0x1
+#define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_SHIFT   5
+#define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_MASK  0x1
+#define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_SHIFT 6
+#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_TCP_FRAG_MASK     0x1
+#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_TCP_FRAG_SHIFT    7
+#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_UDP_FRAG_MASK     0x1
+#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_UDP_FRAG_SHIFT    8
+#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK	      0x7F
+#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT	     9
+	u8      rss_id;
+	u8      rss_mode;
+	u8      update_rss_key;
+	u8      update_rss_ind_table;
+	u8      update_rss_capabilities;
+	u8      tbl_size;
+	__le32  reserved2[2];
+	__le16  indirection_table[ETH_RSS_IND_TABLE_ENTRIES_NUM];
+	__le32  rss_key[ETH_RSS_KEY_SIZE_REGS];
+	__le32  reserved3[2];
+};
+
+enum eth_vport_rss_mode {
+	ETH_VPORT_RSS_MODE_DISABLED,
+	ETH_VPORT_RSS_MODE_REGULAR,
+	MAX_ETH_VPORT_RSS_MODE
+};
+
+struct eth_vport_rx_mode {
+	__le16 state;
+#define ETH_VPORT_RX_MODE_UCAST_DROP_ALL_MASK	  0x1
+#define ETH_VPORT_RX_MODE_UCAST_DROP_ALL_SHIFT	 0
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_ALL_MASK	0x1
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_ALL_SHIFT       1
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED_MASK  0x1
+#define ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED_SHIFT 2
+#define ETH_VPORT_RX_MODE_MCAST_DROP_ALL_MASK	  0x1
+#define ETH_VPORT_RX_MODE_MCAST_DROP_ALL_SHIFT	 3
+#define ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL_MASK	0x1
+#define ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL_SHIFT       4
+#define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_MASK	0x1
+#define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_SHIFT       5
+#define ETH_VPORT_RX_MODE_RESERVED1_MASK	       0x3FF
+#define ETH_VPORT_RX_MODE_RESERVED1_SHIFT	      6
+	__le16 reserved2[3];
+};
+
+struct eth_vport_tpa_param {
+	u64     reserved[2];
+};
+
+struct eth_vport_tx_mode {
+	__le16 state;
+#define ETH_VPORT_TX_MODE_UCAST_DROP_ALL_MASK    0x1
+#define ETH_VPORT_TX_MODE_UCAST_DROP_ALL_SHIFT   0
+#define ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL_MASK  0x1
+#define ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL_SHIFT 1
+#define ETH_VPORT_TX_MODE_MCAST_DROP_ALL_MASK    0x1
+#define ETH_VPORT_TX_MODE_MCAST_DROP_ALL_SHIFT   2
+#define ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL_MASK  0x1
+#define ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL_SHIFT 3
+#define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_MASK  0x1
+#define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_SHIFT 4
+#define ETH_VPORT_TX_MODE_RESERVED1_MASK	 0x7FF
+#define ETH_VPORT_TX_MODE_RESERVED1_SHIFT	5
+	__le16 reserved2[3];
+};
+
+struct rx_queue_start_ramrod_data {
+	__le16	  rx_queue_id;
+	__le16	  num_of_pbl_pages;
+	__le16	  bd_max_bytes;
+	__le16	  sb_id;
+	u8	      sb_index;
+	u8	      vport_id;
+	u8	      default_rss_queue_flg;
+	u8	      complete_cqe_flg;
+	u8	      complete_event_flg;
+	u8	      stats_counter_id;
+	u8	      pin_context;
+	u8	      pxp_tph_valid_bd;
+	u8	      pxp_tph_valid_pkt;
+	u8	      pxp_st_hint;
+	__le16	  pxp_st_index;
+	u8	      reserved[4];
+	struct regpair  cqe_pbl_addr;
+	struct regpair  bd_base;
+	struct regpair  sge_base;
+};
+
+struct rx_queue_stop_ramrod_data {
+	__le16  rx_queue_id;
+	u8      complete_cqe_flg;
+	u8      complete_event_flg;
+	u8      vport_id;
+	u8      reserved[3];
+};
+
+struct rx_queue_update_ramrod_data {
+	__le16	  rx_queue_id;
+	u8	      complete_cqe_flg;
+	u8	      complete_event_flg;
+	u8	      init_sge_ring_flg;
+	u8	      vport_id;
+	u8	      pxp_tph_valid_sge;
+	u8	      pxp_st_hint;
+	__le16	  pxp_st_index;
+	u8	      reserved[6];
+	struct regpair  sge_base;
+};
+
+struct tx_queue_start_ramrod_data {
+	__le16  sb_id;
+	u8      sb_index;
+	u8      vport_id;
+	u8      tc;
+	u8      stats_counter_id;
+	__le16  qm_pq_id;
+	u8      flags;
+#define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_MASK  0x1
+#define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_SHIFT 0
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_MASK      0x1
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_SHIFT     1
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_MASK      0x1
+#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_SHIFT     2
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED0_MASK	      0x1F
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED0_SHIFT	     3
+	u8	      pin_context;
+	u8	      pxp_tph_valid_bd;
+	u8	      pxp_tph_valid_pkt;
+	__le16	  pxp_st_index;
+	u8	      pxp_st_hint;
+	u8	      reserved1[3];
+	__le16	  queue_zone_id;
+	__le16	  test_dup_count;
+	__le16	  pbl_size;
+	struct regpair  pbl_base_addr;
+};
+
+struct tx_queue_stop_ramrod_data {
+	__le16 reserved[4];
+};
+
+struct vport_filter_update_ramrod_data {
+	struct eth_filter_cmd_header    filter_cmd_hdr;
+	struct eth_filter_cmd	   filter_cmds[ETH_FILTER_RULES_COUNT];
+};
+
+struct vport_start_ramrod_data {
+	u8			      vport_id;
+	u8			      sw_fid;
+	__le16			  mtu;
+	u8			      drop_ttl0_en;
+	u8			      inner_vlan_removal_en;
+	struct eth_vport_rx_mode	rx_mode;
+	struct eth_vport_tx_mode	tx_mode;
+	struct eth_vport_tpa_param      tpa_param;
+	__le16			  sge_buff_size;
+	u8			      max_sges_num;
+	u8			      tx_switching_en;
+	u8			      anti_spoofing_en;
+	u8			      default_vlan_en;
+	u8			      handle_ptp_pkts;
+	u8			      silent_vlan_removal_en;
+	__le16			  default_vlan;
+	u8			      untagged;
+	u8			      reserved[7];
+};
+
+struct vport_stop_ramrod_data {
+	u8      vport_id;
+	u8      reserved[7];
+};
+
+struct vport_update_ramrod_data_cmn {
+	u8      vport_id;
+	u8      update_rx_active_flg;
+	u8      rx_active_flg;
+	u8      update_tx_active_flg;
+	u8      tx_active_flg;
+	u8      update_rx_mode_flg;
+	u8      update_tx_mode_flg;
+	u8      update_approx_mcast_flg;
+	u8      update_rss_flg;
+	u8      update_inner_vlan_removal_en_flg;
+	u8      inner_vlan_removal_en;
+	u8      update_tpa_param_flg;
+	u8      update_tpa_en_flg;
+	u8      update_sge_param_flg;
+	__le16  sge_buff_size;
+	u8      max_sges_num;
+	u8      update_tx_switching_en_flg;
+	u8      tx_switching_en;
+	u8      update_anti_spoofing_en_flg;
+	u8      anti_spoofing_en;
+	u8      update_handle_ptp_pkts;
+	u8      handle_ptp_pkts;
+	u8      update_default_vlan_en_flg;
+	u8      default_vlan_en;
+	u8      update_default_vlan_flg;
+	__le16  default_vlan;
+	u8      update_accept_any_vlan_flg;
+	u8      accept_any_vlan;
+	u8      silent_vlan_removal_en;
+	u8      reserved;
+};
+
+struct vport_update_ramrod_mcast {
+	__le32 bins[ETH_MULTICAST_MAC_BINS_IN_REGS];
+};
+
+struct vport_update_ramrod_data {
+	struct vport_update_ramrod_data_cmn     common;
+	struct eth_vport_rx_mode		rx_mode;
+	struct eth_vport_tx_mode		tx_mode;
+	struct eth_vport_tpa_param	      tpa_param;
+	struct vport_update_ramrod_mcast	approx_mcast;
+	struct eth_vport_rss_config	     rss_config;
+};
+
 struct mstorm_eth_conn_ag_ctx {
 	u8	byte0 /* cdu_validation */;
 	u8	byte1 /* state */;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index f2e76024409a..2772573593a4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -34,6 +34,1202 @@
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 
+enum qed_rss_caps {
+	QED_RSS_IPV4		= 0x1,
+	QED_RSS_IPV6		= 0x2,
+	QED_RSS_IPV4_TCP	= 0x4,
+	QED_RSS_IPV6_TCP	= 0x8,
+	QED_RSS_IPV4_UDP	= 0x10,
+	QED_RSS_IPV6_UDP	= 0x20,
+};
+
+/* Should be the same as ETH_RSS_IND_TABLE_ENTRIES_NUM */
+#define QED_RSS_IND_TABLE_SIZE 128
+#define QED_RSS_KEY_SIZE 10 /* size in 32b chunks */
+
+struct qed_rss_params {
+	u8	update_rss_config;
+	u8	rss_enable;
+	u8	rss_eng_id;
+	u8	update_rss_capabilities;
+	u8	update_rss_ind_table;
+	u8	update_rss_key;
+	u8	rss_caps;
+	u8	rss_table_size_log;
+	u16	rss_ind_table[QED_RSS_IND_TABLE_SIZE];
+	u32	rss_key[QED_RSS_KEY_SIZE];
+};
+
+enum qed_filter_opcode {
+	QED_FILTER_ADD,
+	QED_FILTER_REMOVE,
+	QED_FILTER_MOVE,
+	QED_FILTER_REPLACE,     /* Delete all MACs and add new one instead */
+	QED_FILTER_FLUSH,       /* Removes all filters */
+};
+
+enum qed_filter_ucast_type {
+	QED_FILTER_MAC,
+	QED_FILTER_VLAN,
+	QED_FILTER_MAC_VLAN,
+	QED_FILTER_INNER_MAC,
+	QED_FILTER_INNER_VLAN,
+	QED_FILTER_INNER_PAIR,
+	QED_FILTER_INNER_MAC_VNI_PAIR,
+	QED_FILTER_MAC_VNI_PAIR,
+	QED_FILTER_VNI,
+};
+
+struct qed_filter_ucast {
+	enum qed_filter_opcode		opcode;
+	enum qed_filter_ucast_type	type;
+	u8				is_rx_filter;
+	u8				is_tx_filter;
+	u8				vport_to_add_to;
+	u8				vport_to_remove_from;
+	unsigned char			mac[ETH_ALEN];
+	u8				assert_on_error;
+	u16				vlan;
+	u32				vni;
+};
+
+struct qed_filter_mcast {
+	/* MOVE is not supported for multicast */
+	enum qed_filter_opcode	opcode;
+	u8			vport_to_add_to;
+	u8			vport_to_remove_from;
+	u8			num_mc_addrs;
+#define QED_MAX_MC_ADDRS        64
+	unsigned char		mac[QED_MAX_MC_ADDRS][ETH_ALEN];
+};
+
+struct qed_filter_accept_flags {
+	u8	update_rx_mode_config;
+	u8	update_tx_mode_config;
+	u8	rx_accept_filter;
+	u8	tx_accept_filter;
+#define QED_ACCEPT_NONE         0x01
+#define QED_ACCEPT_UCAST_MATCHED        0x02
+#define QED_ACCEPT_UCAST_UNMATCHED      0x04
+#define QED_ACCEPT_MCAST_MATCHED        0x08
+#define QED_ACCEPT_MCAST_UNMATCHED      0x10
+#define QED_ACCEPT_BCAST                0x20
+};
+
+struct qed_sp_vport_update_params {
+	u16				opaque_fid;
+	u8				vport_id;
+	u8				update_vport_active_rx_flg;
+	u8				vport_active_rx_flg;
+	u8				update_vport_active_tx_flg;
+	u8				vport_active_tx_flg;
+	u8				update_approx_mcast_flg;
+	unsigned long			bins[8];
+	struct qed_rss_params		*rss_params;
+	struct qed_filter_accept_flags	accept_flags;
+};
+
+#define QED_MAX_SGES_NUM 16
+#define CRC32_POLY 0x1edc6f41
+
+static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
+			      u32 concrete_fid,
+			      u16 opaque_fid,
+			      u8 vport_id,
+			      u16 mtu,
+			      u8 drop_ttl0_flg,
+			      u8 inner_vlan_removal_en_flg)
+{
+	struct qed_sp_init_request_params params;
+	struct vport_start_ramrod_data *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent =  NULL;
+	int rc = -EINVAL;
+	u16 rx_mode = 0;
+	u8 abs_vport_id = 0;
+
+	rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
+	if (rc != 0)
+		return rc;
+
+	memset(&params, 0, sizeof(params));
+	params.ramrod_data_size = sizeof(*p_ramrod);
+	params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 qed_spq_get_cid(p_hwfn),
+				 opaque_fid,
+				 ETH_RAMROD_VPORT_START,
+				 PROTOCOLID_ETH,
+				 &params);
+	if (rc)
+		return rc;
+
+	p_ramrod		= &p_ent->ramrod.vport_start;
+	p_ramrod->vport_id	= abs_vport_id;
+
+	p_ramrod->mtu			= cpu_to_le16(mtu);
+	p_ramrod->inner_vlan_removal_en = inner_vlan_removal_en_flg;
+	p_ramrod->drop_ttl0_en		= drop_ttl0_flg;
+
+	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
+	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
+
+	p_ramrod->rx_mode.state = cpu_to_le16(rx_mode);
+
+	/* TPA related fields */
+	memset(&p_ramrod->tpa_param, 0,
+	       sizeof(struct eth_vport_tpa_param));
+
+	/* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */
+	p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev,
+						  concrete_fid);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int
+qed_sp_vport_update_rss(struct qed_hwfn *p_hwfn,
+			struct vport_update_ramrod_data *p_ramrod,
+			struct qed_rss_params *p_params)
+{
+	struct eth_vport_rss_config *rss = &p_ramrod->rss_config;
+	u16 abs_l2_queue = 0, capabilities = 0;
+	int rc = 0, i;
+
+	if (!p_params) {
+		p_ramrod->common.update_rss_flg = 0;
+		return rc;
+	}
+
+	BUILD_BUG_ON(QED_RSS_IND_TABLE_SIZE !=
+		     ETH_RSS_IND_TABLE_ENTRIES_NUM);
+
+	rc = qed_fw_rss_eng(p_hwfn, p_params->rss_eng_id, &rss->rss_id);
+	if (rc)
+		return rc;
+
+	p_ramrod->common.update_rss_flg = p_params->update_rss_config;
+	rss->update_rss_capabilities = p_params->update_rss_capabilities;
+	rss->update_rss_ind_table = p_params->update_rss_ind_table;
+	rss->update_rss_key = p_params->update_rss_key;
+
+	rss->rss_mode = p_params->rss_enable ?
+			ETH_VPORT_RSS_MODE_REGULAR :
+			ETH_VPORT_RSS_MODE_DISABLED;
+
+	SET_FIELD(capabilities,
+		  ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY,
+		  !!(p_params->rss_caps & QED_RSS_IPV4));
+	SET_FIELD(capabilities,
+		  ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY,
+		  !!(p_params->rss_caps & QED_RSS_IPV6));
+	SET_FIELD(capabilities,
+		  ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY,
+		  !!(p_params->rss_caps & QED_RSS_IPV4_TCP));
+	SET_FIELD(capabilities,
+		  ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY,
+		  !!(p_params->rss_caps & QED_RSS_IPV6_TCP));
+	SET_FIELD(capabilities,
+		  ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY,
+		  !!(p_params->rss_caps & QED_RSS_IPV4_UDP));
+	SET_FIELD(capabilities,
+		  ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY,
+		  !!(p_params->rss_caps & QED_RSS_IPV6_UDP));
+	rss->tbl_size = p_params->rss_table_size_log;
+
+	rss->capabilities = cpu_to_le16(capabilities);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+		   "update rss flag %d, rss_mode = %d, update_caps = %d, capabilities = %d, update_ind = %d, update_rss_key = %d\n",
+		   p_ramrod->common.update_rss_flg,
+		   rss->rss_mode, rss->update_rss_capabilities,
+		   capabilities, rss->update_rss_ind_table,
+		   rss->update_rss_key);
+
+	for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
+		rc = qed_fw_l2_queue(p_hwfn,
+				     (u8)p_params->rss_ind_table[i],
+				     &abs_l2_queue);
+		if (rc)
+			return rc;
+
+		rss->indirection_table[i] = cpu_to_le16(abs_l2_queue);
+		DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, "i= %d, queue = %d\n",
+			   i, rss->indirection_table[i]);
+	}
+
+	for (i = 0; i < 10; i++)
+		rss->rss_key[i] = cpu_to_le32(p_params->rss_key[i]);
+
+	return rc;
+}
+
+static void
+qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn,
+			  struct vport_update_ramrod_data *p_ramrod,
+			  struct qed_filter_accept_flags accept_flags)
+{
+	p_ramrod->common.update_rx_mode_flg =
+		accept_flags.update_rx_mode_config;
+
+	p_ramrod->common.update_tx_mode_flg =
+		accept_flags.update_tx_mode_config;
+
+	/* Set Rx mode accept flags */
+	if (p_ramrod->common.update_rx_mode_flg) {
+		u8 accept_filter = accept_flags.rx_accept_filter;
+		u16 state = 0;
+
+		SET_FIELD(state, ETH_VPORT_RX_MODE_UCAST_DROP_ALL,
+			  !(!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) ||
+			    !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
+
+		SET_FIELD(state, ETH_VPORT_RX_MODE_UCAST_ACCEPT_UNMATCHED,
+			  !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED));
+
+		SET_FIELD(state, ETH_VPORT_RX_MODE_MCAST_DROP_ALL,
+			  !(!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) ||
+			    !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED)));
+
+		SET_FIELD(state, ETH_VPORT_RX_MODE_MCAST_ACCEPT_ALL,
+			  (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) &&
+			   !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED)));
+
+		SET_FIELD(state, ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL,
+			  !!(accept_filter & QED_ACCEPT_BCAST));
+
+		p_ramrod->rx_mode.state = cpu_to_le16(state);
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "p_ramrod->rx_mode.state = 0x%x\n", state);
+	}
+
+	/* Set Tx mode accept flags */
+	if (p_ramrod->common.update_tx_mode_flg) {
+		u8 accept_filter = accept_flags.tx_accept_filter;
+		u16 state = 0;
+
+		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL,
+			  !!(accept_filter & QED_ACCEPT_NONE));
+
+		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
+			  (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
+			   !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
+
+		SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL,
+			  !!(accept_filter & QED_ACCEPT_NONE));
+
+		SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_ACCEPT_ALL,
+			  (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) &&
+			   !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED)));
+
+		SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL,
+			  !!(accept_filter & QED_ACCEPT_BCAST));
+
+		p_ramrod->tx_mode.state = cpu_to_le16(state);
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "p_ramrod->tx_mode.state = 0x%x\n", state);
+	}
+}
+
+static void
+qed_sp_update_mcast_bin(struct qed_hwfn *p_hwfn,
+			struct vport_update_ramrod_data *p_ramrod,
+			struct qed_sp_vport_update_params *p_params)
+{
+	int i;
+
+	memset(&p_ramrod->approx_mcast.bins, 0,
+	       sizeof(p_ramrod->approx_mcast.bins));
+
+	if (p_params->update_approx_mcast_flg) {
+		p_ramrod->common.update_approx_mcast_flg = 1;
+		for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+			u32 *p_bins = (u32 *)p_params->bins;
+			__le32 val = cpu_to_le32(p_bins[i]);
+
+			p_ramrod->approx_mcast.bins[i] = val;
+		}
+	}
+}
+
+static int
+qed_sp_vport_update(struct qed_hwfn *p_hwfn,
+		    struct qed_sp_vport_update_params *p_params,
+		    enum spq_mode comp_mode,
+		    struct qed_spq_comp_cb *p_comp_data)
+{
+	struct qed_rss_params *p_rss_params = p_params->rss_params;
+	struct vport_update_ramrod_data_cmn *p_cmn;
+	struct qed_sp_init_request_params sp_params;
+	struct vport_update_ramrod_data *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	u8 abs_vport_id = 0;
+	int rc = -EINVAL;
+
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
+	if (rc != 0)
+		return rc;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(*p_ramrod);
+	sp_params.comp_mode = comp_mode;
+	sp_params.p_comp_data = p_comp_data;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 qed_spq_get_cid(p_hwfn),
+				 p_params->opaque_fid,
+				 ETH_RAMROD_VPORT_UPDATE,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	/* Copy input params to ramrod according to FW struct */
+	p_ramrod = &p_ent->ramrod.vport_update;
+	p_cmn = &p_ramrod->common;
+
+	p_cmn->vport_id = abs_vport_id;
+	p_cmn->rx_active_flg = p_params->vport_active_rx_flg;
+	p_cmn->update_rx_active_flg = p_params->update_vport_active_rx_flg;
+	p_cmn->tx_active_flg = p_params->vport_active_tx_flg;
+	p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg;
+
+	rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params);
+	if (rc) {
+		/* Return spq entry which is taken in qed_sp_init_request()*/
+		qed_spq_return_entry(p_hwfn, p_ent);
+		return rc;
+	}
+
+	/* Update mcast bins for VFs, PF doesn't use this functionality */
+	qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params);
+
+	qed_sp_update_accept_mode(p_hwfn, p_ramrod, p_params->accept_flags);
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn,
+			     u16 opaque_fid,
+			     u8 vport_id)
+{
+	struct qed_sp_init_request_params sp_params;
+	struct vport_stop_ramrod_data *p_ramrod;
+	struct qed_spq_entry *p_ent;
+	u8 abs_vport_id = 0;
+	int rc;
+
+	rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
+	if (rc != 0)
+		return rc;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(*p_ramrod);
+	sp_params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 qed_spq_get_cid(p_hwfn),
+				 opaque_fid,
+				 ETH_RAMROD_VPORT_STOP,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.vport_stop;
+	p_ramrod->vport_id = abs_vport_id;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_filter_accept_cmd(struct qed_dev *cdev,
+				 u8 vport,
+				 struct qed_filter_accept_flags accept_flags,
+				 enum spq_mode comp_mode,
+				 struct qed_spq_comp_cb *p_comp_data)
+{
+	struct qed_sp_vport_update_params vport_update_params;
+	int i, rc;
+
+	/* Prepare and send the vport rx_mode change */
+	memset(&vport_update_params, 0, sizeof(vport_update_params));
+	vport_update_params.vport_id = vport;
+	vport_update_params.accept_flags = accept_flags;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		vport_update_params.opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+		rc = qed_sp_vport_update(p_hwfn, &vport_update_params,
+					 comp_mode, p_comp_data);
+		if (rc != 0) {
+			DP_ERR(cdev, "Update rx_mode failed %d\n", rc);
+			return rc;
+		}
+
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "Accept filter configured, flags = [Rx]%x [Tx]%x\n",
+			   accept_flags.rx_accept_filter,
+			   accept_flags.tx_accept_filter);
+	}
+
+	return 0;
+}
+
+static int qed_sp_release_queue_cid(
+	struct qed_hwfn *p_hwfn,
+	struct qed_hw_cid_data *p_cid_data)
+{
+	if (!p_cid_data->b_cid_allocated)
+		return 0;
+
+	qed_cxt_release_cid(p_hwfn, p_cid_data->cid);
+
+	p_cid_data->b_cid_allocated = false;
+
+	return 0;
+}
+
+static int
+qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+			    u16 opaque_fid,
+			    u32 cid,
+			    struct qed_queue_start_common_params *params,
+			    u8 stats_id,
+			    u16 bd_max_bytes,
+			    dma_addr_t bd_chain_phys_addr,
+			    dma_addr_t cqe_pbl_addr,
+			    u16 cqe_pbl_size)
+{
+	struct rx_queue_start_ramrod_data *p_ramrod = NULL;
+	struct qed_sp_init_request_params sp_params;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_hw_cid_data *p_rx_cid;
+	u16 abs_rx_q_id = 0;
+	u8 abs_vport_id = 0;
+	int rc = -EINVAL;
+
+	/* Store information for the stop */
+	p_rx_cid		= &p_hwfn->p_rx_cids[params->queue_id];
+	p_rx_cid->cid		= cid;
+	p_rx_cid->opaque_fid	= opaque_fid;
+	p_rx_cid->vport_id	= params->vport_id;
+
+	rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_vport_id);
+	if (rc != 0)
+		return rc;
+
+	rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_rx_q_id);
+	if (rc != 0)
+		return rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
+		   opaque_fid, cid, params->queue_id, params->vport_id,
+		   params->sb);
+
+	memset(&sp_params, 0, sizeof(params));
+	sp_params.comp_mode = QED_SPQ_MODE_EBLOCK;
+	sp_params.ramrod_data_size = sizeof(*p_ramrod);
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 cid, opaque_fid,
+				 ETH_RAMROD_RX_QUEUE_START,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.rx_queue_start;
+
+	p_ramrod->sb_id			= cpu_to_le16(params->sb);
+	p_ramrod->sb_index		= params->sb_idx;
+	p_ramrod->vport_id		= abs_vport_id;
+	p_ramrod->stats_counter_id	= stats_id;
+	p_ramrod->rx_queue_id		= cpu_to_le16(abs_rx_q_id);
+	p_ramrod->complete_cqe_flg	= 0;
+	p_ramrod->complete_event_flg	= 1;
+
+	p_ramrod->bd_max_bytes	= cpu_to_le16(bd_max_bytes);
+	p_ramrod->bd_base.hi	= DMA_HI_LE(bd_chain_phys_addr);
+	p_ramrod->bd_base.lo	= DMA_LO_LE(bd_chain_phys_addr);
+
+	p_ramrod->num_of_pbl_pages	= cpu_to_le16(cqe_pbl_size);
+	p_ramrod->cqe_pbl_addr.hi	= DMA_HI_LE(cqe_pbl_addr);
+	p_ramrod->cqe_pbl_addr.lo	= DMA_LO_LE(cqe_pbl_addr);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	return rc;
+}
+
+static int
+qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
+			  u16 opaque_fid,
+			  struct qed_queue_start_common_params *params,
+			  u16 bd_max_bytes,
+			  dma_addr_t bd_chain_phys_addr,
+			  dma_addr_t cqe_pbl_addr,
+			  u16 cqe_pbl_size,
+			  void __iomem **pp_prod)
+{
+	struct qed_hw_cid_data *p_rx_cid;
+	u64 init_prod_val = 0;
+	u16 abs_l2_queue = 0;
+	u8 abs_stats_id = 0;
+	int rc;
+
+	rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_l2_queue);
+	if (rc != 0)
+		return rc;
+
+	rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_stats_id);
+	if (rc != 0)
+		return rc;
+
+	*pp_prod = (u8 __iomem *)p_hwfn->regview +
+				 GTT_BAR0_MAP_REG_MSDM_RAM +
+				 MSTORM_PRODS_OFFSET(abs_l2_queue);
+
+	/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
+	__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u64),
+			  (u32 *)(&init_prod_val));
+
+	/* Allocate a CID for the queue */
+	p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id];
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
+				 &p_rx_cid->cid);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
+		return rc;
+	}
+	p_rx_cid->b_cid_allocated = true;
+
+	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
+					 opaque_fid,
+					 p_rx_cid->cid,
+					 params,
+					 abs_stats_id,
+					 bd_max_bytes,
+					 bd_chain_phys_addr,
+					 cqe_pbl_addr,
+					 cqe_pbl_size);
+
+	if (rc != 0)
+		qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+
+	return rc;
+}
+
+static int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+				    u16 rx_queue_id,
+				    bool eq_completion_only,
+				    bool cqe_completion)
+{
+	struct qed_hw_cid_data *p_rx_cid = &p_hwfn->p_rx_cids[rx_queue_id];
+	struct rx_queue_stop_ramrod_data *p_ramrod = NULL;
+	struct qed_sp_init_request_params sp_params;
+	struct qed_spq_entry *p_ent = NULL;
+	u16 abs_rx_q_id = 0;
+	int rc = -EINVAL;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(*p_ramrod);
+	sp_params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 p_rx_cid->cid,
+				 p_rx_cid->opaque_fid,
+				 ETH_RAMROD_RX_QUEUE_STOP,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.rx_queue_stop;
+
+	qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
+	qed_fw_l2_queue(p_hwfn, rx_queue_id, &abs_rx_q_id);
+	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+
+	/* Cleaning the queue requires the completion to arrive there.
+	 * In addition, VFs require the answer to come as eqe to PF.
+	 */
+	p_ramrod->complete_cqe_flg =
+		(!!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) &&
+		 !eq_completion_only) || cqe_completion;
+	p_ramrod->complete_event_flg =
+		!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) ||
+		eq_completion_only;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		return rc;
+
+	return qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+}
+
+static int
+qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
+			    u16  opaque_fid,
+			    u32  cid,
+			    struct qed_queue_start_common_params *p_params,
+			    u8  stats_id,
+			    dma_addr_t pbl_addr,
+			    u16 pbl_size,
+			    union qed_qm_pq_params *p_pq_params)
+{
+	struct tx_queue_start_ramrod_data *p_ramrod = NULL;
+	struct qed_sp_init_request_params sp_params;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_hw_cid_data *p_tx_cid;
+	u8 abs_vport_id;
+	int rc = -EINVAL;
+	u16 pq_id;
+
+	/* Store information for the stop */
+	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
+	p_tx_cid->cid		= cid;
+	p_tx_cid->opaque_fid	= opaque_fid;
+
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
+	if (rc)
+		return rc;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(*p_ramrod);
+	sp_params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, cid,
+				 opaque_fid,
+				 ETH_RAMROD_TX_QUEUE_START,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	p_ramrod		= &p_ent->ramrod.tx_queue_start;
+	p_ramrod->vport_id	= abs_vport_id;
+
+	p_ramrod->sb_id			= cpu_to_le16(p_params->sb);
+	p_ramrod->sb_index		= p_params->sb_idx;
+	p_ramrod->stats_counter_id	= stats_id;
+	p_ramrod->tc			= p_pq_params->eth.tc;
+
+	p_ramrod->pbl_size		= cpu_to_le16(pbl_size);
+	p_ramrod->pbl_base_addr.hi	= DMA_HI_LE(pbl_addr);
+	p_ramrod->pbl_base_addr.lo	= DMA_LO_LE(pbl_addr);
+
+	pq_id			= qed_get_qm_pq(p_hwfn,
+						PROTOCOLID_ETH,
+						p_pq_params);
+	p_ramrod->qm_pq_id	= cpu_to_le16(pq_id);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int
+qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
+			  u16 opaque_fid,
+			  struct qed_queue_start_common_params *p_params,
+			  dma_addr_t pbl_addr,
+			  u16 pbl_size,
+			  void __iomem **pp_doorbell)
+{
+	struct qed_hw_cid_data *p_tx_cid;
+	union qed_qm_pq_params pq_params;
+	u8 abs_stats_id = 0;
+	int rc;
+
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+	if (rc)
+		return rc;
+
+	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
+	memset(p_tx_cid, 0, sizeof(*p_tx_cid));
+	memset(&pq_params, 0, sizeof(pq_params));
+
+	/* Allocate a CID for the queue */
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
+				 &p_tx_cid->cid);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
+		return rc;
+	}
+	p_tx_cid->b_cid_allocated = true;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "opaque_fid=0x%x, cid=0x%x, tx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
+		   opaque_fid, p_tx_cid->cid,
+		   p_params->queue_id, p_params->vport_id, p_params->sb);
+
+	rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
+					 opaque_fid,
+					 p_tx_cid->cid,
+					 p_params,
+					 abs_stats_id,
+					 pbl_addr,
+					 pbl_size,
+					 &pq_params);
+
+	*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+				     qed_db_addr(p_tx_cid->cid, DQ_DEMS_LEGACY);
+
+	if (rc)
+		qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+
+	return rc;
+}
+
+static int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn,
+				    u16 tx_queue_id)
+{
+	struct qed_hw_cid_data *p_tx_cid = &p_hwfn->p_tx_cids[tx_queue_id];
+	struct qed_sp_init_request_params sp_params;
+	struct qed_spq_entry *p_ent = NULL;
+	int rc = -EINVAL;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(struct tx_queue_stop_ramrod_data);
+	sp_params.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 p_tx_cid->cid,
+				 p_tx_cid->opaque_fid,
+				 ETH_RAMROD_TX_QUEUE_STOP,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		return rc;
+
+	return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+}
+
+static enum eth_filter_action
+qed_filter_action(enum qed_filter_opcode opcode)
+{
+	enum eth_filter_action action = MAX_ETH_FILTER_ACTION;
+
+	switch (opcode) {
+	case QED_FILTER_ADD:
+		action = ETH_FILTER_ACTION_ADD;
+		break;
+	case QED_FILTER_REMOVE:
+		action = ETH_FILTER_ACTION_REMOVE;
+		break;
+	case QED_FILTER_REPLACE:
+	case QED_FILTER_FLUSH:
+		action = ETH_FILTER_ACTION_REPLACE;
+		break;
+	default:
+		action = MAX_ETH_FILTER_ACTION;
+	}
+
+	return action;
+}
+
+static void qed_set_fw_mac_addr(__le16 *fw_msb,
+				__le16 *fw_mid,
+				__le16 *fw_lsb,
+				u8 *mac)
+{
+	((u8 *)fw_msb)[0] = mac[1];
+	((u8 *)fw_msb)[1] = mac[0];
+	((u8 *)fw_mid)[0] = mac[3];
+	((u8 *)fw_mid)[1] = mac[2];
+	((u8 *)fw_lsb)[0] = mac[5];
+	((u8 *)fw_lsb)[1] = mac[4];
+}
+
+static int
+qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
+			u16 opaque_fid,
+			struct qed_filter_ucast *p_filter_cmd,
+			struct vport_filter_update_ramrod_data **pp_ramrod,
+			struct qed_spq_entry **pp_ent,
+			enum spq_mode comp_mode,
+			struct qed_spq_comp_cb *p_comp_data)
+{
+	u8 vport_to_add_to = 0, vport_to_remove_from = 0;
+	struct vport_filter_update_ramrod_data *p_ramrod;
+	struct qed_sp_init_request_params sp_params;
+	struct eth_filter_cmd *p_first_filter;
+	struct eth_filter_cmd *p_second_filter;
+	enum eth_filter_action action;
+	int rc;
+
+	rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from,
+			  &vport_to_remove_from);
+	if (rc)
+		return rc;
+
+	rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to,
+			  &vport_to_add_to);
+	if (rc)
+		return rc;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(**pp_ramrod);
+	sp_params.comp_mode = comp_mode;
+	sp_params.p_comp_data = p_comp_data;
+
+	rc = qed_sp_init_request(p_hwfn, pp_ent,
+				 qed_spq_get_cid(p_hwfn),
+				 opaque_fid,
+				 ETH_RAMROD_FILTERS_UPDATE,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+	if (rc)
+		return rc;
+
+	*pp_ramrod = &(*pp_ent)->ramrod.vport_filter_update;
+	p_ramrod = *pp_ramrod;
+	p_ramrod->filter_cmd_hdr.rx = p_filter_cmd->is_rx_filter ? 1 : 0;
+	p_ramrod->filter_cmd_hdr.tx = p_filter_cmd->is_tx_filter ? 1 : 0;
+
+	switch (p_filter_cmd->opcode) {
+	case QED_FILTER_FLUSH:
+		p_ramrod->filter_cmd_hdr.cmd_cnt = 0; break;
+	case QED_FILTER_MOVE:
+		p_ramrod->filter_cmd_hdr.cmd_cnt = 2; break;
+	default:
+		p_ramrod->filter_cmd_hdr.cmd_cnt = 1; break;
+	}
+
+	p_first_filter	= &p_ramrod->filter_cmds[0];
+	p_second_filter = &p_ramrod->filter_cmds[1];
+
+	switch (p_filter_cmd->type) {
+	case QED_FILTER_MAC:
+		p_first_filter->type = ETH_FILTER_TYPE_MAC; break;
+	case QED_FILTER_VLAN:
+		p_first_filter->type = ETH_FILTER_TYPE_VLAN; break;
+	case QED_FILTER_MAC_VLAN:
+		p_first_filter->type = ETH_FILTER_TYPE_PAIR; break;
+	case QED_FILTER_INNER_MAC:
+		p_first_filter->type = ETH_FILTER_TYPE_INNER_MAC; break;
+	case QED_FILTER_INNER_VLAN:
+		p_first_filter->type = ETH_FILTER_TYPE_INNER_VLAN; break;
+	case QED_FILTER_INNER_PAIR:
+		p_first_filter->type = ETH_FILTER_TYPE_INNER_PAIR; break;
+	case QED_FILTER_INNER_MAC_VNI_PAIR:
+		p_first_filter->type = ETH_FILTER_TYPE_INNER_MAC_VNI_PAIR;
+		break;
+	case QED_FILTER_MAC_VNI_PAIR:
+		p_first_filter->type = ETH_FILTER_TYPE_MAC_VNI_PAIR; break;
+	case QED_FILTER_VNI:
+		p_first_filter->type = ETH_FILTER_TYPE_VNI; break;
+	}
+
+	if ((p_first_filter->type == ETH_FILTER_TYPE_MAC) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_PAIR) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_INNER_MAC) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_INNER_PAIR) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_INNER_MAC_VNI_PAIR) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_MAC_VNI_PAIR)) {
+		qed_set_fw_mac_addr(&p_first_filter->mac_msb,
+				    &p_first_filter->mac_mid,
+				    &p_first_filter->mac_lsb,
+				    (u8 *)p_filter_cmd->mac);
+	}
+
+	if ((p_first_filter->type == ETH_FILTER_TYPE_VLAN) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_PAIR) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_INNER_VLAN) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_INNER_PAIR))
+		p_first_filter->vlan_id = cpu_to_le16(p_filter_cmd->vlan);
+
+	if ((p_first_filter->type == ETH_FILTER_TYPE_INNER_MAC_VNI_PAIR) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_MAC_VNI_PAIR) ||
+	    (p_first_filter->type == ETH_FILTER_TYPE_VNI))
+		p_first_filter->vni = cpu_to_le32(p_filter_cmd->vni);
+
+	if (p_filter_cmd->opcode == QED_FILTER_MOVE) {
+		p_second_filter->type		= p_first_filter->type;
+		p_second_filter->mac_msb	= p_first_filter->mac_msb;
+		p_second_filter->mac_mid	= p_first_filter->mac_mid;
+		p_second_filter->mac_lsb	= p_first_filter->mac_lsb;
+		p_second_filter->vlan_id	= p_first_filter->vlan_id;
+		p_second_filter->vni		= p_first_filter->vni;
+
+		p_first_filter->action = ETH_FILTER_ACTION_REMOVE;
+
+		p_first_filter->vport_id = vport_to_remove_from;
+
+		p_second_filter->action		= ETH_FILTER_ACTION_ADD;
+		p_second_filter->vport_id	= vport_to_add_to;
+	} else {
+		action = qed_filter_action(p_filter_cmd->opcode);
+
+		if (action == MAX_ETH_FILTER_ACTION) {
+			DP_NOTICE(p_hwfn,
+				  "%d is not supported yet\n",
+				  p_filter_cmd->opcode);
+			return -EINVAL;
+		}
+
+		p_first_filter->action = action;
+		p_first_filter->vport_id = (p_filter_cmd->opcode ==
+					    QED_FILTER_REMOVE) ?
+					   vport_to_remove_from :
+					   vport_to_add_to;
+	}
+
+	return 0;
+}
+
+static int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
+				   u16 opaque_fid,
+				   struct qed_filter_ucast *p_filter_cmd,
+				   enum spq_mode comp_mode,
+				   struct qed_spq_comp_cb *p_comp_data)
+{
+	struct vport_filter_update_ramrod_data	*p_ramrod	= NULL;
+	struct qed_spq_entry			*p_ent		= NULL;
+	struct eth_filter_cmd_header		*p_header;
+	int					rc;
+
+	rc = qed_filter_ucast_common(p_hwfn, opaque_fid, p_filter_cmd,
+				     &p_ramrod, &p_ent,
+				     comp_mode, p_comp_data);
+	if (rc != 0) {
+		DP_ERR(p_hwfn, "Uni. filter command failed %d\n", rc);
+		return rc;
+	}
+	p_header = &p_ramrod->filter_cmd_hdr;
+	p_header->assert_on_error = p_filter_cmd->assert_on_error;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc != 0) {
+		DP_ERR(p_hwfn,
+		       "Unicast filter ADD command failed %d\n",
+		       rc);
+		return rc;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "Unicast filter configured, opcode = %s, type = %s, cmd_cnt = %d, is_rx_filter = %d, is_tx_filter = %d\n",
+		   (p_filter_cmd->opcode == QED_FILTER_ADD) ? "ADD" :
+		   ((p_filter_cmd->opcode == QED_FILTER_REMOVE) ?
+		   "REMOVE" :
+		   ((p_filter_cmd->opcode == QED_FILTER_MOVE) ?
+		    "MOVE" : "REPLACE")),
+		   (p_filter_cmd->type == QED_FILTER_MAC) ? "MAC" :
+		   ((p_filter_cmd->type == QED_FILTER_VLAN) ?
+		    "VLAN" : "MAC & VLAN"),
+		   p_ramrod->filter_cmd_hdr.cmd_cnt,
+		   p_filter_cmd->is_rx_filter,
+		   p_filter_cmd->is_tx_filter);
+	DP_VERBOSE(p_hwfn, QED_MSG_SP,
+		   "vport_to_add_to = %d, vport_to_remove_from = %d, mac = %2x:%2x:%2x:%2x:%2x:%2x, vlan = %d\n",
+		   p_filter_cmd->vport_to_add_to,
+		   p_filter_cmd->vport_to_remove_from,
+		   p_filter_cmd->mac[0],
+		   p_filter_cmd->mac[1],
+		   p_filter_cmd->mac[2],
+		   p_filter_cmd->mac[3],
+		   p_filter_cmd->mac[4],
+		   p_filter_cmd->mac[5],
+		   p_filter_cmd->vlan);
+
+	return 0;
+}
+
+/*******************************************************************************
+ * Description:
+ *         Calculates crc 32 on a buffer
+ *         Note: crc32_length MUST be aligned to 8
+ * Return:
+ ******************************************************************************/
+static u32 qed_calc_crc32c(u8 *crc32_packet,
+			   u32 crc32_length,
+			   u32 crc32_seed,
+			   u8 complement)
+{
+	u32 byte = 0;
+	u32 bit = 0;
+	u8 msb = 0;
+	u8 current_byte = 0;
+	u32 crc32_result = crc32_seed;
+
+	if ((!crc32_packet) ||
+	    (crc32_length == 0) ||
+	    ((crc32_length % 8) != 0))
+		return crc32_result;
+	for (byte = 0; byte < crc32_length; byte++) {
+		current_byte = crc32_packet[byte];
+		for (bit = 0; bit < 8; bit++) {
+			msb = (u8)(crc32_result >> 31);
+			crc32_result = crc32_result << 1;
+			if (msb != (0x1 & (current_byte >> bit))) {
+				crc32_result = crc32_result ^ CRC32_POLY;
+				crc32_result |= 1; /*crc32_result[0] = 1;*/
+			}
+		}
+	}
+	return crc32_result;
+}
+
+static inline u32 qed_crc32c_le(u32 seed,
+				u8 *mac,
+				u32 len)
+{
+	u32 packet_buf[2] = { 0 };
+
+	memcpy((u8 *)(&packet_buf[0]), &mac[0], 6);
+	return qed_calc_crc32c((u8 *)packet_buf, 8, seed, 0);
+}
+
+static u8 qed_mcast_bin_from_mac(u8 *mac)
+{
+	u32 crc = qed_crc32c_le(ETH_MULTICAST_BIN_FROM_MAC_SEED,
+				mac, ETH_ALEN);
+
+	return crc & 0xff;
+}
+
+static int
+qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn,
+			u16 opaque_fid,
+			struct qed_filter_mcast *p_filter_cmd,
+			enum spq_mode comp_mode,
+			struct qed_spq_comp_cb *p_comp_data)
+{
+	unsigned long bins[ETH_MULTICAST_MAC_BINS_IN_REGS];
+	struct vport_update_ramrod_data *p_ramrod = NULL;
+	struct qed_sp_init_request_params sp_params;
+	struct qed_spq_entry *p_ent = NULL;
+	u8 abs_vport_id = 0;
+	int rc, i;
+
+	if (p_filter_cmd->opcode == QED_FILTER_ADD) {
+		rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to,
+				  &abs_vport_id);
+		if (rc)
+			return rc;
+	} else {
+		rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from,
+				  &abs_vport_id);
+		if (rc)
+			return rc;
+	}
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.ramrod_data_size = sizeof(*p_ramrod);
+	sp_params.comp_mode = comp_mode;
+	sp_params.p_comp_data = p_comp_data;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 qed_spq_get_cid(p_hwfn),
+				 p_hwfn->hw_info.opaque_fid,
+				 ETH_RAMROD_VPORT_UPDATE,
+				 PROTOCOLID_ETH,
+				 &sp_params);
+
+	if (rc) {
+		DP_ERR(p_hwfn, "Multi-cast command failed %d\n", rc);
+		return rc;
+	}
+
+	p_ramrod = &p_ent->ramrod.vport_update;
+	p_ramrod->common.update_approx_mcast_flg = 1;
+
+	/* explicitly clear out the entire vector */
+	memset(&p_ramrod->approx_mcast.bins, 0,
+	       sizeof(p_ramrod->approx_mcast.bins));
+	memset(bins, 0, sizeof(unsigned long) *
+	       ETH_MULTICAST_MAC_BINS_IN_REGS);
+	/* filter ADD op is explicit set op and it removes
+	 *  any existing filters for the vport
+	 */
+	if (p_filter_cmd->opcode == QED_FILTER_ADD) {
+		for (i = 0; i < p_filter_cmd->num_mc_addrs; i++) {
+			u32 bit;
+
+			bit = qed_mcast_bin_from_mac(p_filter_cmd->mac[i]);
+			__set_bit(bit, bins);
+		}
+
+		/* Convert to correct endianity */
+		for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+			u32 *p_bins = (u32 *)bins;
+			struct vport_update_ramrod_mcast *approx_mcast;
+
+			approx_mcast = &p_ramrod->approx_mcast;
+			approx_mcast->bins[i] = cpu_to_le32(p_bins[i]);
+		}
+	}
+
+	p_ramrod->common.vport_id = abs_vport_id;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int
+qed_filter_mcast_cmd(struct qed_dev *cdev,
+		     struct qed_filter_mcast *p_filter_cmd,
+		     enum spq_mode comp_mode,
+		     struct qed_spq_comp_cb *p_comp_data)
+{
+	int rc = 0;
+	int i;
+
+	/* only ADD and REMOVE operations are supported for multi-cast */
+	if ((p_filter_cmd->opcode != QED_FILTER_ADD &&
+	     (p_filter_cmd->opcode != QED_FILTER_REMOVE)) ||
+	    (p_filter_cmd->num_mc_addrs > QED_MAX_MC_ADDRS))
+		return -EINVAL;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		u16 opaque_fid;
+
+		if (rc != 0)
+			break;
+
+		opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+		rc = qed_sp_eth_filter_mcast(p_hwfn,
+					     opaque_fid,
+					     p_filter_cmd,
+					     comp_mode,
+					     p_comp_data);
+	}
+	return rc;
+}
+
+static int qed_filter_ucast_cmd(struct qed_dev *cdev,
+				struct qed_filter_ucast *p_filter_cmd,
+				enum spq_mode comp_mode,
+				struct qed_spq_comp_cb *p_comp_data)
+{
+	int rc = 0;
+	int i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		u16 opaque_fid;
+
+		if (rc != 0)
+			break;
+
+		opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+		rc = qed_sp_eth_filter_ucast(p_hwfn,
+					     opaque_fid,
+					     p_filter_cmd,
+					     comp_mode,
+					     p_comp_data);
+	}
+
+	return rc;
+}
+
 static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 				 struct qed_dev_eth_info *info)
 {
@@ -63,9 +1259,418 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 	return 0;
 }
 
+static int qed_start_vport(struct qed_dev *cdev,
+			   u8 vport_id,
+			   u16 mtu,
+			   u8 drop_ttl0_flg,
+			   u8 inner_vlan_removal_en_flg)
+{
+	int rc, i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		rc = qed_sp_vport_start(p_hwfn,
+					p_hwfn->hw_info.concrete_fid,
+					p_hwfn->hw_info.opaque_fid,
+					vport_id,
+					mtu,
+					drop_ttl0_flg,
+					inner_vlan_removal_en_flg);
+
+		if (rc) {
+			DP_ERR(cdev, "Failed to start VPORT\n");
+			return rc;
+		}
+
+		qed_hw_start_fastpath(p_hwfn);
+
+		DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
+			   "Started V-PORT %d with MTU %d\n",
+			   vport_id, mtu);
+	}
+
+	return 0;
+}
+
+static int qed_stop_vport(struct qed_dev *cdev,
+			  u8 vport_id)
+{
+	int rc, i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		rc = qed_sp_vport_stop(p_hwfn,
+				       p_hwfn->hw_info.opaque_fid,
+				       vport_id);
+
+		if (rc) {
+			DP_ERR(cdev, "Failed to stop VPORT\n");
+			return rc;
+		}
+	}
+	return 0;
+}
+
+static int qed_update_vport(struct qed_dev *cdev,
+			    struct qed_update_vport_params *params)
+{
+	struct qed_sp_vport_update_params sp_params;
+	struct qed_rss_params sp_rss_params;
+	int rc, i;
+
+	if (!cdev)
+		return -ENODEV;
+
+	memset(&sp_params, 0, sizeof(sp_params));
+	memset(&sp_rss_params, 0, sizeof(sp_rss_params));
+
+	/* Translate protocol params into sp params */
+	sp_params.vport_id = params->vport_id;
+	sp_params.update_vport_active_rx_flg =
+		params->update_vport_active_flg;
+	sp_params.update_vport_active_tx_flg =
+		params->update_vport_active_flg;
+	sp_params.vport_active_rx_flg = params->vport_active_flg;
+	sp_params.vport_active_tx_flg = params->vport_active_flg;
+
+	/* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns.
+	 * We need to re-fix the rss values per engine for CMT.
+	 */
+	if (cdev->num_hwfns > 1 && params->update_rss_flg) {
+		struct qed_update_vport_rss_params *rss =
+			&params->rss_params;
+		int k, max = 0;
+
+		/* Find largest entry, since it's possible RSS needs to
+		 * be disabled [in case only 1 queue per-hwfn]
+		 */
+		for (k = 0; k < QED_RSS_IND_TABLE_SIZE; k++)
+			max = (max > rss->rss_ind_table[k]) ?
+				max : rss->rss_ind_table[k];
+
+		/* Either fix RSS values or disable RSS */
+		if (cdev->num_hwfns < max + 1) {
+			int divisor = (max + cdev->num_hwfns - 1) /
+				cdev->num_hwfns;
+
+			DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
+				   "CMT - fixing RSS values (modulo %02x)\n",
+				   divisor);
+
+			for (k = 0; k < QED_RSS_IND_TABLE_SIZE; k++)
+				rss->rss_ind_table[k] =
+					rss->rss_ind_table[k] % divisor;
+		} else {
+			DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
+				   "CMT - 1 queue per-hwfn; Disabling RSS\n");
+			params->update_rss_flg = 0;
+		}
+	}
+
+	/* Now, update the RSS configuration for actual configuration */
+	if (params->update_rss_flg) {
+		sp_rss_params.update_rss_config = 1;
+		sp_rss_params.rss_enable = 1;
+		sp_rss_params.update_rss_capabilities = 1;
+		sp_rss_params.update_rss_ind_table = 1;
+		sp_rss_params.update_rss_key = 1;
+		sp_rss_params.rss_caps = QED_RSS_IPV4 |
+					 QED_RSS_IPV6 |
+					 QED_RSS_IPV4_TCP | QED_RSS_IPV6_TCP;
+		sp_rss_params.rss_table_size_log = 7; /* 2^7 = 128 */
+		memcpy(sp_rss_params.rss_ind_table,
+		       params->rss_params.rss_ind_table,
+		       QED_RSS_IND_TABLE_SIZE * sizeof(u16));
+		memcpy(sp_rss_params.rss_key, params->rss_params.rss_key,
+		       QED_RSS_KEY_SIZE * sizeof(u32));
+	}
+	sp_params.rss_params = &sp_rss_params;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+		sp_params.opaque_fid = p_hwfn->hw_info.opaque_fid;
+		rc = qed_sp_vport_update(p_hwfn, &sp_params,
+					 QED_SPQ_MODE_EBLOCK,
+					 NULL);
+		if (rc) {
+			DP_ERR(cdev, "Failed to update VPORT\n");
+			return rc;
+		}
+
+		DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
+			   "Updated V-PORT %d: active_flag %d [update %d]\n",
+			   params->vport_id, params->vport_active_flg,
+			   params->update_vport_active_flg);
+	}
+
+	return 0;
+}
+
+static int qed_start_rxq(struct qed_dev *cdev,
+			 struct qed_queue_start_common_params *params,
+			 u16 bd_max_bytes,
+			 dma_addr_t bd_chain_phys_addr,
+			 dma_addr_t cqe_pbl_addr,
+			 u16 cqe_pbl_size,
+			 void __iomem **pp_prod)
+{
+	int rc, hwfn_index;
+	struct qed_hwfn *p_hwfn;
+
+	hwfn_index = params->rss_id % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
+
+	/* Fix queue ID in 100g mode */
+	params->queue_id /= cdev->num_hwfns;
+
+	rc = qed_sp_eth_rx_queue_start(p_hwfn,
+				       p_hwfn->hw_info.opaque_fid,
+				       params,
+				       bd_max_bytes,
+				       bd_chain_phys_addr,
+				       cqe_pbl_addr,
+				       cqe_pbl_size,
+				       pp_prod);
+
+	if (rc) {
+		DP_ERR(cdev, "Failed to start RXQ#%d\n", params->queue_id);
+		return rc;
+	}
+
+	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
+		   "Started RX-Q %d [rss %d] on V-PORT %d and SB %d\n",
+		   params->queue_id, params->rss_id, params->vport_id,
+		   params->sb);
+
+	return 0;
+}
+
+static int qed_stop_rxq(struct qed_dev *cdev,
+			struct qed_stop_rxq_params *params)
+{
+	int rc, hwfn_index;
+	struct qed_hwfn *p_hwfn;
+
+	hwfn_index	= params->rss_id % cdev->num_hwfns;
+	p_hwfn		= &cdev->hwfns[hwfn_index];
+
+	rc = qed_sp_eth_rx_queue_stop(p_hwfn,
+				      params->rx_queue_id / cdev->num_hwfns,
+				      params->eq_completion_only,
+				      false);
+	if (rc) {
+		DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int qed_start_txq(struct qed_dev *cdev,
+			 struct qed_queue_start_common_params *p_params,
+			 dma_addr_t pbl_addr,
+			 u16 pbl_size,
+			 void __iomem **pp_doorbell)
+{
+	struct qed_hwfn *p_hwfn;
+	int rc, hwfn_index;
+
+	hwfn_index	= p_params->rss_id % cdev->num_hwfns;
+	p_hwfn		= &cdev->hwfns[hwfn_index];
+
+	/* Fix queue ID in 100g mode */
+	p_params->queue_id /= cdev->num_hwfns;
+
+	rc = qed_sp_eth_tx_queue_start(p_hwfn,
+				       p_hwfn->hw_info.opaque_fid,
+				       p_params,
+				       pbl_addr,
+				       pbl_size,
+				       pp_doorbell);
+
+	if (rc) {
+		DP_ERR(cdev, "Failed to start TXQ#%d\n", p_params->queue_id);
+		return rc;
+	}
+
+	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
+		   "Started TX-Q %d [rss %d] on V-PORT %d and SB %d\n",
+		   p_params->queue_id, p_params->rss_id, p_params->vport_id,
+		   p_params->sb);
+
+	return 0;
+}
+
+#define QED_HW_STOP_RETRY_LIMIT (10)
+static int qed_fastpath_stop(struct qed_dev *cdev)
+{
+	qed_hw_stop_fastpath(cdev);
+
+	return 0;
+}
+
+static int qed_stop_txq(struct qed_dev *cdev,
+			struct qed_stop_txq_params *params)
+{
+	struct qed_hwfn *p_hwfn;
+	int rc, hwfn_index;
+
+	hwfn_index	= params->rss_id % cdev->num_hwfns;
+	p_hwfn		= &cdev->hwfns[hwfn_index];
+
+	rc = qed_sp_eth_tx_queue_stop(p_hwfn,
+				      params->tx_queue_id / cdev->num_hwfns);
+	if (rc) {
+		DP_ERR(cdev, "Failed to stop TXQ#%d\n", params->tx_queue_id);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int qed_configure_filter_rx_mode(struct qed_dev *cdev,
+					enum qed_filter_rx_mode_type type)
+{
+	struct qed_filter_accept_flags accept_flags;
+
+	memset(&accept_flags, 0, sizeof(accept_flags));
+
+	accept_flags.update_rx_mode_config	= 1;
+	accept_flags.update_tx_mode_config	= 1;
+	accept_flags.rx_accept_filter		= QED_ACCEPT_UCAST_MATCHED |
+						  QED_ACCEPT_MCAST_MATCHED |
+						  QED_ACCEPT_BCAST;
+	accept_flags.tx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
+					QED_ACCEPT_MCAST_MATCHED |
+					QED_ACCEPT_BCAST;
+
+	if (type == QED_FILTER_RX_MODE_TYPE_PROMISC)
+		accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED |
+						 QED_ACCEPT_MCAST_UNMATCHED;
+	else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC)
+		accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
+
+	return qed_filter_accept_cmd(cdev, 0, accept_flags,
+				     QED_SPQ_MODE_CB, NULL);
+}
+
+static int qed_configure_filter_ucast(struct qed_dev *cdev,
+				      struct qed_filter_ucast_params *params)
+{
+	struct qed_filter_ucast ucast;
+
+	if (!params->vlan_valid && !params->mac_valid) {
+		DP_NOTICE(
+			cdev,
+			"Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
+		return -EINVAL;
+	}
+
+	memset(&ucast, 0, sizeof(ucast));
+	switch (params->type) {
+	case QED_FILTER_XCAST_TYPE_ADD:
+		ucast.opcode = QED_FILTER_ADD;
+		break;
+	case QED_FILTER_XCAST_TYPE_DEL:
+		ucast.opcode = QED_FILTER_REMOVE;
+		break;
+	case QED_FILTER_XCAST_TYPE_REPLACE:
+		ucast.opcode = QED_FILTER_REPLACE;
+		break;
+	default:
+		DP_NOTICE(cdev, "Unknown unicast filter type %d\n",
+			  params->type);
+	}
+
+	if (params->vlan_valid && params->mac_valid) {
+		ucast.type = QED_FILTER_MAC_VLAN;
+		ether_addr_copy(ucast.mac, params->mac);
+		ucast.vlan = params->vlan;
+	} else if (params->mac_valid) {
+		ucast.type = QED_FILTER_MAC;
+		ether_addr_copy(ucast.mac, params->mac);
+	} else {
+		ucast.type = QED_FILTER_VLAN;
+		ucast.vlan = params->vlan;
+	}
+
+	ucast.is_rx_filter = true;
+	ucast.is_tx_filter = true;
+
+	return qed_filter_ucast_cmd(cdev, &ucast, QED_SPQ_MODE_CB, NULL);
+}
+
+static int qed_configure_filter_mcast(struct qed_dev *cdev,
+				      struct qed_filter_mcast_params *params)
+{
+	struct qed_filter_mcast mcast;
+	int i;
+
+	memset(&mcast, 0, sizeof(mcast));
+	switch (params->type) {
+	case QED_FILTER_XCAST_TYPE_ADD:
+		mcast.opcode = QED_FILTER_ADD;
+		break;
+	case QED_FILTER_XCAST_TYPE_DEL:
+		mcast.opcode = QED_FILTER_REMOVE;
+		break;
+	default:
+		DP_NOTICE(cdev, "Unknown multicast filter type %d\n",
+			  params->type);
+	}
+
+	mcast.num_mc_addrs = params->num;
+	for (i = 0; i < mcast.num_mc_addrs; i++)
+		ether_addr_copy(mcast.mac[i], params->mac[i]);
+
+	return qed_filter_mcast_cmd(cdev, &mcast,
+				    QED_SPQ_MODE_CB, NULL);
+}
+
+static int qed_configure_filter(struct qed_dev *cdev,
+				struct qed_filter_params *params)
+{
+	enum qed_filter_rx_mode_type accept_flags;
+
+	switch (params->type) {
+	case QED_FILTER_TYPE_UCAST:
+		return qed_configure_filter_ucast(cdev, &params->filter.ucast);
+	case QED_FILTER_TYPE_MCAST:
+		return qed_configure_filter_mcast(cdev, &params->filter.mcast);
+	case QED_FILTER_TYPE_RX_MODE:
+		accept_flags = params->filter.accept_flags;
+		return qed_configure_filter_rx_mode(cdev, accept_flags);
+	default:
+		DP_NOTICE(cdev, "Unknown filter type %d\n",
+			  (int)params->type);
+		return -EINVAL;
+	}
+}
+
+static int qed_fp_cqe_completion(struct qed_dev *dev,
+				 u8 rss_id,
+				 struct eth_slow_path_rx_cqe *cqe)
+{
+	return qed_eth_cqe_completion(&dev->hwfns[rss_id % dev->num_hwfns],
+				      cqe);
+}
+
 static const struct qed_eth_ops qed_eth_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.fill_dev_info = &qed_fill_eth_dev_info,
+	.vport_start = &qed_start_vport,
+	.vport_stop = &qed_stop_vport,
+	.vport_update = &qed_update_vport,
+	.q_rx_start = &qed_start_rxq,
+	.q_rx_stop = &qed_stop_rxq,
+	.q_tx_start = &qed_start_txq,
+	.q_tx_stop = &qed_stop_txq,
+	.filter_config = &qed_configure_filter,
+	.fastpath_stop = &qed_fastpath_stop,
+	.eth_cqe_completion = &qed_fp_cqe_completion,
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(u32 version)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index d0b1ff0ca3c8..1659418eec88 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -182,6 +182,8 @@ err0:
 int qed_fill_dev_info(struct qed_dev *cdev,
 		      struct qed_dev_info *dev_info)
 {
+	struct qed_ptt  *ptt;
+
 	memset(dev_info, 0, sizeof(struct qed_dev_info));
 
 	dev_info->num_hwfns = cdev->num_hwfns;
@@ -199,6 +201,14 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 
 	qed_mcp_get_mfw_ver(cdev, &dev_info->mfw_rev);
 
+	ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	if (ptt) {
+		qed_mcp_get_flash_size(QED_LEADING_HWFN(cdev), ptt,
+				       &dev_info->flash_size);
+
+		qed_ptt_release(QED_LEADING_HWFN(cdev), ptt);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 601d3f5daf13..8a5c3849bfe0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -516,6 +516,22 @@ int qed_mcp_drain(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt,
+			   u32 *p_flash_size)
+{
+	u32 flash_size;
+
+	flash_size = qed_rd(p_hwfn, p_ptt, MCP_REG_NVM_CFG4);
+	flash_size = (flash_size & MCP_REG_NVM_CFG4_FLASH_SIZE) >>
+		      MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT;
+	flash_size = (1 << (flash_size + MCP_BYTES_PER_MBIT_SHIFT));
+
+	*p_flash_size = flash_size;
+
+	return 0;
+}
+
 int
 qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 230c2550dc89..106d78a19937 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -89,6 +89,19 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 int qed_mcp_drain(struct qed_hwfn *p_hwfn,
 		  struct qed_ptt *p_ptt);
 
+/**
+ * @brief Get the flash size value
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_flash_size  - flash size in bytes to be filled.
+ *
+ * @return int - 0 - operation was successul.
+ */
+int qed_mcp_get_flash_size(struct qed_hwfn     *p_hwfn,
+			   struct qed_ptt       *p_ptt,
+			   u32 *p_flash_size);
+
 /**
  * @brief Send driver version to MFW
  *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 74657d227583..31a1f1eb4f56 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -32,8 +32,35 @@ struct qed_spq_comp_cb {
 	void	*cookie;
 };
 
+/**
+ * @brief qed_eth_cqe_completion - handles the completion of a
+ *        ramrod on the cqe ring
+ *
+ * @param p_hwfn
+ * @param cqe
+ *
+ * @return int
+ */
+int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn,
+			   struct eth_slow_path_rx_cqe *cqe);
+
+/**
+ *  @file
+ *
+ *  QED Slow-hwfn queue interface
+ */
+
 union ramrod_data {
 	struct pf_start_ramrod_data pf_start;
+	struct rx_queue_start_ramrod_data rx_queue_start;
+	struct rx_queue_update_ramrod_data rx_queue_update;
+	struct rx_queue_stop_ramrod_data rx_queue_stop;
+	struct tx_queue_start_ramrod_data tx_queue_start;
+	struct tx_queue_stop_ramrod_data tx_queue_stop;
+	struct vport_start_ramrod_data vport_start;
+	struct vport_stop_ramrod_data vport_stop;
+	struct vport_update_ramrod_data vport_update;
+	struct vport_filter_update_ramrod_data vport_filter_update;
 };
 
 #define EQ_MAX_CREDIT   0xffffffff
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index f28ecb197309..7c0b8459666e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -373,6 +373,35 @@ void qed_eq_free(struct qed_hwfn *p_hwfn,
 	kfree(p_eq);
 }
 
+/***************************************************************************
+* CQE API - manipulate EQ functionality
+***************************************************************************/
+static int qed_cqe_completion(
+	struct qed_hwfn *p_hwfn,
+	struct eth_slow_path_rx_cqe *cqe,
+	enum protocol_type protocol)
+{
+	/* @@@tmp - it's possible we'll eventually want to handle some
+	 * actual commands that can arrive here, but for now this is only
+	 * used to complete the ramrod using the echo value on the cqe
+	 */
+	return qed_spq_completion(p_hwfn, cqe->echo, 0, NULL);
+}
+
+int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn,
+			   struct eth_slow_path_rx_cqe *cqe)
+{
+	int rc;
+
+	rc = qed_cqe_completion(p_hwfn, cqe, PROTOCOLID_ETH);
+	if (rc)
+		DP_NOTICE(p_hwfn,
+			  "Failed to handle RXQ CQE [cmd 0x%02x]\n",
+			  cqe->ramrod_cmd_id);
+
+	return rc;
+}
+
 /***************************************************************************
 * Slow hwfn Queue (spq)
 ***************************************************************************/
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index fbd8700f0b31..67a7b41b70aa 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -24,12 +24,132 @@ struct qed_dev_eth_info {
 	u8	num_vlan_filters;
 };
 
+struct qed_update_vport_rss_params {
+	u16	rss_ind_table[128];
+	u32	rss_key[10];
+};
+
+struct qed_update_vport_params {
+	u8 vport_id;
+	u8 update_vport_active_flg;
+	u8 vport_active_flg;
+	u8 update_rss_flg;
+	struct qed_update_vport_rss_params rss_params;
+};
+
+struct qed_stop_rxq_params {
+	u8 rss_id;
+	u8 rx_queue_id;
+	u8 vport_id;
+	bool eq_completion_only;
+};
+
+struct qed_stop_txq_params {
+	u8 rss_id;
+	u8 tx_queue_id;
+};
+
+enum qed_filter_rx_mode_type {
+	QED_FILTER_RX_MODE_TYPE_REGULAR,
+	QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC,
+	QED_FILTER_RX_MODE_TYPE_PROMISC,
+};
+
+enum qed_filter_xcast_params_type {
+	QED_FILTER_XCAST_TYPE_ADD,
+	QED_FILTER_XCAST_TYPE_DEL,
+	QED_FILTER_XCAST_TYPE_REPLACE,
+};
+
+struct qed_filter_ucast_params {
+	enum qed_filter_xcast_params_type type;
+	u8 vlan_valid;
+	u16 vlan;
+	u8 mac_valid;
+	unsigned char mac[ETH_ALEN];
+};
+
+struct qed_filter_mcast_params {
+	enum qed_filter_xcast_params_type type;
+	u8 num;
+	unsigned char mac[64][ETH_ALEN];
+};
+
+union qed_filter_type_params {
+	enum qed_filter_rx_mode_type accept_flags;
+	struct qed_filter_ucast_params ucast;
+	struct qed_filter_mcast_params mcast;
+};
+
+enum qed_filter_type {
+	QED_FILTER_TYPE_UCAST,
+	QED_FILTER_TYPE_MCAST,
+	QED_FILTER_TYPE_RX_MODE,
+	QED_MAX_FILTER_TYPES,
+};
+
+struct qed_filter_params {
+	enum qed_filter_type type;
+	union qed_filter_type_params filter;
+};
+
+struct qed_queue_start_common_params {
+	u8 rss_id;
+	u8 queue_id;
+	u8 vport_id;
+	u16 sb;
+	u16 sb_idx;
+};
+
+struct qed_eth_cb_ops {
+	struct qed_common_cb_ops common;
+};
+
 struct qed_eth_ops {
 	const struct qed_common_ops *common;
 
 	int (*fill_dev_info)(struct qed_dev *cdev,
 			     struct qed_dev_eth_info *info);
 
+	int (*vport_start)(struct qed_dev *cdev,
+			   u8 vport_id, u16 mtu,
+			   u8 drop_ttl0_flg,
+			   u8 inner_vlan_removal_en_flg);
+
+	int (*vport_stop)(struct qed_dev *cdev,
+			  u8 vport_id);
+
+	int (*vport_update)(struct qed_dev *cdev,
+			    struct qed_update_vport_params *params);
+
+	int (*q_rx_start)(struct qed_dev *cdev,
+			  struct qed_queue_start_common_params *params,
+			  u16 bd_max_bytes,
+			  dma_addr_t bd_chain_phys_addr,
+			  dma_addr_t cqe_pbl_addr,
+			  u16 cqe_pbl_size,
+			  void __iomem **pp_prod);
+
+	int (*q_rx_stop)(struct qed_dev *cdev,
+			 struct qed_stop_rxq_params *params);
+
+	int (*q_tx_start)(struct qed_dev *cdev,
+			  struct qed_queue_start_common_params *params,
+			  dma_addr_t pbl_addr,
+			  u16 pbl_size,
+			  void __iomem **pp_doorbell);
+
+	int (*q_tx_stop)(struct qed_dev *cdev,
+			 struct qed_stop_txq_params *params);
+
+	int (*filter_config)(struct qed_dev *cdev,
+			     struct qed_filter_params *params);
+
+	int (*fastpath_stop)(struct qed_dev *cdev);
+
+	int (*eth_cqe_completion)(struct qed_dev *cdev,
+				  u8 rss_id,
+				  struct eth_slow_path_rx_cqe *cqe);
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(u32 version);
-- 
cgit v1.2.3


From cc875c2e4f34e86c2f562f18b6e917cfcc560bcb Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Mon, 26 Oct 2015 11:02:31 +0200
Subject: qed: Add link support

Physical link is handled by the management Firmware.
This patch lays the infrastructure for attention handling in the driver,
as link change notifications arrive via async. attentions,
as well the handling of such notifications.

This patch also extends the API with the protocol drivers by adding
registered callbacks which the protocol driver passes to qed in order
to be notified of async. events originating from the FW/HW.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h      |  20 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c  | 106 ++++++++-
 drivers/net/ethernet/qlogic/qed/qed_int.c  | 336 ++++++++++++++++++++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_l2.c   |   9 +
 drivers/net/ethernet/qlogic/qed/qed_main.c | 211 ++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 295 +++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 126 ++++++++++-
 include/linux/qed/qed_eth_if.h             |   4 +
 8 files changed, 1102 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index e03371d3e622..ca6cc8a7fc64 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -108,6 +108,18 @@ enum QED_FEATURE {
 	QED_MAX_FEATURES,
 };
 
+enum QED_PORT_MODE {
+	QED_PORT_MODE_DE_2X40G,
+	QED_PORT_MODE_DE_2X50G,
+	QED_PORT_MODE_DE_1X100G,
+	QED_PORT_MODE_DE_4X10G_F,
+	QED_PORT_MODE_DE_4X10G_E,
+	QED_PORT_MODE_DE_4X20G,
+	QED_PORT_MODE_DE_1X40G,
+	QED_PORT_MODE_DE_2X25G,
+	QED_PORT_MODE_DE_1X25G
+};
+
 struct qed_hw_info {
 	/* PCI personality */
 	enum qed_pci_personality	personality;
@@ -404,6 +416,13 @@ struct qed_dev {
 	u8				protocol;
 #define IS_QED_ETH_IF(cdev)     ((cdev)->protocol == QED_PROTOCOL_ETH)
 
+	/* Callbacks to protocol driver */
+	union {
+		struct qed_common_cb_ops	*common;
+		struct qed_eth_cb_ops		*eth;
+	} protocol_ops;
+	void				*ops_cookie;
+
 	const struct firmware		*firmware;
 };
 
@@ -453,6 +472,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 /* Prototypes */
 int qed_fill_dev_info(struct qed_dev *cdev,
 		      struct qed_dev_info *dev_info);
+void qed_link_update(struct qed_hwfn *hwfn);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3d1bdbf9ade1..7fd3d78d94f1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1039,8 +1039,9 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 			       struct qed_ptt *p_ptt)
 {
-	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, nvm_cfg_addr;
-	u32 val;
+	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
+	u32 port_cfg_addr, link_temp, val, nvm_cfg_addr;
+	struct qed_mcp_link_params *link;
 
 	/* Read global nvm_cfg address */
 	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
@@ -1060,6 +1061,48 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 	       offsetof(struct nvm_cfg1_glob, pci_id);
 	p_hwfn->hw_info.vendor_id = qed_rd(p_hwfn, p_ptt, addr) &
 				    NVM_CFG1_GLOB_VENDOR_ID_MASK;
+
+	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+	       offsetof(struct nvm_cfg1, glob) +
+	       offsetof(struct nvm_cfg1_glob, core_cfg);
+
+	core_cfg = qed_rd(p_hwfn, p_ptt, addr);
+
+	switch ((core_cfg & NVM_CFG1_GLOB_NETWORK_PORT_MODE_MASK) >>
+		NVM_CFG1_GLOB_NETWORK_PORT_MODE_OFFSET) {
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X40G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X40G;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X50G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X50G;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X100G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X100G;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_F:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X10G_F;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X10G_E:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X10G_E;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_4X20G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_4X20G;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X40G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X40G;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_2X25G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_2X25G;
+		break;
+	case NVM_CFG1_GLOB_NETWORK_PORT_MODE_DE_1X25G:
+		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n",
+			  core_cfg);
+		break;
+	}
+
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
 	       offsetof(struct nvm_cfg1, func[MCP_PF_ID(p_hwfn)]) +
 	       offsetof(struct nvm_cfg1_func, device_id);
@@ -1075,6 +1118,65 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 			NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET;
 	}
 
+	/* Read default link configuration */
+	link = &p_hwfn->mcp_info->link_input;
+	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+			offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
+	link_temp = qed_rd(p_hwfn, p_ptt,
+			   port_cfg_addr +
+			   offsetof(struct nvm_cfg1_port, speed_cap_mask));
+	link->speed.advertised_speeds =
+		link_temp & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+
+	p_hwfn->mcp_info->link_capabilities.speed_capabilities =
+						link->speed.advertised_speeds;
+
+	link_temp = qed_rd(p_hwfn, p_ptt,
+			   port_cfg_addr +
+			   offsetof(struct nvm_cfg1_port, link_settings));
+	switch ((link_temp & NVM_CFG1_PORT_DRV_LINK_SPEED_MASK) >>
+		NVM_CFG1_PORT_DRV_LINK_SPEED_OFFSET) {
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG:
+		link->speed.autoneg = true;
+		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_1G:
+		link->speed.forced_speed = 1000;
+		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_10G:
+		link->speed.forced_speed = 10000;
+		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_25G:
+		link->speed.forced_speed = 25000;
+		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_40G:
+		link->speed.forced_speed = 40000;
+		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_50G:
+		link->speed.forced_speed = 50000;
+		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_100G:
+		link->speed.forced_speed = 100000;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n",
+			  link_temp);
+	}
+
+	link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK;
+	link_temp >>= NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET;
+	link->pause.autoneg = !!(link_temp &
+				 NVM_CFG1_PORT_DRV_FLOW_CONTROL_AUTONEG);
+	link->pause.forced_rx = !!(link_temp &
+				   NVM_CFG1_PORT_DRV_FLOW_CONTROL_RX);
+	link->pause.forced_tx = !!(link_temp &
+				   NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX);
+	link->loopback_mode = 0;
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x\n",
+		   link->speed.forced_speed, link->speed.advertised_speeds,
+		   link->speed.autoneg, link->pause.autoneg);
+
 	/* Read Multi-function information from shmem */
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
 	       offsetof(struct nvm_cfg1, glob) +
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 37d926a5fae5..2e399b6137a2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -39,10 +39,214 @@ struct qed_sb_sp_info {
 	struct qed_pi_info	pi_info_arr[PIS_PER_SB];
 };
 
+#define SB_ATTN_ALIGNED_SIZE(p_hwfn) \
+	ALIGNED_TYPE_SIZE(struct atten_status_block, p_hwfn)
+
+#define ATTN_STATE_BITS (0xfff)
+#define ATTN_BITS_MASKABLE      (0x3ff)
+struct qed_sb_attn_info {
+	/* Virtual & Physical address of the SB */
+	struct atten_status_block       *sb_attn;
+	dma_addr_t		      sb_phys;
+
+	/* Last seen running index */
+	u16			     index;
+
+	/* Previously asserted attentions, which are still unasserted */
+	u16			     known_attn;
+
+	/* Cleanup address for the link's general hw attention */
+	u32			     mfw_attn_addr;
+};
+
+static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn,
+				      struct qed_sb_attn_info   *p_sb_desc)
+{
+	u16     rc = 0;
+	u16     index;
+
+	/* Make certain HW write took affect */
+	mmiowb();
+
+	index = le16_to_cpu(p_sb_desc->sb_attn->sb_index);
+	if (p_sb_desc->index != index) {
+		p_sb_desc->index	= index;
+		rc		      = QED_SB_ATT_IDX;
+	}
+
+	/* Make certain we got a consistent view with HW */
+	mmiowb();
+
+	return rc;
+}
+
+/**
+ *  @brief qed_int_assertion - handles asserted attention bits
+ *
+ *  @param p_hwfn
+ *  @param asserted_bits newly asserted bits
+ *  @return int
+ */
+static int qed_int_assertion(struct qed_hwfn *p_hwfn,
+			     u16 asserted_bits)
+{
+	struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn;
+	u32 igu_mask;
+
+	/* Mask the source of the attention in the IGU */
+	igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+			  IGU_REG_ATTENTION_ENABLE);
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "IGU mask: 0x%08x --> 0x%08x\n",
+		   igu_mask, igu_mask & ~(asserted_bits & ATTN_BITS_MASKABLE));
+	igu_mask &= ~(asserted_bits & ATTN_BITS_MASKABLE);
+	qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, igu_mask);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+		   "inner known ATTN state: 0x%04x --> 0x%04x\n",
+		   sb_attn_sw->known_attn,
+		   sb_attn_sw->known_attn | asserted_bits);
+	sb_attn_sw->known_attn |= asserted_bits;
+
+	/* Handle MCP events */
+	if (asserted_bits & 0x100) {
+		qed_mcp_handle_events(p_hwfn, p_hwfn->p_dpc_ptt);
+		/* Clean the MCP attention */
+		qed_wr(p_hwfn, p_hwfn->p_dpc_ptt,
+		       sb_attn_sw->mfw_attn_addr, 0);
+	}
+
+	DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
+		      GTT_BAR0_MAP_REG_IGU_CMD +
+		      ((IGU_CMD_ATTN_BIT_SET_UPPER -
+			IGU_CMD_INT_ACK_BASE) << 3),
+		      (u32)asserted_bits);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "set cmd IGU: 0x%04x\n",
+		   asserted_bits);
+
+	return 0;
+}
+
+/**
+ * @brief - handles deassertion of previously asserted attentions.
+ *
+ * @param p_hwfn
+ * @param deasserted_bits - newly deasserted bits
+ * @return int
+ *
+ */
+static int qed_int_deassertion(struct qed_hwfn  *p_hwfn,
+			       u16 deasserted_bits)
+{
+	struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn;
+	u32 aeu_mask;
+
+	if (deasserted_bits != 0x100)
+		DP_ERR(p_hwfn, "Unexpected - non-link deassertion\n");
+
+	/* Clear IGU indication for the deasserted bits */
+	DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
+		      GTT_BAR0_MAP_REG_IGU_CMD +
+		      ((IGU_CMD_ATTN_BIT_CLR_UPPER -
+			IGU_CMD_INT_ACK_BASE) << 3),
+		      ~((u32)deasserted_bits));
+
+	/* Unmask deasserted attentions in IGU */
+	aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+			  IGU_REG_ATTENTION_ENABLE);
+	aeu_mask |= (deasserted_bits & ATTN_BITS_MASKABLE);
+	qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, aeu_mask);
+
+	/* Clear deassertion from inner state */
+	sb_attn_sw->known_attn &= ~deasserted_bits;
+
+	return 0;
+}
+
+static int qed_int_attentions(struct qed_hwfn *p_hwfn)
+{
+	struct qed_sb_attn_info *p_sb_attn_sw = p_hwfn->p_sb_attn;
+	struct atten_status_block *p_sb_attn = p_sb_attn_sw->sb_attn;
+	u32 attn_bits = 0, attn_acks = 0;
+	u16 asserted_bits, deasserted_bits;
+	__le16 index;
+	int rc = 0;
+
+	/* Read current attention bits/acks - safeguard against attentions
+	 * by guaranting work on a synchronized timeframe
+	 */
+	do {
+		index = p_sb_attn->sb_index;
+		attn_bits = le32_to_cpu(p_sb_attn->atten_bits);
+		attn_acks = le32_to_cpu(p_sb_attn->atten_ack);
+	} while (index != p_sb_attn->sb_index);
+	p_sb_attn->sb_index = index;
+
+	/* Attention / Deassertion are meaningful (and in correct state)
+	 * only when they differ and consistent with known state - deassertion
+	 * when previous attention & current ack, and assertion when current
+	 * attention with no previous attention
+	 */
+	asserted_bits = (attn_bits & ~attn_acks & ATTN_STATE_BITS) &
+		~p_sb_attn_sw->known_attn;
+	deasserted_bits = (~attn_bits & attn_acks & ATTN_STATE_BITS) &
+		p_sb_attn_sw->known_attn;
+
+	if ((asserted_bits & ~0x100) || (deasserted_bits & ~0x100)) {
+		DP_INFO(p_hwfn,
+			"Attention: Index: 0x%04x, Bits: 0x%08x, Acks: 0x%08x, asserted: 0x%04x, De-asserted 0x%04x [Prev. known: 0x%04x]\n",
+			index, attn_bits, attn_acks, asserted_bits,
+			deasserted_bits, p_sb_attn_sw->known_attn);
+	} else if (asserted_bits == 0x100) {
+		DP_INFO(p_hwfn,
+			"MFW indication via attention\n");
+	} else {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "MFW indication [deassertion]\n");
+	}
+
+	if (asserted_bits) {
+		rc = qed_int_assertion(p_hwfn, asserted_bits);
+		if (rc)
+			return rc;
+	}
+
+	if (deasserted_bits) {
+		rc = qed_int_deassertion(p_hwfn, deasserted_bits);
+		if (rc)
+			return rc;
+	}
+
+	return rc;
+}
+
+static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
+			    void __iomem *igu_addr,
+			    u32 ack_cons)
+{
+	struct igu_prod_cons_update igu_ack = { 0 };
+
+	igu_ack.sb_id_and_flags =
+		((ack_cons << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) |
+		 (1 << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) |
+		 (IGU_INT_NOP << IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT) |
+		 (IGU_SEG_ACCESS_ATTN <<
+		  IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT));
+
+	DIRECT_REG_WR(igu_addr, igu_ack.sb_id_and_flags);
+
+	/* Both segments (interrupts & acks) are written to same place address;
+	 * Need to guarantee all commands will be received (in-order) by HW.
+	 */
+	mmiowb();
+	barrier();
+}
+
 void qed_int_sp_dpc(unsigned long hwfn_cookie)
 {
 	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)hwfn_cookie;
 	struct qed_pi_info *pi_info = NULL;
+	struct qed_sb_attn_info *sb_attn;
 	struct qed_sb_info *sb_info;
 	int arr_size;
 	u16 rc = 0;
@@ -65,6 +269,12 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
 		return;
 	}
 
+	if (!p_hwfn->p_sb_attn) {
+		DP_ERR(p_hwfn->cdev, "DPC called - no p_sb_attn");
+		return;
+	}
+	sb_attn = p_hwfn->p_sb_attn;
+
 	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "DPC Called! (hwfn %p %d)\n",
 		   p_hwfn, p_hwfn->my_id);
 
@@ -87,6 +297,19 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
 			   tmp_index, sb_info->sb_ack);
 	}
 
+	if (!sb_attn || !sb_attn->sb_attn) {
+		DP_ERR(
+			p_hwfn->cdev,
+			"Attentions Status block is NULL - cannot check for new attentions!\n");
+	} else {
+		u16 tmp_index = sb_attn->index;
+
+		rc |= qed_attn_update_idx(p_hwfn, sb_attn);
+		DP_VERBOSE(p_hwfn->cdev, NETIF_MSG_INTR,
+			   "Attention indices: 0x%08x --> 0x%08x\n",
+			   tmp_index, sb_attn->index);
+	}
+
 	/* Check if we expect interrupts at this time. if not just ack them */
 	if (!(rc & QED_SB_EVENT_MASK)) {
 		qed_sb_ack(sb_info, IGU_INT_ENABLE, 1);
@@ -100,6 +323,9 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
 		return;
 	}
 
+	if (rc & QED_SB_ATT_IDX)
+		qed_int_attentions(p_hwfn);
+
 	if (rc & QED_SB_IDX) {
 		int pi;
 
@@ -111,9 +337,97 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
 		}
 	}
 
+	if (sb_attn && (rc & QED_SB_ATT_IDX))
+		/* This should be done before the interrupts are enabled,
+		 * since otherwise a new attention will be generated.
+		 */
+		qed_sb_ack_attn(p_hwfn, sb_info->igu_addr, sb_attn->index);
+
 	qed_sb_ack(sb_info, IGU_INT_ENABLE, 1);
 }
 
+static void qed_int_sb_attn_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_dev *cdev   = p_hwfn->cdev;
+	struct qed_sb_attn_info *p_sb   = p_hwfn->p_sb_attn;
+
+	if (p_sb) {
+		if (p_sb->sb_attn)
+			dma_free_coherent(&cdev->pdev->dev,
+					  SB_ATTN_ALIGNED_SIZE(p_hwfn),
+					  p_sb->sb_attn,
+					  p_sb->sb_phys);
+		kfree(p_sb);
+	}
+}
+
+static void qed_int_sb_attn_setup(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt)
+{
+	struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn;
+
+	memset(sb_info->sb_attn, 0, sizeof(*sb_info->sb_attn));
+
+	sb_info->index = 0;
+	sb_info->known_attn = 0;
+
+	/* Configure Attention Status Block in IGU */
+	qed_wr(p_hwfn, p_ptt, IGU_REG_ATTN_MSG_ADDR_L,
+	       lower_32_bits(p_hwfn->p_sb_attn->sb_phys));
+	qed_wr(p_hwfn, p_ptt, IGU_REG_ATTN_MSG_ADDR_H,
+	       upper_32_bits(p_hwfn->p_sb_attn->sb_phys));
+}
+
+static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 void *sb_virt_addr,
+				 dma_addr_t sb_phy_addr)
+{
+	struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn;
+
+	sb_info->sb_attn = sb_virt_addr;
+	sb_info->sb_phys = sb_phy_addr;
+
+	/* Set the address of cleanup for the mcp attention */
+	sb_info->mfw_attn_addr = (p_hwfn->rel_pf_id << 3) +
+				 MISC_REG_AEU_GENERAL_ATTN_0;
+
+	qed_int_sb_attn_setup(p_hwfn, p_ptt);
+}
+
+static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	struct qed_sb_attn_info *p_sb;
+	void *p_virt;
+	dma_addr_t p_phys = 0;
+
+	/* SB struct */
+	p_sb = kmalloc(sizeof(*p_sb), GFP_ATOMIC);
+	if (!p_sb) {
+		DP_NOTICE(cdev, "Failed to allocate `struct qed_sb_attn_info'\n");
+		return -ENOMEM;
+	}
+
+	/* SB ring  */
+	p_virt = dma_alloc_coherent(&cdev->pdev->dev,
+				    SB_ATTN_ALIGNED_SIZE(p_hwfn),
+				    &p_phys, GFP_KERNEL);
+
+	if (!p_virt) {
+		DP_NOTICE(cdev, "Failed to allocate status block (attentions)\n");
+		kfree(p_sb);
+		return -ENOMEM;
+	}
+
+	/* Attention setup */
+	p_hwfn->p_sb_attn = p_sb;
+	qed_int_sb_attn_init(p_hwfn, p_ptt, p_virt, p_phys);
+
+	return 0;
+}
+
 /* coalescing timeout = timeset << (timer_res + 1) */
 #define QED_CAU_DEF_RX_USECS 24
 #define QED_CAU_DEF_TX_USECS 48
@@ -394,6 +708,12 @@ static void qed_int_sp_sb_setup(struct qed_hwfn *p_hwfn,
 	else
 		DP_NOTICE(p_hwfn->cdev,
 			  "Failed to setup Slow path status block - NULL pointer\n");
+
+	if (p_hwfn->p_sb_attn)
+		qed_int_sb_attn_setup(p_hwfn, p_ptt);
+	else
+		DP_NOTICE(p_hwfn->cdev,
+			  "Failed to setup attentions status block - NULL pointer\n");
 }
 
 int qed_int_register_cb(struct qed_hwfn *p_hwfn,
@@ -444,7 +764,7 @@ void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt,
 			    enum qed_int_mode int_mode)
 {
-	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN;
+	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN | IGU_PF_CONF_ATTN_BIT_EN;
 
 	p_hwfn->cdev->int_mode = int_mode;
 	switch (p_hwfn->cdev->int_mode) {
@@ -484,8 +804,15 @@ void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
 	/* Enable interrupt Generation */
 	qed_int_igu_enable_int(p_hwfn, p_ptt, int_mode);
 
+	/* Configure AEU signal change to produce attentions for link */
+	qed_wr(p_hwfn, p_ptt, IGU_REG_LEADING_EDGE_LATCH, 0xfff);
+	qed_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0xfff);
+
 	/* Flush the writes to IGU */
 	mmiowb();
+
+	/* Unmask AEU signals toward IGU */
+	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
 }
 
 void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
@@ -770,13 +1097,18 @@ int qed_int_alloc(struct qed_hwfn *p_hwfn,
 		DP_ERR(p_hwfn->cdev, "Failed to allocate sp sb mem\n");
 		return rc;
 	}
-
+	rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt);
+	if (rc) {
+		DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n");
+		return rc;
+	}
 	return rc;
 }
 
 void qed_int_free(struct qed_hwfn *p_hwfn)
 {
 	qed_int_sp_sb_free(p_hwfn);
+	qed_int_sb_attn_free(p_hwfn);
 	qed_int_sp_dpc_free(p_hwfn);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 2772573593a4..7049e4139d3c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1259,6 +1259,14 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 	return 0;
 }
 
+static void qed_register_eth_ops(struct qed_dev *cdev,
+				 struct qed_eth_cb_ops *ops,
+				 void *cookie)
+{
+	cdev->protocol_ops.eth	= ops;
+	cdev->ops_cookie	= cookie;
+}
+
 static int qed_start_vport(struct qed_dev *cdev,
 			   u8 vport_id,
 			   u16 mtu,
@@ -1661,6 +1669,7 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
 static const struct qed_eth_ops qed_eth_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.fill_dev_info = &qed_fill_eth_dev_info,
+	.register_ops = &qed_register_eth_ops,
 	.vport_start = &qed_start_vport,
 	.vport_stop = &qed_stop_vport,
 	.vport_update = &qed_update_vport,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 1659418eec88..947c7af72b25 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -904,6 +904,215 @@ static u32 qed_sb_release(struct qed_dev *cdev,
 	return rc;
 }
 
+static int qed_set_link(struct qed_dev *cdev,
+			struct qed_link_params *params)
+{
+	struct qed_hwfn *hwfn;
+	struct qed_mcp_link_params *link_params;
+	struct qed_ptt *ptt;
+	int rc;
+
+	if (!cdev)
+		return -ENODEV;
+
+	/* The link should be set only once per PF */
+	hwfn = &cdev->hwfns[0];
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EBUSY;
+
+	link_params = qed_mcp_get_link_params(hwfn);
+	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_AUTONEG)
+		link_params->speed.autoneg = params->autoneg;
+	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
+		link_params->speed.advertised_speeds = 0;
+		if ((params->adv_speeds & SUPPORTED_1000baseT_Half) ||
+		    (params->adv_speeds & SUPPORTED_1000baseT_Full))
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		if (params->adv_speeds & SUPPORTED_10000baseKR_Full)
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		if (params->adv_speeds & SUPPORTED_40000baseLR4_Full)
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		if (params->adv_speeds & 0)
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
+		if (params->adv_speeds & 0)
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G;
+	}
+	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_FORCED_SPEED)
+		link_params->speed.forced_speed = params->forced_speed;
+
+	rc = qed_mcp_set_link(hwfn, ptt, params->link_up);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+static int qed_get_port_type(u32 media_type)
+{
+	int port_type;
+
+	switch (media_type) {
+	case MEDIA_SFPP_10G_FIBER:
+	case MEDIA_SFP_1G_FIBER:
+	case MEDIA_XFP_FIBER:
+	case MEDIA_KR:
+		port_type = PORT_FIBRE;
+		break;
+	case MEDIA_DA_TWINAX:
+		port_type = PORT_DA;
+		break;
+	case MEDIA_BASE_T:
+		port_type = PORT_TP;
+		break;
+	case MEDIA_NOT_PRESENT:
+		port_type = PORT_NONE;
+		break;
+	case MEDIA_UNSPECIFIED:
+	default:
+		port_type = PORT_OTHER;
+		break;
+	}
+	return port_type;
+}
+
+static void qed_fill_link(struct qed_hwfn *hwfn,
+			  struct qed_link_output *if_link)
+{
+	struct qed_mcp_link_params params;
+	struct qed_mcp_link_state link;
+	struct qed_mcp_link_capabilities link_caps;
+	u32 media_type;
+
+	memset(if_link, 0, sizeof(*if_link));
+
+	/* Prepare source inputs */
+	memcpy(&params, qed_mcp_get_link_params(hwfn), sizeof(params));
+	memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link));
+	memcpy(&link_caps, qed_mcp_get_link_capabilities(hwfn),
+	       sizeof(link_caps));
+
+	/* Set the link parameters to pass to protocol driver */
+	if (link.link_up)
+		if_link->link_up = true;
+
+	/* TODO - at the moment assume supported and advertised speed equal */
+	if_link->supported_caps = SUPPORTED_FIBRE;
+	if (params.speed.autoneg)
+		if_link->supported_caps |= SUPPORTED_Autoneg;
+	if (params.pause.autoneg ||
+	    (params.pause.forced_rx && params.pause.forced_tx))
+		if_link->supported_caps |= SUPPORTED_Asym_Pause;
+	if (params.pause.autoneg || params.pause.forced_rx ||
+	    params.pause.forced_tx)
+		if_link->supported_caps |= SUPPORTED_Pause;
+
+	if_link->advertised_caps = if_link->supported_caps;
+	if (params.speed.advertised_speeds &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+		if_link->advertised_caps |= SUPPORTED_1000baseT_Half |
+					   SUPPORTED_1000baseT_Full;
+	if (params.speed.advertised_speeds &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+		if_link->advertised_caps |= SUPPORTED_10000baseKR_Full;
+	if (params.speed.advertised_speeds &
+		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+		if_link->advertised_caps |= SUPPORTED_40000baseLR4_Full;
+	if (params.speed.advertised_speeds &
+		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+		if_link->advertised_caps |= 0;
+	if (params.speed.advertised_speeds &
+		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G)
+		if_link->advertised_caps |= 0;
+
+	if (link_caps.speed_capabilities &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+		if_link->supported_caps |= SUPPORTED_1000baseT_Half |
+					   SUPPORTED_1000baseT_Full;
+	if (link_caps.speed_capabilities &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+		if_link->supported_caps |= SUPPORTED_10000baseKR_Full;
+	if (link_caps.speed_capabilities &
+		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+		if_link->supported_caps |= SUPPORTED_40000baseLR4_Full;
+	if (link_caps.speed_capabilities &
+		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+		if_link->supported_caps |= 0;
+	if (link_caps.speed_capabilities &
+		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_100G)
+		if_link->supported_caps |= 0;
+
+	if (link.link_up)
+		if_link->speed = link.speed;
+
+	/* TODO - fill duplex properly */
+	if_link->duplex = DUPLEX_FULL;
+	qed_mcp_get_media_type(hwfn->cdev, &media_type);
+	if_link->port = qed_get_port_type(media_type);
+
+	if_link->autoneg = params.speed.autoneg;
+
+	if (params.pause.autoneg)
+		if_link->pause_config |= QED_LINK_PAUSE_AUTONEG_ENABLE;
+	if (params.pause.forced_rx)
+		if_link->pause_config |= QED_LINK_PAUSE_RX_ENABLE;
+	if (params.pause.forced_tx)
+		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
+
+	/* Link partner capabilities */
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_1G_HD)
+		if_link->lp_caps |= SUPPORTED_1000baseT_Half;
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_1G_FD)
+		if_link->lp_caps |= SUPPORTED_1000baseT_Full;
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_10G)
+		if_link->lp_caps |= SUPPORTED_10000baseKR_Full;
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_40G)
+		if_link->lp_caps |= SUPPORTED_40000baseLR4_Full;
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_50G)
+		if_link->lp_caps |= 0;
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_100G)
+		if_link->lp_caps |= 0;
+
+	if (link.an_complete)
+		if_link->lp_caps |= SUPPORTED_Autoneg;
+
+	if (link.partner_adv_pause)
+		if_link->lp_caps |= SUPPORTED_Pause;
+	if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE ||
+	    link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE)
+		if_link->lp_caps |= SUPPORTED_Asym_Pause;
+}
+
+static void qed_get_current_link(struct qed_dev *cdev,
+				 struct qed_link_output *if_link)
+{
+	qed_fill_link(&cdev->hwfns[0], if_link);
+}
+
+void qed_link_update(struct qed_hwfn *hwfn)
+{
+	void *cookie = hwfn->cdev->ops_cookie;
+	struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common;
+	struct qed_link_output if_link;
+
+	qed_fill_link(hwfn, &if_link);
+
+	if (IS_LEAD_HWFN(hwfn) && cookie)
+		op->link_update(cookie, &if_link);
+}
+
 static int qed_drain(struct qed_dev *cdev)
 {
 	struct qed_hwfn *hwfn;
@@ -940,6 +1149,8 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.sb_release = &qed_sb_release,
 	.simd_handler_config = &qed_simd_handler_config,
 	.simd_handler_clean = &qed_simd_handler_clean,
+	.set_link = &qed_set_link,
+	.get_link = &qed_get_current_link,
 	.drain = &qed_drain,
 	.update_msglvl = &qed_init_dp,
 	.chain_alloc = &qed_chain_alloc,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 8a5c3849bfe0..20d048cdcb88 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -365,6 +365,252 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       bool b_reset)
+{
+	struct qed_mcp_link_state *p_link;
+	u32 status = 0;
+
+	p_link = &p_hwfn->mcp_info->link_output;
+	memset(p_link, 0, sizeof(*p_link));
+	if (!b_reset) {
+		status = qed_rd(p_hwfn, p_ptt,
+				p_hwfn->mcp_info->port_addr +
+				offsetof(struct public_port, link_status));
+		DP_VERBOSE(p_hwfn, (NETIF_MSG_LINK | QED_MSG_SP),
+			   "Received link update [0x%08x] from mfw [Addr 0x%x]\n",
+			   status,
+			   (u32)(p_hwfn->mcp_info->port_addr +
+				 offsetof(struct public_port,
+					  link_status)));
+	} else {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+			   "Resetting link indications\n");
+		return;
+	}
+
+	p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
+
+	p_link->full_duplex = true;
+	switch ((status & LINK_STATUS_SPEED_AND_DUPLEX_MASK)) {
+	case LINK_STATUS_SPEED_AND_DUPLEX_100G:
+		p_link->speed = 100000;
+		break;
+	case LINK_STATUS_SPEED_AND_DUPLEX_50G:
+		p_link->speed = 50000;
+		break;
+	case LINK_STATUS_SPEED_AND_DUPLEX_40G:
+		p_link->speed = 40000;
+		break;
+	case LINK_STATUS_SPEED_AND_DUPLEX_25G:
+		p_link->speed = 25000;
+		break;
+	case LINK_STATUS_SPEED_AND_DUPLEX_20G:
+		p_link->speed = 20000;
+		break;
+	case LINK_STATUS_SPEED_AND_DUPLEX_10G:
+		p_link->speed = 10000;
+		break;
+	case LINK_STATUS_SPEED_AND_DUPLEX_1000THD:
+		p_link->full_duplex = false;
+	/* Fall-through */
+	case LINK_STATUS_SPEED_AND_DUPLEX_1000TFD:
+		p_link->speed = 1000;
+		break;
+	default:
+		p_link->speed = 0;
+	}
+
+	/* Correct speed according to bandwidth allocation */
+	if (p_hwfn->mcp_info->func_info.bandwidth_max && p_link->speed) {
+		p_link->speed = p_link->speed *
+				p_hwfn->mcp_info->func_info.bandwidth_max /
+				100;
+		qed_init_pf_rl(p_hwfn, p_ptt, p_hwfn->rel_pf_id,
+			       p_link->speed);
+		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+			   "Configured MAX bandwidth to be %08x Mb/sec\n",
+			   p_link->speed);
+	}
+
+	p_link->an = !!(status & LINK_STATUS_AUTO_NEGOTIATE_ENABLED);
+	p_link->an_complete = !!(status &
+				 LINK_STATUS_AUTO_NEGOTIATE_COMPLETE);
+	p_link->parallel_detection = !!(status &
+					LINK_STATUS_PARALLEL_DETECTION_USED);
+	p_link->pfc_enabled = !!(status & LINK_STATUS_PFC_ENABLED);
+
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_1G_FD : 0;
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_1G_HD : 0;
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_10G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_10G : 0;
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_20G : 0;
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_40G : 0;
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_50G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_50G : 0;
+	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_100G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_100G : 0;
+
+	p_link->partner_tx_flow_ctrl_en =
+		!!(status & LINK_STATUS_TX_FLOW_CONTROL_ENABLED);
+	p_link->partner_rx_flow_ctrl_en =
+		!!(status & LINK_STATUS_RX_FLOW_CONTROL_ENABLED);
+
+	switch (status & LINK_STATUS_LINK_PARTNER_FLOW_CONTROL_MASK) {
+	case LINK_STATUS_LINK_PARTNER_SYMMETRIC_PAUSE:
+		p_link->partner_adv_pause = QED_LINK_PARTNER_SYMMETRIC_PAUSE;
+		break;
+	case LINK_STATUS_LINK_PARTNER_ASYMMETRIC_PAUSE:
+		p_link->partner_adv_pause = QED_LINK_PARTNER_ASYMMETRIC_PAUSE;
+		break;
+	case LINK_STATUS_LINK_PARTNER_BOTH_PAUSE:
+		p_link->partner_adv_pause = QED_LINK_PARTNER_BOTH_PAUSE;
+		break;
+	default:
+		p_link->partner_adv_pause = 0;
+	}
+
+	p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT);
+
+	qed_link_update(p_hwfn);
+}
+
+int qed_mcp_set_link(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt,
+		     bool b_up)
+{
+	struct qed_mcp_link_params *params = &p_hwfn->mcp_info->link_input;
+	u32 param = 0, reply = 0, cmd;
+	struct pmm_phy_cfg phy_cfg;
+	int rc = 0;
+	u32 i;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		return -EBUSY;
+	}
+
+	/* Set the shmem configuration according to params */
+	memset(&phy_cfg, 0, sizeof(phy_cfg));
+	cmd = b_up ? DRV_MSG_CODE_INIT_PHY : DRV_MSG_CODE_LINK_RESET;
+	if (!params->speed.autoneg)
+		phy_cfg.speed = params->speed.forced_speed;
+	phy_cfg.pause |= (params->pause.autoneg) ? PMM_PAUSE_AUTONEG : 0;
+	phy_cfg.pause |= (params->pause.forced_rx) ? PMM_PAUSE_RX : 0;
+	phy_cfg.pause |= (params->pause.forced_tx) ? PMM_PAUSE_TX : 0;
+	phy_cfg.adv_speed = params->speed.advertised_speeds;
+	phy_cfg.loopback_mode = params->loopback_mode;
+
+	/* Write the requested configuration to shmem */
+	for (i = 0; i < sizeof(phy_cfg); i += 4)
+		qed_wr(p_hwfn, p_ptt,
+		       p_hwfn->mcp_info->drv_mb_addr +
+		       offsetof(struct public_drv_mb, union_data) + i,
+		       ((u32 *)&phy_cfg)[i >> 2]);
+
+	if (b_up) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+			   "Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x, features 0x%08x\n",
+			   phy_cfg.speed,
+			   phy_cfg.pause,
+			   phy_cfg.adv_speed,
+			   phy_cfg.loopback_mode,
+			   phy_cfg.feature_config_flags);
+	} else {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+			   "Resetting link\n");
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "fw_seq 0x%08x, drv_pulse 0x%x\n",
+		   p_hwfn->mcp_info->drv_mb_seq,
+		   p_hwfn->mcp_info->drv_pulse_seq);
+
+	/* Load Request */
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, cmd, 0, &reply, &param);
+
+	/* if mcp fails to respond we must abort */
+	if (rc) {
+		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+		return rc;
+	}
+
+	/* Reset the link status if needed */
+	if (!b_up)
+		qed_mcp_handle_link_change(p_hwfn, p_ptt, true);
+
+	return 0;
+}
+
+int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_info *info = p_hwfn->mcp_info;
+	int rc = 0;
+	bool found = false;
+	u16 i;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "Received message from MFW\n");
+
+	/* Read Messages from MFW */
+	qed_mcp_read_mb(p_hwfn, p_ptt);
+
+	/* Compare current messages to old ones */
+	for (i = 0; i < info->mfw_mb_length; i++) {
+		if (info->mfw_mb_cur[i] == info->mfw_mb_shadow[i])
+			continue;
+
+		found = true;
+
+		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+			   "Msg [%d] - old CMD 0x%02x, new CMD 0x%02x\n",
+			   i, info->mfw_mb_shadow[i], info->mfw_mb_cur[i]);
+
+		switch (i) {
+		case MFW_DRV_MSG_LINK_CHANGE:
+			qed_mcp_handle_link_change(p_hwfn, p_ptt, false);
+			break;
+		default:
+			DP_NOTICE(p_hwfn, "Unimplemented MFW message %d\n", i);
+			rc = -EINVAL;
+		}
+	}
+
+	/* ACK everything */
+	for (i = 0; i < MFW_DRV_MSG_MAX_DWORDS(info->mfw_mb_length); i++) {
+		__be32 val = cpu_to_be32(((u32 *)info->mfw_mb_cur)[i]);
+
+		/* MFW expect answer in BE, so we force write in that format */
+		qed_wr(p_hwfn, p_ptt,
+		       info->mfw_mb_addr + sizeof(u32) +
+		       MFW_DRV_MSG_MAX_DWORDS(info->mfw_mb_length) *
+		       sizeof(u32) + i * sizeof(u32),
+		       (__force u32)val);
+	}
+
+	if (!found) {
+		DP_NOTICE(p_hwfn,
+			  "Received an MFW message indication but no new message!\n");
+		rc = -EINVAL;
+	}
+
+	/* Copy the new mfw messages into the shadow */
+	memcpy(info->mfw_mb_shadow, info->mfw_mb_cur, info->mfw_mb_length);
+
+	return rc;
+}
+
 int qed_mcp_get_mfw_ver(struct qed_dev *cdev,
 			u32 *p_mfw_ver)
 {
@@ -389,6 +635,31 @@ int qed_mcp_get_mfw_ver(struct qed_dev *cdev,
 	return 0;
 }
 
+int qed_mcp_get_media_type(struct qed_dev *cdev,
+			   u32 *p_media_type)
+{
+	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
+	struct qed_ptt  *p_ptt;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		return -EBUSY;
+	}
+
+	*p_media_type = MEDIA_UNSPECIFIED;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	*p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+			       offsetof(struct public_port, media_type));
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return 0;
+}
+
 static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
 				  struct public_func *p_data,
@@ -500,6 +771,30 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+struct qed_mcp_link_params
+*qed_mcp_get_link_params(struct qed_hwfn *p_hwfn)
+{
+	if (!p_hwfn || !p_hwfn->mcp_info)
+		return NULL;
+	return &p_hwfn->mcp_info->link_input;
+}
+
+struct qed_mcp_link_state
+*qed_mcp_get_link_state(struct qed_hwfn *p_hwfn)
+{
+	if (!p_hwfn || !p_hwfn->mcp_info)
+		return NULL;
+	return &p_hwfn->mcp_info->link_output;
+}
+
+struct qed_mcp_link_capabilities
+*qed_mcp_get_link_capabilities(struct qed_hwfn *p_hwfn)
+{
+	if (!p_hwfn || !p_hwfn->mcp_info)
+		return NULL;
+	return &p_hwfn->mcp_info->link_capabilities;
+}
+
 int qed_mcp_drain(struct qed_hwfn *p_hwfn,
 		  struct qed_ptt *p_ptt)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 106d78a19937..dbaae586b4a7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -15,6 +15,59 @@
 #include <linux/slab.h>
 #include "qed_hsi.h"
 
+struct qed_mcp_link_speed_params {
+	bool    autoneg;
+	u32     advertised_speeds;      /* bitmask of DRV_SPEED_CAPABILITY */
+	u32     forced_speed;	   /* In Mb/s */
+};
+
+struct qed_mcp_link_pause_params {
+	bool    autoneg;
+	bool    forced_rx;
+	bool    forced_tx;
+};
+
+struct qed_mcp_link_params {
+	struct qed_mcp_link_speed_params	speed;
+	struct qed_mcp_link_pause_params	pause;
+	u32				     loopback_mode;
+};
+
+struct qed_mcp_link_capabilities {
+	u32 speed_capabilities;
+};
+
+struct qed_mcp_link_state {
+	bool    link_up;
+
+	u32     speed; /* In Mb/s */
+	bool    full_duplex;
+
+	bool    an;
+	bool    an_complete;
+	bool    parallel_detection;
+	bool    pfc_enabled;
+
+#define QED_LINK_PARTNER_SPEED_1G_HD    BIT(0)
+#define QED_LINK_PARTNER_SPEED_1G_FD    BIT(1)
+#define QED_LINK_PARTNER_SPEED_10G      BIT(2)
+#define QED_LINK_PARTNER_SPEED_20G      BIT(3)
+#define QED_LINK_PARTNER_SPEED_40G      BIT(4)
+#define QED_LINK_PARTNER_SPEED_50G      BIT(5)
+#define QED_LINK_PARTNER_SPEED_100G     BIT(6)
+	u32     partner_adv_speed;
+
+	bool    partner_tx_flow_ctrl_en;
+	bool    partner_rx_flow_ctrl_en;
+
+#define QED_LINK_PARTNER_SYMMETRIC_PAUSE (1)
+#define QED_LINK_PARTNER_ASYMMETRIC_PAUSE (2)
+#define QED_LINK_PARTNER_BOTH_PAUSE (3)
+	u8      partner_adv_pause;
+
+	bool    sfp_tx_fault;
+};
+
 struct qed_mcp_function_info {
 	u8				pause_on_host;
 
@@ -44,6 +97,47 @@ struct qed_mcp_drv_version {
 	u8	name[MCP_DRV_VER_STR_SIZE - 4];
 };
 
+/**
+ * @brief - returns the link params of the hw function
+ *
+ * @param p_hwfn
+ *
+ * @returns pointer to link params
+ */
+struct qed_mcp_link_params *qed_mcp_get_link_params(struct qed_hwfn *);
+
+/**
+ * @brief - return the link state of the hw function
+ *
+ * @param p_hwfn
+ *
+ * @returns pointer to link state
+ */
+struct qed_mcp_link_state *qed_mcp_get_link_state(struct qed_hwfn *);
+
+/**
+ * @brief - return the link capabilities of the hw function
+ *
+ * @param p_hwfn
+ *
+ * @returns pointer to link capabilities
+ */
+struct qed_mcp_link_capabilities
+	*qed_mcp_get_link_capabilities(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief Request the MFW to set the the link according to 'link_input'.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param b_up - raise link if `true'. Reset link if `false'.
+ *
+ * @return int
+ */
+int qed_mcp_set_link(struct qed_hwfn   *p_hwfn,
+		     struct qed_ptt     *p_ptt,
+		     bool               b_up);
+
 /**
  * @brief Get the management firmware version value
  *
@@ -55,6 +149,19 @@ struct qed_mcp_drv_version {
 int qed_mcp_get_mfw_ver(struct qed_dev *cdev,
 			u32 *mfw_ver);
 
+/**
+ * @brief Get media type value of the port.
+ *
+ * @param cdev      - qed dev pointer
+ * @param mfw_ver    - media type value
+ *
+ * @return int -
+ *      0 - Operation was successul.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_media_type(struct qed_dev      *cdev,
+			   u32                  *media_type);
+
 /**
  * @brief General function for sending commands to the MCP
  *        mailbox. It acquire mutex lock for the entire
@@ -142,8 +249,10 @@ struct qed_mcp_info {
 	u32					port_addr;
 	u16					drv_mb_seq;
 	u16					drv_pulse_seq;
+	struct qed_mcp_link_params		link_input;
+	struct qed_mcp_link_state		link_output;
+	struct qed_mcp_link_capabilities	link_capabilities;
 	struct qed_mcp_function_info		func_info;
-
 	u8					*mfw_mb_cur;
 	u8					*mfw_mb_shadow;
 	u16					mfw_mb_length;
@@ -181,6 +290,21 @@ void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
 
 int qed_mcp_free(struct qed_hwfn *p_hwfn);
 
+/**
+ * @brief This function is called from the DPC context. After
+ * pointing PTT to the mfw mb, check for events sent by the MCP
+ * to the driver and ack them. In case a critical event
+ * detected, it will be handled here, otherwise the work will be
+ * queued to a sleepable work-queue.
+ *
+ * @param p_hwfn - HW function
+ * @param p_ptt - PTT required for register access
+ * @return int - 0 - operation
+ * was successul.
+ */
+int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt);
+
 /**
  * @brief Sends a LOAD_REQ to the MFW, and in case operation
  *        succeed, returns whether this PF is the first on the
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 67a7b41b70aa..ab1041424013 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -111,6 +111,10 @@ struct qed_eth_ops {
 	int (*fill_dev_info)(struct qed_dev *cdev,
 			     struct qed_dev_eth_info *info);
 
+	void (*register_ops)(struct qed_dev *cdev,
+			     struct qed_eth_cb_ops *ops,
+			     void *cookie);
+
 	int (*vport_start)(struct qed_dev *cdev,
 			   u8 vport_id, u16 mtu,
 			   u8 drop_ttl0_flg,
-- 
cgit v1.2.3


From 9df2ed0415b13218f84262c2372323ef028310fc Mon Sep 17 00:00:00 2001
From: Manish Chopra <Manish.Chopra@qlogic.com>
Date: Mon, 26 Oct 2015 11:02:33 +0200
Subject: qed: Add statistics support

Device statistics can be gathered on-demand. This adds the qed support for
reading the statistics [both function and port] from the device, and adds
to the public API a method for requesting the current statistics.

Signed-off-by: Manish Chopra <Manish.Chopra@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Ariel Elior <Ariel.Elior@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h         |  14 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c     | 244 +++++++++++++++++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h |   3 +
 drivers/net/ethernet/qlogic/qed/qed_hsi.h     |  30 ++++
 drivers/net/ethernet/qlogic/qed/qed_l2.c      |   3 +
 include/linux/qed/qed_eth_if.h                |   3 +
 6 files changed, 296 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index ca6cc8a7fc64..ac17d8669b1a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -212,7 +212,20 @@ struct qed_qm_info {
 	u32				pf_rl;
 };
 
+struct storm_stats {
+	u32     address;
+	u32     len;
+};
+
+struct qed_storm_stats {
+	struct storm_stats mstats;
+	struct storm_stats pstats;
+	struct storm_stats tstats;
+	struct storm_stats ustats;
+};
+
 struct qed_fw_data {
+	struct fw_ver_info	*fw_ver_info;
 	const u8		*modes_tree_buf;
 	union init_op		*init_ops;
 	const u32		*arr_data;
@@ -296,6 +309,7 @@ struct qed_hwfn {
 
 	/* QM init */
 	struct qed_qm_info		qm_info;
+	struct qed_storm_stats		storm_stats;
 
 	/* Buffer for unzipping firmware data */
 	void				*unzip_buf;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 7fd3d78d94f1..b9b7b7e6fa53 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -649,8 +649,10 @@ int qed_hw_init(struct qed_dev *cdev,
 		bool allow_npar_tx_switch,
 		const u8 *bin_fw_data)
 {
-	u32 load_code, param;
+	struct qed_storm_stats *p_stat;
+	u32 load_code, param, *p_address;
 	int rc, mfw_rc, i;
+	u8 fw_vport = 0;
 
 	rc = qed_init_fw_data(cdev, bin_fw_data);
 	if (rc != 0)
@@ -659,6 +661,10 @@ int qed_hw_init(struct qed_dev *cdev,
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
+		rc = qed_fw_vport(p_hwfn, 0, &fw_vport);
+		if (rc != 0)
+			return rc;
+
 		/* Enable DMAE in PXP */
 		rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
 
@@ -722,6 +728,25 @@ int qed_hw_init(struct qed_dev *cdev,
 		}
 
 		p_hwfn->hw_init_done = true;
+
+		/* init PF stats */
+		p_stat = &p_hwfn->storm_stats;
+		p_stat->mstats.address = BAR0_MAP_REG_MSDM_RAM +
+					 MSTORM_QUEUE_STAT_OFFSET(fw_vport);
+		p_stat->mstats.len = sizeof(struct eth_mstorm_per_queue_stat);
+
+		p_stat->ustats.address = BAR0_MAP_REG_USDM_RAM +
+					 USTORM_QUEUE_STAT_OFFSET(fw_vport);
+		p_stat->ustats.len = sizeof(struct eth_ustorm_per_queue_stat);
+
+		p_stat->pstats.address = BAR0_MAP_REG_PSDM_RAM +
+					 PSTORM_QUEUE_STAT_OFFSET(fw_vport);
+		p_stat->pstats.len = sizeof(struct eth_pstorm_per_queue_stat);
+
+		p_address = &p_stat->tstats.address;
+		*p_address = BAR0_MAP_REG_TSDM_RAM +
+			     TSTORM_PORT_STAT_OFFSET(MFW_PORT(p_hwfn));
+		p_stat->tstats.len = sizeof(struct tstorm_per_port_stat);
 	}
 
 	return 0;
@@ -1494,6 +1519,223 @@ void qed_chain_free(struct qed_dev *cdev,
 			  p_chain->p_phys_addr);
 }
 
+static void __qed_get_vport_stats(struct qed_dev *cdev,
+				  struct qed_eth_stats  *stats)
+{
+	int i, j;
+
+	memset(stats, 0, sizeof(*stats));
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		struct eth_mstorm_per_queue_stat mstats;
+		struct eth_ustorm_per_queue_stat ustats;
+		struct eth_pstorm_per_queue_stat pstats;
+		struct tstorm_per_port_stat tstats;
+		struct port_stats port_stats;
+		struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+
+		if (!p_ptt) {
+			DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+			continue;
+		}
+
+		memset(&mstats, 0, sizeof(mstats));
+		qed_memcpy_from(p_hwfn, p_ptt, &mstats,
+				p_hwfn->storm_stats.mstats.address,
+				p_hwfn->storm_stats.mstats.len);
+
+		memset(&ustats, 0, sizeof(ustats));
+		qed_memcpy_from(p_hwfn, p_ptt, &ustats,
+				p_hwfn->storm_stats.ustats.address,
+				p_hwfn->storm_stats.ustats.len);
+
+		memset(&pstats, 0, sizeof(pstats));
+		qed_memcpy_from(p_hwfn, p_ptt, &pstats,
+				p_hwfn->storm_stats.pstats.address,
+				p_hwfn->storm_stats.pstats.len);
+
+		memset(&tstats, 0, sizeof(tstats));
+		qed_memcpy_from(p_hwfn, p_ptt, &tstats,
+				p_hwfn->storm_stats.tstats.address,
+				p_hwfn->storm_stats.tstats.len);
+
+		memset(&port_stats, 0, sizeof(port_stats));
+
+		if (p_hwfn->mcp_info)
+			qed_memcpy_from(p_hwfn, p_ptt, &port_stats,
+					p_hwfn->mcp_info->port_addr +
+					offsetof(struct public_port, stats),
+					sizeof(port_stats));
+		qed_ptt_release(p_hwfn, p_ptt);
+
+		stats->no_buff_discards +=
+			HILO_64_REGPAIR(mstats.no_buff_discard);
+		stats->packet_too_big_discard +=
+			HILO_64_REGPAIR(mstats.packet_too_big_discard);
+		stats->ttl0_discard +=
+			HILO_64_REGPAIR(mstats.ttl0_discard);
+		stats->tpa_coalesced_pkts +=
+			HILO_64_REGPAIR(mstats.tpa_coalesced_pkts);
+		stats->tpa_coalesced_events +=
+			HILO_64_REGPAIR(mstats.tpa_coalesced_events);
+		stats->tpa_aborts_num +=
+			HILO_64_REGPAIR(mstats.tpa_aborts_num);
+		stats->tpa_coalesced_bytes +=
+			HILO_64_REGPAIR(mstats.tpa_coalesced_bytes);
+
+		stats->rx_ucast_bytes +=
+			HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+		stats->rx_mcast_bytes +=
+			HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+		stats->rx_bcast_bytes +=
+			HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+		stats->rx_ucast_pkts +=
+			HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+		stats->rx_mcast_pkts +=
+			HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+		stats->rx_bcast_pkts +=
+			HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+
+		stats->mftag_filter_discards +=
+			HILO_64_REGPAIR(tstats.mftag_filter_discard);
+		stats->mac_filter_discards +=
+			HILO_64_REGPAIR(tstats.eth_mac_filter_discard);
+
+		stats->tx_ucast_bytes +=
+			HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+		stats->tx_mcast_bytes +=
+			HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+		stats->tx_bcast_bytes +=
+			HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+		stats->tx_ucast_pkts +=
+			HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+		stats->tx_mcast_pkts +=
+			HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+		stats->tx_bcast_pkts +=
+			HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+		stats->tx_err_drop_pkts +=
+			HILO_64_REGPAIR(pstats.error_drop_pkts);
+		stats->rx_64_byte_packets       += port_stats.pmm.r64;
+		stats->rx_127_byte_packets      += port_stats.pmm.r127;
+		stats->rx_255_byte_packets      += port_stats.pmm.r255;
+		stats->rx_511_byte_packets      += port_stats.pmm.r511;
+		stats->rx_1023_byte_packets     += port_stats.pmm.r1023;
+		stats->rx_1518_byte_packets     += port_stats.pmm.r1518;
+		stats->rx_1522_byte_packets     += port_stats.pmm.r1522;
+		stats->rx_2047_byte_packets     += port_stats.pmm.r2047;
+		stats->rx_4095_byte_packets     += port_stats.pmm.r4095;
+		stats->rx_9216_byte_packets     += port_stats.pmm.r9216;
+		stats->rx_16383_byte_packets    += port_stats.pmm.r16383;
+		stats->rx_crc_errors	    += port_stats.pmm.rfcs;
+		stats->rx_mac_crtl_frames       += port_stats.pmm.rxcf;
+		stats->rx_pause_frames	  += port_stats.pmm.rxpf;
+		stats->rx_pfc_frames	    += port_stats.pmm.rxpp;
+		stats->rx_align_errors	  += port_stats.pmm.raln;
+		stats->rx_carrier_errors	+= port_stats.pmm.rfcr;
+		stats->rx_oversize_packets      += port_stats.pmm.rovr;
+		stats->rx_jabbers	       += port_stats.pmm.rjbr;
+		stats->rx_undersize_packets     += port_stats.pmm.rund;
+		stats->rx_fragments	     += port_stats.pmm.rfrg;
+		stats->tx_64_byte_packets       += port_stats.pmm.t64;
+		stats->tx_65_to_127_byte_packets += port_stats.pmm.t127;
+		stats->tx_128_to_255_byte_packets += port_stats.pmm.t255;
+		stats->tx_256_to_511_byte_packets  += port_stats.pmm.t511;
+		stats->tx_512_to_1023_byte_packets += port_stats.pmm.t1023;
+		stats->tx_1024_to_1518_byte_packets += port_stats.pmm.t1518;
+		stats->tx_1519_to_2047_byte_packets += port_stats.pmm.t2047;
+		stats->tx_2048_to_4095_byte_packets += port_stats.pmm.t4095;
+		stats->tx_4096_to_9216_byte_packets += port_stats.pmm.t9216;
+		stats->tx_9217_to_16383_byte_packets += port_stats.pmm.t16383;
+		stats->tx_pause_frames	  += port_stats.pmm.txpf;
+		stats->tx_pfc_frames	    += port_stats.pmm.txpp;
+		stats->tx_lpi_entry_count       += port_stats.pmm.tlpiec;
+		stats->tx_total_collisions      += port_stats.pmm.tncl;
+		stats->rx_mac_bytes	     += port_stats.pmm.rbyte;
+		stats->rx_mac_uc_packets	+= port_stats.pmm.rxuca;
+		stats->rx_mac_mc_packets	+= port_stats.pmm.rxmca;
+		stats->rx_mac_bc_packets	+= port_stats.pmm.rxbca;
+		stats->rx_mac_frames_ok	 += port_stats.pmm.rxpok;
+		stats->tx_mac_bytes	     += port_stats.pmm.tbyte;
+		stats->tx_mac_uc_packets	+= port_stats.pmm.txuca;
+		stats->tx_mac_mc_packets	+= port_stats.pmm.txmca;
+		stats->tx_mac_bc_packets	+= port_stats.pmm.txbca;
+		stats->tx_mac_ctrl_frames       += port_stats.pmm.txcf;
+
+		for (j = 0; j < 8; j++) {
+			stats->brb_truncates += port_stats.brb.brb_truncate[j];
+			stats->brb_discards += port_stats.brb.brb_discard[j];
+		}
+	}
+}
+
+void qed_get_vport_stats(struct qed_dev *cdev,
+			 struct qed_eth_stats *stats)
+{
+	u32 i;
+
+	if (!cdev) {
+		memset(stats, 0, sizeof(*stats));
+		return;
+	}
+
+	__qed_get_vport_stats(cdev, stats);
+
+	if (!cdev->reset_stats)
+		return;
+
+	/* Reduce the statistics baseline */
+	for (i = 0; i < sizeof(struct qed_eth_stats) / sizeof(u64); i++)
+		((u64 *)stats)[i] -= ((u64 *)cdev->reset_stats)[i];
+}
+
+/* zeroes V-PORT specific portion of stats (Port stats remains untouched) */
+void qed_reset_vport_stats(struct qed_dev *cdev)
+{
+	int i;
+
+	for_each_hwfn(cdev, i) {
+		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		struct eth_mstorm_per_queue_stat mstats;
+		struct eth_ustorm_per_queue_stat ustats;
+		struct eth_pstorm_per_queue_stat pstats;
+		struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+
+		if (!p_ptt) {
+			DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+			continue;
+		}
+
+		memset(&mstats, 0, sizeof(mstats));
+		qed_memcpy_to(p_hwfn, p_ptt,
+			      p_hwfn->storm_stats.mstats.address,
+			      &mstats,
+			      p_hwfn->storm_stats.mstats.len);
+
+		memset(&ustats, 0, sizeof(ustats));
+		qed_memcpy_to(p_hwfn, p_ptt,
+			      p_hwfn->storm_stats.ustats.address,
+			      &ustats,
+			      p_hwfn->storm_stats.ustats.len);
+
+		memset(&pstats, 0, sizeof(pstats));
+		qed_memcpy_to(p_hwfn, p_ptt,
+			      p_hwfn->storm_stats.pstats.address,
+			      &pstats,
+			      p_hwfn->storm_stats.pstats.len);
+
+		qed_ptt_release(p_hwfn, p_ptt);
+	}
+
+	/* PORT statistics are not necessarily reset, so we need to
+	 * read and create a baseline for future statistics.
+	 */
+	if (!cdev->reset_stats)
+		DP_INFO(cdev, "Reset stats not allocated\n");
+	else
+		__qed_get_vport_stats(cdev, cdev->reset_stats);
+}
+
 int qed_fw_l2_queue(struct qed_hwfn *p_hwfn,
 		    u16 src_id, u16 *dst_id)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 773070d04ab8..e29a3ba6c8b0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -156,6 +156,9 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
  */
 void qed_ptt_release(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt);
+void qed_get_vport_stats(struct qed_dev *cdev,
+			 struct qed_eth_stats   *stats);
+void qed_reset_vport_stats(struct qed_dev *cdev);
 
 enum qed_dmae_address_type_t {
 	QED_DMAE_ADDRESS_HOST_VIRT,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 5909823463ab..b2f8e854dfd1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -355,6 +355,36 @@ struct core_conn_context {
 	struct regpair			ustorm_st_padding[2] /* padding */;
 };
 
+struct eth_mstorm_per_queue_stat {
+	struct regpair  ttl0_discard;
+	struct regpair  packet_too_big_discard;
+	struct regpair  no_buff_discard;
+	struct regpair  not_active_discard;
+	struct regpair  tpa_coalesced_pkts;
+	struct regpair  tpa_coalesced_events;
+	struct regpair  tpa_aborts_num;
+	struct regpair  tpa_coalesced_bytes;
+};
+
+struct eth_pstorm_per_queue_stat {
+	struct regpair  sent_ucast_bytes;
+	struct regpair  sent_mcast_bytes;
+	struct regpair  sent_bcast_bytes;
+	struct regpair  sent_ucast_pkts;
+	struct regpair  sent_mcast_pkts;
+	struct regpair  sent_bcast_pkts;
+	struct regpair  error_drop_pkts;
+};
+
+struct eth_ustorm_per_queue_stat {
+	struct regpair  rcv_ucast_bytes;
+	struct regpair  rcv_mcast_bytes;
+	struct regpair  rcv_bcast_bytes;
+	struct regpair  rcv_ucast_pkts;
+	struct regpair  rcv_mcast_pkts;
+	struct regpair  rcv_bcast_pkts;
+};
+
 /* Event Ring Next Page Address */
 struct event_ring_next_addr {
 	struct regpair	addr /* Next Page Address */;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 7049e4139d3c..f72036a2ef5b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1298,6 +1298,8 @@ static int qed_start_vport(struct qed_dev *cdev,
 			   vport_id, mtu);
 	}
 
+	qed_reset_vport_stats(cdev);
+
 	return 0;
 }
 
@@ -1680,6 +1682,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
 	.filter_config = &qed_configure_filter,
 	.fastpath_stop = &qed_fastpath_stop,
 	.eth_cqe_completion = &qed_fp_cqe_completion,
+	.get_vport_stats = &qed_get_vport_stats,
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(u32 version)
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index ab1041424013..81ab178e31c1 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -154,6 +154,9 @@ struct qed_eth_ops {
 	int (*eth_cqe_completion)(struct qed_dev *cdev,
 				  u8 rss_id,
 				  struct eth_slow_path_rx_cqe *cqe);
+
+	void (*get_vport_stats)(struct qed_dev *cdev,
+				struct qed_eth_stats *stats);
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(u32 version);
-- 
cgit v1.2.3


From f8e529ed941ba2bbcbf310b575d968159ce7e895 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen@canonical.com>
Date: Tue, 27 Oct 2015 09:23:59 +0900
Subject: seccomp, ptrace: add support for dumping seccomp filters

This patch adds support for dumping a process' (classic BPF) seccomp
filters via ptrace.

PTRACE_SECCOMP_GET_FILTER allows the tracer to dump the user's classic BPF
seccomp filters. addr should be an integer which represents the ith seccomp
filter (0 is the most recently installed filter). data should be a struct
sock_filter * with enough room for the ith filter, or NULL, in which case
the filter is not saved. The return value for this command is the number of
BPF instructions the program represents, or negative in the case of errors.
Command specific errors are ENOENT: which indicates that there is no ith
filter in this seccomp tree, and EMEDIUMTYPE, which indicates that the ith
filter was not installed as a classic BPF filter.

A caveat with this approach is that there is no way to get explicitly at
the heirarchy of seccomp filters, and users need to memcmp() filters to
decide which are inherited. This means that a task which installs two of
the same filter can potentially confuse users of this interface.

v2: * make save_orig const
    * check that the orig_prog exists (not necessary right now, but when
       grows eBPF support it will be)
    * s/n/filter_off and make it an unsigned long to match ptrace
    * count "down" the tree instead of "up" when passing a filter offset

v3: * don't take the current task's lock for inspecting its seccomp mode
    * use a 0x42** constant for the ptrace command value

v4: * don't copy to userspace while holding spinlocks

v5: * add another condition to WARN_ON

v6: * rebase on net-next

Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
CC: Will Drewry <wad@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
CC: Alexei Starovoitov <ast@kernel.org>
CC: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/seccomp.h     | 11 +++++++
 include/uapi/linux/ptrace.h |  2 ++
 kernel/ptrace.c             |  5 +++
 kernel/seccomp.c            | 76 ++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 93 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index f4265039a94c..2296e6b2f690 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -95,4 +95,15 @@ static inline void get_seccomp_filter(struct task_struct *tsk)
 	return;
 }
 #endif /* CONFIG_SECCOMP_FILTER */
+
+#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
+extern long seccomp_get_filter(struct task_struct *task,
+			       unsigned long filter_off, void __user *data);
+#else
+static inline long seccomp_get_filter(struct task_struct *task,
+				      unsigned long n, void __user *data)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
 #endif /* _LINUX_SECCOMP_H */
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index a7a697986614..fb8106509000 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -64,6 +64,8 @@ struct ptrace_peeksiginfo_args {
 #define PTRACE_GETSIGMASK	0x420a
 #define PTRACE_SETSIGMASK	0x420b
 
+#define PTRACE_SECCOMP_GET_FILTER	0x420c
+
 /* Read signals from a shared (process wide) queue */
 #define PTRACE_PEEKSIGINFO_SHARED	(1 << 0)
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 787320de68e0..b760bae64cf1 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1016,6 +1016,11 @@ int ptrace_request(struct task_struct *child, long request,
 		break;
 	}
 #endif
+
+	case PTRACE_SECCOMP_GET_FILTER:
+		ret = seccomp_get_filter(child, addr, datavp);
+		break;
+
 	default:
 		break;
 	}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 06858a74bb9c..580ac2d4024f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -347,6 +347,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
 	struct seccomp_filter *sfilter;
 	int ret;
+	const bool save_orig = config_enabled(CONFIG_CHECKPOINT_RESTORE);
 
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
 		return ERR_PTR(-EINVAL);
@@ -370,7 +371,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 		return ERR_PTR(-ENOMEM);
 
 	ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
-					seccomp_check_filter, false);
+					seccomp_check_filter, save_orig);
 	if (ret < 0) {
 		kfree(sfilter);
 		return ERR_PTR(ret);
@@ -867,3 +868,76 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 	/* prctl interface doesn't have flags, so they are always zero. */
 	return do_seccomp(op, 0, uargs);
 }
+
+#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
+long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
+			void __user *data)
+{
+	struct seccomp_filter *filter;
+	struct sock_fprog_kern *fprog;
+	long ret;
+	unsigned long count = 0;
+
+	if (!capable(CAP_SYS_ADMIN) ||
+	    current->seccomp.mode != SECCOMP_MODE_DISABLED) {
+		return -EACCES;
+	}
+
+	spin_lock_irq(&task->sighand->siglock);
+	if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	filter = task->seccomp.filter;
+	while (filter) {
+		filter = filter->prev;
+		count++;
+	}
+
+	if (filter_off >= count) {
+		ret = -ENOENT;
+		goto out;
+	}
+	count -= filter_off;
+
+	filter = task->seccomp.filter;
+	while (filter && count > 1) {
+		filter = filter->prev;
+		count--;
+	}
+
+	if (WARN_ON(count != 1 || !filter)) {
+		/* The filter tree shouldn't shrink while we're using it. */
+		ret = -ENOENT;
+		goto out;
+	}
+
+	fprog = filter->prog->orig_prog;
+	if (!fprog) {
+		/* This must be a new non-cBPF filter, since we save every
+		 * every cBPF filter's orig_prog above when
+		 * CONFIG_CHECKPOINT_RESTORE is enabled.
+		 */
+		ret = -EMEDIUMTYPE;
+		goto out;
+	}
+
+	ret = fprog->len;
+	if (!data)
+		goto out;
+
+	get_seccomp_filter(task);
+	spin_unlock_irq(&task->sighand->siglock);
+
+	if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
+		ret = -EFAULT;
+
+	put_seccomp_filter(task);
+	return ret;
+
+out:
+	spin_unlock_irq(&task->sighand->siglock);
+	return ret;
+}
+#endif
-- 
cgit v1.2.3


From 2b1d88cda32f81685bae45c00bf517f77bcda3cd Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 15 Oct 2015 17:02:19 +0200
Subject: PM / Domains: Merge measurements for PM QoS device latencies

Measure latency does by itself contribute to an increased latency, thus we
should avoid it when it isn't needed.

By merging the latency measurements for the ->save_state() and the
->stop() callbacks, we get one measurement instead of two and we get one
value to store instead of two. Let's also apply the likewise change for
the ->start() and ->restore_state() callbacks.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c          | 99 +++++++++++++++++++-----------------
 drivers/base/power/domain_governor.c |  6 +--
 include/linux/pm_domain.h            |  6 +--
 3 files changed, 55 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index a1c3ec4cc4fb..a7dfdf9f15ba 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -34,22 +34,6 @@
 	__ret;							\
 })
 
-#define GENPD_DEV_TIMED_CALLBACK(genpd, type, callback, dev, field, name)	\
-({										\
-	ktime_t __start = ktime_get();						\
-	type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev);		\
-	s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start));		\
-	struct gpd_timing_data *__td = &dev_gpd_data(dev)->td;			\
-	if (!__retval && __elapsed > __td->field) {				\
-		__td->field = __elapsed;					\
-		dev_dbg(dev, name " latency exceeded, new value %lld ns\n",	\
-			__elapsed);						\
-		genpd->max_off_time_changed = true;				\
-		__td->constraint_changed = true;				\
-	}									\
-	__retval;								\
-})
-
 static LIST_HEAD(gpd_list);
 static DEFINE_MUTEX(gpd_list_lock);
 
@@ -90,24 +74,14 @@ static struct generic_pm_domain *dev_to_genpd(struct device *dev)
 	return pd_to_genpd(dev->pm_domain);
 }
 
-static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev,
-			bool timed)
+static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
-	if (!timed)
-		return GENPD_DEV_CALLBACK(genpd, int, stop, dev);
-
-	return GENPD_DEV_TIMED_CALLBACK(genpd, int, stop, dev,
-					stop_latency_ns, "stop");
+	return GENPD_DEV_CALLBACK(genpd, int, stop, dev);
 }
 
-static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev,
-			bool timed)
+static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
-	if (!timed)
-		return GENPD_DEV_CALLBACK(genpd, int, start, dev);
-
-	return GENPD_DEV_TIMED_CALLBACK(genpd, int, start, dev,
-					start_latency_ns, "start");
+	return GENPD_DEV_CALLBACK(genpd, int, start, dev);
 }
 
 static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
@@ -263,19 +237,13 @@ static int genpd_poweron(struct generic_pm_domain *genpd)
 
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
-	return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev,
-					save_state_latency_ns, "state save");
+	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
 }
 
 static int genpd_restore_dev(struct generic_pm_domain *genpd,
-			struct device *dev, bool timed)
+			struct device *dev)
 {
-	if (!timed)
-		return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev);
-
-	return GENPD_DEV_TIMED_CALLBACK(genpd, int, restore_state, dev,
-					restore_state_latency_ns,
-					"state restore");
+	return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev);
 }
 
 static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
@@ -422,6 +390,9 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	bool (*stop_ok)(struct device *__dev);
+	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	ktime_t time_start;
+	s64 elapsed_ns;
 	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
@@ -434,16 +405,29 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	if (stop_ok && !stop_ok(dev))
 		return -EBUSY;
 
+	/* Measure suspend latency. */
+	time_start = ktime_get();
+
 	ret = genpd_save_dev(genpd, dev);
 	if (ret)
 		return ret;
 
-	ret = genpd_stop_dev(genpd, dev, true);
+	ret = genpd_stop_dev(genpd, dev);
 	if (ret) {
-		genpd_restore_dev(genpd, dev, true);
+		genpd_restore_dev(genpd, dev);
 		return ret;
 	}
 
+	/* Update suspend latency value if the measured time exceeds it. */
+	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
+	if (elapsed_ns > td->suspend_latency_ns) {
+		td->suspend_latency_ns = elapsed_ns;
+		dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
+			elapsed_ns);
+		genpd->max_off_time_changed = true;
+		td->constraint_changed = true;
+	}
+
 	/*
 	 * If power.irq_safe is set, this routine will be run with interrupts
 	 * off, so it can't use mutexes.
@@ -469,6 +453,9 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 static int pm_genpd_runtime_resume(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	ktime_t time_start;
+	s64 elapsed_ns;
 	int ret;
 	bool timed = true;
 
@@ -492,8 +479,24 @@ static int pm_genpd_runtime_resume(struct device *dev)
 		return ret;
 
  out:
-	genpd_start_dev(genpd, dev, timed);
-	genpd_restore_dev(genpd, dev, timed);
+	/* Measure resume latency. */
+	if (timed)
+		time_start = ktime_get();
+
+	genpd_start_dev(genpd, dev);
+	genpd_restore_dev(genpd, dev);
+
+	/* Update resume latency value if the measured time exceeds it. */
+	if (timed) {
+		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
+		if (elapsed_ns > td->resume_latency_ns) {
+			td->resume_latency_ns = elapsed_ns;
+			dev_dbg(dev, "resume latency exceeded, %lld ns\n",
+				elapsed_ns);
+			genpd->max_off_time_changed = true;
+			td->constraint_changed = true;
+		}
+	}
 
 	return 0;
 }
@@ -783,7 +786,7 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
 		return 0;
 
-	genpd_stop_dev(genpd, dev, false);
+	genpd_stop_dev(genpd, dev);
 
 	/*
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
@@ -824,7 +827,7 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	pm_genpd_sync_poweron(genpd, true);
 	genpd->suspended_count--;
 
-	return genpd_start_dev(genpd, dev, false);
+	return genpd_start_dev(genpd, dev);
 }
 
 /**
@@ -932,7 +935,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev, false);
+	return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev);
 }
 
 /**
@@ -953,7 +956,7 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 		return -EINVAL;
 
 	return genpd->suspend_power_off ?
-		0 : genpd_start_dev(genpd, dev, false);
+		0 : genpd_start_dev(genpd, dev);
 }
 
 /**
@@ -1047,7 +1050,7 @@ static int pm_genpd_restore_noirq(struct device *dev)
 
 	pm_genpd_sync_poweron(genpd, true);
 
-	return genpd_start_dev(genpd, dev, false);
+	return genpd_start_dev(genpd, dev);
 }
 
 /**
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 85e17bacc834..e60dd12e23aa 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -77,10 +77,8 @@ static bool default_stop_ok(struct device *dev)
 				      dev_update_qos_constraint);
 
 	if (constraint_ns > 0) {
-		constraint_ns -= td->save_state_latency_ns +
-				td->stop_latency_ns +
-				td->start_latency_ns +
-				td->restore_state_latency_ns;
+		constraint_ns -= td->suspend_latency_ns +
+				td->resume_latency_ns;
 		if (constraint_ns == 0)
 			return false;
 	}
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f4dd8105b024..ba4ced38efae 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -81,10 +81,8 @@ struct gpd_link {
 };
 
 struct gpd_timing_data {
-	s64 stop_latency_ns;
-	s64 start_latency_ns;
-	s64 save_state_latency_ns;
-	s64 restore_state_latency_ns;
+	s64 suspend_latency_ns;
+	s64 resume_latency_ns;
 	s64 effective_constraint_ns;
 	bool constraint_changed;
 	bool cached_stop_ok;
-- 
cgit v1.2.3


From 8eec1020f0c0c03f7219ed50cf1b754be49dd448 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 15 Oct 2015 21:35:22 +0530
Subject: cpufreq: create cpu/cpufreq at boot time

Later patches will need to create policy specific directories in
/sys/devices/system/cpu/cpufreq/ directory and so the cpufreq directory
wouldn't be ever empty.

And so no fun creating/destroying it on need basis anymore. Create it
once on system boot.

Reviewed-by: Saravana Kannan <skannan@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c          | 32 ++------------------------------
 drivers/cpufreq/cpufreq_governor.c | 20 +++++---------------
 include/linux/cpufreq.h            |  2 --
 3 files changed, 7 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 16b9e811ff01..c1fd57db50bd 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -876,43 +876,15 @@ static struct kobj_type ktype_cpufreq = {
 struct kobject *cpufreq_global_kobject;
 EXPORT_SYMBOL(cpufreq_global_kobject);
 
-static int cpufreq_global_kobject_usage;
-
-int cpufreq_get_global_kobject(void)
-{
-	if (!cpufreq_global_kobject_usage++)
-		return kobject_add(cpufreq_global_kobject,
-				&cpu_subsys.dev_root->kobj, "%s", "cpufreq");
-
-	return 0;
-}
-EXPORT_SYMBOL(cpufreq_get_global_kobject);
-
-void cpufreq_put_global_kobject(void)
-{
-	if (!--cpufreq_global_kobject_usage)
-		kobject_del(cpufreq_global_kobject);
-}
-EXPORT_SYMBOL(cpufreq_put_global_kobject);
-
 int cpufreq_sysfs_create_file(const struct attribute *attr)
 {
-	int ret = cpufreq_get_global_kobject();
-
-	if (!ret) {
-		ret = sysfs_create_file(cpufreq_global_kobject, attr);
-		if (ret)
-			cpufreq_put_global_kobject();
-	}
-
-	return ret;
+	return sysfs_create_file(cpufreq_global_kobject, attr);
 }
 EXPORT_SYMBOL(cpufreq_sysfs_create_file);
 
 void cpufreq_sysfs_remove_file(const struct attribute *attr)
 {
 	sysfs_remove_file(cpufreq_global_kobject, attr);
-	cpufreq_put_global_kobject();
 }
 EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
 
@@ -2582,7 +2554,7 @@ static int __init cpufreq_core_init(void)
 	if (cpufreq_disabled())
 		return -ENODEV;
 
-	cpufreq_global_kobject = kobject_create();
+	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
 	BUG_ON(!cpufreq_global_kobject);
 
 	register_syscore_ops(&cpufreq_syscore_ops);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 750626d8fb03..11258c4c1b17 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -348,29 +348,21 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
 	set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
 					latency * LATENCY_MULTIPLIER));
 
-	if (!have_governor_per_policy()) {
-		if (WARN_ON(cpufreq_get_global_kobject())) {
-			ret = -EINVAL;
-			goto cdata_exit;
-		}
+	if (!have_governor_per_policy())
 		cdata->gdbs_data = dbs_data;
-	}
 
 	ret = sysfs_create_group(get_governor_parent_kobj(policy),
 				 get_sysfs_attr(dbs_data));
 	if (ret)
-		goto put_kobj;
+		goto reset_gdbs_data;
 
 	policy->governor_data = dbs_data;
 
 	return 0;
 
-put_kobj:
-	if (!have_governor_per_policy()) {
+reset_gdbs_data:
+	if (!have_governor_per_policy())
 		cdata->gdbs_data = NULL;
-		cpufreq_put_global_kobject();
-	}
-cdata_exit:
 	cdata->exit(dbs_data, !policy->governor->initialized);
 free_common_dbs_info:
 	free_common_dbs_info(policy, cdata);
@@ -394,10 +386,8 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy,
 		sysfs_remove_group(get_governor_parent_kobj(policy),
 				   get_sysfs_attr(dbs_data));
 
-		if (!have_governor_per_policy()) {
+		if (!have_governor_per_policy())
 			cdata->gdbs_data = NULL;
-			cpufreq_put_global_kobject();
-		}
 
 		cdata->exit(dbs_data, policy->governor->initialized == 1);
 		kfree(dbs_data);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index dca22de98d94..338bf0e59bb8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -149,8 +149,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
 
 /* /sys/devices/system/cpu/cpufreq: entry point for global variables */
 extern struct kobject *cpufreq_global_kobject;
-int cpufreq_get_global_kobject(void);
-void cpufreq_put_global_kobject(void);
 int cpufreq_sysfs_create_file(const struct attribute *attr);
 void cpufreq_sysfs_remove_file(const struct attribute *attr);
 
-- 
cgit v1.2.3


From c82bd44437f5d53d1654d9e36a9e4e55610f6624 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 15 Oct 2015 21:35:23 +0530
Subject: cpufreq: remove cpufreq_sysfs_{create|remove}_file()

They don't do anything special now, remove the unnecessary wrapper.

Reviewed-by: Saravana Kannan <skannan@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 22 +++++-----------------
 include/linux/cpufreq.h   |  2 --
 2 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c1fd57db50bd..04222e7bbc73 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -873,21 +873,6 @@ static struct kobj_type ktype_cpufreq = {
 	.release	= cpufreq_sysfs_release,
 };
 
-struct kobject *cpufreq_global_kobject;
-EXPORT_SYMBOL(cpufreq_global_kobject);
-
-int cpufreq_sysfs_create_file(const struct attribute *attr)
-{
-	return sysfs_create_file(cpufreq_global_kobject, attr);
-}
-EXPORT_SYMBOL(cpufreq_sysfs_create_file);
-
-void cpufreq_sysfs_remove_file(const struct attribute *attr)
-{
-	sysfs_remove_file(cpufreq_global_kobject, attr);
-}
-EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
-
 static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
 {
 	struct device *cpu_dev;
@@ -2390,7 +2375,7 @@ static int create_boost_sysfs_file(void)
 	if (!cpufreq_driver->set_boost)
 		cpufreq_driver->set_boost = cpufreq_boost_set_sw;
 
-	ret = cpufreq_sysfs_create_file(&boost.attr);
+	ret = sysfs_create_file(cpufreq_global_kobject, &boost.attr);
 	if (ret)
 		pr_err("%s: cannot register global BOOST sysfs file\n",
 		       __func__);
@@ -2401,7 +2386,7 @@ static int create_boost_sysfs_file(void)
 static void remove_boost_sysfs_file(void)
 {
 	if (cpufreq_boost_supported())
-		cpufreq_sysfs_remove_file(&boost.attr);
+		sysfs_remove_file(cpufreq_global_kobject, &boost.attr);
 }
 
 int cpufreq_enable_boost_support(void)
@@ -2549,6 +2534,9 @@ static struct syscore_ops cpufreq_syscore_ops = {
 	.shutdown = cpufreq_suspend,
 };
 
+struct kobject *cpufreq_global_kobject;
+EXPORT_SYMBOL(cpufreq_global_kobject);
+
 static int __init cpufreq_core_init(void)
 {
 	if (cpufreq_disabled())
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 338bf0e59bb8..9623218d996a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -149,8 +149,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
 
 /* /sys/devices/system/cpu/cpufreq: entry point for global variables */
 extern struct kobject *cpufreq_global_kobject;
-int cpufreq_sysfs_create_file(const struct attribute *attr);
-void cpufreq_sysfs_remove_file(const struct attribute *attr);
 
 #ifdef CONFIG_CPU_FREQ
 unsigned int cpufreq_get(unsigned int cpu);
-- 
cgit v1.2.3


From 96bdda61f58b70431bbe8a3e49794c8210f7691b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 15 Oct 2015 21:35:24 +0530
Subject: cpufreq: create cpu/cpufreq/policyX directories

The cpufreq sysfs interface had been a bit inconsistent as one of the
CPUs for a policy had a real directory within its sysfs 'cpuX' directory
and all other CPUs had links to it. That also made the code a bit
complex as we need to take care of moving the sysfs directory if the CPU
containing the real directory is getting physically hot-unplugged.

Solve this by creating 'policyX' directories (per-policy) in
/sys/devices/system/cpu/cpufreq/ directory, where X is the CPU for which
the policy was first created.

This also removes the need of keeping kobj_cpu and we can remove it now.

Suggested-by: Saravana Kannan <skannan@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Saravana Kannan <skannan@codeaurora.org>
Acked-by: is more of a general agreement from the person that he is
Reviewed-by: is a more strict tag and implies that the reviewer has
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 34 ++++------------------------------
 include/linux/cpufreq.h   |  1 -
 2 files changed, 4 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 04222e7bbc73..4fa2215cc6ec 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -910,9 +910,6 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 
 	/* Some related CPUs might not be present (physically hotplugged) */
 	for_each_cpu(j, policy->real_cpus) {
-		if (j == policy->kobj_cpu)
-			continue;
-
 		ret = add_cpu_dev_symlink(policy, j);
 		if (ret)
 			break;
@@ -926,12 +923,8 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
 	unsigned int j;
 
 	/* Some related CPUs might not be present (physically hotplugged) */
-	for_each_cpu(j, policy->real_cpus) {
-		if (j == policy->kobj_cpu)
-			continue;
-
+	for_each_cpu(j, policy->real_cpus)
 		remove_cpu_dev_symlink(policy, j);
-	}
 }
 
 static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -1047,8 +1040,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 	if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL))
 		goto err_free_rcpumask;
 
-	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj,
-				   "cpufreq");
+	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+				   cpufreq_global_kobject, "policy%u", cpu);
 	if (ret) {
 		pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
 		goto err_free_real_cpus;
@@ -1062,10 +1055,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 	INIT_WORK(&policy->update, handle_update);
 
 	policy->cpu = cpu;
-
-	/* Set this once on allocation */
-	policy->kobj_cpu = cpu;
-
 	return policy;
 
 err_free_real_cpus:
@@ -1417,22 +1406,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 		return;
 	}
 
-	if (cpu != policy->kobj_cpu) {
-		remove_cpu_dev_symlink(policy, cpu);
-	} else {
-		/*
-		 * The CPU owning the policy object is going away.  Move it to
-		 * another suitable CPU.
-		 */
-		unsigned int new_cpu = cpumask_first(policy->real_cpus);
-		struct device *new_dev = get_cpu_device(new_cpu);
-
-		dev_dbg(dev, "%s: Moving policy object to CPU%u\n", __func__, new_cpu);
-
-		sysfs_remove_link(&new_dev->kobj, "cpufreq");
-		policy->kobj_cpu = new_cpu;
-		WARN_ON(kobject_move(&policy->kobj, &new_dev->kobj));
-	}
+	remove_cpu_dev_symlink(policy, cpu);
 }
 
 static void handle_update(struct work_struct *work)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9623218d996a..ef4c5b1a860f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -65,7 +65,6 @@ struct cpufreq_policy {
 	unsigned int		shared_type; /* ACPI: ANY or ALL affected CPUs
 						should set cpufreq */
 	unsigned int		cpu;    /* cpu managing this policy, must be online */
-	unsigned int		kobj_cpu; /* cpu managing sysfs files, can be offline */
 
 	struct clk		*clk;
 	struct cpufreq_cpuinfo	cpuinfo;/* see above */
-- 
cgit v1.2.3


From 44511fb9e55ada760822b0b0d7be9d150576f17f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 23 Oct 2015 11:48:16 +0200
Subject: efi: Use correct type for struct efi_memory_map::phys_map

We have been getting away with using a void* for the physical
address of the UEFI memory map, since, even on 32-bit platforms
with 64-bit physical addresses, no truncation takes place if the
memory map has been allocated by the firmware (which only uses
1:1 virtually addressable memory), which is usually the case.

However, commit:

  0f96a99dab36 ("efi: Add "efi_fake_mem" boot option")

adds code that clones and modifies the UEFI memory map, and the
clone may live above 4 GB on 32-bit platforms.

This means our use of void* for struct efi_memory_map::phys_map has
graduated from 'incorrect but working' to 'incorrect and
broken', and we need to fix it.

So redefine struct efi_memory_map::phys_map as phys_addr_t, and
get rid of a bunch of casts that are now unneeded.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: izumi.taku@jp.fujitsu.com
Cc: kamezawa.hiroyu@jp.fujitsu.com
Cc: linux-efi@vger.kernel.org
Cc: matt.fleming@intel.com
Link: http://lkml.kernel.org/r/1445593697-1342-1-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/kernel/efi.c     | 4 ++--
 arch/x86/platform/efi/efi.c | 4 ++--
 drivers/firmware/efi/efi.c  | 8 ++++----
 include/linux/efi.h         | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 4b7df346e388..61eb1d17586a 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -208,7 +208,7 @@ void __init efi_init(void)
 
 	memblock_reserve(params.mmap & PAGE_MASK,
 			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
-	memmap.phys_map = (void *)params.mmap;
+	memmap.phys_map = params.mmap;
 	memmap.map = early_memremap(params.mmap, params.mmap_size);
 	memmap.map_end = memmap.map + params.mmap_size;
 	memmap.desc_size = params.desc_size;
@@ -282,7 +282,7 @@ static int __init arm64_enable_runtime_services(void)
 	pr_info("Remapping and enabling EFI services.\n");
 
 	mapsize = memmap.map_end - memmap.map;
-	memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
+	memmap.map = (__force void *)ioremap_cache(memmap.phys_map,
 						   mapsize);
 	if (!memmap.map) {
 		pr_err("Failed to remap EFI memory map\n");
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 3e1d09e885c0..ad285404ea7f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -194,7 +194,7 @@ static void __init do_add_efi_memmap(void)
 int __init efi_memblock_x86_reserve_range(void)
 {
 	struct efi_info *e = &boot_params.efi_info;
-	unsigned long pmap;
+	phys_addr_t pmap;
 
 	if (efi_enabled(EFI_PARAVIRT))
 		return 0;
@@ -209,7 +209,7 @@ int __init efi_memblock_x86_reserve_range(void)
 #else
 	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
 #endif
-	memmap.phys_map		= (void *)pmap;
+	memmap.phys_map		= pmap;
 	memmap.nr_map		= e->efi_memmap_size /
 				  e->efi_memdesc_size;
 	memmap.desc_size	= e->efi_memdesc_size;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 31fc864eb037..027ca212179f 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -254,7 +254,7 @@ subsys_initcall(efisubsys_init);
 int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
 {
 	struct efi_memory_map *map = efi.memmap;
-	void *p, *e;
+	phys_addr_t p, e;
 
 	if (!efi_enabled(EFI_MEMMAP)) {
 		pr_err_once("EFI_MEMMAP is not enabled.\n");
@@ -286,10 +286,10 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
 		 * So just always get our own virtual map on the CPU.
 		 *
 		 */
-		md = early_memremap((phys_addr_t)p, sizeof (*md));
+		md = early_memremap(p, sizeof (*md));
 		if (!md) {
-			pr_err_once("early_memremap(%p, %zu) failed.\n",
-				    p, sizeof (*md));
+			pr_err_once("early_memremap(%pa, %zu) failed.\n",
+				    &p, sizeof (*md));
 			return -ENOMEM;
 		}
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 4d01c1033fce..569b5a866bb1 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -680,7 +680,7 @@ typedef struct {
 } efi_system_table_t;
 
 struct efi_memory_map {
-	void *phys_map;
+	phys_addr_t phys_map;
 	void *map;
 	void *map_end;
 	int nr_map;
-- 
cgit v1.2.3


From 1bd5dfe41b994a6e793363894befef76626965a9 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Mon, 26 Oct 2015 20:23:53 +0200
Subject: ARM: OMAP1: fix incorrect INT_DMA_LCD

Commit 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for
sparse IRQ") turned on SPARSE_IRQ on OMAP1, but forgot to change
the number of INT_DMA_LCD. This broke the boot at least on Nokia 770,
where the device hangs during framebuffer initialization.

Fix by defining INT_DMA_LCD like the other interrupts.

Cc: stable@vger.kernel.org # v4.2+
Fixes: 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for sparse IRQ")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/omap-dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index e5a70132a240..88fa8af2b937 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -17,7 +17,7 @@
 
 #include <linux/platform_device.h>
 
-#define INT_DMA_LCD			25
+#define INT_DMA_LCD			(NR_IRQS_LEGACY + 25)
 
 #define OMAP1_DMA_TOUT_IRQ		(1 << 0)
 #define OMAP_DMA_DROP_IRQ		(1 << 1)
-- 
cgit v1.2.3


From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Tue, 13 Oct 2015 19:11:36 +0300
Subject: svcrdma: Port to new memory registration API

Instead of maintaining a fastreg page list, keep an sg table
and convert an array of pages to a sg list. Then call ib_map_mr_sg
and construct ib_reg_wr.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Tested-by: Selvin Xavier <selvin.xavier@avagotech.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  6 +--
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 76 ++++++++++++++++++--------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 +++++---------
 3 files changed, 55 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7ccc961f33e9..1e4438ea2380 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge {
 };
 struct svc_rdma_fastreg_mr {
 	struct ib_mr *mr;
-	void *kva;
-	struct ib_fast_reg_page_list *page_list;
-	int page_list_len;
+	struct scatterlist *sg;
+	int sg_nents;
 	unsigned long access_flags;
-	unsigned long map_len;
 	enum dma_data_direction direction;
 	struct list_head frmr_list;
 };
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 7be42d0da19e..cb0991345816 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -220,12 +220,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 {
 	struct ib_rdma_wr read_wr;
 	struct ib_send_wr inv_wr;
-	struct ib_fast_reg_wr fastreg_wr;
+	struct ib_reg_wr reg_wr;
 	u8 key;
-	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
 	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
 	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-	int ret, read, pno;
+	int ret, read, pno, dma_nents, n;
 	u32 pg_off = *page_offset;
 	u32 pg_no = *page_no;
 
@@ -234,16 +234,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 
 	ctxt->direction = DMA_FROM_DEVICE;
 	ctxt->frmr = frmr;
-	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
-	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+	nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, nents << PAGE_SHIFT, rs_length);
 
-	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
 	frmr->direction = DMA_FROM_DEVICE;
 	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->map_len = pages_needed << PAGE_SHIFT;
-	frmr->page_list_len = pages_needed;
+	frmr->sg_nents = nents;
 
-	for (pno = 0; pno < pages_needed; pno++) {
+	for (pno = 0; pno < nents; pno++) {
 		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
 
 		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -251,17 +249,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 		head->arg.len += len;
 		if (!pg_off)
 			head->count++;
+
+		sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+			    len, pg_off);
+
 		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
 		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		frmr->page_list->page_list[pno] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					head->arg.pages[pg_no], 0,
-					PAGE_SIZE, DMA_FROM_DEVICE);
-		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-					   frmr->page_list->page_list[pno]);
-		if (ret)
-			goto err;
-		atomic_inc(&xprt->sc_dma_used);
 
 		/* adjust offset and wrap to next page if needed */
 		pg_off += len;
@@ -277,28 +270,42 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	else
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
+	dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+				  frmr->sg, frmr->sg_nents,
+				  frmr->direction);
+	if (!dma_nents) {
+		pr_err("svcrdma: failed to dma map sg %p\n",
+		       frmr->sg);
+		return -ENOMEM;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+
+	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+	if (unlikely(n != frmr->sg_nents)) {
+		pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+		       frmr->mr, n, frmr->sg_nents);
+		return n < 0 ? n : -EINVAL;
+	}
+
 	/* Bump the key */
 	key = (u8)(frmr->mr->lkey & 0x000000FF);
 	ib_update_fast_reg_key(frmr->mr, ++key);
 
-	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].addr = frmr->mr->iova;
 	ctxt->sge[0].lkey = frmr->mr->lkey;
-	ctxt->sge[0].length = read;
+	ctxt->sge[0].length = frmr->mr->length;
 	ctxt->count = 1;
 	ctxt->read_hdr = head;
 
-	/* Prepare FASTREG WR */
-	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-	fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR;
-	fastreg_wr.wr.send_flags = IB_SEND_SIGNALED;
-	fastreg_wr.iova_start = (unsigned long)frmr->kva;
-	fastreg_wr.page_list = frmr->page_list;
-	fastreg_wr.page_list_len = frmr->page_list_len;
-	fastreg_wr.page_shift = PAGE_SHIFT;
-	fastreg_wr.length = frmr->map_len;
-	fastreg_wr.access_flags = frmr->access_flags;
-	fastreg_wr.rkey = frmr->mr->lkey;
-	fastreg_wr.wr.next = &read_wr.wr;
+	/* Prepare REG WR */
+	reg_wr.wr.opcode = IB_WR_REG_MR;
+	reg_wr.wr.wr_id = 0;
+	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+	reg_wr.wr.num_sge = 0;
+	reg_wr.mr = frmr->mr;
+	reg_wr.key = frmr->mr->lkey;
+	reg_wr.access = frmr->access_flags;
+	reg_wr.wr.next = &read_wr.wr;
 
 	/* Prepare RDMA_READ */
 	memset(&read_wr, 0, sizeof(read_wr));
@@ -324,7 +331,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	ctxt->wr_op = read_wr.wr.opcode;
 
 	/* Post the chain */
-	ret = svc_rdma_send(xprt, &fastreg_wr.wr);
+	ret = svc_rdma_send(xprt, &reg_wr.wr);
 	if (ret) {
 		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
 		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -338,7 +345,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	atomic_inc(&rdma_stat_read);
 	return ret;
  err:
-	svc_rdma_unmap_dma(ctxt);
+	ib_dma_unmap_sg(xprt->sc_cm_id->device,
+			frmr->sg, frmr->sg_nents, frmr->direction);
 	svc_rdma_put_context(ctxt, 0);
 	svc_rdma_put_frmr(xprt, frmr);
 	return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4a41122d586f..a266e870d870 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 {
 	struct ib_mr *mr;
-	struct ib_fast_reg_page_list *pl;
+	struct scatterlist *sg;
 	struct svc_rdma_fastreg_mr *frmr;
 	u32 num_sg;
 
@@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 	if (IS_ERR(mr))
 		goto err_free_frmr;
 
-	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
-					 num_sg);
-	if (IS_ERR(pl))
+	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+	if (!sg)
 		goto err_free_mr;
 
+	sg_init_table(sg, RPCSVC_MAXPAGES);
+
 	frmr->mr = mr;
-	frmr->page_list = pl;
+	frmr->sg = sg;
 	INIT_LIST_HEAD(&frmr->frmr_list);
 	return frmr;
 
@@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
 		frmr = list_entry(xprt->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
+		kfree(frmr->sg);
 		ib_dereg_mr(frmr->mr);
-		ib_free_fast_reg_page_list(frmr->page_list);
 		kfree(frmr);
 	}
 }
@@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 		frmr = list_entry(rdma->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
-		frmr->map_len = 0;
-		frmr->page_list_len = 0;
+		frmr->sg_nents = 0;
 	}
 	spin_unlock_bh(&rdma->sc_frmr_q_lock);
 	if (frmr)
@@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 	return rdma_alloc_frmr(rdma);
 }
 
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
-			   struct svc_rdma_fastreg_mr *frmr)
-{
-	int page_no;
-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-		dma_addr_t addr = frmr->page_list->page_list[page_no];
-		if (ib_dma_mapping_error(frmr->mr->device, addr))
-			continue;
-		atomic_dec(&xprt->sc_dma_used);
-		ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
-				  frmr->direction);
-	}
-}
-
 void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
 		       struct svc_rdma_fastreg_mr *frmr)
 {
 	if (frmr) {
-		frmr_unmap_dma(rdma, frmr);
+		ib_dma_unmap_sg(rdma->sc_cm_id->device,
+				frmr->sg, frmr->sg_nents, frmr->direction);
+		atomic_dec(&rdma->sc_dma_used);
 		spin_lock_bh(&rdma->sc_frmr_q_lock);
 		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
 		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-- 
cgit v1.2.3


From cd9e9808d18fe7107c306f6e71c8be7230ee42b4 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Wed, 28 Oct 2015 19:54:55 +0100
Subject: lightnvm: Support for Open-Channel SSDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Open-channel SSDs are devices that share responsibilities with the host
in order to implement and maintain features that typical SSDs keep
strictly in firmware. These include (i) the Flash Translation Layer
(FTL), (ii) bad block management, and (iii) hardware units such as the
flash controller, the interface controller, and large amounts of flash
chips. In this way, Open-channels SSDs exposes direct access to their
physical flash storage, while keeping a subset of the internal features
of SSDs.

LightNVM is a specification that gives support to Open-channel SSDs
LightNVM allows the host to manage data placement, garbage collection,
and parallelism. Device specific responsibilities such as bad block
management, FTL extensions to support atomic IOs, or metadata
persistence are still handled by the device.

The implementation of LightNVM consists of two parts: core and
(multiple) targets. The core implements functionality shared across
targets. This is initialization, teardown and statistics. The targets
implement the interface that exposes physical flash to user-space
applications. Examples of such targets include key-value store,
object-store, as well as traditional block devices, which can be
application-specific.

Contributions in this patch from:

  Javier Gonzalez <jg@lightnvm.io>
  Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
  Jesper Madsen <jmad@itu.dk>

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/ioctl/ioctl-number.txt |   1 +
 MAINTAINERS                          |   8 +
 drivers/Kconfig                      |   2 +
 drivers/Makefile                     |   1 +
 drivers/lightnvm/Kconfig             |  28 ++
 drivers/lightnvm/Makefile            |   5 +
 drivers/lightnvm/core.c              | 826 +++++++++++++++++++++++++++++++++++
 include/linux/lightnvm.h             | 522 ++++++++++++++++++++++
 include/uapi/linux/lightnvm.h        | 130 ++++++
 9 files changed, 1523 insertions(+)
 create mode 100644 drivers/lightnvm/Kconfig
 create mode 100644 drivers/lightnvm/Makefile
 create mode 100644 drivers/lightnvm/core.c
 create mode 100644 include/linux/lightnvm.h
 create mode 100644 include/uapi/linux/lightnvm.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index df1b25eb8382..8a44d44cf901 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -149,6 +149,7 @@ Code  Seq#(hex)	Include File		Comments
 'K'	all	linux/kd.h
 'L'	00-1F	linux/loop.h		conflict!
 'L'	10-1F	drivers/scsi/mpt2sas/mpt2sas_ctl.h	conflict!
+'L'	20-2F	linux/lightnvm.h
 'L'	E0-FF	linux/ppdd.h		encrypted disk device driver
 					<http://linux01.gwdg.de/~alatham/ppdd.html>
 'M'	all	linux/soundcard.h	conflict!
diff --git a/MAINTAINERS b/MAINTAINERS
index f1d5a59432fc..d8be12c57f84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6243,6 +6243,14 @@ F:	drivers/nvdimm/pmem.c
 F:	include/linux/pmem.h
 F:	arch/*/include/asm/pmem.h
 
+LIGHTNVM PLATFORM SUPPORT
+M:	Matias Bjorling <mb@lightnvm.io>
+W:	http://github/OpenChannelSSD
+S:	Maintained
+F:	drivers/lightnvm/
+F:	include/linux/lightnvm.h
+F:	include/uapi/linux/lightnvm.h
+
 LINUX FOR IBM pSERIES (RS/6000)
 M:	Paul Mackerras <paulus@au.ibm.com>
 W:	http://www.ibm.com/linux/ltc/projects/ppc
diff --git a/drivers/Kconfig b/drivers/Kconfig
index e69ec82ac80a..3a5ab4d5873d 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -44,6 +44,8 @@ source "drivers/net/Kconfig"
 
 source "drivers/isdn/Kconfig"
 
+source "drivers/lightnvm/Kconfig"
+
 # input before char - char/joystick depends on it. As does USB.
 
 source "drivers/input/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 42f9dd5f07c8..7f1b7c5a1cfd 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS)		+= nubus/
 obj-y				+= macintosh/
 obj-$(CONFIG_IDE)		+= ide/
 obj-$(CONFIG_SCSI)		+= scsi/
+obj-$(CONFIG_NVM)		+= lightnvm/
 obj-y				+= nvme/
 obj-$(CONFIG_ATA)		+= ata/
 obj-$(CONFIG_TARGET_CORE)	+= target/
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
new file mode 100644
index 000000000000..d4f309f127cd
--- /dev/null
+++ b/drivers/lightnvm/Kconfig
@@ -0,0 +1,28 @@
+#
+# Open-Channel SSD NVM configuration
+#
+
+menuconfig NVM
+	bool "Open-Channel SSD target support"
+	depends on BLOCK
+	help
+	  Say Y here to get to enable Open-channel SSDs.
+
+	  Open-Channel SSDs implement a set of extension to SSDs, that
+	  exposes direct access to the underlying non-volatile memory.
+
+	  If you say N, all options in this submenu will be skipped and disabled
+	  only do this if you know what you are doing.
+
+if NVM
+
+config NVM_DEBUG
+	bool "Open-Channel SSD debugging support"
+	---help---
+	Exposes a debug management interface to create/remove targets at:
+
+	  /sys/module/lnvm/parameters/configure_debug
+
+	It is required to create/remove targets without IOCTLs.
+
+endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
new file mode 100644
index 000000000000..38185e990d5d
--- /dev/null
+++ b/drivers/lightnvm/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Open-Channel SSDs.
+#
+
+obj-$(CONFIG_NVM)		:= core.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
new file mode 100644
index 000000000000..f659e605a406
--- /dev/null
+++ b/drivers/lightnvm/core.c
@@ -0,0 +1,826 @@
+/*
+ * Copyright (C) 2015 IT University of Copenhagen. All rights reserved.
+ * Initial release: Matias Bjorling <m@bjorling.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/sem.h>
+#include <linux/bitmap.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/lightnvm.h>
+#include <uapi/linux/lightnvm.h>
+
+static LIST_HEAD(nvm_targets);
+static LIST_HEAD(nvm_mgrs);
+static LIST_HEAD(nvm_devices);
+static DECLARE_RWSEM(nvm_lock);
+
+static struct nvm_tgt_type *nvm_find_target_type(const char *name)
+{
+	struct nvm_tgt_type *tt;
+
+	list_for_each_entry(tt, &nvm_targets, list)
+		if (!strcmp(name, tt->name))
+			return tt;
+
+	return NULL;
+}
+
+int nvm_register_target(struct nvm_tgt_type *tt)
+{
+	int ret = 0;
+
+	down_write(&nvm_lock);
+	if (nvm_find_target_type(tt->name))
+		ret = -EEXIST;
+	else
+		list_add(&tt->list, &nvm_targets);
+	up_write(&nvm_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(nvm_register_target);
+
+void nvm_unregister_target(struct nvm_tgt_type *tt)
+{
+	if (!tt)
+		return;
+
+	down_write(&nvm_lock);
+	list_del(&tt->list);
+	up_write(&nvm_lock);
+}
+EXPORT_SYMBOL(nvm_unregister_target);
+
+void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
+							dma_addr_t *dma_handler)
+{
+	return dev->ops->dev_dma_alloc(dev->q, dev->ppalist_pool, mem_flags,
+								dma_handler);
+}
+EXPORT_SYMBOL(nvm_dev_dma_alloc);
+
+void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list,
+							dma_addr_t dma_handler)
+{
+	dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler);
+}
+EXPORT_SYMBOL(nvm_dev_dma_free);
+
+static struct nvmm_type *nvm_find_mgr_type(const char *name)
+{
+	struct nvmm_type *mt;
+
+	list_for_each_entry(mt, &nvm_mgrs, list)
+		if (!strcmp(name, mt->name))
+			return mt;
+
+	return NULL;
+}
+
+int nvm_register_mgr(struct nvmm_type *mt)
+{
+	int ret = 0;
+
+	down_write(&nvm_lock);
+	if (nvm_find_mgr_type(mt->name))
+		ret = -EEXIST;
+	else
+		list_add(&mt->list, &nvm_mgrs);
+	up_write(&nvm_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(nvm_register_mgr);
+
+void nvm_unregister_mgr(struct nvmm_type *mt)
+{
+	if (!mt)
+		return;
+
+	down_write(&nvm_lock);
+	list_del(&mt->list);
+	up_write(&nvm_lock);
+}
+EXPORT_SYMBOL(nvm_unregister_mgr);
+
+static struct nvm_dev *nvm_find_nvm_dev(const char *name)
+{
+	struct nvm_dev *dev;
+
+	list_for_each_entry(dev, &nvm_devices, devices)
+		if (!strcmp(name, dev->name))
+			return dev;
+
+	return NULL;
+}
+
+struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
+							unsigned long flags)
+{
+	return dev->mt->get_blk(dev, lun, flags);
+}
+EXPORT_SYMBOL(nvm_get_blk);
+
+/* Assumes that all valid pages have already been moved on release to bm */
+void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
+{
+	return dev->mt->put_blk(dev, blk);
+}
+EXPORT_SYMBOL(nvm_put_blk);
+
+int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+	return dev->mt->submit_io(dev, rqd);
+}
+EXPORT_SYMBOL(nvm_submit_io);
+
+int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
+{
+	return dev->mt->erase_blk(dev, blk, 0);
+}
+EXPORT_SYMBOL(nvm_erase_blk);
+
+static void nvm_core_free(struct nvm_dev *dev)
+{
+	kfree(dev);
+}
+
+static int nvm_core_init(struct nvm_dev *dev)
+{
+	struct nvm_id *id = &dev->identity;
+	struct nvm_id_group *grp = &id->groups[0];
+
+	/* device values */
+	dev->nr_chnls = grp->num_ch;
+	dev->luns_per_chnl = grp->num_lun;
+	dev->pgs_per_blk = grp->num_pg;
+	dev->blks_per_lun = grp->num_blk;
+	dev->nr_planes = grp->num_pln;
+	dev->sec_size = grp->csecs;
+	dev->oob_size = grp->sos;
+	dev->sec_per_pg = grp->fpg_sz / grp->csecs;
+	dev->addr_mode = id->ppat;
+	dev->addr_format = id->ppaf;
+
+	dev->plane_mode = NVM_PLANE_SINGLE;
+	dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
+
+	if (grp->mpos & 0x020202)
+		dev->plane_mode = NVM_PLANE_DOUBLE;
+	if (grp->mpos & 0x040404)
+		dev->plane_mode = NVM_PLANE_QUAD;
+
+	/* calculated values */
+	dev->sec_per_pl = dev->sec_per_pg * dev->nr_planes;
+	dev->sec_per_blk = dev->sec_per_pl * dev->pgs_per_blk;
+	dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun;
+	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
+
+	dev->total_blocks = dev->nr_planes *
+				dev->blks_per_lun *
+				dev->luns_per_chnl *
+				dev->nr_chnls;
+	dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
+	INIT_LIST_HEAD(&dev->online_targets);
+
+	return 0;
+}
+
+static void nvm_free(struct nvm_dev *dev)
+{
+	if (!dev)
+		return;
+
+	if (dev->mt)
+		dev->mt->unregister_mgr(dev);
+
+	nvm_core_free(dev);
+}
+
+static int nvm_init(struct nvm_dev *dev)
+{
+	struct nvmm_type *mt;
+	int ret = 0;
+
+	if (!dev->q || !dev->ops)
+		return -EINVAL;
+
+	if (dev->ops->identity(dev->q, &dev->identity)) {
+		pr_err("nvm: device could not be identified\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n",
+			dev->identity.ver_id, dev->identity.vmnt,
+							dev->identity.cgrps);
+
+	if (dev->identity.ver_id != 1) {
+		pr_err("nvm: device not supported by kernel.");
+		goto err;
+	}
+
+	if (dev->identity.cgrps != 1) {
+		pr_err("nvm: only one group configuration supported.");
+		goto err;
+	}
+
+	ret = nvm_core_init(dev);
+	if (ret) {
+		pr_err("nvm: could not initialize core structures.\n");
+		goto err;
+	}
+
+	/* register with device with a supported manager */
+	list_for_each_entry(mt, &nvm_mgrs, list) {
+		ret = mt->register_mgr(dev);
+		if (ret < 0)
+			goto err; /* initialization failed */
+		if (ret > 0) {
+			dev->mt = mt;
+			break; /* successfully initialized */
+		}
+	}
+
+	if (!ret) {
+		pr_info("nvm: no compatible manager found.\n");
+		return 0;
+	}
+
+	pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
+			dev->name, dev->sec_per_pg, dev->nr_planes,
+			dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns,
+			dev->nr_chnls);
+	return 0;
+err:
+	nvm_free(dev);
+	pr_err("nvm: failed to initialize nvm\n");
+	return ret;
+}
+
+static void nvm_exit(struct nvm_dev *dev)
+{
+	if (dev->ppalist_pool)
+		dev->ops->destroy_dma_pool(dev->ppalist_pool);
+	nvm_free(dev);
+
+	pr_info("nvm: successfully unloaded\n");
+}
+
+int nvm_register(struct request_queue *q, char *disk_name,
+							struct nvm_dev_ops *ops)
+{
+	struct nvm_dev *dev;
+	int ret;
+
+	if (!ops->identity)
+		return -EINVAL;
+
+	dev = kzalloc(sizeof(struct nvm_dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->q = q;
+	dev->ops = ops;
+	strncpy(dev->name, disk_name, DISK_NAME_LEN);
+
+	ret = nvm_init(dev);
+	if (ret)
+		goto err_init;
+
+	down_write(&nvm_lock);
+	list_add(&dev->devices, &nvm_devices);
+	up_write(&nvm_lock);
+
+	if (dev->ops->max_phys_sect > 1) {
+		dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
+								"ppalist");
+		if (!dev->ppalist_pool) {
+			pr_err("nvm: could not create ppa pool\n");
+			return -ENOMEM;
+		}
+	} else if (dev->ops->max_phys_sect > 256) {
+		pr_info("nvm: max sectors supported is 256.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+err_init:
+	kfree(dev);
+	return ret;
+}
+EXPORT_SYMBOL(nvm_register);
+
+void nvm_unregister(char *disk_name)
+{
+	struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
+
+	if (!dev) {
+		pr_err("nvm: could not find device %s to unregister\n",
+								disk_name);
+		return;
+	}
+
+	nvm_exit(dev);
+
+	down_write(&nvm_lock);
+	list_del(&dev->devices);
+	up_write(&nvm_lock);
+}
+EXPORT_SYMBOL(nvm_unregister);
+
+static const struct block_device_operations nvm_fops = {
+	.owner		= THIS_MODULE,
+};
+
+static int nvm_create_target(struct nvm_dev *dev,
+						struct nvm_ioctl_create *create)
+{
+	struct nvm_ioctl_create_simple *s = &create->conf.s;
+	struct request_queue *tqueue;
+	struct nvmm_type *mt;
+	struct gendisk *tdisk;
+	struct nvm_tgt_type *tt;
+	struct nvm_target *t;
+	void *targetdata;
+	int ret = 0;
+
+	if (!dev->mt) {
+		/* register with device with a supported NVM manager */
+		list_for_each_entry(mt, &nvm_mgrs, list) {
+			ret = mt->register_mgr(dev);
+			if (ret < 0)
+				return ret; /* initialization failed */
+			if (ret > 0) {
+				dev->mt = mt;
+				break; /* successfully initialized */
+			}
+		}
+
+		if (!ret) {
+			pr_info("nvm: no compatible nvm manager found.\n");
+			return -ENODEV;
+		}
+	}
+
+	tt = nvm_find_target_type(create->tgttype);
+	if (!tt) {
+		pr_err("nvm: target type %s not found\n", create->tgttype);
+		return -EINVAL;
+	}
+
+	down_write(&nvm_lock);
+	list_for_each_entry(t, &dev->online_targets, list) {
+		if (!strcmp(create->tgtname, t->disk->disk_name)) {
+			pr_err("nvm: target name already exists.\n");
+			up_write(&nvm_lock);
+			return -EINVAL;
+		}
+	}
+	up_write(&nvm_lock);
+
+	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
+	if (!t)
+		return -ENOMEM;
+
+	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+	if (!tqueue)
+		goto err_t;
+	blk_queue_make_request(tqueue, tt->make_rq);
+
+	tdisk = alloc_disk(0);
+	if (!tdisk)
+		goto err_queue;
+
+	sprintf(tdisk->disk_name, "%s", create->tgtname);
+	tdisk->flags = GENHD_FL_EXT_DEVT;
+	tdisk->major = 0;
+	tdisk->first_minor = 0;
+	tdisk->fops = &nvm_fops;
+	tdisk->queue = tqueue;
+
+	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
+	if (IS_ERR(targetdata))
+		goto err_init;
+
+	tdisk->private_data = targetdata;
+	tqueue->queuedata = targetdata;
+
+	blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
+
+	set_capacity(tdisk, tt->capacity(targetdata));
+	add_disk(tdisk);
+
+	t->type = tt;
+	t->disk = tdisk;
+
+	down_write(&nvm_lock);
+	list_add_tail(&t->list, &dev->online_targets);
+	up_write(&nvm_lock);
+
+	return 0;
+err_init:
+	put_disk(tdisk);
+err_queue:
+	blk_cleanup_queue(tqueue);
+err_t:
+	kfree(t);
+	return -ENOMEM;
+}
+
+static void nvm_remove_target(struct nvm_target *t)
+{
+	struct nvm_tgt_type *tt = t->type;
+	struct gendisk *tdisk = t->disk;
+	struct request_queue *q = tdisk->queue;
+
+	lockdep_assert_held(&nvm_lock);
+
+	del_gendisk(tdisk);
+	if (tt->exit)
+		tt->exit(tdisk->private_data);
+
+	blk_cleanup_queue(q);
+
+	put_disk(tdisk);
+
+	list_del(&t->list);
+	kfree(t);
+}
+
+static int __nvm_configure_create(struct nvm_ioctl_create *create)
+{
+	struct nvm_dev *dev;
+	struct nvm_ioctl_create_simple *s;
+
+	dev = nvm_find_nvm_dev(create->dev);
+	if (!dev) {
+		pr_err("nvm: device not found\n");
+		return -EINVAL;
+	}
+
+	if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) {
+		pr_err("nvm: config type not valid\n");
+		return -EINVAL;
+	}
+	s = &create->conf.s;
+
+	if (s->lun_begin > s->lun_end || s->lun_end > dev->nr_luns) {
+		pr_err("nvm: lun out of bound (%u:%u > %u)\n",
+			s->lun_begin, s->lun_end, dev->nr_luns);
+		return -EINVAL;
+	}
+
+	return nvm_create_target(dev, create);
+}
+
+static int __nvm_configure_remove(struct nvm_ioctl_remove *remove)
+{
+	struct nvm_target *t = NULL;
+	struct nvm_dev *dev;
+	int ret = -1;
+
+	down_write(&nvm_lock);
+	list_for_each_entry(dev, &nvm_devices, devices)
+		list_for_each_entry(t, &dev->online_targets, list) {
+			if (!strcmp(remove->tgtname, t->disk->disk_name)) {
+				nvm_remove_target(t);
+				ret = 0;
+				break;
+			}
+		}
+	up_write(&nvm_lock);
+
+	if (ret) {
+		pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_NVM_DEBUG
+static int nvm_configure_show(const char *val)
+{
+	struct nvm_dev *dev;
+	char opcode, devname[DISK_NAME_LEN];
+	int ret;
+
+	ret = sscanf(val, "%c %32s", &opcode, devname);
+	if (ret != 2) {
+		pr_err("nvm: invalid command. Use \"opcode devicename\".\n");
+		return -EINVAL;
+	}
+
+	dev = nvm_find_nvm_dev(devname);
+	if (!dev) {
+		pr_err("nvm: device not found\n");
+		return -EINVAL;
+	}
+
+	if (!dev->mt)
+		return 0;
+
+	dev->mt->free_blocks_print(dev);
+
+	return 0;
+}
+
+static int nvm_configure_remove(const char *val)
+{
+	struct nvm_ioctl_remove remove;
+	char opcode;
+	int ret;
+
+	ret = sscanf(val, "%c %256s", &opcode, remove.tgtname);
+	if (ret != 2) {
+		pr_err("nvm: invalid command. Use \"d targetname\".\n");
+		return -EINVAL;
+	}
+
+	remove.flags = 0;
+
+	return __nvm_configure_remove(&remove);
+}
+
+static int nvm_configure_create(const char *val)
+{
+	struct nvm_ioctl_create create;
+	char opcode;
+	int lun_begin, lun_end, ret;
+
+	ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev,
+						create.tgtname, create.tgttype,
+						&lun_begin, &lun_end);
+	if (ret != 6) {
+		pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n");
+		return -EINVAL;
+	}
+
+	create.flags = 0;
+	create.conf.type = NVM_CONFIG_TYPE_SIMPLE;
+	create.conf.s.lun_begin = lun_begin;
+	create.conf.s.lun_end = lun_end;
+
+	return __nvm_configure_create(&create);
+}
+
+
+/* Exposes administrative interface through /sys/module/lnvm/configure_by_str */
+static int nvm_configure_by_str_event(const char *val,
+					const struct kernel_param *kp)
+{
+	char opcode;
+	int ret;
+
+	ret = sscanf(val, "%c", &opcode);
+	if (ret != 1) {
+		pr_err("nvm: string must have the format of \"cmd ...\"\n");
+		return -EINVAL;
+	}
+
+	switch (opcode) {
+	case 'a':
+		return nvm_configure_create(val);
+	case 'd':
+		return nvm_configure_remove(val);
+	case 's':
+		return nvm_configure_show(val);
+	default:
+		pr_err("nvm: invalid command\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nvm_configure_get(char *buf, const struct kernel_param *kp)
+{
+	int sz = 0;
+	char *buf_start = buf;
+	struct nvm_dev *dev;
+
+	buf += sprintf(buf, "available devices:\n");
+	down_write(&nvm_lock);
+	list_for_each_entry(dev, &nvm_devices, devices) {
+		if (sz > 4095 - DISK_NAME_LEN)
+			break;
+		buf += sprintf(buf, " %32s\n", dev->name);
+	}
+	up_write(&nvm_lock);
+
+	return buf - buf_start - 1;
+}
+
+static const struct kernel_param_ops nvm_configure_by_str_event_param_ops = {
+	.set	= nvm_configure_by_str_event,
+	.get	= nvm_configure_get,
+};
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX	"lnvm."
+
+module_param_cb(configure_debug, &nvm_configure_by_str_event_param_ops, NULL,
+									0644);
+
+#endif /* CONFIG_NVM_DEBUG */
+
+static long nvm_ioctl_info(struct file *file, void __user *arg)
+{
+	struct nvm_ioctl_info *info;
+	struct nvm_tgt_type *tt;
+	int tgt_iter = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
+	if (IS_ERR(info))
+		return -EFAULT;
+
+	info->version[0] = NVM_VERSION_MAJOR;
+	info->version[1] = NVM_VERSION_MINOR;
+	info->version[2] = NVM_VERSION_PATCH;
+
+	down_write(&nvm_lock);
+	list_for_each_entry(tt, &nvm_targets, list) {
+		struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter];
+
+		tgt->version[0] = tt->version[0];
+		tgt->version[1] = tt->version[1];
+		tgt->version[2] = tt->version[2];
+		strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX);
+
+		tgt_iter++;
+	}
+
+	info->tgtsize = tgt_iter;
+	up_write(&nvm_lock);
+
+	if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
+		return -EFAULT;
+
+	kfree(info);
+	return 0;
+}
+
+static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
+{
+	struct nvm_ioctl_get_devices *devices;
+	struct nvm_dev *dev;
+	int i = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
+	if (!devices)
+		return -ENOMEM;
+
+	down_write(&nvm_lock);
+	list_for_each_entry(dev, &nvm_devices, devices) {
+		struct nvm_ioctl_device_info *info = &devices->info[i];
+
+		sprintf(info->devname, "%s", dev->name);
+		if (dev->mt) {
+			info->bmversion[0] = dev->mt->version[0];
+			info->bmversion[1] = dev->mt->version[1];
+			info->bmversion[2] = dev->mt->version[2];
+			sprintf(info->bmname, "%s", dev->mt->name);
+		} else {
+			sprintf(info->bmname, "none");
+		}
+
+		i++;
+		if (i > 31) {
+			pr_err("nvm: max 31 devices can be reported.\n");
+			break;
+		}
+	}
+	up_write(&nvm_lock);
+
+	devices->nr_devices = i;
+
+	if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
+		return -EFAULT;
+
+	kfree(devices);
+	return 0;
+}
+
+static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
+{
+	struct nvm_ioctl_create create;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
+		return -EFAULT;
+
+	create.dev[DISK_NAME_LEN - 1] = '\0';
+	create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
+	create.tgtname[DISK_NAME_LEN - 1] = '\0';
+
+	if (create.flags != 0) {
+		pr_err("nvm: no flags supported\n");
+		return -EINVAL;
+	}
+
+	return __nvm_configure_create(&create);
+}
+
+static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
+{
+	struct nvm_ioctl_remove remove;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
+		return -EFAULT;
+
+	remove.tgtname[DISK_NAME_LEN - 1] = '\0';
+
+	if (remove.flags != 0) {
+		pr_err("nvm: no flags supported\n");
+		return -EINVAL;
+	}
+
+	return __nvm_configure_remove(&remove);
+}
+
+static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	case NVM_INFO:
+		return nvm_ioctl_info(file, argp);
+	case NVM_GET_DEVICES:
+		return nvm_ioctl_get_devices(file, argp);
+	case NVM_DEV_CREATE:
+		return nvm_ioctl_dev_create(file, argp);
+	case NVM_DEV_REMOVE:
+		return nvm_ioctl_dev_remove(file, argp);
+	}
+	return 0;
+}
+
+static const struct file_operations _ctl_fops = {
+	.open = nonseekable_open,
+	.unlocked_ioctl = nvm_ctl_ioctl,
+	.owner = THIS_MODULE,
+	.llseek  = noop_llseek,
+};
+
+static struct miscdevice _nvm_misc = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "lightnvm",
+	.nodename	= "lightnvm/control",
+	.fops		= &_ctl_fops,
+};
+
+MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
+
+static int __init nvm_mod_init(void)
+{
+	int ret;
+
+	ret = misc_register(&_nvm_misc);
+	if (ret)
+		pr_err("nvm: misc_register failed for control device");
+
+	return ret;
+}
+
+static void __exit nvm_mod_exit(void)
+{
+	misc_deregister(&_nvm_misc);
+}
+
+MODULE_AUTHOR("Matias Bjorling <m@bjorling.me>");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("0.1");
+module_init(nvm_mod_init);
+module_exit(nvm_mod_exit);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
new file mode 100644
index 000000000000..122b176600fa
--- /dev/null
+++ b/include/linux/lightnvm.h
@@ -0,0 +1,522 @@
+#ifndef NVM_H
+#define NVM_H
+
+enum {
+	NVM_IO_OK = 0,
+	NVM_IO_REQUEUE = 1,
+	NVM_IO_DONE = 2,
+	NVM_IO_ERR = 3,
+
+	NVM_IOTYPE_NONE = 0,
+	NVM_IOTYPE_GC = 1,
+};
+
+#ifdef CONFIG_NVM
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/dmapool.h>
+
+enum {
+	/* HW Responsibilities */
+	NVM_RSP_L2P	= 1 << 0,
+	NVM_RSP_ECC	= 1 << 1,
+
+	/* Physical Adressing Mode */
+	NVM_ADDRMODE_LINEAR	= 0,
+	NVM_ADDRMODE_CHANNEL	= 1,
+
+	/* Plane programming mode for LUN */
+	NVM_PLANE_SINGLE	= 0,
+	NVM_PLANE_DOUBLE	= 1,
+	NVM_PLANE_QUAD		= 2,
+
+	/* Status codes */
+	NVM_RSP_SUCCESS		= 0x0,
+	NVM_RSP_NOT_CHANGEABLE	= 0x1,
+	NVM_RSP_ERR_FAILWRITE	= 0x40ff,
+	NVM_RSP_ERR_EMPTYPAGE	= 0x42ff,
+
+	/* Device opcodes */
+	NVM_OP_HBREAD		= 0x02,
+	NVM_OP_HBWRITE		= 0x81,
+	NVM_OP_PWRITE		= 0x91,
+	NVM_OP_PREAD		= 0x92,
+	NVM_OP_ERASE		= 0x90,
+
+	/* PPA Command Flags */
+	NVM_IO_SNGL_ACCESS	= 0x0,
+	NVM_IO_DUAL_ACCESS	= 0x1,
+	NVM_IO_QUAD_ACCESS	= 0x2,
+
+	NVM_IO_SUSPEND		= 0x80,
+	NVM_IO_SLC_MODE		= 0x100,
+	NVM_IO_SCRAMBLE_DISABLE	= 0x200,
+};
+
+struct nvm_id_group {
+	u8	mtype;
+	u8	fmtype;
+	u16	res16;
+	u8	num_ch;
+	u8	num_lun;
+	u8	num_pln;
+	u16	num_blk;
+	u16	num_pg;
+	u16	fpg_sz;
+	u16	csecs;
+	u16	sos;
+	u32	trdt;
+	u32	trdm;
+	u32	tprt;
+	u32	tprm;
+	u32	tbet;
+	u32	tbem;
+	u32	mpos;
+	u16	cpar;
+	u8	res[913];
+} __packed;
+
+struct nvm_addr_format {
+	u8	ch_offset;
+	u8	ch_len;
+	u8	lun_offset;
+	u8	lun_len;
+	u8	pln_offset;
+	u8	pln_len;
+	u8	blk_offset;
+	u8	blk_len;
+	u8	pg_offset;
+	u8	pg_len;
+	u8	sect_offset;
+	u8	sect_len;
+	u8	res[4];
+};
+
+struct nvm_id {
+	u8	ver_id;
+	u8	vmnt;
+	u8	cgrps;
+	u8	res[5];
+	u32	cap;
+	u32	dom;
+	struct nvm_addr_format ppaf;
+	u8	ppat;
+	u8	resv[224];
+	struct nvm_id_group groups[4];
+} __packed;
+
+struct nvm_target {
+	struct list_head list;
+	struct nvm_tgt_type *type;
+	struct gendisk *disk;
+};
+
+struct nvm_tgt_instance {
+	struct nvm_tgt_type *tt;
+};
+
+#define ADDR_EMPTY (~0ULL)
+
+#define NVM_VERSION_MAJOR 1
+#define NVM_VERSION_MINOR 0
+#define NVM_VERSION_PATCH 0
+
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS  (6)
+#define NVM_PG_BITS  (16)
+#define NVM_BLK_BITS (16)
+#define NVM_LUN_BITS (10)
+#define NVM_CH_BITS  (8)
+
+struct ppa_addr {
+	union {
+		/* Channel-based PPA format in nand 4x2x2x2x8x10 */
+		struct {
+			sector_t ch		: 4;
+			sector_t sec		: 2; /* 4 sectors per page */
+			sector_t pl		: 2; /* 4 planes per LUN */
+			sector_t lun		: 2; /* 4 LUNs per channel */
+			sector_t pg		: 8; /* 256 pages per block */
+			sector_t blk		: 10;/* 1024 blocks per plane */
+			sector_t resved		: 36;
+		} chnl;
+
+		/* Generic structure for all addresses */
+		struct {
+			sector_t sec		: NVM_SEC_BITS;
+			sector_t pl		: NVM_PL_BITS;
+			sector_t pg		: NVM_PG_BITS;
+			sector_t blk		: NVM_BLK_BITS;
+			sector_t lun		: NVM_LUN_BITS;
+			sector_t ch		: NVM_CH_BITS;
+		} g;
+
+		sector_t ppa;
+	};
+} __packed;
+
+struct nvm_rq {
+	struct nvm_tgt_instance *ins;
+	struct nvm_dev *dev;
+
+	struct bio *bio;
+
+	union {
+		struct ppa_addr ppa_addr;
+		dma_addr_t dma_ppa_list;
+	};
+
+	struct ppa_addr *ppa_list;
+
+	void *metadata;
+	dma_addr_t dma_metadata;
+
+	uint8_t opcode;
+	uint16_t nr_pages;
+	uint16_t flags;
+};
+
+static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
+{
+	return pdu - sizeof(struct nvm_rq);
+}
+
+static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
+{
+	return rqdata + 1;
+}
+
+struct nvm_block;
+
+typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
+typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
+typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
+				nvm_l2p_update_fn *, void *);
+typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
+				nvm_bb_update_fn *, void *);
+typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
+typedef void (nvm_destroy_dma_pool_fn)(void *);
+typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
+								dma_addr_t *);
+typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
+
+struct nvm_dev_ops {
+	nvm_id_fn		*identity;
+	nvm_get_l2p_tbl_fn	*get_l2p_tbl;
+	nvm_op_bb_tbl_fn	*get_bb_tbl;
+	nvm_op_set_bb_fn	*set_bb;
+
+	nvm_submit_io_fn	*submit_io;
+	nvm_erase_blk_fn	*erase_block;
+
+	nvm_create_dma_pool_fn	*create_dma_pool;
+	nvm_destroy_dma_pool_fn	*destroy_dma_pool;
+	nvm_dev_dma_alloc_fn	*dev_dma_alloc;
+	nvm_dev_dma_free_fn	*dev_dma_free;
+
+	uint8_t			max_phys_sect;
+};
+
+struct nvm_lun {
+	int id;
+
+	int lun_id;
+	int chnl_id;
+
+	unsigned int nr_free_blocks;	/* Number of unused blocks */
+	struct nvm_block *blocks;
+
+	spinlock_t lock;
+};
+
+struct nvm_block {
+	struct list_head list;
+	struct nvm_lun *lun;
+	unsigned long id;
+
+	void *priv;
+	int type;
+};
+
+struct nvm_dev {
+	struct nvm_dev_ops *ops;
+
+	struct list_head devices;
+	struct list_head online_targets;
+
+	/* Media manager */
+	struct nvmm_type *mt;
+	void *mp;
+
+	/* Device information */
+	int nr_chnls;
+	int nr_planes;
+	int luns_per_chnl;
+	int sec_per_pg; /* only sectors for a single page */
+	int pgs_per_blk;
+	int blks_per_lun;
+	int sec_size;
+	int oob_size;
+	int addr_mode;
+	struct nvm_addr_format addr_format;
+
+	/* Calculated/Cached values. These do not reflect the actual usable
+	 * blocks at run-time.
+	 */
+	int max_rq_size;
+	int plane_mode; /* drive device in single, double or quad mode */
+
+	int sec_per_pl; /* all sectors across planes */
+	int sec_per_blk;
+	int sec_per_lun;
+
+	unsigned long total_pages;
+	unsigned long total_blocks;
+	int nr_luns;
+	unsigned max_pages_per_blk;
+
+	void *ppalist_pool;
+
+	struct nvm_id identity;
+
+	/* Backend device */
+	struct request_queue *q;
+	char name[DISK_NAME_LEN];
+};
+
+/* fallback conversion */
+static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
+							struct ppa_addr r)
+{
+	struct ppa_addr l;
+
+	l.ppa = r.g.sec +
+		r.g.pg  * dev->sec_per_pg +
+		r.g.blk * (dev->pgs_per_blk *
+				dev->sec_per_pg) +
+		r.g.lun * (dev->blks_per_lun *
+				dev->pgs_per_blk *
+				dev->sec_per_pg) +
+		r.g.ch * (dev->blks_per_lun *
+				dev->pgs_per_blk *
+				dev->luns_per_chnl *
+				dev->sec_per_pg);
+
+	return l;
+}
+
+/* fallback conversion */
+static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
+							struct ppa_addr r)
+{
+	struct ppa_addr l;
+	int secs, pgs, blks, luns;
+	sector_t ppa = r.ppa;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, dev->sec_per_pg);
+	div_u64_rem(ppa, dev->sec_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	sector_div(ppa, dev->pgs_per_blk);
+	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	l.g.blk = blks;
+
+	sector_div(ppa, dev->blks_per_lun);
+	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	l.g.lun = luns;
+
+	sector_div(ppa, dev->luns_per_chnl);
+	l.g.ch = ppa;
+
+	return l;
+}
+
+static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
+{
+	struct ppa_addr l;
+
+	l.ppa = 0;
+
+	l.chnl.sec = r.g.sec;
+	l.chnl.pl = r.g.pl;
+	l.chnl.pg = r.g.pg;
+	l.chnl.blk = r.g.blk;
+	l.chnl.lun = r.g.lun;
+	l.chnl.ch = r.g.ch;
+
+	return l;
+}
+
+static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
+{
+	struct ppa_addr l;
+
+	l.ppa = 0;
+
+	l.g.sec = r.chnl.sec;
+	l.g.pl = r.chnl.pl;
+	l.g.pg = r.chnl.pg;
+	l.g.blk = r.chnl.blk;
+	l.g.lun = r.chnl.lun;
+	l.g.ch = r.chnl.ch;
+
+	return l;
+}
+
+static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
+						struct ppa_addr gppa)
+{
+	switch (dev->addr_mode) {
+	case NVM_ADDRMODE_LINEAR:
+		return __linear_to_generic_addr(dev, gppa);
+	case NVM_ADDRMODE_CHANNEL:
+		return __chnl_to_generic_addr(gppa);
+	default:
+		BUG();
+	}
+	return gppa;
+}
+
+static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
+						struct ppa_addr gppa)
+{
+	switch (dev->addr_mode) {
+	case NVM_ADDRMODE_LINEAR:
+		return __generic_to_linear_addr(dev, gppa);
+	case NVM_ADDRMODE_CHANNEL:
+		return __generic_to_chnl_addr(gppa);
+	default:
+		BUG();
+	}
+	return gppa;
+}
+
+static inline int ppa_empty(struct ppa_addr ppa_addr)
+{
+	return (ppa_addr.ppa == ADDR_EMPTY);
+}
+
+static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
+{
+	ppa_addr->ppa = ADDR_EMPTY;
+}
+
+static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
+							struct nvm_block *blk)
+{
+	struct ppa_addr ppa;
+	struct nvm_lun *lun = blk->lun;
+
+	ppa.ppa = 0;
+	ppa.g.blk = blk->id % dev->blks_per_lun;
+	ppa.g.lun = lun->lun_id;
+	ppa.g.ch = lun->chnl_id;
+
+	return ppa;
+}
+
+typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
+typedef sector_t (nvm_tgt_capacity_fn)(void *);
+typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
+typedef void (nvm_tgt_exit_fn)(void *);
+
+struct nvm_tgt_type {
+	const char *name;
+	unsigned int version[3];
+
+	/* target entry points */
+	nvm_tgt_make_rq_fn *make_rq;
+	nvm_tgt_capacity_fn *capacity;
+	nvm_tgt_end_io_fn *end_io;
+
+	/* module-specific init/teardown */
+	nvm_tgt_init_fn *init;
+	nvm_tgt_exit_fn *exit;
+
+	/* For internal use */
+	struct list_head list;
+};
+
+extern int nvm_register_target(struct nvm_tgt_type *);
+extern void nvm_unregister_target(struct nvm_tgt_type *);
+
+extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
+extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
+
+typedef int (nvmm_register_fn)(struct nvm_dev *);
+typedef void (nvmm_unregister_fn)(struct nvm_dev *);
+typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
+					      struct nvm_lun *, unsigned long);
+typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
+typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
+typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
+typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
+typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
+typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
+								unsigned long);
+typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
+typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
+
+struct nvmm_type {
+	const char *name;
+	unsigned int version[3];
+
+	nvmm_register_fn *register_mgr;
+	nvmm_unregister_fn *unregister_mgr;
+
+	/* Block administration callbacks */
+	nvmm_get_blk_fn *get_blk;
+	nvmm_put_blk_fn *put_blk;
+	nvmm_open_blk_fn *open_blk;
+	nvmm_close_blk_fn *close_blk;
+	nvmm_flush_blk_fn *flush_blk;
+
+	nvmm_submit_io_fn *submit_io;
+	nvmm_end_io_fn *end_io;
+	nvmm_erase_blk_fn *erase_blk;
+
+	/* Configuration management */
+	nvmm_get_lun_fn *get_lun;
+
+	/* Statistics */
+	nvmm_free_blocks_print_fn *free_blocks_print;
+	struct list_head list;
+};
+
+extern int nvm_register_mgr(struct nvmm_type *);
+extern void nvm_unregister_mgr(struct nvmm_type *);
+
+extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
+								unsigned long);
+extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
+
+extern int nvm_register(struct request_queue *, char *,
+						struct nvm_dev_ops *);
+extern void nvm_unregister(char *);
+
+extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+#else /* CONFIG_NVM */
+struct nvm_dev_ops;
+
+static inline int nvm_register(struct request_queue *q, char *disk_name,
+							struct nvm_dev_ops *ops)
+{
+	return -EINVAL;
+}
+static inline void nvm_unregister(char *disk_name) {}
+#endif /* CONFIG_NVM */
+#endif /* LIGHTNVM.H */
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
new file mode 100644
index 000000000000..928f98997d8a
--- /dev/null
+++ b/include/uapi/linux/lightnvm.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2015 CNEX Labs.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+#ifndef _UAPI_LINUX_LIGHTNVM_H
+#define _UAPI_LINUX_LIGHTNVM_H
+
+#ifdef __KERNEL__
+#include <linux/kernel.h>
+#include <linux/ioctl.h>
+#else /* __KERNEL__ */
+#include <stdio.h>
+#include <sys/ioctl.h>
+#define DISK_NAME_LEN 32
+#endif /* __KERNEL__ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define NVM_TTYPE_NAME_MAX 48
+#define NVM_TTYPE_MAX 63
+
+#define NVM_CTRL_FILE "/dev/lightnvm/control"
+
+struct nvm_ioctl_info_tgt {
+	__u32 version[3];
+	__u32 reserved;
+	char tgtname[NVM_TTYPE_NAME_MAX];
+};
+
+struct nvm_ioctl_info {
+	__u32 version[3];	/* in/out - major, minor, patch */
+	__u16 tgtsize;		/* number of targets */
+	__u16 reserved16;	/* pad to 4K page */
+	__u32 reserved[12];
+	struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
+};
+
+enum {
+	NVM_DEVICE_ACTIVE = 1 << 0,
+};
+
+struct nvm_ioctl_device_info {
+	char devname[DISK_NAME_LEN];
+	char bmname[NVM_TTYPE_NAME_MAX];
+	__u32 bmversion[3];
+	__u32 flags;
+	__u32 reserved[8];
+};
+
+struct nvm_ioctl_get_devices {
+	__u32 nr_devices;
+	__u32 reserved[31];
+	struct nvm_ioctl_device_info info[31];
+};
+
+struct nvm_ioctl_create_simple {
+	__u32 lun_begin;
+	__u32 lun_end;
+};
+
+enum {
+	NVM_CONFIG_TYPE_SIMPLE = 0,
+};
+
+struct nvm_ioctl_create_conf {
+	__u32 type;
+	union {
+		struct nvm_ioctl_create_simple s;
+	};
+};
+
+struct nvm_ioctl_create {
+	char dev[DISK_NAME_LEN];		/* open-channel SSD device */
+	char tgttype[NVM_TTYPE_NAME_MAX];	/* target type name */
+	char tgtname[DISK_NAME_LEN];		/* dev to expose target as */
+
+	__u32 flags;
+
+	struct nvm_ioctl_create_conf conf;
+};
+
+struct nvm_ioctl_remove {
+	char tgtname[DISK_NAME_LEN];
+
+	__u32 flags;
+};
+
+
+/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
+enum {
+	/* top level cmds */
+	NVM_INFO_CMD = 0x20,
+	NVM_GET_DEVICES_CMD,
+
+	/* device level cmds */
+	NVM_DEV_CREATE_CMD,
+	NVM_DEV_REMOVE_CMD,
+};
+
+#define NVM_IOCTL 'L' /* 0x4c */
+
+#define NVM_INFO		_IOWR(NVM_IOCTL, NVM_INFO_CMD, \
+						struct nvm_ioctl_info)
+#define NVM_GET_DEVICES		_IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
+						struct nvm_ioctl_get_devices)
+#define NVM_DEV_CREATE		_IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
+						struct nvm_ioctl_create)
+#define NVM_DEV_REMOVE		_IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
+						struct nvm_ioctl_remove)
+
+#define NVM_VERSION_MAJOR	1
+#define NVM_VERSION_MINOR	0
+#define NVM_VERSION_PATCHLEVEL	0
+
+#endif
-- 
cgit v1.2.3


From 1e0d69a9cc9172d7896c2113f983a74f6e8ff303 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 28 Oct 2015 13:21:03 +0100
Subject: Revert "Merge branch 'ipv6-overflow-arith'"

Linus dislikes these changes. To not hold up the net-merge let's revert
it for now and fix the bug like Linus suggested.

This reverts commit ec3661b42257d9a06cf0d318175623ac7a660113, reversing
changes made to c80dbe04612986fd6104b4a1be21681b113b5ac9.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compiler-gcc.h   |  4 ----
 include/linux/overflow-arith.h | 18 ------------------
 net/ipv6/ip6_output.c          |  6 +-----
 3 files changed, 1 insertion(+), 27 deletions(-)
 delete mode 100644 include/linux/overflow-arith.h

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 82c159e0532a..dfaa7b3e9ae9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -237,10 +237,6 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-#if GCC_VERSION >= 50000
-#define CC_HAVE_BUILTIN_OVERFLOW
-#endif
-
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h
deleted file mode 100644
index e12ccf854a70..000000000000
--- a/include/linux/overflow-arith.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <linux/kernel.h>
-
-#ifdef CC_HAVE_BUILTIN_OVERFLOW
-
-#define overflow_usub __builtin_usub_overflow
-
-#else
-
-static inline bool overflow_usub(unsigned int a, unsigned int b,
-				 unsigned int *res)
-{
-	*res = a - b;
-	return *res > a ? true : false;
-}
-
-#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8dddb45c433e..d03d6da772f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -28,7 +28,6 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/overflow-arith.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -585,10 +584,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
-
-	if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
-	    mtu <= 7)
-		goto fail_toobig;
+	mtu -= hlen + sizeof(struct frag_hdr);
 
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 				    &ipv6_hdr(skb)->saddr);
-- 
cgit v1.2.3


From b7c08cf85c9a3a4b05474b7acacc9fbce8fb3eaf Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Wed, 28 Oct 2015 15:13:42 +0200
Subject: spi: pxa2xx: Add support for Intel Broxton

LPSS SPI in Intel Broxton is otherwise the same than in Intel Sunrisepoint
but it supports up to four chip selects per port and has different FIFO
thresholds. Patch adds support for two Broxton SoC variants.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c   | 20 ++++++++++++++++++++
 include/linux/pxa2xx_ssp.h |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index a5c2dce7d0a3..f759c082f0f7 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -116,6 +116,16 @@ static const struct lpss_config lpss_platforms[] = {
 		.tx_threshold_lo = 32,
 		.tx_threshold_hi = 56,
 	},
+	{	/* LPSS_BXT_SSP */
+		.offset = 0x200,
+		.reg_general = -1,
+		.reg_ssp = 0x20,
+		.reg_cs_ctrl = 0x24,
+		.reg_capabilities = 0xfc,
+		.rx_threshold = 1,
+		.tx_threshold_lo = 16,
+		.tx_threshold_hi = 48,
+	},
 };
 
 static inline const struct lpss_config
@@ -130,6 +140,7 @@ static bool is_lpss_ssp(const struct driver_data *drv_data)
 	case LPSS_LPT_SSP:
 	case LPSS_BYT_SSP:
 	case LPSS_SPT_SSP:
+	case LPSS_BXT_SSP:
 		return true;
 	default:
 		return false;
@@ -1152,6 +1163,7 @@ static int setup(struct spi_device *spi)
 	case LPSS_LPT_SSP:
 	case LPSS_BYT_SSP:
 	case LPSS_SPT_SSP:
+	case LPSS_BXT_SSP:
 		config = lpss_get_config(drv_data);
 		tx_thres = config->tx_threshold_lo;
 		tx_hi_thres = config->tx_threshold_hi;
@@ -1313,6 +1325,14 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = {
 	/* SPT-H */
 	{ PCI_VDEVICE(INTEL, 0xa129), LPSS_SPT_SSP },
 	{ PCI_VDEVICE(INTEL, 0xa12a), LPSS_SPT_SSP },
+	/* BXT */
+	{ PCI_VDEVICE(INTEL, 0x0ac2), LPSS_BXT_SSP },
+	{ PCI_VDEVICE(INTEL, 0x0ac4), LPSS_BXT_SSP },
+	{ PCI_VDEVICE(INTEL, 0x0ac6), LPSS_BXT_SSP },
+	/* APL */
+	{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
+	{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
+	{ PCI_VDEVICE(INTEL, 0x5ac6), LPSS_BXT_SSP },
 	{ },
 };
 
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 92273776bce6..c2f2574ff61c 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -198,6 +198,7 @@ enum pxa_ssp_type {
 	LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */
 	LPSS_BYT_SSP,
 	LPSS_SPT_SSP,
+	LPSS_BXT_SSP,
 };
 
 struct ssp_device {
-- 
cgit v1.2.3


From a519435a96597d8cd96123246fea4ae5a6c90b02 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 20 Oct 2015 16:34:16 +0200
Subject: dma-buf/fence: add fence_wait_any_timeout function v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Waiting for the first fence in an array of fences to signal.

This is useful for device driver specific resource managers
and also Vulkan needs something similar.

v2: more parameter checks, handling for timeout==0,
    remove NULL entry support, better callback removal.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/dma-buf/fence.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fence.h   |  3 +-
 2 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 50ef8bd8708b..7b05dbe9b296 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -397,6 +397,104 @@ out:
 }
 EXPORT_SYMBOL(fence_default_wait);
 
+static bool
+fence_test_signaled_any(struct fence **fences, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; ++i) {
+		struct fence *fence = fences[i];
+		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * fence_wait_any_timeout - sleep until any fence gets signaled
+ * or until timeout elapses
+ * @fences:	[in]	array of fences to wait on
+ * @count:	[in]	number of fences to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
+ * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
+ * on success.
+ *
+ * Synchronous waits for the first fence in the array to be signaled. The
+ * caller needs to hold a reference to all fences in the array, otherwise a
+ * fence might be freed before return, resulting in undefined behavior.
+ */
+signed long
+fence_wait_any_timeout(struct fence **fences, uint32_t count,
+		       bool intr, signed long timeout)
+{
+	struct default_wait_cb *cb;
+	signed long ret = timeout;
+	unsigned i;
+
+	if (WARN_ON(!fences || !count || timeout < 0))
+		return -EINVAL;
+
+	if (timeout == 0) {
+		for (i = 0; i < count; ++i)
+			if (fence_is_signaled(fences[i]))
+				return 1;
+
+		return 0;
+	}
+
+	cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL);
+	if (cb == NULL) {
+		ret = -ENOMEM;
+		goto err_free_cb;
+	}
+
+	for (i = 0; i < count; ++i) {
+		struct fence *fence = fences[i];
+
+		if (fence->ops->wait != fence_default_wait) {
+			ret = -EINVAL;
+			goto fence_rm_cb;
+		}
+
+		cb[i].task = current;
+		if (fence_add_callback(fence, &cb[i].base,
+				       fence_default_wait_cb)) {
+			/* This fence is already signaled */
+			goto fence_rm_cb;
+		}
+	}
+
+	while (ret > 0) {
+		if (intr)
+			set_current_state(TASK_INTERRUPTIBLE);
+		else
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+		if (fence_test_signaled_any(fences, count))
+			break;
+
+		ret = schedule_timeout(ret);
+
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+fence_rm_cb:
+	while (i-- > 0)
+		fence_remove_callback(fences[i], &cb[i].base);
+
+err_free_cb:
+	kfree(cb);
+
+	return ret;
+}
+EXPORT_SYMBOL(fence_wait_any_timeout);
+
 /**
  * fence_init - Initialize a custom fence.
  * @fence:	[in]	the fence to initialize
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 39efee130d2b..a4084d6bb851 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -305,7 +305,8 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
 }
 
 signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
-
+signed long fence_wait_any_timeout(struct fence **fences, uint32_t count,
+				   bool intr, signed long timeout);
 
 /**
  * fence_wait - sleep until the fence gets signaled
-- 
cgit v1.2.3


From 6c455ac17bcf4beae6c094a1007b976b60b4bb57 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 21 Oct 2015 12:58:17 +0200
Subject: dma-buf/fence: add fence_is_later()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return true when fence 1 is later than fence 2 without
checking if any of them are signaled.

Useful for driver specific resource handling based on fences.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/linux/fence.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fence.h b/include/linux/fence.h
index a4084d6bb851..bb522011383b 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -279,6 +279,22 @@ fence_is_signaled(struct fence *fence)
 	return false;
 }
 
+/**
+ * fence_is_later - return if f1 is chronologically later than f2
+ * @f1:	[in]	the first fence from the same context
+ * @f2:	[in]	the second fence from the same context
+ *
+ * Returns true if f1 is chronologically later than f2. Both fences must be
+ * from the same context, since a seqno is not re-used across contexts.
+ */
+static inline bool fence_is_later(struct fence *f1, struct fence *f2)
+{
+	if (WARN_ON(f1->context != f2->context))
+		return false;
+
+	return f1->seqno - f2->seqno < INT_MAX;
+}
+
 /**
  * fence_later - return the chronologically later fence
  * @f1:	[in]	the first fence from the same context
@@ -298,10 +314,10 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
 	 * set if enable_signaling wasn't called, and enabling that here is
 	 * overkill.
 	 */
-	if (f2->seqno - f1->seqno <= INT_MAX)
-		return fence_is_signaled(f2) ? NULL : f2;
-	else
+	if (fence_is_later(f1, f2))
 		return fence_is_signaled(f1) ? NULL : f1;
+	else
+		return fence_is_signaled(f2) ? NULL : f2;
 }
 
 signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
-- 
cgit v1.2.3


From d9e4ad5badf4ccbfddee208c898fb8fd0c8836b1 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Wed, 28 Oct 2015 16:14:31 +0900
Subject: Document that IRQ_NONE should be returned when IRQ not actually
 handled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our IRQ storm detection works when an interrupt handler returns
IRQ_NONE for thousands of consecutive interrupts in a second. It
doesn't hurt to occasionally return IRQ_NONE when the interrupt is
actually genuine.

Drivers should only be returning IRQ_HANDLED if they have actually
*done* something to stop an interrupt from happening — it doesn't just
mean "this really *was* my device".

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: davem@davemloft.net
Link: http://lkml.kernel.org/r/1446016471.3405.201.camel@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqreturn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h
index e374e369fb2f..eb1bdcf95f2e 100644
--- a/include/linux/irqreturn.h
+++ b/include/linux/irqreturn.h
@@ -3,7 +3,7 @@
 
 /**
  * enum irqreturn
- * @IRQ_NONE		interrupt was not from this device
+ * @IRQ_NONE		interrupt was not from this device or was not handled
  * @IRQ_HANDLED		interrupt was handled by this device
  * @IRQ_WAKE_THREAD	handler requests to wake the handler thread
  */
-- 
cgit v1.2.3


From 6bb3b3acc3d096b938887c90f4bb19a639b99852 Mon Sep 17 00:00:00 2001
From: Qipeng Zha <qipeng.zha@intel.com>
Date: Tue, 15 Sep 2015 00:39:18 +0800
Subject: mfd: intel_soc_pmic: Add support for Broxton WC PMIC

IRQ control registers of Intel Broxton Whisky Cove PMIC are
separated in two parts, so add secondary IRQ chip.
And the new member of device will be used in PMC IPC regmap APIs.

Signed-off-by: Qipeng Zha <qipeng.zha@intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/intel_soc_pmic.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
index abcbfcf32d10..cf619dbeace2 100644
--- a/include/linux/mfd/intel_soc_pmic.h
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -25,6 +25,8 @@ struct intel_soc_pmic {
 	int irq;
 	struct regmap *regmap;
 	struct regmap_irq_chip_data *irq_chip_data;
+	struct regmap_irq_chip_data *irq_chip_data_level2;
+	struct device *dev;
 };
 
 #endif	/* __INTEL_SOC_PMIC_H__ */
-- 
cgit v1.2.3


From 39d047c0b1c812e9f0014e7100e372e61f2de3de Mon Sep 17 00:00:00 2001
From: Qipeng Zha <qipeng.zha@intel.com>
Date: Tue, 15 Sep 2015 00:39:19 +0800
Subject: mfd: add Intel Broxton Whiskey Cove PMIC driver

Add MFD core driver for Intel Broxton Whiskey Cove PMIC,
which is specially accessed by hardware IPC, not a generic
I2C device

Signed-off-by: Qipeng Zha <qipeng.zha@intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Makefile               |   1 +
 drivers/mfd/intel_soc_pmic_bxtwc.c | 478 +++++++++++++++++++++++++++++++++++++
 include/linux/mfd/intel_bxtwc.h    |  69 ++++++
 3 files changed, 548 insertions(+)
 create mode 100644 drivers/mfd/intel_soc_pmic_bxtwc.c
 create mode 100644 include/linux/mfd/intel_bxtwc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a59e3fcc8626..d6c21e3c409f 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -190,5 +190,6 @@ obj-$(CONFIG_MFD_RT5033)	+= rt5033.o
 obj-$(CONFIG_MFD_SKY81452)	+= sky81452.o
 
 intel-soc-pmic-objs		:= intel_soc_pmic_core.o intel_soc_pmic_crc.o
+intel-soc-pmic-$(CONFIG_INTEL_PMC_IPC)	+= intel_soc_pmic_bxtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
 obj-$(CONFIG_MFD_MT6397)	+= mt6397-core.o
diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c
new file mode 100644
index 000000000000..40acaff2e1c3
--- /dev/null
+++ b/drivers/mfd/intel_soc_pmic_bxtwc.c
@@ -0,0 +1,478 @@
+/*
+ * MFD core driver for Intel Broxton Whiskey Cove PMIC
+ *
+ * Copyright (C) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/intel_bxtwc.h>
+#include <asm/intel_pmc_ipc.h>
+
+/* PMIC device registers */
+#define REG_ADDR_MASK		0xFF00
+#define REG_ADDR_SHIFT		8
+#define REG_OFFSET_MASK		0xFF
+
+/* Interrupt Status Registers */
+#define BXTWC_IRQLVL1		0x4E02
+#define BXTWC_PWRBTNIRQ		0x4E03
+
+#define BXTWC_THRM0IRQ		0x4E04
+#define BXTWC_THRM1IRQ		0x4E05
+#define BXTWC_THRM2IRQ		0x4E06
+#define BXTWC_BCUIRQ		0x4E07
+#define BXTWC_ADCIRQ		0x4E08
+#define BXTWC_CHGR0IRQ		0x4E09
+#define BXTWC_CHGR1IRQ		0x4E0A
+#define BXTWC_GPIOIRQ0		0x4E0B
+#define BXTWC_GPIOIRQ1		0x4E0C
+#define BXTWC_CRITIRQ		0x4E0D
+
+/* Interrupt MASK Registers */
+#define BXTWC_MIRQLVL1		0x4E0E
+#define BXTWC_MPWRTNIRQ		0x4E0F
+
+#define BXTWC_MTHRM0IRQ		0x4E12
+#define BXTWC_MTHRM1IRQ		0x4E13
+#define BXTWC_MTHRM2IRQ		0x4E14
+#define BXTWC_MBCUIRQ		0x4E15
+#define BXTWC_MADCIRQ		0x4E16
+#define BXTWC_MCHGR0IRQ		0x4E17
+#define BXTWC_MCHGR1IRQ		0x4E18
+#define BXTWC_MGPIO0IRQ		0x4E19
+#define BXTWC_MGPIO1IRQ		0x4E1A
+#define BXTWC_MCRITIRQ		0x4E1B
+
+/* Whiskey Cove PMIC share same ACPI ID between different platforms */
+#define BROXTON_PMIC_WC_HRV	4
+
+/* Manage in two IRQ chips since mask registers are not consecutive */
+enum bxtwc_irqs {
+	/* Level 1 */
+	BXTWC_PWRBTN_LVL1_IRQ = 0,
+	BXTWC_TMU_LVL1_IRQ,
+	BXTWC_THRM_LVL1_IRQ,
+	BXTWC_BCU_LVL1_IRQ,
+	BXTWC_ADC_LVL1_IRQ,
+	BXTWC_CHGR_LVL1_IRQ,
+	BXTWC_GPIO_LVL1_IRQ,
+	BXTWC_CRIT_LVL1_IRQ,
+
+	/* Level 2 */
+	BXTWC_PWRBTN_IRQ,
+};
+
+enum bxtwc_irqs_level2 {
+	/* Level 2 */
+	BXTWC_THRM0_IRQ = 0,
+	BXTWC_THRM1_IRQ,
+	BXTWC_THRM2_IRQ,
+	BXTWC_BCU_IRQ,
+	BXTWC_ADC_IRQ,
+	BXTWC_CHGR0_IRQ,
+	BXTWC_CHGR1_IRQ,
+	BXTWC_GPIO0_IRQ,
+	BXTWC_GPIO1_IRQ,
+	BXTWC_CRIT_IRQ,
+};
+
+static const struct regmap_irq bxtwc_regmap_irqs[] = {
+	REGMAP_IRQ_REG(BXTWC_PWRBTN_LVL1_IRQ, 0, BIT(0)),
+	REGMAP_IRQ_REG(BXTWC_TMU_LVL1_IRQ, 0, BIT(1)),
+	REGMAP_IRQ_REG(BXTWC_THRM_LVL1_IRQ, 0, BIT(2)),
+	REGMAP_IRQ_REG(BXTWC_BCU_LVL1_IRQ, 0, BIT(3)),
+	REGMAP_IRQ_REG(BXTWC_ADC_LVL1_IRQ, 0, BIT(4)),
+	REGMAP_IRQ_REG(BXTWC_CHGR_LVL1_IRQ, 0, BIT(5)),
+	REGMAP_IRQ_REG(BXTWC_GPIO_LVL1_IRQ, 0, BIT(6)),
+	REGMAP_IRQ_REG(BXTWC_CRIT_LVL1_IRQ, 0, BIT(7)),
+	REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 1, 0x03),
+};
+
+static const struct regmap_irq bxtwc_regmap_irqs_level2[] = {
+	REGMAP_IRQ_REG(BXTWC_THRM0_IRQ, 0, 0xff),
+	REGMAP_IRQ_REG(BXTWC_THRM1_IRQ, 1, 0xbf),
+	REGMAP_IRQ_REG(BXTWC_THRM2_IRQ, 2, 0xff),
+	REGMAP_IRQ_REG(BXTWC_BCU_IRQ, 3, 0x1f),
+	REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 4, 0xff),
+	REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x1f),
+	REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 6, 0x1f),
+	REGMAP_IRQ_REG(BXTWC_GPIO0_IRQ, 7, 0xff),
+	REGMAP_IRQ_REG(BXTWC_GPIO1_IRQ, 8, 0x3f),
+	REGMAP_IRQ_REG(BXTWC_CRIT_IRQ, 9, 0x03),
+};
+
+static struct regmap_irq_chip bxtwc_regmap_irq_chip = {
+	.name = "bxtwc_irq_chip",
+	.status_base = BXTWC_IRQLVL1,
+	.mask_base = BXTWC_MIRQLVL1,
+	.irqs = bxtwc_regmap_irqs,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs),
+	.num_regs = 2,
+};
+
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_level2 = {
+	.name = "bxtwc_irq_chip_level2",
+	.status_base = BXTWC_THRM0IRQ,
+	.mask_base = BXTWC_MTHRM0IRQ,
+	.irqs = bxtwc_regmap_irqs_level2,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_level2),
+	.num_regs = 10,
+};
+
+static struct resource gpio_resources[] = {
+	DEFINE_RES_IRQ_NAMED(BXTWC_GPIO0_IRQ, "GPIO0"),
+	DEFINE_RES_IRQ_NAMED(BXTWC_GPIO1_IRQ, "GPIO1"),
+};
+
+static struct resource adc_resources[] = {
+	DEFINE_RES_IRQ_NAMED(BXTWC_ADC_IRQ, "ADC"),
+};
+
+static struct resource charger_resources[] = {
+	DEFINE_RES_IRQ_NAMED(BXTWC_CHGR0_IRQ, "CHARGER"),
+	DEFINE_RES_IRQ_NAMED(BXTWC_CHGR1_IRQ, "CHARGER1"),
+};
+
+static struct resource thermal_resources[] = {
+	DEFINE_RES_IRQ(BXTWC_THRM0_IRQ),
+	DEFINE_RES_IRQ(BXTWC_THRM1_IRQ),
+	DEFINE_RES_IRQ(BXTWC_THRM2_IRQ),
+};
+
+static struct resource bcu_resources[] = {
+	DEFINE_RES_IRQ_NAMED(BXTWC_BCU_IRQ, "BCU"),
+};
+
+static struct mfd_cell bxt_wc_dev[] = {
+	{
+		.name = "bxt_wcove_gpadc",
+		.num_resources = ARRAY_SIZE(adc_resources),
+		.resources = adc_resources,
+	},
+	{
+		.name = "bxt_wcove_thermal",
+		.num_resources = ARRAY_SIZE(thermal_resources),
+		.resources = thermal_resources,
+	},
+	{
+		.name = "bxt_wcove_ext_charger",
+		.num_resources = ARRAY_SIZE(charger_resources),
+		.resources = charger_resources,
+	},
+	{
+		.name = "bxt_wcove_bcu",
+		.num_resources = ARRAY_SIZE(bcu_resources),
+		.resources = bcu_resources,
+	},
+	{
+		.name = "bxt_wcove_gpio",
+		.num_resources = ARRAY_SIZE(gpio_resources),
+		.resources = gpio_resources,
+	},
+	{
+		.name = "bxt_wcove_region",
+	},
+};
+
+static int regmap_ipc_byte_reg_read(void *context, unsigned int reg,
+				    unsigned int *val)
+{
+	int ret;
+	int i2c_addr;
+	u8 ipc_in[2];
+	u8 ipc_out[4];
+	struct intel_soc_pmic *pmic = context;
+
+	if (reg & REG_ADDR_MASK)
+		i2c_addr = (reg & REG_ADDR_MASK) >> REG_ADDR_SHIFT;
+	else {
+		i2c_addr = BXTWC_DEVICE1_ADDR;
+		if (!i2c_addr) {
+			dev_err(pmic->dev, "I2C address not set\n");
+			return -EINVAL;
+		}
+	}
+	reg &= REG_OFFSET_MASK;
+
+	ipc_in[0] = reg;
+	ipc_in[1] = i2c_addr;
+	ret = intel_pmc_ipc_command(PMC_IPC_PMIC_ACCESS,
+			PMC_IPC_PMIC_ACCESS_READ,
+			ipc_in, sizeof(ipc_in), (u32 *)ipc_out, 1);
+	if (ret) {
+		dev_err(pmic->dev, "Failed to read from PMIC\n");
+		return ret;
+	}
+	*val = ipc_out[0];
+
+	return 0;
+}
+
+static int regmap_ipc_byte_reg_write(void *context, unsigned int reg,
+				       unsigned int val)
+{
+	int ret;
+	int i2c_addr;
+	u8 ipc_in[3];
+	struct intel_soc_pmic *pmic = context;
+
+	if (reg & REG_ADDR_MASK)
+		i2c_addr = (reg & REG_ADDR_MASK) >> REG_ADDR_SHIFT;
+	else {
+		i2c_addr = BXTWC_DEVICE1_ADDR;
+		if (!i2c_addr) {
+			dev_err(pmic->dev, "I2C address not set\n");
+			return -EINVAL;
+		}
+	}
+	reg &= REG_OFFSET_MASK;
+
+	ipc_in[0] = reg;
+	ipc_in[1] = i2c_addr;
+	ipc_in[2] = val;
+	ret = intel_pmc_ipc_command(PMC_IPC_PMIC_ACCESS,
+			PMC_IPC_PMIC_ACCESS_WRITE,
+			ipc_in, sizeof(ipc_in), NULL, 0);
+	if (ret) {
+		dev_err(pmic->dev, "Failed to write to PMIC\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/* sysfs interfaces to r/w PMIC registers, required by initial script */
+static unsigned long bxtwc_reg_addr;
+static ssize_t bxtwc_reg_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%lx\n", bxtwc_reg_addr);
+}
+
+static ssize_t bxtwc_reg_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	if (kstrtoul(buf, 0, &bxtwc_reg_addr)) {
+		dev_err(dev, "Invalid register address\n");
+		return -EINVAL;
+	}
+	return (ssize_t)count;
+}
+
+static ssize_t bxtwc_val_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned int val;
+	struct intel_soc_pmic *pmic = dev_get_drvdata(dev);
+
+	ret = regmap_read(pmic->regmap, bxtwc_reg_addr, &val);
+	if (ret < 0) {
+		dev_err(dev, "Failed to read 0x%lx\n", bxtwc_reg_addr);
+		return -EIO;
+	}
+
+	return sprintf(buf, "0x%02x\n", val);
+}
+
+static ssize_t bxtwc_val_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	struct intel_soc_pmic *pmic = dev_get_drvdata(dev);
+
+	if (kstrtoul(buf, 0, (unsigned long *)&val)) {
+		dev_err(dev, "Invalid register value\n");
+		return -EINVAL;
+	}
+
+	ret = regmap_write(pmic->regmap, bxtwc_reg_addr, val);
+	if (ret) {
+		dev_err(dev, "Failed to write value 0x%02x to address 0x%lx",
+			val, bxtwc_reg_addr);
+		return -EIO;
+	}
+	return count;
+}
+
+static DEVICE_ATTR(addr, S_IWUSR | S_IRUSR, bxtwc_reg_show, bxtwc_reg_store);
+static DEVICE_ATTR(val, S_IWUSR | S_IRUSR, bxtwc_val_show, bxtwc_val_store);
+static struct attribute *bxtwc_attrs[] = {
+	&dev_attr_addr.attr,
+	&dev_attr_val.attr,
+	NULL
+};
+
+static const struct attribute_group bxtwc_group = {
+	.attrs = bxtwc_attrs,
+};
+
+static const struct regmap_config bxtwc_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.reg_write = regmap_ipc_byte_reg_write,
+	.reg_read = regmap_ipc_byte_reg_read,
+};
+
+static int bxtwc_probe(struct platform_device *pdev)
+{
+	int ret;
+	acpi_handle handle;
+	acpi_status status;
+	unsigned long long hrv;
+	struct intel_soc_pmic *pmic;
+
+	handle = ACPI_HANDLE(&pdev->dev);
+	status = acpi_evaluate_integer(handle, "_HRV", NULL, &hrv);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&pdev->dev, "Failed to get PMIC hardware revision\n");
+		return -ENODEV;
+	}
+	if (hrv != BROXTON_PMIC_WC_HRV) {
+		dev_err(&pdev->dev, "Invalid PMIC hardware revision: %llu\n",
+			hrv);
+		return -ENODEV;
+	}
+
+	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
+	if (!pmic)
+		return -ENOMEM;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Invalid IRQ\n");
+		return ret;
+	}
+	pmic->irq = ret;
+
+	dev_set_drvdata(&pdev->dev, pmic);
+	pmic->dev = &pdev->dev;
+
+	pmic->regmap = devm_regmap_init(&pdev->dev, NULL, pmic,
+					&bxtwc_regmap_config);
+	if (IS_ERR(pmic->regmap)) {
+		ret = PTR_ERR(pmic->regmap);
+		dev_err(&pdev->dev, "Failed to initialise regmap: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_add_irq_chip(pmic->regmap, pmic->irq,
+				  IRQF_ONESHOT | IRQF_SHARED,
+				  0, &bxtwc_regmap_irq_chip,
+				  &pmic->irq_chip_data);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add IRQ chip\n");
+		return ret;
+	}
+
+	ret = regmap_add_irq_chip(pmic->regmap, pmic->irq,
+				  IRQF_ONESHOT | IRQF_SHARED,
+				  0, &bxtwc_regmap_irq_chip_level2,
+				  &pmic->irq_chip_data_level2);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add secondary IRQ chip\n");
+		goto err_irq_chip_level2;
+	}
+
+	ret = mfd_add_devices(&pdev->dev, PLATFORM_DEVID_NONE, bxt_wc_dev,
+			      ARRAY_SIZE(bxt_wc_dev), NULL, 0,
+			      NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add devices\n");
+		goto err_mfd;
+	}
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &bxtwc_group);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to create sysfs group %d\n", ret);
+		goto err_sysfs;
+	}
+
+	return 0;
+
+err_sysfs:
+	mfd_remove_devices(&pdev->dev);
+err_mfd:
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data_level2);
+err_irq_chip_level2:
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
+
+	return ret;
+}
+
+static int bxtwc_remove(struct platform_device *pdev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(&pdev->dev);
+
+	sysfs_remove_group(&pdev->dev.kobj, &bxtwc_group);
+	mfd_remove_devices(&pdev->dev);
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data_level2);
+
+	return 0;
+}
+
+static void bxtwc_shutdown(struct platform_device *pdev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(&pdev->dev);
+
+	disable_irq(pmic->irq);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bxtwc_suspend(struct device *dev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(dev);
+
+	disable_irq(pmic->irq);
+
+	return 0;
+}
+
+static int bxtwc_resume(struct device *dev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(dev);
+
+	enable_irq(pmic->irq);
+	return 0;
+}
+#endif
+static SIMPLE_DEV_PM_OPS(bxtwc_pm_ops, bxtwc_suspend, bxtwc_resume);
+
+static const struct acpi_device_id bxtwc_acpi_ids[] = {
+	{ "INT34D3", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, pmic_acpi_ids);
+
+static struct platform_driver bxtwc_driver = {
+	.probe = bxtwc_probe,
+	.remove	= bxtwc_remove,
+	.shutdown = bxtwc_shutdown,
+	.driver	= {
+		.name	= "BXTWC PMIC",
+		.pm     = &bxtwc_pm_ops,
+		.acpi_match_table = ACPI_PTR(bxtwc_acpi_ids),
+	},
+};
+
+module_platform_driver(bxtwc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Qipeng Zha<qipeng.zha@intel.com>");
diff --git a/include/linux/mfd/intel_bxtwc.h b/include/linux/mfd/intel_bxtwc.h
new file mode 100644
index 000000000000..1a0ee9d6efe9
--- /dev/null
+++ b/include/linux/mfd/intel_bxtwc.h
@@ -0,0 +1,69 @@
+/*
+ * intel_bxtwc.h - Header file for Intel Broxton Whiskey Cove PMIC
+ *
+ * Copyright (C) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/mfd/intel_soc_pmic.h>
+
+#ifndef __INTEL_BXTWC_H__
+#define __INTEL_BXTWC_H__
+
+/* BXT WC devices */
+#define BXTWC_DEVICE1_ADDR		0x4E
+#define BXTWC_DEVICE2_ADDR		0x4F
+#define BXTWC_DEVICE3_ADDR		0x5E
+
+/* device1 Registers */
+#define BXTWC_CHIPID			0x4E00
+#define BXTWC_CHIPVER			0x4E01
+
+#define BXTWC_SCHGRIRQ0_ADDR		0x5E1A
+#define BXTWC_CHGRCTRL0_ADDR		0x5E16
+#define BXTWC_CHGRCTRL1_ADDR		0x5E17
+#define BXTWC_CHGRCTRL2_ADDR		0x5E18
+#define BXTWC_CHGRSTATUS_ADDR		0x5E19
+#define BXTWC_THRMBATZONE_ADDR		0x4F22
+
+#define BXTWC_USBPATH_ADDR		0x5E19
+#define BXTWC_USBPHYCTRL_ADDR		0x5E07
+#define BXTWC_USBIDCTRL_ADDR		0x5E05
+#define BXTWC_USBIDEN_MASK		0x01
+#define BXTWC_USBIDSTAT_ADDR		0x00FF
+#define BXTWC_USBSRCDETSTATUS_ADDR	0x5E29
+
+#define BXTWC_DBGUSBBC1_ADDR		0x5FE0
+#define BXTWC_DBGUSBBC2_ADDR		0x5FE1
+#define BXTWC_DBGUSBBCSTAT_ADDR		0x5FE2
+
+#define BXTWC_WAKESRC_ADDR		0x4E22
+#define BXTWC_WAKESRC2_ADDR		0x4EE5
+#define BXTWC_CHRTTADDR_ADDR		0x5E22
+#define BXTWC_CHRTTDATA_ADDR		0x5E23
+
+#define BXTWC_STHRMIRQ0_ADDR		0x4F19
+#define WC_MTHRMIRQ1_ADDR		0x4E12
+#define WC_STHRMIRQ1_ADDR		0x4F1A
+#define WC_STHRMIRQ2_ADDR		0x4F1B
+
+#define BXTWC_THRMZN0H_ADDR		0x4F44
+#define BXTWC_THRMZN0L_ADDR		0x4F45
+#define BXTWC_THRMZN1H_ADDR		0x4F46
+#define BXTWC_THRMZN1L_ADDR		0x4F47
+#define BXTWC_THRMZN2H_ADDR		0x4F48
+#define BXTWC_THRMZN2L_ADDR		0x4F49
+#define BXTWC_THRMZN3H_ADDR		0x4F4A
+#define BXTWC_THRMZN3L_ADDR		0x4F4B
+#define BXTWC_THRMZN4H_ADDR		0x4F4C
+#define BXTWC_THRMZN4L_ADDR		0x4F4D
+
+#endif
-- 
cgit v1.2.3


From 0386af30d3d99d942dd68a8c64beb4f03958e74f Mon Sep 17 00:00:00 2001
From: Steve Twiss <stwiss.opensource@diasemi.com>
Date: Thu, 8 Oct 2015 16:17:51 +0100
Subject: mfd: da9053: Addition of extra registers for GPIOs 8-13

Definitions for GPIO registers 8, 9, 10, 11, 12 and 13 are added into
the register header file.

- DA9052_GPIO_8_9_REG    25
- DA9052_GPIO_10_11_REG  26
- DA9052_GPIO_12_13_REG  27

A modification is also made to the MFD core code to define these registers
as readable and writable. The functions for da9052_reg_readable() and
da9052_reg_writeable() have had their case statements altered to include
these new registers.

Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9052-core.c      | 6 ++++++
 include/linux/mfd/da9052/reg.h | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index 46e3840c7a37..c0bf68a3e614 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -51,6 +51,9 @@ static bool da9052_reg_readable(struct device *dev, unsigned int reg)
 	case DA9052_GPIO_2_3_REG:
 	case DA9052_GPIO_4_5_REG:
 	case DA9052_GPIO_6_7_REG:
+	case DA9052_GPIO_8_9_REG:
+	case DA9052_GPIO_10_11_REG:
+	case DA9052_GPIO_12_13_REG:
 	case DA9052_GPIO_14_15_REG:
 	case DA9052_ID_0_1_REG:
 	case DA9052_ID_2_3_REG:
@@ -178,6 +181,9 @@ static bool da9052_reg_writeable(struct device *dev, unsigned int reg)
 	case DA9052_GPIO_2_3_REG:
 	case DA9052_GPIO_4_5_REG:
 	case DA9052_GPIO_6_7_REG:
+	case DA9052_GPIO_8_9_REG:
+	case DA9052_GPIO_10_11_REG:
+	case DA9052_GPIO_12_13_REG:
 	case DA9052_GPIO_14_15_REG:
 	case DA9052_ID_0_1_REG:
 	case DA9052_ID_2_3_REG:
diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h
index c4dd3a8add21..5010f978725c 100644
--- a/include/linux/mfd/da9052/reg.h
+++ b/include/linux/mfd/da9052/reg.h
@@ -65,6 +65,9 @@
 #define DA9052_GPIO_2_3_REG		22
 #define DA9052_GPIO_4_5_REG		23
 #define DA9052_GPIO_6_7_REG		24
+#define DA9052_GPIO_8_9_REG		25
+#define DA9052_GPIO_10_11_REG		26
+#define DA9052_GPIO_12_13_REG		27
 #define DA9052_GPIO_14_15_REG		28
 
 /* POWER SEQUENCER CONTROL REGISTERS */
-- 
cgit v1.2.3


From ce6a5acc93876f619f32f8f60c7c6e549e46d962 Mon Sep 17 00:00:00 2001
From: Micky Ching <micky_ching@realsil.com.cn>
Date: Mon, 29 Jun 2015 09:19:39 +0800
Subject: mfd: rtsx: Add support for rts522A

rts522a(rts5227s) is derived from rts5227, and mainly same with rts5227.
Add it to file mfd/rts5227.c to support this chip.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig          |  7 ++--
 drivers/mfd/rts5227.c        | 77 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/mfd/rtsx_pcr.c       |  5 +++
 drivers/mfd/rtsx_pcr.h       |  3 ++
 include/linux/mfd/rtsx_pci.h |  6 ++++
 5 files changed, 93 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d6514fb35f78..2db1337432cf 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -736,9 +736,10 @@ config MFD_RTSX_PCI
 	select MFD_CORE
 	help
 	  This supports for Realtek PCI-Express card reader including rts5209,
-	  rts5229, rtl8411, etc. Realtek card reader supports access to many
-	  types of memory cards, such as Memory Stick, Memory Stick Pro,
-	  Secure Digital and MultiMediaCard.
+	  rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, etc.
+	  Realtek card reader supports access to many types of memory cards,
+	  such as Memory Stick, Memory Stick Pro, Secure Digital and
+	  MultiMediaCard.
 
 config MFD_RT5033
 	tristate "Richtek RT5033 Power Management IC"
diff --git a/drivers/mfd/rts5227.c b/drivers/mfd/rts5227.c
index c5a65298c781..ff296a4bf3d2 100644
--- a/drivers/mfd/rts5227.c
+++ b/drivers/mfd/rts5227.c
@@ -26,6 +26,14 @@
 
 #include "rtsx_pcr.h"
 
+static u8 rts5227_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
 static void rts5227_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
 {
 	u8 driving_3v3[4][3] = {
@@ -88,7 +96,7 @@ static void rts5227_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
 	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0);
 
 	if (pm_state == HOST_ENTER_S3)
-		rtsx_pci_write_register(pcr, PM_CTRL3, 0x10, 0x10);
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x10);
 
 	rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
 }
@@ -121,7 +129,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
 		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0xB8);
 	else
 		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0x88);
-	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x10, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, pcr->reg_pm_ctrl3, 0x10, 0x00);
 
 	return rtsx_pci_send_cmd(pcr, 100);
 }
@@ -294,8 +302,73 @@ void rts5227_init_params(struct rtsx_pcr *pcr)
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7);
 
+	pcr->ic_version = rts5227_get_ic_version(pcr);
 	pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl;
 	pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl;
 	pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl;
 	pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = PM_CTRL3;
+}
+
+static int rts522a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, D3_DELINK_MODE_EN,
+		0x00);
+	if (err < 0)
+		return err;
+
+	if (is_version(pcr, 0x522A, IC_VER_A)) {
+		err = rtsx_pci_write_phy_register(pcr, PHY_RCR2,
+			PHY_RCR2_INIT_27S);
+		if (err)
+			return err;
+
+		rtsx_pci_write_phy_register(pcr, PHY_RCR1, PHY_RCR1_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD0, PHY_FLD0_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD3, PHY_FLD3_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD4, PHY_FLD4_INIT_27S);
+	}
+
+	return 0;
+}
+
+static int rts522a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5227_extra_init_hw(pcr);
+
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG,
+		FUNC_FORCE_UPME_XMT_DBG);
+	rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04);
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 0xFF, 0x11);
+
+	return 0;
+}
+
+/* rts522a operations mainly derived from rts5227, except phy/hw init setting.
+ */
+static const struct pcr_ops rts522a_pcr_ops = {
+	.fetch_vendor_settings = rts5227_fetch_vendor_settings,
+	.extra_init_hw = rts522a_extra_init_hw,
+	.optimize_phy = rts522a_optimize_phy,
+	.turn_on_led = rts5227_turn_on_led,
+	.turn_off_led = rts5227_turn_off_led,
+	.enable_auto_blink = rts5227_enable_auto_blink,
+	.disable_auto_blink = rts5227_disable_auto_blink,
+	.card_power_on = rts5227_card_power_on,
+	.card_power_off = rts5227_card_power_off,
+	.switch_output_voltage = rts5227_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5227_force_power_down,
+};
+
+void rts522a_init_params(struct rtsx_pcr *pcr)
+{
+	rts5227_init_params(pcr);
+
+	pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
 }
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index b98cf1de0a55..f3820d08c9a3 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -55,6 +55,7 @@ static const struct pci_device_id rtsx_pci_ids[] = {
 	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x522A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5287), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 },
@@ -1098,6 +1099,10 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
 		rts5227_init_params(pcr);
 		break;
 
+	case 0x522A:
+		rts522a_init_params(pcr);
+		break;
+
 	case 0x5249:
 		rts5249_init_params(pcr);
 		break;
diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
index ce48842570d7..931d1ae3ce32 100644
--- a/drivers/mfd/rtsx_pcr.h
+++ b/drivers/mfd/rtsx_pcr.h
@@ -27,6 +27,8 @@
 #define MIN_DIV_N_PCR		80
 #define MAX_DIV_N_PCR		208
 
+#define RTS522A_PM_CTRL3		0xFF7E
+
 #define RTS524A_PME_FORCE_CTL		0xFF78
 #define RTS524A_PM_CTRL3		0xFF7E
 
@@ -38,6 +40,7 @@ void rts5229_init_params(struct rtsx_pcr *pcr);
 void rtl8411_init_params(struct rtsx_pcr *pcr);
 void rtl8402_init_params(struct rtsx_pcr *pcr);
 void rts5227_init_params(struct rtsx_pcr *pcr);
+void rts522a_init_params(struct rtsx_pcr *pcr);
 void rts5249_init_params(struct rtsx_pcr *pcr);
 void rts524a_init_params(struct rtsx_pcr *pcr);
 void rts525a_init_params(struct rtsx_pcr *pcr);
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index ff843e7ca23d..7eb7cbac0a9a 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -589,6 +589,7 @@
 #define   FORCE_ASPM_NO_ASPM		0x00
 #define PM_CLK_FORCE_CTL		0xFE58
 #define FUNC_FORCE_CTL			0xFE59
+#define   FUNC_FORCE_UPME_XMT_DBG	0x02
 #define PERST_GLITCH_WIDTH		0xFE5C
 #define CHANGE_LINK_STATE		0xFE5B
 #define RESET_LOAD_REG			0xFE5E
@@ -712,6 +713,7 @@
 #define PHY_RCR1			0x02
 #define   PHY_RCR1_ADP_TIME_4		0x0400
 #define   PHY_RCR1_VCO_COARSE		0x001F
+#define   PHY_RCR1_INIT_27S		0x0A1F
 #define PHY_SSCCR2			0x02
 #define   PHY_SSCCR2_PLL_NCODE		0x0A00
 #define   PHY_SSCCR2_TIME0		0x001C
@@ -724,6 +726,7 @@
 #define   PHY_RCR2_FREQSEL_12		0x0040
 #define   PHY_RCR2_CDR_SC_12P		0x0010
 #define   PHY_RCR2_CALIB_LATE		0x0002
+#define   PHY_RCR2_INIT_27S		0xC152
 #define PHY_SSCCR3			0x03
 #define   PHY_SSCCR3_STEP_IN		0x2740
 #define   PHY_SSCCR3_CHECK_DELAY	0x0008
@@ -800,12 +803,14 @@
 #define   PHY_ANA1A_RXT_BIST		0x0500
 #define   PHY_ANA1A_TXR_BIST		0x0040
 #define   PHY_ANA1A_REV			0x0006
+#define   PHY_FLD0_INIT_27S		0x2546
 #define PHY_FLD1			0x1B
 #define PHY_FLD2			0x1C
 #define PHY_FLD3			0x1D
 #define   PHY_FLD3_TIMER_4		0x0800
 #define   PHY_FLD3_TIMER_6		0x0020
 #define   PHY_FLD3_RXDELINK		0x0004
+#define   PHY_FLD3_INIT_27S		0x0004
 #define PHY_ANA1D			0x1D
 #define   PHY_ANA1D_DEBUG_ADDR		0x0004
 #define _PHY_FLD0			0x1D
@@ -824,6 +829,7 @@
 #define   PHY_FLD4_BER_COUNT		0x00E0
 #define   PHY_FLD4_BER_TIMER		0x000A
 #define   PHY_FLD4_BER_CHK_EN		0x0001
+#define   PHY_FLD4_INIT_27S		0x5C7F
 #define PHY_DIG1E			0x1E
 #define   PHY_DIG1E_REV			0x4000
 #define   PHY_DIG1E_D0_X_D1		0x1000
-- 
cgit v1.2.3


From 8a97d4287e2659f3460a8dec61ccc935154726c0 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Wed, 7 Oct 2015 09:44:41 +0900
Subject: mfd: sec-core: Disable buck voltage reset on watchdog falling edge

The WRSTBI bit (disabled by default but enabled by bootloader), when
set, is responsible for resetting voltages to default values of
certain bucks on falling edge of Warm Reset Input pin from AP.

However on some boards (with S2MPS13) the pin is pulled down so any
suspend will effectively trigger the reset of bucks supplying the power
to the little and big cores. In the same time when resuming, these bucks
must provide voltage greater or equal to voltage before suspend to match
the frequency chosen by cpufreq. If voltage (default value of voltage
after reset) is lower than one set by cpufreq before suspend, then
system will hang during resuming.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reported-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Tested-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c              | 26 ++++++++++++++++++++++++++
 include/linux/mfd/samsung/core.h    |  2 ++
 include/linux/mfd/samsung/s2mps13.h |  1 +
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 589e5efc2d7f..2626fc0b5b8c 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -262,6 +262,29 @@ static void sec_pmic_dump_rev(struct sec_pmic_dev *sec_pmic)
 		dev_dbg(sec_pmic->dev, "Revision: 0x%x\n", val);
 }
 
+static void sec_pmic_configure(struct sec_pmic_dev *sec_pmic)
+{
+	int err;
+
+	if (sec_pmic->device_type != S2MPS13X)
+		return;
+
+	if (sec_pmic->pdata->disable_wrstbi) {
+		/*
+		 * If WRSTBI pin is pulled down this feature must be disabled
+		 * because each Suspend to RAM will trigger buck voltage reset
+		 * to default values.
+		 */
+		err = regmap_update_bits(sec_pmic->regmap_pmic,
+					 S2MPS13_REG_WRSTBI,
+					 S2MPS13_REG_WRSTBI_MASK, 0x0);
+		if (err)
+			dev_warn(sec_pmic->dev,
+				 "Cannot initialize WRSTBI config: %d\n",
+				 err);
+	}
+}
+
 #ifdef CONFIG_OF
 /*
  * Only the common platform data elements for s5m8767 are parsed here from the
@@ -289,6 +312,8 @@ static struct sec_platform_data *sec_pmic_i2c_parse_dt_pdata(
 
 	pd->manual_poweroff = of_property_read_bool(dev->of_node,
 						"samsung,s2mps11-acokb-ground");
+	pd->disable_wrstbi = of_property_read_bool(dev->of_node,
+						"samsung,s2mps11-wrstbi-ground");
 	return pd;
 }
 #else
@@ -434,6 +459,7 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 		goto err_mfd;
 
 	device_init_wakeup(sec_pmic->dev, sec_pmic->wakeup);
+	sec_pmic_configure(sec_pmic);
 	sec_pmic_dump_rev(sec_pmic);
 
 	return ret;
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index aa78957e092f..a06098639399 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -134,6 +134,8 @@ struct sec_platform_data {
 	int				buck4_init;
 	/* Whether or not manually set PWRHOLD to low during shutdown. */
 	bool				manual_poweroff;
+	/* Disable the WRSTBI (buck voltage warm reset) when probing? */
+	bool				disable_wrstbi;
 };
 
 /**
diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h
index b1fd675fa36f..239e977ba45d 100644
--- a/include/linux/mfd/samsung/s2mps13.h
+++ b/include/linux/mfd/samsung/s2mps13.h
@@ -184,5 +184,6 @@ enum s2mps13_regulators {
  * Let's assume that default value will be set.
  */
 #define S2MPS13_BUCK_RAMP_DELAY		12500
+#define S2MPS13_REG_WRSTBI_MASK		BIT(5)
 
 #endif /*  __LINUX_MFD_S2MPS13_H */
-- 
cgit v1.2.3


From a76caf55e5b356ba20a5a43ac4d9f7a04b20941d Mon Sep 17 00:00:00 2001
From: Ørjan Eide <orjan.eide@arm.com>
Date: Thu, 10 Sep 2015 18:09:30 +0100
Subject: thermal: Add devfreq cooling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a generic thermal cooling device for devfreq, that is similar to
cpu_cooling.

The device must use devfreq.  In order to use the power extension of the
cooling device, it must have registered its OPPs using the OPP library.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Ørjan Eide <orjan.eide@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig           |  14 +
 drivers/thermal/Makefile          |   3 +
 drivers/thermal/devfreq_cooling.c | 563 ++++++++++++++++++++++++++++++++++++++
 include/linux/devfreq_cooling.h   |  81 ++++++
 4 files changed, 661 insertions(+)
 create mode 100644 drivers/thermal/devfreq_cooling.c
 create mode 100644 include/linux/devfreq_cooling.h

(limited to 'include/linux')

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 5aabc4bc0d75..90629f69bb22 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -147,6 +147,20 @@ config CLOCK_THERMAL
 	  device that is configured to use this cooling mechanism will be
 	  controlled to reduce clock frequency whenever temperature is high.
 
+config DEVFREQ_THERMAL
+	bool "Generic device cooling support"
+	depends on PM_DEVFREQ
+	depends on PM_OPP
+	help
+	  This implements the generic devfreq cooling mechanism through
+	  frequency reduction for devices using devfreq.
+
+	  This will throttle the device by limiting the maximum allowed DVFS
+	  frequency corresponding to the cooling level.
+
+	  In order to use the power extensions of the cooling device,
+	  devfreq should use the simple_ondemand governor.
+
 	  If you want this support, you should say Y here.
 
 config THERMAL_EMULATION
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 26f160809959..cfae6a654793 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -22,6 +22,9 @@ thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 # clock cooling
 thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
 
+# devfreq cooling
+thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
new file mode 100644
index 000000000000..a032c5d5c374
--- /dev/null
+++ b/drivers/thermal/devfreq_cooling.c
@@ -0,0 +1,563 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014-2015 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * TODO:
+ *    - If OPPs are added or removed after devfreq cooling has
+ *      registered, the devfreq cooling won't react to it.
+ */
+
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/pm_opp.h>
+#include <linux/thermal.h>
+
+static DEFINE_MUTEX(devfreq_lock);
+static DEFINE_IDR(devfreq_idr);
+
+/**
+ * struct devfreq_cooling_device - Devfreq cooling device
+ * @id:		unique integer value corresponding to each
+ *		devfreq_cooling_device registered.
+ * @cdev:	Pointer to associated thermal cooling device.
+ * @devfreq:	Pointer to associated devfreq device.
+ * @cooling_state:	Current cooling state.
+ * @power_table:	Pointer to table with maximum power draw for each
+ *			cooling state. State is the index into the table, and
+ *			the power is in mW.
+ * @freq_table:	Pointer to a table with the frequencies sorted in descending
+ *		order.  You can index the table by cooling device state
+ * @freq_table_size:	Size of the @freq_table and @power_table
+ * @power_ops:	Pointer to devfreq_cooling_power, used to generate the
+ *		@power_table.
+ */
+struct devfreq_cooling_device {
+	int id;
+	struct thermal_cooling_device *cdev;
+	struct devfreq *devfreq;
+	unsigned long cooling_state;
+	u32 *power_table;
+	u32 *freq_table;
+	size_t freq_table_size;
+	struct devfreq_cooling_power *power_ops;
+};
+
+/**
+ * get_idr - function to get a unique id.
+ * @idr: struct idr * handle used to create a id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int get_idr(struct idr *idr, int *id)
+{
+	int ret;
+
+	mutex_lock(&devfreq_lock);
+	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
+	mutex_unlock(&devfreq_lock);
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
+
+	return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @idr: struct idr * handle used for creating the id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(struct idr *idr, int id)
+{
+	mutex_lock(&devfreq_lock);
+	idr_remove(idr, id);
+	mutex_unlock(&devfreq_lock);
+}
+
+/**
+ * partition_enable_opps() - disable all opps above a given state
+ * @dfc:	Pointer to devfreq we are operating on
+ * @cdev_state:	cooling device state we're setting
+ *
+ * Go through the OPPs of the device, enabling all OPPs until
+ * @cdev_state and disabling those frequencies above it.
+ */
+static int partition_enable_opps(struct devfreq_cooling_device *dfc,
+				 unsigned long cdev_state)
+{
+	int i;
+	struct device *dev = dfc->devfreq->dev.parent;
+
+	for (i = 0; i < dfc->freq_table_size; i++) {
+		struct dev_pm_opp *opp;
+		int ret = 0;
+		unsigned int freq = dfc->freq_table[i];
+		bool want_enable = i >= cdev_state ? true : false;
+
+		rcu_read_lock();
+		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
+		rcu_read_unlock();
+
+		if (PTR_ERR(opp) == -ERANGE)
+			continue;
+		else if (IS_ERR(opp))
+			return PTR_ERR(opp);
+
+		if (want_enable)
+			ret = dev_pm_opp_enable(dev, freq);
+		else
+			ret = dev_pm_opp_disable(dev, freq);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
+					 unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+
+	*state = dfc->freq_table_size - 1;
+
+	return 0;
+}
+
+static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+					 unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+
+	*state = dfc->cooling_state;
+
+	return 0;
+}
+
+static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+					 unsigned long state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	int ret;
+
+	if (state == dfc->cooling_state)
+		return 0;
+
+	dev_dbg(dev, "Setting cooling state %lu\n", state);
+
+	if (state >= dfc->freq_table_size)
+		return -EINVAL;
+
+	ret = partition_enable_opps(dfc, state);
+	if (ret)
+		return ret;
+
+	dfc->cooling_state = state;
+
+	return 0;
+}
+
+/**
+ * freq_get_state() - get the cooling state corresponding to a frequency
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	frequency in Hz
+ *
+ * Return: the cooling state associated with the @freq, or
+ * THERMAL_CSTATE_INVALID if it wasn't found.
+ */
+static unsigned long
+freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	int i;
+
+	for (i = 0; i < dfc->freq_table_size; i++) {
+		if (dfc->freq_table[i] == freq)
+			return i;
+	}
+
+	return THERMAL_CSTATE_INVALID;
+}
+
+/**
+ * get_static_power() - calculate the static power
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	Frequency in Hz
+ *
+ * Calculate the static power in milliwatts using the supplied
+ * get_static_power().  The current voltage is calculated using the
+ * OPP library.  If no get_static_power() was supplied, assume the
+ * static power is negligible.
+ */
+static unsigned long
+get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	unsigned long voltage;
+	struct dev_pm_opp *opp;
+
+	if (!dfc->power_ops->get_static_power)
+		return 0;
+
+	rcu_read_lock();
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
+	if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE))
+		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
+
+	voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
+
+	rcu_read_unlock();
+
+	if (voltage == 0) {
+		dev_warn_ratelimited(dev,
+				     "Failed to get voltage for frequency %lu: %ld\n",
+				     freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		return 0;
+	}
+
+	return dfc->power_ops->get_static_power(voltage);
+}
+
+/**
+ * get_dynamic_power - calculate the dynamic power
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	Frequency in Hz
+ * @voltage:	Voltage in millivolts
+ *
+ * Calculate the dynamic power in milliwatts consumed by the device at
+ * frequency @freq and voltage @voltage.  If the get_dynamic_power()
+ * was supplied as part of the devfreq_cooling_power struct, then that
+ * function is used.  Otherwise, a simple power model (Pdyn = Coeff *
+ * Voltage^2 * Frequency) is used.
+ */
+static unsigned long
+get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
+		  unsigned long voltage)
+{
+	unsigned long power;
+	u32 freq_mhz;
+	struct devfreq_cooling_power *dfc_power = dfc->power_ops;
+
+	if (dfc_power->get_dynamic_power)
+		return dfc_power->get_dynamic_power(freq, voltage);
+
+	freq_mhz = freq / 1000000;
+	power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;
+	do_div(power, 1000000000);
+
+	return power;
+}
+
+static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
+					       struct thermal_zone_device *tz,
+					       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct devfreq_dev_status *status = &df->last_status;
+	unsigned long state;
+	unsigned long freq = status->current_frequency;
+	u32 dyn_power, static_power;
+
+	/* Get dynamic power for state */
+	state = freq_get_state(dfc, freq);
+	if (state == THERMAL_CSTATE_INVALID)
+		return -EAGAIN;
+
+	dyn_power = dfc->power_table[state];
+
+	/* Scale dynamic power for utilization */
+	dyn_power = (dyn_power * status->busy_time) / status->total_time;
+
+	/* Get static power */
+	static_power = get_static_power(dfc, freq);
+
+	*power = dyn_power + static_power;
+
+	return 0;
+}
+
+static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       unsigned long state,
+				       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	unsigned long freq;
+	u32 static_power;
+
+	if (state < 0 || state >= dfc->freq_table_size)
+		return -EINVAL;
+
+	freq = dfc->freq_table[state];
+	static_power = get_static_power(dfc, freq);
+
+	*power = dfc->power_table[state] + static_power;
+	return 0;
+}
+
+static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       u32 power, unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct devfreq_dev_status *status = &df->last_status;
+	unsigned long freq = status->current_frequency;
+	unsigned long busy_time;
+	s32 dyn_power;
+	u32 static_power;
+	int i;
+
+	static_power = get_static_power(dfc, freq);
+
+	dyn_power = power - static_power;
+	dyn_power = dyn_power > 0 ? dyn_power : 0;
+
+	/* Scale dynamic power for utilization */
+	busy_time = status->busy_time ?: 1;
+	dyn_power = (dyn_power * status->total_time) / busy_time;
+
+	/*
+	 * Find the first cooling state that is within the power
+	 * budget for dynamic power.
+	 */
+	for (i = 0; i < dfc->freq_table_size - 1; i++)
+		if (dyn_power >= dfc->power_table[i])
+			break;
+
+	*state = i;
+	return 0;
+}
+
+static struct thermal_cooling_device_ops devfreq_cooling_ops = {
+	.get_max_state = devfreq_cooling_get_max_state,
+	.get_cur_state = devfreq_cooling_get_cur_state,
+	.set_cur_state = devfreq_cooling_set_cur_state,
+};
+
+/**
+ * devfreq_cooling_gen_tables() - Generate power and freq tables.
+ * @dfc: Pointer to devfreq cooling device.
+ *
+ * Generate power and frequency tables: the power table hold the
+ * device's maximum power usage at each cooling state (OPP).  The
+ * static and dynamic power using the appropriate voltage and
+ * frequency for the state, is acquired from the struct
+ * devfreq_cooling_power, and summed to make the maximum power draw.
+ *
+ * The frequency table holds the frequencies in descending order.
+ * That way its indexed by cooling device state.
+ *
+ * The tables are malloced, and pointers put in dfc.  They must be
+ * freed when unregistering the devfreq cooling device.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	int ret, num_opps;
+	unsigned long freq;
+	u32 *power_table = NULL;
+	u32 *freq_table;
+	int i;
+
+	num_opps = dev_pm_opp_get_opp_count(dev);
+
+	if (dfc->power_ops) {
+		power_table = kcalloc(num_opps, sizeof(*power_table),
+				      GFP_KERNEL);
+		if (!power_table)
+			ret = -ENOMEM;
+	}
+
+	freq_table = kcalloc(num_opps, sizeof(*freq_table),
+			     GFP_KERNEL);
+	if (!freq_table) {
+		ret = -ENOMEM;
+		goto free_power_table;
+	}
+
+	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
+		unsigned long power_dyn, voltage;
+		struct dev_pm_opp *opp;
+
+		rcu_read_lock();
+
+		opp = dev_pm_opp_find_freq_floor(dev, &freq);
+		if (IS_ERR(opp)) {
+			rcu_read_unlock();
+			ret = PTR_ERR(opp);
+			goto free_tables;
+		}
+
+		voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
+
+		rcu_read_unlock();
+
+		if (dfc->power_ops) {
+			power_dyn = get_dynamic_power(dfc, freq, voltage);
+
+			dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
+				freq / 1000000, voltage, power_dyn, power_dyn);
+
+			power_table[i] = power_dyn;
+		}
+
+		freq_table[i] = freq;
+	}
+
+	if (dfc->power_ops)
+		dfc->power_table = power_table;
+
+	dfc->freq_table = freq_table;
+	dfc->freq_table_size = num_opps;
+
+	return 0;
+
+free_tables:
+	kfree(freq_table);
+free_power_table:
+	kfree(power_table);
+
+	return ret;
+}
+
+/**
+ * of_devfreq_cooling_register_power() - Register devfreq cooling device,
+ *                                      with OF and power information.
+ * @np:	Pointer to OF device_node.
+ * @df:	Pointer to devfreq device.
+ * @dfc_power:	Pointer to devfreq_cooling_power.
+ *
+ * Register a devfreq cooling device.  The available OPPs must be
+ * registered on the device.
+ *
+ * If @dfc_power is provided, the cooling device is registered with the
+ * power extensions.  For the power extensions to work correctly,
+ * devfreq should use the simple_ondemand governor, other governors
+ * are not currently supported.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power)
+{
+	struct thermal_cooling_device *cdev;
+	struct devfreq_cooling_device *dfc;
+	char dev_name[THERMAL_NAME_LENGTH];
+	int err;
+
+	dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
+	if (!dfc)
+		return ERR_PTR(-ENOMEM);
+
+	dfc->devfreq = df;
+
+	if (dfc_power) {
+		dfc->power_ops = dfc_power;
+
+		devfreq_cooling_ops.get_requested_power =
+			devfreq_cooling_get_requested_power;
+		devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
+		devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
+	}
+
+	err = devfreq_cooling_gen_tables(dfc);
+	if (err)
+		goto free_dfc;
+
+	err = get_idr(&devfreq_idr, &dfc->id);
+	if (err)
+		goto free_tables;
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
+
+	cdev = thermal_of_cooling_device_register(np, dev_name, dfc,
+						  &devfreq_cooling_ops);
+	if (IS_ERR(cdev)) {
+		err = PTR_ERR(cdev);
+		dev_err(df->dev.parent,
+			"Failed to register devfreq cooling device (%d)\n",
+			err);
+		goto release_idr;
+	}
+
+	dfc->cdev = cdev;
+
+	return dfc;
+
+release_idr:
+	release_idr(&devfreq_idr, dfc->id);
+free_tables:
+	kfree(dfc->power_table);
+	kfree(dfc->freq_table);
+free_dfc:
+	kfree(dfc);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
+
+/**
+ * of_devfreq_cooling_register() - Register devfreq cooling device,
+ *                                with OF information.
+ * @np: Pointer to OF device_node.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return of_devfreq_cooling_register_power(np, df, NULL);
+}
+EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_register() - Register devfreq cooling device.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df)
+{
+	return of_devfreq_cooling_register(NULL, df);
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_unregister() - Unregister devfreq cooling device.
+ * @dfc: Pointer to devfreq cooling device to unregister.
+ */
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+	if (!dfc)
+		return;
+
+	thermal_cooling_device_unregister(dfc->cdev);
+	release_idr(&devfreq_idr, dfc->id);
+	kfree(dfc->power_table);
+	kfree(dfc->freq_table);
+
+	kfree(dfc);
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
new file mode 100644
index 000000000000..ee5f0ec9290b
--- /dev/null
+++ b/include/linux/devfreq_cooling.h
@@ -0,0 +1,81 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014-2015 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DEVFREQ_COOLING_H__
+#define __DEVFREQ_COOLING_H__
+
+#include <linux/devfreq.h>
+#include <linux/thermal.h>
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+
+/**
+ * struct devfreq_cooling_power - Devfreq cooling power ops
+ * @get_static_power:	Take voltage, in mV, and return the static power
+ *			in mW.  If NULL, the static power is assumed
+ *			to be 0.
+ * @get_dynamic_power:	Take voltage, in mV, and frequency, in HZ, and
+ *			return the dynamic power draw in mW.  If NULL,
+ *			a simple power model is used.
+ * @dyn_power_coeff:	Coefficient for the simple dynamic power model in
+ *			mW/(MHz mV mV).
+ *			If get_dynamic_power() is NULL, then the
+ *			dynamic power is calculated as
+ *			@dyn_power_coeff * frequency * voltage^2
+ */
+struct devfreq_cooling_power {
+	unsigned long (*get_static_power)(unsigned long voltage);
+	unsigned long (*get_dynamic_power)(unsigned long freq,
+					   unsigned long voltage);
+	unsigned long dyn_power_coeff;
+};
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power);
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df);
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc);
+
+#else /* !CONFIG_DEVFREQ_THERMAL */
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct devfreq_cooling_device *
+devfreq_cooling_register(struct devfreq *df)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline void
+devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+}
+
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* __DEVFREQ_COOLING_H__ */
-- 
cgit v1.2.3


From df5c7386f62d2db95ca48005087195e9a15e2b1f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 13 Oct 2015 20:09:19 +0300
Subject: dmaengine: dw: some Intel devices has no memcpy support

Provide a flag to choose if the device does support memory-to-memory transfers.
At least this is not true for iDMA32 controller that might be supported in the
future. Besides that Intel BayTrail and Braswell users should not try this
feature due to HW specific behaviour.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/core.c                | 6 +++++-
 include/linux/platform_data/dma-dw.h | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index f16d1ed99ba9..41e9554b884d 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1541,6 +1541,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 
 		/* Fill platform data with the default values */
 		pdata->is_private = true;
+		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
 	} else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
@@ -1653,10 +1654,13 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
 
-	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	/* Set capabilities */
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
 	if (pdata->is_private)
 		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	if (pdata->is_memcpy)
+		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
 	dw->dma.dev = chip->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
 	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 87ac14c584f2..03b6095d3b18 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -37,6 +37,7 @@ struct dw_dma_slave {
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
+ * @is_memcpy: The device channels do support memory-to-memory transfers.
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -47,6 +48,7 @@ struct dw_dma_slave {
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 	bool		is_private;
+	bool		is_memcpy;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
-- 
cgit v1.2.3


From e56f81e0b01ef4e45292d8c1e19edd4d09724e14 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 15 Oct 2015 14:10:50 +0200
Subject: dm: refactor ioctl handling

This moves the call to blkdev_ioctl and the argument checking to DM core
code, and only leaves a callout to find the block device to operate on
in the targets.  This simplifies the code and allows us to pass through
ioctl-like command using other methods in the next patch.

Also split out a helper around calling the prepare_ioctl method that
will be reused for persistent reservation handling.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-flakey.c        | 16 ++++++-------
 drivers/md/dm-linear.c        | 14 +++++------
 drivers/md/dm-log-writes.c    | 13 +++++-----
 drivers/md/dm-mpath.c         | 29 ++++++++++-------------
 drivers/md/dm-switch.c        | 21 +++++++----------
 drivers/md/dm-verity.c        | 15 ++++++------
 drivers/md/dm.c               | 55 +++++++++++++++++++++++++++++++++----------
 include/linux/device-mapper.h |  6 ++---
 8 files changed, 94 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 645e8b4f808e..09e2afcafd2d 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -373,20 +373,20 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
+static int flakey_prepare_ioctl(struct dm_target *ti,
+		struct block_device **bdev, fmode_t *mode)
 {
 	struct flakey_c *fc = ti->private;
-	struct dm_dev *dev = fc->dev;
-	int r = 0;
+
+	*bdev = fc->dev->bdev;
 
 	/*
 	 * Only pass ioctls through if the device sizes match exactly.
 	 */
 	if (fc->start ||
-	    ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+	    ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+		return 1;
+	return 0;
 }
 
 static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
@@ -405,7 +405,7 @@ static struct target_type flakey_target = {
 	.map    = flakey_map,
 	.end_io = flakey_end_io,
 	.status = flakey_status,
-	.ioctl	= flakey_ioctl,
+	.prepare_ioctl = flakey_prepare_ioctl,
 	.iterate_devices = flakey_iterate_devices,
 };
 
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 436f5c9b6aea..de52864d60fa 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -116,21 +116,21 @@ static void linear_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int linear_ioctl(struct dm_target *ti, unsigned int cmd,
-			unsigned long arg)
+static int linear_prepare_ioctl(struct dm_target *ti,
+		struct block_device **bdev, fmode_t *mode)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
 	struct dm_dev *dev = lc->dev;
-	int r = 0;
+
+	*bdev = dev->bdev;
 
 	/*
 	 * Only pass ioctls through if the device sizes match exactly.
 	 */
 	if (lc->start ||
 	    ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+		return 1;
+	return 0;
 }
 
 static int linear_iterate_devices(struct dm_target *ti,
@@ -149,7 +149,7 @@ static struct target_type linear_target = {
 	.dtr    = linear_dtr,
 	.map    = linear_map,
 	.status = linear_status,
-	.ioctl  = linear_ioctl,
+	.prepare_ioctl = linear_prepare_ioctl,
 	.iterate_devices = linear_iterate_devices,
 };
 
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index b2912dbac8bc..624589d51c2c 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -714,20 +714,19 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd,
-			    unsigned long arg)
+static int log_writes_prepare_ioctl(struct dm_target *ti,
+		struct block_device **bdev, fmode_t *mode)
 {
 	struct log_writes_c *lc = ti->private;
 	struct dm_dev *dev = lc->dev;
-	int r = 0;
 
+	*bdev = dev->bdev;
 	/*
 	 * Only pass ioctls through if the device sizes match exactly.
 	 */
 	if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+		return 1;
+	return 0;
 }
 
 static int log_writes_iterate_devices(struct dm_target *ti,
@@ -782,7 +781,7 @@ static struct target_type log_writes_target = {
 	.map    = log_writes_map,
 	.end_io = normal_end_io,
 	.status = log_writes_status,
-	.ioctl	= log_writes_ioctl,
+	.prepare_ioctl = log_writes_prepare_ioctl,
 	.message = log_writes_message,
 	.iterate_devices = log_writes_iterate_devices,
 	.io_hints = log_writes_io_hints,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index bdc96cd838b8..77066a199984 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1533,18 +1533,14 @@ out:
 	return r;
 }
 
-static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
-			   unsigned long arg)
+static int multipath_prepare_ioctl(struct dm_target *ti,
+		struct block_device **bdev, fmode_t *mode)
 {
 	struct multipath *m = ti->private;
 	struct pgpath *pgpath;
-	struct block_device *bdev;
-	fmode_t mode;
 	unsigned long flags;
 	int r;
 
-	bdev = NULL;
-	mode = 0;
 	r = 0;
 
 	spin_lock_irqsave(&m->lock, flags);
@@ -1555,23 +1551,17 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
 	pgpath = m->current_pgpath;
 
 	if (pgpath) {
-		bdev = pgpath->path.dev->bdev;
-		mode = pgpath->path.dev->mode;
+		*bdev = pgpath->path.dev->bdev;
+		*mode = pgpath->path.dev->mode;
 	}
 
 	if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
 		r = -ENOTCONN;
-	else if (!bdev)
+	else if (!*bdev)
 		r = -EIO;
 
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	/*
-	 * Only pass ioctls through if the device sizes match exactly.
-	 */
-	if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
 	if (r == -ENOTCONN && !fatal_signal_pending(current)) {
 		spin_lock_irqsave(&m->lock, flags);
 		if (!m->current_pg) {
@@ -1584,7 +1574,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
 		dm_table_run_md_queue_async(m->ti->table);
 	}
 
-	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+	/*
+	 * Only pass ioctls through if the device sizes match exactly.
+	 */
+	if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+		return 1;
+	return r;
 }
 
 static int multipath_iterate_devices(struct dm_target *ti,
@@ -1700,7 +1695,7 @@ static struct target_type multipath_target = {
 	.resume = multipath_resume,
 	.status = multipath_status,
 	.message = multipath_message,
-	.ioctl  = multipath_ioctl,
+	.prepare_ioctl = multipath_prepare_ioctl,
 	.iterate_devices = multipath_iterate_devices,
 	.busy = multipath_busy,
 };
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 50fca469cafd..b1285753a5d4 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -511,27 +511,24 @@ static void switch_status(struct dm_target *ti, status_type_t type,
  *
  * Passthrough all ioctls to the path for sector 0
  */
-static int switch_ioctl(struct dm_target *ti, unsigned cmd,
-			unsigned long arg)
+static int switch_prepare_ioctl(struct dm_target *ti,
+		struct block_device **bdev, fmode_t *mode)
 {
 	struct switch_ctx *sctx = ti->private;
-	struct block_device *bdev;
-	fmode_t mode;
 	unsigned path_nr;
-	int r = 0;
 
 	path_nr = switch_get_path_nr(sctx, 0);
 
-	bdev = sctx->path_list[path_nr].dmdev->bdev;
-	mode = sctx->path_list[path_nr].dmdev->mode;
+	*bdev = sctx->path_list[path_nr].dmdev->bdev;
+	*mode = sctx->path_list[path_nr].dmdev->mode;
 
 	/*
 	 * Only pass ioctls through if the device sizes match exactly.
 	 */
-	if (ti->len + sctx->path_list[path_nr].start != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+	if (ti->len + sctx->path_list[path_nr].start !=
+	    i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+		return 1;
+	return 0;
 }
 
 static int switch_iterate_devices(struct dm_target *ti,
@@ -560,7 +557,7 @@ static struct target_type switch_target = {
 	.map = switch_map,
 	.message = switch_message,
 	.status = switch_status,
-	.ioctl = switch_ioctl,
+	.prepare_ioctl = switch_prepare_ioctl,
 	.iterate_devices = switch_iterate_devices,
 };
 
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index edc624bccf9a..ccf41886ebcf 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -631,18 +631,17 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int verity_ioctl(struct dm_target *ti, unsigned cmd,
-			unsigned long arg)
+static int verity_prepare_ioctl(struct dm_target *ti,
+		struct block_device **bdev, fmode_t *mode)
 {
 	struct dm_verity *v = ti->private;
-	int r = 0;
+
+	*bdev = v->data_dev->bdev;
 
 	if (v->data_start ||
 	    ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
-				     cmd, arg);
+		return 1;
+	return 0;
 }
 
 static int verity_iterate_devices(struct dm_target *ti,
@@ -965,7 +964,7 @@ static struct target_type verity_target = {
 	.dtr		= verity_dtr,
 	.map		= verity_map,
 	.status		= verity_status,
-	.ioctl		= verity_ioctl,
+	.prepare_ioctl	= verity_prepare_ioctl,
 	.iterate_devices = verity_iterate_devices,
 	.io_hints	= verity_io_hints,
 };
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 77ebb985154c..9b3fe5be0cee 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -555,18 +555,16 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return dm_get_geometry(md, geo);
 }
 
-static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
+static int dm_get_live_table_for_ioctl(struct mapped_device *md,
+		struct dm_target **tgt, struct block_device **bdev,
+		fmode_t *mode, int *srcu_idx)
 {
-	struct mapped_device *md = bdev->bd_disk->private_data;
-	int srcu_idx;
 	struct dm_table *map;
-	struct dm_target *tgt;
-	int r = -ENOTTY;
+	int r;
 
 retry:
-	map = dm_get_live_table(md, &srcu_idx);
-
+	r = -ENOTTY;
+	map = dm_get_live_table(md, srcu_idx);
 	if (!map || !dm_table_get_size(map))
 		goto out;
 
@@ -574,8 +572,9 @@ retry:
 	if (dm_table_get_num_targets(map) != 1)
 		goto out;
 
-	tgt = dm_table_get_target(map, 0);
-	if (!tgt->type->ioctl)
+	*tgt = dm_table_get_target(map, 0);
+
+	if (!(*tgt)->type->prepare_ioctl)
 		goto out;
 
 	if (dm_suspended_md(md)) {
@@ -583,16 +582,46 @@ retry:
 		goto out;
 	}
 
-	r = tgt->type->ioctl(tgt, cmd, arg);
+	r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode);
+	if (r < 0)
+		goto out;
 
-out:
-	dm_put_live_table(md, srcu_idx);
+	return r;
 
+out:
+	dm_put_live_table(md, *srcu_idx);
 	if (r == -ENOTCONN) {
 		msleep(10);
 		goto retry;
 	}
+	return r;
+}
+
+static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg)
+{
+	struct mapped_device *md = bdev->bd_disk->private_data;
+	struct dm_target *tgt;
+	int srcu_idx, r;
+
+	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	if (r < 0)
+		return r;
+
+	if (r > 0) {
+		/*
+		 * Target determined this ioctl is being issued against
+		 * a logical partition of the parent bdev; so extra
+		 * validation is needed.
+		 */
+		r = scsi_verify_blk_ioctl(NULL, cmd);
+		if (r)
+			goto out;
+	}
 
+	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+out:
+	dm_put_live_table(md, srcu_idx);
 	return r;
 }
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 76d23fa8c7d3..ec1c61c87d89 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -79,8 +79,8 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
 
 typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
 
-typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
-			    unsigned long arg);
+typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti,
+			    struct block_device **bdev, fmode_t *mode);
 
 /*
  * These iteration functions are typically used to check (and combine)
@@ -156,7 +156,7 @@ struct target_type {
 	dm_resume_fn resume;
 	dm_status_fn status;
 	dm_message_fn message;
-	dm_ioctl_fn ioctl;
+	dm_prepare_ioctl_fn prepare_ioctl;
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
-- 
cgit v1.2.3


From f3f86e33dc3da437fa4f204588ce7c78ea756982 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 30 Oct 2015 16:53:57 -0700
Subject: vfs: Fix pathological performance case for __alloc_fd()

Al Viro points out that:
> >     * [Linux-specific aside] our __alloc_fd() can degrade quite badly
> > with some use patterns.  The cacheline pingpong in the bitmap is probably
> > inevitable, unless we accept considerably heavier memory footprint,
> > but we also have a case when alloc_fd() takes O(n) and it's _not_ hard
> > to trigger - close(3);open(...); will have the next open() after that
> > scanning the entire in-use bitmap.

And Eric Dumazet has a somewhat realistic multithreaded microbenchmark
that opens and closes a lot of sockets with minimal work per socket.

This patch largely fixes it.  We keep a 2nd-level bitmap of the open
file bitmaps, showing which words are already full.  So then we can
traverse that second-level bitmap to efficiently skip already allocated
file descriptors.

On his benchmark, this improves performance by up to an order of
magnitude, by avoiding the excessive open file bitmap scanning.

Tested-and-acked-by: Eric Dumazet <edumazet@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/file.c               | 39 +++++++++++++++++++++++++++++++++++----
 include/linux/fdtable.h |  2 ++
 2 files changed, 37 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file.c b/fs/file.c
index 6c672ad329e9..6f6eb2b03af5 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -56,6 +56,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	__free_fdtable(container_of(rcu, struct fdtable, rcu));
 }
 
+#define BITBIT_NR(nr)	BITS_TO_LONGS(BITS_TO_LONGS(nr))
+#define BITBIT_SIZE(nr)	(BITBIT_NR(nr) * sizeof(long))
+
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
@@ -77,6 +80,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 	memset((char *)(nfdt->open_fds) + cpy, 0, set);
 	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
 	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
+
+	cpy = BITBIT_SIZE(ofdt->max_fds);
+	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
+	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
+	memset(cpy+(char *)nfdt->full_fds_bits, 0, set);
 }
 
 static struct fdtable * alloc_fdtable(unsigned int nr)
@@ -115,12 +123,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
 	fdt->fd = data;
 
 	data = alloc_fdmem(max_t(size_t,
-				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
+				 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES));
 	if (!data)
 		goto out_arr;
 	fdt->open_fds = data;
 	data += nr / BITS_PER_BYTE;
 	fdt->close_on_exec = data;
+	data += nr / BITS_PER_BYTE;
+	fdt->full_fds_bits = data;
 
 	return fdt;
 
@@ -229,14 +239,18 @@ static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
 	__clear_bit(fd, fdt->close_on_exec);
 }
 
-static inline void __set_open_fd(int fd, struct fdtable *fdt)
+static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
 {
 	__set_bit(fd, fdt->open_fds);
+	fd /= BITS_PER_LONG;
+	if (!~fdt->open_fds[fd])
+		__set_bit(fd, fdt->full_fds_bits);
 }
 
-static inline void __clear_open_fd(int fd, struct fdtable *fdt)
+static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
 {
 	__clear_bit(fd, fdt->open_fds);
+	__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
 }
 
 static int count_open_files(struct fdtable *fdt)
@@ -280,6 +294,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	new_fdt->max_fds = NR_OPEN_DEFAULT;
 	new_fdt->close_on_exec = newf->close_on_exec_init;
 	new_fdt->open_fds = newf->open_fds_init;
+	new_fdt->full_fds_bits = newf->full_fds_bits_init;
 	new_fdt->fd = &newf->fd_array[0];
 
 	spin_lock(&oldf->file_lock);
@@ -323,6 +338,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 
 	memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
 	memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
+	memcpy(new_fdt->full_fds_bits, old_fdt->full_fds_bits, BITBIT_SIZE(open_files));
 
 	for (i = open_files; i != 0; i--) {
 		struct file *f = *old_fds++;
@@ -454,10 +470,25 @@ struct files_struct init_files = {
 		.fd		= &init_files.fd_array[0],
 		.close_on_exec	= init_files.close_on_exec_init,
 		.open_fds	= init_files.open_fds_init,
+		.full_fds_bits	= init_files.full_fds_bits_init,
 	},
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
 };
 
+static unsigned long find_next_fd(struct fdtable *fdt, unsigned long start)
+{
+	unsigned long maxfd = fdt->max_fds;
+	unsigned long maxbit = maxfd / BITS_PER_LONG;
+	unsigned long bitbit = start / BITS_PER_LONG;
+
+	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
+	if (bitbit > maxfd)
+		return maxfd;
+	if (bitbit > start)
+		start = bitbit;
+	return find_next_zero_bit(fdt->open_fds, maxfd, start);
+}
+
 /*
  * allocate a file descriptor, mark it busy.
  */
@@ -476,7 +507,7 @@ repeat:
 		fd = files->next_fd;
 
 	if (fd < fdt->max_fds)
-		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
+		fd = find_next_fd(fdt, fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 674e3e226465..5295535b60c6 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -26,6 +26,7 @@ struct fdtable {
 	struct file __rcu **fd;      /* current fd array */
 	unsigned long *close_on_exec;
 	unsigned long *open_fds;
+	unsigned long *full_fds_bits;
 	struct rcu_head rcu;
 };
 
@@ -59,6 +60,7 @@ struct files_struct {
 	int next_fd;
 	unsigned long close_on_exec_init[1];
 	unsigned long open_fds_init[1];
+	unsigned long full_fds_bits_init[1];
 	struct file __rcu * fd_array[NR_OPEN_DEFAULT];
 };
 
-- 
cgit v1.2.3


From a5ad88ce8c7fae7ddc72ee49a11a75aa837788e0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 1 Nov 2015 17:09:15 -0800
Subject: mm: get rid of 'vmalloc_info' from /proc/meminfo

It turns out that at least some versions of glibc end up reading
/proc/meminfo at every single startup, because glibc wants to know the
amount of memory the machine has.  And while that's arguably insane,
it's just how things are.

And it turns out that it's not all that expensive most of the time, but
the vmalloc information statistics (amount of virtual memory used in the
vmalloc space, and the biggest remaining chunk) can be rather expensive
to compute.

The 'get_vmalloc_info()' function actually showed up on my profiles as
4% of the CPU usage of "make test" in the git source repository, because
the git tests are lots of very short-lived shell-scripts etc.

It turns out that apparently this same silly vmalloc info gathering
shows up on the facebook servers too, according to Dave Jones.  So it's
not just "make test" for git.

We had two patches to just cache the information (one by me, one by
Ingo) to mitigate this issue, but the whole vmalloc information of of
rather dubious value to begin with, and people who *actually* want to
know what the situation is wrt the vmalloc area should just look at the
much more complete /proc/vmallocinfo instead.

In fact, according to my testing - and perhaps more importantly,
according to that big search engine in the sky: Google - there is
nothing out there that actually cares about those two expensive fields:
VmallocUsed and VmallocChunk.

So let's try to just remove them entirely.  Actually, this just removes
the computation and reports the numbers as zero for now, just to try to
be minimally intrusive.

If this breaks anything, we'll obviously have to re-introduce the code
to compute this all and add the caching patches on top.  But if given
the option, I'd really prefer to just remove this bad idea entirely
rather than add even more code to work around our historical mistake
that likely nobody really cares about.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c       |  7 ++-----
 include/linux/vmalloc.h | 12 ------------
 mm/vmalloc.c            | 47 -----------------------------------------------
 3 files changed, 2 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d3ebf2e61853..9155a5a0d3b9 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -27,7 +27,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 {
 	struct sysinfo i;
 	unsigned long committed;
-	struct vmalloc_info vmi;
 	long cached;
 	long available;
 	unsigned long pagecache;
@@ -49,8 +48,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	if (cached < 0)
 		cached = 0;
 
-	get_vmalloc_info(&vmi);
-
 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
 		pages[lru] = global_page_state(NR_LRU_BASE + lru);
 
@@ -191,8 +188,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(vm_commit_limit()),
 		K(committed),
 		(unsigned long)VMALLOC_TOTAL >> 10,
-		vmi.used >> 10,
-		vmi.largest_chunk >> 10
+		0ul, // used to be vmalloc 'used'
+		0ul  // used to be vmalloc 'largest_chunk'
 #ifdef CONFIG_MEMORY_FAILURE
 		, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
 #endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 0ec598381f97..3bff87a25a42 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -182,22 +182,10 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 # endif
 #endif
 
-struct vmalloc_info {
-	unsigned long   used;
-	unsigned long   largest_chunk;
-};
-
 #ifdef CONFIG_MMU
 #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
-extern void get_vmalloc_info(struct vmalloc_info *vmi);
 #else
-
 #define VMALLOC_TOTAL 0UL
-#define get_vmalloc_info(vmi)			\
-do {						\
-	(vmi)->used = 0;			\
-	(vmi)->largest_chunk = 0;		\
-} while (0)
 #endif
 
 #endif /* _LINUX_VMALLOC_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2faaa2976447..af3a519e40c2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2688,52 +2688,5 @@ static int __init proc_vmalloc_init(void)
 }
 module_init(proc_vmalloc_init);
 
-void get_vmalloc_info(struct vmalloc_info *vmi)
-{
-	struct vmap_area *va;
-	unsigned long free_area_size;
-	unsigned long prev_end;
-
-	vmi->used = 0;
-	vmi->largest_chunk = 0;
-
-	prev_end = VMALLOC_START;
-
-	rcu_read_lock();
-
-	if (list_empty(&vmap_area_list)) {
-		vmi->largest_chunk = VMALLOC_TOTAL;
-		goto out;
-	}
-
-	list_for_each_entry_rcu(va, &vmap_area_list, list) {
-		unsigned long addr = va->va_start;
-
-		/*
-		 * Some archs keep another range for modules in vmalloc space
-		 */
-		if (addr < VMALLOC_START)
-			continue;
-		if (addr >= VMALLOC_END)
-			break;
-
-		if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
-			continue;
-
-		vmi->used += (va->va_end - va->va_start);
-
-		free_area_size = addr - prev_end;
-		if (vmi->largest_chunk < free_area_size)
-			vmi->largest_chunk = free_area_size;
-
-		prev_end = va->va_end;
-	}
-
-	if (VMALLOC_END - prev_end > vmi->largest_chunk)
-		vmi->largest_chunk = VMALLOC_END - prev_end;
-
-out:
-	rcu_read_unlock();
-}
 #endif
 
-- 
cgit v1.2.3


From 42e5c3e2725ba0c0affc1fc8a6aa1d5cf31ecb75 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 24 Oct 2015 17:27:35 -0400
Subject: SUNRPC: Abstract backchannel operations

xprt_{setup,destroy}_backchannel() won't be adequate for RPC/RMDA
bi-direction. In particular, receive buffers have to be pre-
registered and posted in order to receive incoming backchannel
requests.

Add a virtual function call to allow the insertion of appropriate
backchannel setup and destruction methods for each transport.

In addition, freeing a backchannel request is a little different
for RPC/RDMA. Introduce an rpc_xprt_op to handle the difference.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Tested-By: Devesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/bc_xprt.h |  5 +++++
 include/linux/sunrpc/xprt.h    |  5 +++++
 net/sunrpc/backchannel_rqst.c  | 24 ++++++++++++++++++++++--
 net/sunrpc/xprtsock.c          |  5 +++++
 4 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 8df43c9f11dc..4397a4824c81 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
 
+/* Socket backchannel transport methods */
+int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs);
+void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs);
+void xprt_free_bc_rqst(struct rpc_rqst *req);
+
 /*
  * Determine if a shared backchannel is in use
  */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 0fb9acbb4780..3f79c4a4ce74 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -136,6 +136,11 @@ struct rpc_xprt_ops {
 	int		(*enable_swap)(struct rpc_xprt *xprt);
 	void		(*disable_swap)(struct rpc_xprt *xprt);
 	void		(*inject_disconnect)(struct rpc_xprt *xprt);
+	int		(*bc_setup)(struct rpc_xprt *xprt,
+				    unsigned int min_reqs);
+	void		(*bc_free_rqst)(struct rpc_rqst *rqst);
+	void		(*bc_destroy)(struct rpc_xprt *xprt,
+				      unsigned int max_reqs);
 };
 
 /*
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 6255d141133b..229956bf8457 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -137,6 +137,14 @@ out_free:
  * callback requests can be up to 4096 bytes in size.
  */
 int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
+{
+	if (!xprt->ops->bc_setup)
+		return 0;
+	return xprt->ops->bc_setup(xprt, min_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
+
+int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
 {
 	struct rpc_rqst *req;
 	struct list_head tmp_list;
@@ -192,7 +200,6 @@ out_free:
 	dprintk("RPC:       setup backchannel transport failed\n");
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
 
 /**
  * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
@@ -204,6 +211,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
  * of reqs specified by the caller.
  */
 void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
+{
+	if (xprt->ops->bc_destroy)
+		xprt->ops->bc_destroy(xprt, max_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
+
+void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
 {
 	struct rpc_rqst *req = NULL, *tmp = NULL;
 
@@ -227,7 +241,6 @@ out:
 	dprintk("RPC:        backchannel list empty= %s\n",
 		list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
-EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
 
 static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
 {
@@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 
+	xprt->ops->bc_free_rqst(req);
+}
+
+void xprt_free_bc_rqst(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
 	dprintk("RPC:       free backchannel req=%p\n", req);
 
 	req->rq_connect_cookie = xprt->connect_cookie - 1;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1a85e0ed0b48..44a81e4c6783 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2580,6 +2580,11 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.enable_swap		= xs_enable_swap,
 	.disable_swap		= xs_disable_swap,
 	.inject_disconnect	= xs_inject_disconnect,
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+	.bc_setup		= xprt_setup_bc,
+	.bc_free_rqst		= xprt_free_bc_rqst,
+	.bc_destroy		= xprt_destroy_bc,
+#endif
 };
 
 /*
-- 
cgit v1.2.3


From 9468431962616c2449d47c482208a5967e011bf9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 24 Oct 2015 17:28:16 -0400
Subject: svcrdma: Add backward direction service for RPC/RDMA transport

On NFSv4.1 mount points, the Linux NFS client uses this transport
endpoint to receive backward direction calls and route replies back
to the NFSv4.1 server.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Tested-By: Devesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/svc_rdma.h          |  6 +++-
 include/linux/sunrpc/xprt.h              |  1 +
 net/sunrpc/xprtrdma/svc_rdma.c           |  6 ++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 58 ++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7ccc961f33e9..fb4013edcf57 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -228,9 +228,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
 			      struct svc_rdma_fastreg_mr *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
-extern struct svc_xprt_class svc_rdma_class;
 extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
 
+extern struct svc_xprt_class svc_rdma_class;
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+extern struct svc_xprt_class svc_rdma_bc_class;
+#endif
+
 /* svc_rdma.c */
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3f79c4a4ce74..82c083946ef0 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -158,6 +158,7 @@ enum xprt_transports {
 	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
 	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
 	XPRT_TRANSPORT_RDMA	= 256,
+	XPRT_TRANSPORT_BC_RDMA	= XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC,
 	XPRT_TRANSPORT_LOCAL	= 257,
 };
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 2cd252f023a5..1b7051bdbdc8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -239,6 +239,9 @@ void svc_rdma_cleanup(void)
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
 	}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	svc_unreg_xprt_class(&svc_rdma_bc_class);
+#endif
 	svc_unreg_xprt_class(&svc_rdma_class);
 	kmem_cache_destroy(svc_rdma_map_cachep);
 	kmem_cache_destroy(svc_rdma_ctxt_cachep);
@@ -286,6 +289,9 @@ int svc_rdma_init(void)
 
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	svc_reg_xprt_class(&svc_rdma_bc_class);
+#endif
 	return 0;
  err1:
 	kmem_cache_destroy(svc_rdma_map_cachep);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..a133b1e5b5f6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -56,6 +56,7 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 					struct net *net,
 					struct sockaddr *sa, int salen,
@@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
+					   struct sockaddr *, int, int);
+static void svc_rdma_bc_detach(struct svc_xprt *);
+static void svc_rdma_bc_free(struct svc_xprt *);
+
+static struct svc_xprt_ops svc_rdma_bc_ops = {
+	.xpo_create = svc_rdma_bc_create,
+	.xpo_detach = svc_rdma_bc_detach,
+	.xpo_free = svc_rdma_bc_free,
+	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
+	.xpo_secure_port = svc_rdma_secure_port,
+};
+
+struct svc_xprt_class svc_rdma_bc_class = {
+	.xcl_name = "rdma-bc",
+	.xcl_owner = THIS_MODULE,
+	.xcl_ops = &svc_rdma_bc_ops,
+	.xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
+};
+
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
+					   struct net *net,
+					   struct sockaddr *sa, int salen,
+					   int flags)
+{
+	struct svcxprt_rdma *cma_xprt;
+	struct svc_xprt *xprt;
+
+	cma_xprt = rdma_create_xprt(serv, 0);
+	if (!cma_xprt)
+		return ERR_PTR(-ENOMEM);
+	xprt = &cma_xprt->sc_xprt;
+
+	svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+	serv->sv_bc_xprt = xprt;
+
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+	return xprt;
+}
+
+static void svc_rdma_bc_detach(struct svc_xprt *xprt)
+{
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+}
+
+static void svc_rdma_bc_free(struct svc_xprt *xprt)
+{
+	struct svcxprt_rdma *rdma =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+	if (xprt)
+		kfree(rdma);
+}
+#endif	/* CONFIG_SUNRPC_BACKCHANNEL */
+
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt;
-- 
cgit v1.2.3


From 3d4e204d81eec30abffe55d01912e07ce81eef12 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Tue, 29 Sep 2015 22:43:32 +0800
Subject: ring_buffer: ring_buffer_empty{cpu}() can return boolean

Make ring_buffer_empty() and ring_buffer_empty_cpu() return bool.

No functional change.

Link: http://lkml.kernel.org/r/1443537816-5788-5-git-send-email-bywxiaobai@163.com

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  4 ++--
 kernel/trace/ring_buffer.c  | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e2c13cd863bd..4acc552e9279 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -154,8 +154,8 @@ ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 }
 #endif
 
-int ring_buffer_empty(struct ring_buffer *buffer);
-int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
+bool ring_buffer_empty(struct ring_buffer *buffer);
+bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
 
 void ring_buffer_record_disable(struct ring_buffer *buffer);
 void ring_buffer_record_enable(struct ring_buffer *buffer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 26a948f3187f..fc9ce12666be 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4267,7 +4267,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
  * rind_buffer_empty - is the ring buffer empty?
  * @buffer: The ring buffer to test
  */
-int ring_buffer_empty(struct ring_buffer *buffer)
+bool ring_buffer_empty(struct ring_buffer *buffer)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	unsigned long flags;
@@ -4285,10 +4285,10 @@ int ring_buffer_empty(struct ring_buffer *buffer)
 		local_irq_restore(flags);
 
 		if (!ret)
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_empty);
 
@@ -4297,7 +4297,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
  * @buffer: The ring buffer
  * @cpu: The CPU buffer to test
  */
-int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	unsigned long flags;
@@ -4305,7 +4305,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 	int ret;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
-		return 1;
+		return true;
 
 	cpu_buffer = buffer->buffers[cpu];
 	local_irq_save(flags);
-- 
cgit v1.2.3


From 3c99c2cef75eb5bfc05c5728e4560f3ee656d47e Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Mon, 2 Nov 2015 19:03:03 +0000
Subject: thermal: devfreq_cooling: use a thermal_cooling_device for register
 and unregister

Be consistent with what other cooling devices do and return a struct
thermal_cooling_device * on register.  Also, for the unregister, accept
a struct thermal_cooling_device * as parameter.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/devfreq_cooling.c | 16 ++++++++++------
 include/linux/devfreq_cooling.h   | 16 ++++++++--------
 2 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index a27206815066..31e40a9a9fd0 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -467,7 +467,7 @@ free_power_table:
  * devfreq should use the simple_ondemand governor, other governors
  * are not currently supported.
  */
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power)
 {
@@ -513,7 +513,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 
 	dfc->cdev = cdev;
 
-	return dfc;
+	return cdev;
 
 release_idr:
 	release_idr(&devfreq_idr, dfc->id);
@@ -533,7 +533,7 @@ EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
  * @np: Pointer to OF device_node.
  * @df: Pointer to devfreq device.
  */
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 {
 	return of_devfreq_cooling_register_power(np, df, NULL);
@@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
  * devfreq_cooling_register() - Register devfreq cooling device.
  * @df: Pointer to devfreq device.
  */
-struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df)
+struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 {
 	return of_devfreq_cooling_register(NULL, df);
 }
@@ -554,11 +554,15 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_register);
  * devfreq_cooling_unregister() - Unregister devfreq cooling device.
  * @dfc: Pointer to devfreq cooling device to unregister.
  */
-void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-	if (!dfc)
+	struct devfreq_cooling_device *dfc;
+
+	if (!cdev)
 		return;
 
+	dfc = cdev->devdata;
+
 	thermal_cooling_device_unregister(dfc->cdev);
 	release_idr(&devfreq_idr, dfc->id);
 	kfree(dfc->power_table);
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index ee5f0ec9290b..7adf6cc4b305 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -43,37 +43,37 @@ struct devfreq_cooling_power {
 	unsigned long dyn_power_coeff;
 };
 
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power);
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
-struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df);
-void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc);
+struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
+void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
 
 #else /* !CONFIG_DEVFREQ_THERMAL */
 
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct devfreq_cooling_device *
+static inline struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct devfreq_cooling_device *
+static inline struct thermal_cooling_device *
 devfreq_cooling_register(struct devfreq *df)
 {
 	return ERR_PTR(-EINVAL);
 }
 
 static inline void
-devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
 {
 }
 
-- 
cgit v1.2.3


From 76566773a1f1c2295ed901b6f1241cfe10d99029 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 24 Oct 2015 17:28:32 -0400
Subject: NFS: Enable client side NFSv4.1 backchannel to use other transports

Forechannel transports get their own "bc_up" method to create an
endpoint for the backchannel service.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Anna Schumaker: Add forward declaration of struct net to xprt.h]
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback.c                 | 40 ++++++++-------------------------------
 include/linux/sunrpc/xprt.h       |  3 +++
 net/sunrpc/xprtrdma/backchannel.c | 21 ++++++++++++++++++++
 net/sunrpc/xprtrdma/transport.c   |  1 +
 net/sunrpc/xprtrdma/xprt_rdma.h   |  1 +
 net/sunrpc/xprtsock.c             | 12 ++++++++++++
 6 files changed, 46 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 75f7c0a7538a..a7f2e6e33305 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv)
 }
 
 #if defined(CONFIG_NFS_V4_1)
-static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
-{
-	/*
-	 * Create an svc_sock for the back channel service that shares the
-	 * fore channel connection.
-	 * Returns the input port (0) and sets the svc_serv bc_xprt on success
-	 */
-	return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
-			      SVC_SOCK_ANONYMOUS);
-}
-
 /*
  * The callback service for NFSv4.1 callbacks
  */
@@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
 		xprt->bc_serv = serv;
 }
 #else
-static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
-{
-	return 0;
-}
-
 static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
 		struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
 {
@@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
 	svc_shutdown_net(serv, net);
 }
 
-static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net)
+static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+			       struct net *net, struct rpc_xprt *xprt)
 {
 	struct nfs_net *nn = net_generic(net, nfs_net_id);
 	int ret;
@@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n
 		goto err_bind;
 	}
 
-	switch (minorversion) {
-		case 0:
-			ret = nfs4_callback_up_net(serv, net);
-			break;
-		case 1:
-		case 2:
-			ret = nfs41_callback_up_net(serv, net);
-			break;
-		default:
-			printk(KERN_ERR "NFS: unknown callback version: %d\n",
-					minorversion);
-			ret = -EINVAL;
-			break;
-	}
+	ret = -EPROTONOSUPPORT;
+	if (minorversion == 0)
+		ret = nfs4_callback_up_net(serv, net);
+	else if (xprt->ops->bc_up)
+		ret = xprt->ops->bc_up(serv, net);
 
 	if (ret < 0) {
 		printk(KERN_ERR "NFS: callback service start failed\n");
@@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 		goto err_create;
 	}
 
-	ret = nfs_callback_up_net(minorversion, serv, net);
+	ret = nfs_callback_up_net(minorversion, serv, net, xprt);
 	if (ret < 0)
 		goto err_net;
 
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 82c083946ef0..69ef5b3ab038 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -54,6 +54,8 @@ enum rpc_display_format_t {
 struct rpc_task;
 struct rpc_xprt;
 struct seq_file;
+struct svc_serv;
+struct net;
 
 /*
  * This describes a complete RPC request
@@ -138,6 +140,7 @@ struct rpc_xprt_ops {
 	void		(*inject_disconnect)(struct rpc_xprt *xprt);
 	int		(*bc_setup)(struct rpc_xprt *xprt,
 				    unsigned int min_reqs);
+	int		(*bc_up)(struct svc_serv *serv, struct net *net);
 	void		(*bc_free_rqst)(struct rpc_rqst *rqst);
 	void		(*bc_destroy)(struct rpc_xprt *xprt,
 				      unsigned int max_reqs);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 0b3387fe3f0d..2dcb44f69e53 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 #include "xprt_rdma.h"
 
@@ -173,6 +174,26 @@ out_err:
 	return -ENOMEM;
 }
 
+/**
+ * xprt_rdma_bc_up - Create transport endpoint for backchannel service
+ * @serv: server endpoint
+ * @net: network namespace
+ *
+ * The "xprt" is an implied argument: it supplies the name of the
+ * backchannel transport class.
+ *
+ * Returns zero on success, negative errno on failure
+ */
+int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
+{
+	int ret;
+
+	ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
 /**
  * rpcrdma_bc_marshal_reply - Send backwards direction reply
  * @rqst: buffer containing RPC reply data
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 845278e63be0..8c545f7d7525 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -708,6 +708,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
 	.inject_disconnect	= xprt_rdma_inject_disconnect,
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	.bc_setup		= xprt_rdma_bc_setup,
+	.bc_up			= xprt_rdma_bc_up,
 	.bc_free_rqst		= xprt_rdma_bc_free_rqst,
 	.bc_destroy		= xprt_rdma_bc_destroy,
 #endif
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index eb87d96e80ca..f8dd17be9f43 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -520,6 +520,7 @@ void xprt_rdma_cleanup(void);
  */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
+int xprt_rdma_bc_up(struct svc_serv *, struct net *);
 int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
 void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
 int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 44a81e4c6783..dc4706711224 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1306,6 +1306,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 		xs_tcp_read_reply(xprt, desc) :
 		xs_tcp_read_callback(xprt, desc);
 }
+
+static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
+{
+	int ret;
+
+	ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
+			      SVC_SOCK_ANONYMOUS);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
 #else
 static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 					struct xdr_skb_reader *desc)
@@ -2582,6 +2593,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.inject_disconnect	= xs_inject_disconnect,
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 	.bc_setup		= xprt_setup_bc,
+	.bc_up			= xs_tcp_bc_up,
 	.bc_free_rqst		= xprt_free_bc_rqst,
 	.bc_destroy		= xprt_destroy_bc,
 #endif
-- 
cgit v1.2.3


From 79dbd1baa651cece408e68a1b445f3628c4b5bdc Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 26 Oct 2015 22:23:56 +0100
Subject: libceph: msg signing callouts don't need con argument

We can use msg->con instead - at the point we sign an outgoing message
or check the signature on the incoming one, msg->con is always set.  We
wouldn't know how to sign a message without an associated session (i.e.
msg->con == NULL) and being able to sign a message using an explicitly
provided authorizer is of no use.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c           | 14 ++++++++------
 include/linux/ceph/messenger.h |  5 ++---
 net/ceph/messenger.c           |  4 ++--
 net/ceph/osd_client.c          | 14 ++++++++------
 4 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 89838a226fe9..e7b130a637f9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3942,17 +3942,19 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
 	return msg;
 }
 
-static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+static int mds_sign_message(struct ceph_msg *msg)
 {
-       struct ceph_mds_session *s = con->private;
+       struct ceph_mds_session *s = msg->con->private;
        struct ceph_auth_handshake *auth = &s->s_auth;
+
        return ceph_auth_sign_message(auth, msg);
 }
 
-static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+static int mds_check_message_signature(struct ceph_msg *msg)
 {
-       struct ceph_mds_session *s = con->private;
+       struct ceph_mds_session *s = msg->con->private;
        struct ceph_auth_handshake *auth = &s->s_auth;
+
        return ceph_auth_check_message_signature(auth, msg);
 }
 
@@ -3965,8 +3967,8 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
 	.alloc_msg = mds_alloc_msg,
-	.sign_message = sign_message,
-	.check_message_signature = check_message_signature,
+	.sign_message = mds_sign_message,
+	.check_message_signature = mds_check_message_signature,
 };
 
 /* eof */
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index b2371d9b51fa..3687ff0f0133 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -43,10 +43,9 @@ struct ceph_connection_operations {
 	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
 					struct ceph_msg_header *hdr,
 					int *skip);
-	int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg);
 
-	int (*check_message_signature) (struct ceph_connection *con,
-					struct ceph_msg *msg);
+	int (*sign_message) (struct ceph_msg *msg);
+	int (*check_message_signature) (struct ceph_msg *msg);
 };
 
 /* use format string %s%d */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index fce6ad636613..805f6f82139f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1205,7 +1205,7 @@ static void prepare_write_message_footer(struct ceph_connection *con)
 	con->out_kvec[v].iov_base = &m->footer;
 	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
 		if (con->ops->sign_message)
-			con->ops->sign_message(con, m);
+			con->ops->sign_message(m);
 		else
 			m->footer.sig = 0;
 		con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -2422,7 +2422,7 @@ static int read_partial_message(struct ceph_connection *con)
 	}
 
 	if (need_sign && con->ops->check_message_signature &&
-	    con->ops->check_message_signature(con, m)) {
+	    con->ops->check_message_signature(m)) {
 		pr_err("read_partial_message %p signature check failed\n", m);
 		return -EBADMSG;
 	}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 191bc21cecea..118e4ce37ecc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2979,17 +2979,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
-static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_sign_message(struct ceph_msg *msg)
 {
-	struct ceph_osd *o = con->private;
+	struct ceph_osd *o = msg->con->private;
 	struct ceph_auth_handshake *auth = &o->o_auth;
+
 	return ceph_auth_sign_message(auth, msg);
 }
 
-static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_check_message_signature(struct ceph_msg *msg)
 {
-	struct ceph_osd *o = con->private;
+	struct ceph_osd *o = msg->con->private;
 	struct ceph_auth_handshake *auth = &o->o_auth;
+
 	return ceph_auth_check_message_signature(auth, msg);
 }
 
@@ -3001,7 +3003,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
-	.sign_message = sign_message,
-	.check_message_signature = check_message_signature,
+	.sign_message = osd_sign_message,
+	.check_message_signature = osd_check_message_signature,
 	.fault = osd_reset,
 };
-- 
cgit v1.2.3


From 859bff51dc5e92ddfb5eb6f17b8040d9311095bb Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 28 Oct 2015 23:50:58 +0100
Subject: libceph: stop duplicating client fields in messenger

supported_features and required_features serve no purpose at all, while
nocrc and tcp_nodelay belong to ceph_options::flags.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h   |  1 +
 include/linux/ceph/messenger.h | 11 +----------
 net/ceph/ceph_common.c         |  6 +-----
 net/ceph/messenger.c           | 26 +++++++++-----------------
 4 files changed, 12 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 397c5cd09794..a7caafe03d3c 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -137,6 +137,7 @@ struct ceph_client {
 #endif
 };
 
+#define from_msgr(ms)	container_of(ms, struct ceph_client, msgr)
 
 
 /*
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 3687ff0f0133..71b1d6cdcb5d 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -57,8 +57,6 @@ struct ceph_messenger {
 
 	atomic_t stopping;
 	possible_net_t net;
-	bool nocrc;
-	bool tcp_nodelay;
 
 	/*
 	 * the global_seq counts connections i (attempt to) initiate
@@ -66,9 +64,6 @@ struct ceph_messenger {
 	 */
 	u32 global_seq;
 	spinlock_t global_seq_lock;
-
-	u64 supported_features;
-	u64 required_features;
 };
 
 enum ceph_msg_data_type {
@@ -267,11 +262,7 @@ extern void ceph_msgr_exit(void);
 extern void ceph_msgr_flush(void);
 
 extern void ceph_messenger_init(struct ceph_messenger *msgr,
-			struct ceph_entity_addr *myaddr,
-			u64 supported_features,
-			u64 required_features,
-			bool nocrc,
-			bool tcp_nodelay);
+				struct ceph_entity_addr *myaddr);
 extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 54a00d66509e..d1494d1a8592 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -596,11 +596,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	if (ceph_test_opt(client, MYIP))
 		myaddr = &client->options->my_addr;
 
-	ceph_messenger_init(&client->msgr, myaddr,
-		client->supported_features,
-		client->required_features,
-		ceph_test_opt(client, NOCRC),
-		ceph_test_opt(client, TCP_NODELAY));
+	ceph_messenger_init(&client->msgr, myaddr);
 
 	/* subsystems */
 	err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 805f6f82139f..11108076bac3 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -509,7 +509,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 		return ret;
 	}
 
-	if (con->msgr->tcp_nodelay) {
+	if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
 		int optval = 1;
 
 		ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -1432,7 +1432,8 @@ static int prepare_write_connect(struct ceph_connection *con)
 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
 	     con->connect_seq, global_seq, proto);
 
-	con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
+	con->out_connect.features =
+	    cpu_to_le64(from_msgr(con->msgr)->supported_features);
 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
 	con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1527,7 +1528,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 {
 	struct ceph_msg *msg = con->out_msg;
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	bool do_datacrc = !con->msgr->nocrc;
+	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
 	u32 crc;
 
 	dout("%s %p msg %p\n", __func__, con, msg);
@@ -2005,8 +2006,8 @@ static int process_banner(struct ceph_connection *con)
 
 static int process_connect(struct ceph_connection *con)
 {
-	u64 sup_feat = con->msgr->supported_features;
-	u64 req_feat = con->msgr->required_features;
+	u64 sup_feat = from_msgr(con->msgr)->supported_features;
+	u64 req_feat = from_msgr(con->msgr)->required_features;
 	u64 server_feat = ceph_sanitize_features(
 				le64_to_cpu(con->in_reply.features));
 	int ret;
@@ -2232,7 +2233,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 {
 	struct ceph_msg *msg = con->in_msg;
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	const bool do_datacrc = !con->msgr->nocrc;
+	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
 	struct page *page;
 	size_t page_offset;
 	size_t length;
@@ -2277,7 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
 	int end;
 	int ret;
 	unsigned int front_len, middle_len, data_len;
-	bool do_datacrc = !con->msgr->nocrc;
+	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
 	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
 	u64 seq;
 	u32 crc;
@@ -2951,15 +2952,8 @@ static void con_fault(struct ceph_connection *con)
  * initialize a new messenger instance
  */
 void ceph_messenger_init(struct ceph_messenger *msgr,
-			struct ceph_entity_addr *myaddr,
-			u64 supported_features,
-			u64 required_features,
-			bool nocrc,
-			bool tcp_nodelay)
+			 struct ceph_entity_addr *myaddr)
 {
-	msgr->supported_features = supported_features;
-	msgr->required_features = required_features;
-
 	spin_lock_init(&msgr->global_seq_lock);
 
 	if (myaddr)
@@ -2969,8 +2963,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
 	msgr->inst.addr.type = 0;
 	get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
 	encode_my_addr(msgr);
-	msgr->nocrc = nocrc;
-	msgr->tcp_nodelay = tcp_nodelay;
 
 	atomic_set(&msgr->stopping, 0);
 	write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
-- 
cgit v1.2.3


From a51983e4dd2d4d63912aab939f657c4cd476e21a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 28 Oct 2015 23:52:06 +0100
Subject: libceph: add nocephx_sign_messages option

Support for message signing was merged into 3.19, along with
nocephx_require_signatures option.  But, all that option does is allow
the kernel client to talk to clusters that don't support MSG_AUTH
feature bit.  That's pretty useless, given that it's been supported
since bobtail.

Meanwhile, if one disables message signing on the server side with
"cephx sign messages = false", it becomes impossible to use the kernel
client since it expects messages to be signed if MSG_AUTH was
negotiated.  Add nocephx_sign_messages option to support this use case.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h |  3 ++-
 net/ceph/auth_x.c            |  7 +++++++
 net/ceph/ceph_common.c       | 12 ++++++++++++
 net/ceph/messenger.c         |  2 +-
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index a7caafe03d3c..3e3799cdc6e6 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -29,8 +29,9 @@
 #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
 #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
 #define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
-#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* not require cephx message signature */
+#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* don't require msg signing feat */
 #define CEPH_OPT_TCP_NODELAY	  (1<<5) /* TCP_NODELAY on TCP sockets */
+#define CEPH_OPT_NOMSGSIGN	  (1<<6) /* don't sign msgs */
 
 #define CEPH_OPT_DEFAULT   (CEPH_OPT_TCP_NODELAY)
 
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 3a544ca6b5ce..10d87753ed87 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/libceph.h>
 #include <linux/ceph/messenger.h>
 
 #include "crypto.h"
@@ -698,6 +699,9 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
 {
 	int ret;
 
+	if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
+		return 0;
+
 	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
 			      msg, &msg->footer.sig);
 	if (ret < 0)
@@ -712,6 +716,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
 	__le64 sig_check;
 	int ret;
 
+	if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
+		return 0;
+
 	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
 			      msg, &sig_check);
 	if (ret < 0)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index d1494d1a8592..6b4d3a1684de 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -245,6 +245,8 @@ enum {
 	Opt_nocrc,
 	Opt_cephx_require_signatures,
 	Opt_nocephx_require_signatures,
+	Opt_cephx_sign_messages,
+	Opt_nocephx_sign_messages,
 	Opt_tcp_nodelay,
 	Opt_notcp_nodelay,
 };
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
 	{Opt_nocrc, "nocrc"},
 	{Opt_cephx_require_signatures, "cephx_require_signatures"},
 	{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
+	{Opt_cephx_sign_messages, "cephx_sign_messages"},
+	{Opt_nocephx_sign_messages, "nocephx_sign_messages"},
 	{Opt_tcp_nodelay, "tcp_nodelay"},
 	{Opt_notcp_nodelay, "notcp_nodelay"},
 	{-1, NULL}
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
 		case Opt_nocephx_require_signatures:
 			opt->flags |= CEPH_OPT_NOMSGAUTH;
 			break;
+		case Opt_cephx_sign_messages:
+			opt->flags &= ~CEPH_OPT_NOMSGSIGN;
+			break;
+		case Opt_nocephx_sign_messages:
+			opt->flags |= CEPH_OPT_NOMSGSIGN;
+			break;
 
 		case Opt_tcp_nodelay:
 			opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
 		seq_puts(m, "nocrc,");
 	if (opt->flags & CEPH_OPT_NOMSGAUTH)
 		seq_puts(m, "nocephx_require_signatures,");
+	if (opt->flags & CEPH_OPT_NOMSGSIGN)
+		seq_puts(m, "nocephx_sign_messages,");
 	if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
 		seq_puts(m, "notcp_nodelay,");
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 11108076bac3..0cc5608b2c8f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2677,7 +2677,7 @@ more:
 		if (ret <= 0) {
 			switch (ret) {
 			case -EBADMSG:
-				con->error_msg = "bad crc";
+				con->error_msg = "bad crc/signature";
 				/* fall through */
 			case -EBADE:
 				ret = -EIO;
-- 
cgit v1.2.3


From a15920bea0428cd22291637f6c72542b1843e65f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 2 Nov 2015 17:42:42 -0500
Subject: tracepoints: Fix documentation of RCU lockdep checks

The documentation on top of __DECLARE_TRACE() does not match its
implementation since the condition check has been added to the
RCU lockdep checks. Update the documentation to match its
implementation.

Link: http://lkml.kernel.org/r/1446504164-21563-1-git-send-email-mathieu.desnoyers@efficios.com

CC: Dave Hansen <dave@sr71.net>
Fixes: a05d59a56733 "tracing: Add condition check to RCU lockdep checks"
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 6b79537a42b1..696a339c592c 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -184,10 +184,11 @@ extern void syscall_unregfunc(void);
  * structure. Force alignment to the same alignment as the section start.
  *
  * When lockdep is enabled, we make sure to always do the RCU portions of
- * the tracepoint code, regardless of whether tracing is on or we match the
- * condition.  This lets us find RCU issues triggered with tracepoints even
- * when this tracepoint is off.  This code has no purpose other than poking
- * RCU a bit.
+ * the tracepoint code, regardless of whether tracing is on. However,
+ * don't check if the condition is false, due to interaction with idle
+ * instrumentation. This lets us find RCU issues triggered with tracepoints
+ * even when this tracepoint is off. This code has no purpose other than
+ * poking RCU a bit.
  */
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
 	extern struct tracepoint __tracepoint_##name;			\
-- 
cgit v1.2.3


From c210129760a010b555372ef74f4e1a46d4eb8a22 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 29 Oct 2015 14:58:07 +0100
Subject: bpf: align and clean bpf_{map,prog}_get helpers

Add a bpf_map_get() function that we're going to use later on and
align/clean the remaining helpers a bit so that we have them a bit
more consistent:

  - __bpf_map_get() and __bpf_prog_get() that both work on the fd
    struct, check whether the descriptor is eBPF and return the
    pointer to the map/prog stored in the private data.

    Also, we can return f.file->private_data directly, the function
    signature is enough of a documentation already.

  - bpf_map_get() and bpf_prog_get() that both work on u32 user fd,
    call their respective __bpf_map_get()/__bpf_prog_get() variants,
    and take a reference.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |  2 +-
 kernel/bpf/syscall.c  | 41 +++++++++++++++++++++++------------------
 kernel/bpf/verifier.c |  3 +--
 3 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 75718fa28260..0b5fb6acef64 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -167,7 +167,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
-struct bpf_map *bpf_map_get(struct fd f);
+struct bpf_map *__bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
 
 extern int sysctl_unprivileged_bpf_disabled;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2b89ef0a9757..3fff82ca68fa 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -162,19 +162,29 @@ free_map:
 /* if error is returned, fd is released.
  * On success caller should complete fd access with matching fdput()
  */
-struct bpf_map *bpf_map_get(struct fd f)
+struct bpf_map *__bpf_map_get(struct fd f)
 {
-	struct bpf_map *map;
-
 	if (!f.file)
 		return ERR_PTR(-EBADF);
-
 	if (f.file->f_op != &bpf_map_fops) {
 		fdput(f);
 		return ERR_PTR(-EINVAL);
 	}
 
-	map = f.file->private_data;
+	return f.file->private_data;
+}
+
+static struct bpf_map *bpf_map_get(u32 ufd)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return map;
+
+	atomic_inc(&map->refcnt);
+	fdput(f);
 
 	return map;
 }
@@ -202,7 +212,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 		return -EINVAL;
 
 	f = fdget(ufd);
-	map = bpf_map_get(f);
+	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
@@ -261,7 +271,7 @@ static int map_update_elem(union bpf_attr *attr)
 		return -EINVAL;
 
 	f = fdget(ufd);
-	map = bpf_map_get(f);
+	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
@@ -314,7 +324,7 @@ static int map_delete_elem(union bpf_attr *attr)
 		return -EINVAL;
 
 	f = fdget(ufd);
-	map = bpf_map_get(f);
+	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
@@ -355,7 +365,7 @@ static int map_get_next_key(union bpf_attr *attr)
 		return -EINVAL;
 
 	f = fdget(ufd);
-	map = bpf_map_get(f);
+	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
@@ -549,21 +559,16 @@ static int bpf_prog_new_fd(struct bpf_prog *prog)
 				O_RDWR | O_CLOEXEC);
 }
 
-static struct bpf_prog *get_prog(struct fd f)
+static struct bpf_prog *__bpf_prog_get(struct fd f)
 {
-	struct bpf_prog *prog;
-
 	if (!f.file)
 		return ERR_PTR(-EBADF);
-
 	if (f.file->f_op != &bpf_prog_fops) {
 		fdput(f);
 		return ERR_PTR(-EINVAL);
 	}
 
-	prog = f.file->private_data;
-
-	return prog;
+	return f.file->private_data;
 }
 
 /* called by sockets/tracing/seccomp before attaching program to an event
@@ -574,13 +579,13 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 	struct fd f = fdget(ufd);
 	struct bpf_prog *prog;
 
-	prog = get_prog(f);
-
+	prog = __bpf_prog_get(f);
 	if (IS_ERR(prog))
 		return prog;
 
 	atomic_inc(&prog->aux->refcnt);
 	fdput(f);
+
 	return prog;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b56cf51f8d42..fdc88c5a60e3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1989,8 +1989,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 			}
 
 			f = fdget(insn->imm);
-
-			map = bpf_map_get(f);
+			map = __bpf_map_get(f);
 			if (IS_ERR(map)) {
 				verbose("fd %d is not pointing to valid bpf_map\n",
 					insn->imm);
-- 
cgit v1.2.3


From b2197755b2633e164a439682fb05a9b5ea48f706 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 29 Oct 2015 14:58:09 +0100
Subject: bpf: add support for persistent maps/progs

This work adds support for "persistent" eBPF maps/programs. The term
"persistent" is to be understood that maps/programs have a facility
that lets them survive process termination. This is desired by various
eBPF subsystem users.

Just to name one example: tc classifier/action. Whenever tc parses
the ELF object, extracts and loads maps/progs into the kernel, these
file descriptors will be out of reach after the tc instance exits.
So a subsequent tc invocation won't be able to access/relocate on this
resource, and therefore maps cannot easily be shared, f.e. between the
ingress and egress networking data path.

The current workaround is that Unix domain sockets (UDS) need to be
instrumented in order to pass the created eBPF map/program file
descriptors to a third party management daemon through UDS' socket
passing facility. This makes it a bit complicated to deploy shared
eBPF maps or programs (programs f.e. for tail calls) among various
processes.

We've been brainstorming on how we could tackle this issue and various
approches have been tried out so far, which can be read up further in
the below reference.

The architecture we eventually ended up with is a minimal file system
that can hold map/prog objects. The file system is a per mount namespace
singleton, and the default mount point is /sys/fs/bpf/. Any subsequent
mounts within a given namespace will point to the same instance. The
file system allows for creating a user-defined directory structure.
The objects for maps/progs are created/fetched through bpf(2) with
two new commands (BPF_OBJ_PIN/BPF_OBJ_GET). I.e. a bpf file descriptor
along with a pathname is being passed to bpf(2) that in turn creates
(we call it eBPF object pinning) the file system nodes. Only the pathname
is being passed to bpf(2) for getting a new BPF file descriptor to an
existing node. The user can use that to access maps and progs later on,
through bpf(2). Removal of file system nodes is being managed through
normal VFS functions such as unlink(2), etc. The file system code is
kept to a very minimum and can be further extended later on.

The next step I'm working on is to add dump eBPF map/prog commands
to bpf(2), so that a specification from a given file descriptor can
be retrieved. This can be used by things like CRIU but also applications
can inspect the meta data after calling BPF_OBJ_GET.

Big thanks also to Alexei and Hannes who significantly contributed
in the design discussion that eventually let us end up with this
architecture here.

Reference: https://lkml.org/lkml/2015/10/15/925
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h        |   7 +
 include/uapi/linux/bpf.h   |  45 +-----
 include/uapi/linux/magic.h |   1 +
 kernel/bpf/Makefile        |   4 +-
 kernel/bpf/inode.c         | 387 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c       |  30 +++-
 6 files changed, 433 insertions(+), 41 deletions(-)
 create mode 100644 kernel/bpf/inode.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0b5fb6acef64..de464e6683b6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -167,11 +167,18 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
+struct bpf_map *bpf_map_get(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
+int bpf_map_new_fd(struct bpf_map *map);
+int bpf_prog_new_fd(struct bpf_prog *prog);
+
+int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
+int bpf_obj_get_user(const char __user *pathname);
+
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e032426cfb7..9ea2d22fa2cb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -63,50 +63,16 @@ struct bpf_insn {
 	__s32	imm;		/* signed immediate constant */
 };
 
-/* BPF syscall commands */
+/* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
-	/* create a map with given type and attributes
-	 * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
-	 * returns fd or negative error
-	 * map is deleted when fd is closed
-	 */
 	BPF_MAP_CREATE,
-
-	/* lookup key in a given map
-	 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
-	 * Using attr->map_fd, attr->key, attr->value
-	 * returns zero and stores found elem into value
-	 * or negative error
-	 */
 	BPF_MAP_LOOKUP_ELEM,
-
-	/* create or update key/value pair in a given map
-	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
-	 * Using attr->map_fd, attr->key, attr->value, attr->flags
-	 * returns zero or negative error
-	 */
 	BPF_MAP_UPDATE_ELEM,
-
-	/* find and delete elem by key in a given map
-	 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
-	 * Using attr->map_fd, attr->key
-	 * returns zero or negative error
-	 */
 	BPF_MAP_DELETE_ELEM,
-
-	/* lookup key in a given map and return next key
-	 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
-	 * Using attr->map_fd, attr->key, attr->next_key
-	 * returns zero and stores next key or negative error
-	 */
 	BPF_MAP_GET_NEXT_KEY,
-
-	/* verify and load eBPF program
-	 * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
-	 * Using attr->prog_type, attr->insns, attr->license
-	 * returns fd or negative error
-	 */
 	BPF_PROG_LOAD,
+	BPF_OBJ_PIN,
+	BPF_OBJ_GET,
 };
 
 enum bpf_map_type {
@@ -160,6 +126,11 @@ union bpf_attr {
 		__aligned_u64	log_buf;	/* user supplied buffer */
 		__u32		kern_version;	/* checked when prog_type=kprobe */
 	};
+
+	struct { /* anonymous struct used by BPF_OBJ_* commands */
+		__aligned_u64	pathname;
+		__u32		bpf_fd;
+	};
 } __attribute__((aligned(8)));
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 7b1425a6b370..accb036bbc9c 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -75,5 +75,6 @@
 #define ANON_INODE_FS_MAGIC	0x09041934
 #define BTRFS_TEST_MAGIC	0x73727279
 #define NSFS_MAGIC		0x6e736673
+#define BPF_FS_MAGIC		0xcafe4a11
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e6983be12bd3..13272582eee0 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,2 +1,4 @@
 obj-y := core.o
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
+
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
new file mode 100644
index 000000000000..be6d726e31c9
--- /dev/null
+++ b/kernel/bpf/inode.c
@@ -0,0 +1,387 @@
+/*
+ * Minimal file system backend for holding eBPF maps and programs,
+ * used by bpf(2) object pinning.
+ *
+ * Authors:
+ *
+ *	Daniel Borkmann <daniel@iogearbox.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/magic.h>
+#include <linux/major.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+enum bpf_type {
+	BPF_TYPE_UNSPEC	= 0,
+	BPF_TYPE_PROG,
+	BPF_TYPE_MAP,
+};
+
+static void *bpf_any_get(void *raw, enum bpf_type type)
+{
+	switch (type) {
+	case BPF_TYPE_PROG:
+		atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
+		break;
+	case BPF_TYPE_MAP:
+		atomic_inc(&((struct bpf_map *)raw)->refcnt);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return raw;
+}
+
+static void bpf_any_put(void *raw, enum bpf_type type)
+{
+	switch (type) {
+	case BPF_TYPE_PROG:
+		bpf_prog_put(raw);
+		break;
+	case BPF_TYPE_MAP:
+		bpf_map_put(raw);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
+{
+	void *raw;
+
+	*type = BPF_TYPE_MAP;
+	raw = bpf_map_get(ufd);
+	if (IS_ERR(raw)) {
+		*type = BPF_TYPE_PROG;
+		raw = bpf_prog_get(ufd);
+	}
+
+	return raw;
+}
+
+static const struct inode_operations bpf_dir_iops;
+
+static const struct inode_operations bpf_prog_iops = { };
+static const struct inode_operations bpf_map_iops  = { };
+
+static struct inode *bpf_get_inode(struct super_block *sb,
+				   const struct inode *dir,
+				   umode_t mode)
+{
+	struct inode *inode;
+
+	switch (mode & S_IFMT) {
+	case S_IFDIR:
+	case S_IFREG:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOSPC);
+
+	inode->i_ino = get_next_ino();
+	inode->i_atime = CURRENT_TIME;
+	inode->i_mtime = inode->i_atime;
+	inode->i_ctime = inode->i_atime;
+
+	inode_init_owner(inode, dir, mode);
+
+	return inode;
+}
+
+static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
+{
+	*type = BPF_TYPE_UNSPEC;
+	if (inode->i_op == &bpf_prog_iops)
+		*type = BPF_TYPE_PROG;
+	else if (inode->i_op == &bpf_map_iops)
+		*type = BPF_TYPE_MAP;
+	else
+		return -EACCES;
+
+	return 0;
+}
+
+static bool bpf_dname_reserved(const struct dentry *dentry)
+{
+	return strchr(dentry->d_name.name, '.');
+}
+
+static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct inode *inode;
+
+	if (bpf_dname_reserved(dentry))
+		return -EPERM;
+
+	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &bpf_dir_iops;
+	inode->i_fop = &simple_dir_operations;
+
+	inc_nlink(inode);
+	inc_nlink(dir);
+
+	d_instantiate(dentry, inode);
+	dget(dentry);
+
+	return 0;
+}
+
+static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
+			 umode_t mode, const struct inode_operations *iops)
+{
+	struct inode *inode;
+
+	if (bpf_dname_reserved(dentry))
+		return -EPERM;
+
+	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = iops;
+	inode->i_private = dentry->d_fsdata;
+
+	d_instantiate(dentry, inode);
+	dget(dentry);
+
+	return 0;
+}
+
+static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode,
+		     dev_t devt)
+{
+	enum bpf_type type = MINOR(devt);
+
+	if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) ||
+	    dentry->d_fsdata == NULL)
+		return -EPERM;
+
+	switch (type) {
+	case BPF_TYPE_PROG:
+		return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops);
+	case BPF_TYPE_MAP:
+		return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops);
+	default:
+		return -EPERM;
+	}
+}
+
+static const struct inode_operations bpf_dir_iops = {
+	.lookup		= simple_lookup,
+	.mknod		= bpf_mkobj,
+	.mkdir		= bpf_mkdir,
+	.rmdir		= simple_rmdir,
+	.unlink		= simple_unlink,
+};
+
+static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
+			  enum bpf_type type)
+{
+	struct dentry *dentry;
+	struct inode *dir;
+	struct path path;
+	umode_t mode;
+	dev_t devt;
+	int ret;
+
+	dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+	devt = MKDEV(UNNAMED_MAJOR, type);
+
+	ret = security_path_mknod(&path, dentry, mode, devt);
+	if (ret)
+		goto out;
+
+	dir = d_inode(path.dentry);
+	if (dir->i_op != &bpf_dir_iops) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	dentry->d_fsdata = raw;
+	ret = vfs_mknod(dir, dentry, mode, devt);
+	dentry->d_fsdata = NULL;
+out:
+	done_path_create(&path, dentry);
+	return ret;
+}
+
+int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
+{
+	struct filename *pname;
+	enum bpf_type type;
+	void *raw;
+	int ret;
+
+	pname = getname(pathname);
+	if (IS_ERR(pname))
+		return PTR_ERR(pname);
+
+	raw = bpf_fd_probe_obj(ufd, &type);
+	if (IS_ERR(raw)) {
+		ret = PTR_ERR(raw);
+		goto out;
+	}
+
+	ret = bpf_obj_do_pin(pname, raw, type);
+	if (ret != 0)
+		bpf_any_put(raw, type);
+out:
+	putname(pname);
+	return ret;
+}
+
+static void *bpf_obj_do_get(const struct filename *pathname,
+			    enum bpf_type *type)
+{
+	struct inode *inode;
+	struct path path;
+	void *raw;
+	int ret;
+
+	ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
+	if (ret)
+		return ERR_PTR(ret);
+
+	inode = d_backing_inode(path.dentry);
+	ret = inode_permission(inode, MAY_WRITE);
+	if (ret)
+		goto out;
+
+	ret = bpf_inode_type(inode, type);
+	if (ret)
+		goto out;
+
+	raw = bpf_any_get(inode->i_private, *type);
+	touch_atime(&path);
+
+	path_put(&path);
+	return raw;
+out:
+	path_put(&path);
+	return ERR_PTR(ret);
+}
+
+int bpf_obj_get_user(const char __user *pathname)
+{
+	enum bpf_type type = BPF_TYPE_UNSPEC;
+	struct filename *pname;
+	int ret = -ENOENT;
+	void *raw;
+
+	pname = getname(pathname);
+	if (IS_ERR(pname))
+		return PTR_ERR(pname);
+
+	raw = bpf_obj_do_get(pname, &type);
+	if (IS_ERR(raw)) {
+		ret = PTR_ERR(raw);
+		goto out;
+	}
+
+	if (type == BPF_TYPE_PROG)
+		ret = bpf_prog_new_fd(raw);
+	else if (type == BPF_TYPE_MAP)
+		ret = bpf_map_new_fd(raw);
+	else
+		goto out;
+
+	if (ret < 0)
+		bpf_any_put(raw, type);
+out:
+	putname(pname);
+	return ret;
+}
+
+static void bpf_evict_inode(struct inode *inode)
+{
+	enum bpf_type type;
+
+	truncate_inode_pages_final(&inode->i_data);
+	clear_inode(inode);
+
+	if (!bpf_inode_type(inode, &type))
+		bpf_any_put(inode->i_private, type);
+}
+
+static const struct super_operations bpf_super_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+	.evict_inode	= bpf_evict_inode,
+};
+
+static int bpf_fill_super(struct super_block *sb, void *data, int silent)
+{
+	static struct tree_descr bpf_rfiles[] = { { "" } };
+	struct inode *inode;
+	int ret;
+
+	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
+	if (ret)
+		return ret;
+
+	sb->s_op = &bpf_super_ops;
+
+	inode = sb->s_root->d_inode;
+	inode->i_op = &bpf_dir_iops;
+	inode->i_mode &= ~S_IALLUGO;
+	inode->i_mode |= S_ISVTX | S_IRWXUGO;
+
+	return 0;
+}
+
+static struct dentry *bpf_mount(struct file_system_type *type, int flags,
+				const char *dev_name, void *data)
+{
+	return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super);
+}
+
+static struct file_system_type bpf_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "bpf",
+	.mount		= bpf_mount,
+	.kill_sb	= kill_litter_super,
+	.fs_flags	= FS_USERNS_MOUNT,
+};
+
+MODULE_ALIAS_FS("bpf");
+
+static int __init bpf_init(void)
+{
+	int ret;
+
+	ret = sysfs_create_mount_point(fs_kobj, "bpf");
+	if (ret)
+		return ret;
+
+	ret = register_filesystem(&bpf_fs_type);
+	if (ret)
+		sysfs_remove_mount_point(fs_kobj, "bpf");
+
+	return ret;
+}
+fs_initcall(bpf_init);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d7783cb04d86..0d3313d02a7e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -111,7 +111,7 @@ static const struct file_operations bpf_map_fops = {
 	.release = bpf_map_release,
 };
 
-static int bpf_map_new_fd(struct bpf_map *map)
+int bpf_map_new_fd(struct bpf_map *map)
 {
 	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 				O_RDWR | O_CLOEXEC);
@@ -174,7 +174,7 @@ struct bpf_map *__bpf_map_get(struct fd f)
 	return f.file->private_data;
 }
 
-static struct bpf_map *bpf_map_get(u32 ufd)
+struct bpf_map *bpf_map_get(u32 ufd)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_map *map;
@@ -548,7 +548,7 @@ static const struct file_operations bpf_prog_fops = {
         .release = bpf_prog_release,
 };
 
-static int bpf_prog_new_fd(struct bpf_prog *prog)
+int bpf_prog_new_fd(struct bpf_prog *prog)
 {
 	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 				O_RDWR | O_CLOEXEC);
@@ -674,6 +674,24 @@ free_prog_nouncharge:
 	return err;
 }
 
+#define BPF_OBJ_LAST_FIELD bpf_fd
+
+static int bpf_obj_pin(const union bpf_attr *attr)
+{
+	if (CHECK_ATTR(BPF_OBJ))
+		return -EINVAL;
+
+	return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
+}
+
+static int bpf_obj_get(const union bpf_attr *attr)
+{
+	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
+		return -EINVAL;
+
+	return bpf_obj_get_user(u64_to_ptr(attr->pathname));
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -734,6 +752,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_PROG_LOAD:
 		err = bpf_prog_load(&attr);
 		break;
+	case BPF_OBJ_PIN:
+		err = bpf_obj_pin(&attr);
+		break;
+	case BPF_OBJ_GET:
+		err = bpf_obj_get(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From fd867d51f889aec11cca235ebb008578780d052d Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 2 Nov 2015 21:55:59 -0500
Subject: net/core: generic support for disabling netdev features down stack

There are some netdev features, which when disabled on an upper device,
such as a bonding master or a bridge, must be disabled and cannot be
re-enabled on underlying devices.

This is a rework of an earlier more heavy-handed appraoch, which simply
disables and prevents re-enabling of netdev features listed in a new
define in include/net/netdev_features.h, NETIF_F_UPPER_DISABLES. Any upper
device that disables a flag in that feature mask, the disabling will
propagate down the stack, and any lower device that has any upper device
with one of those flags disabled should not be able to enable said flag.

Initially, only LRO is included for proof of concept, and because this
code effectively does the same thing as dev_disable_lro(), though it will
also activate from the ethtool path, which was one of the goals here.

[root@dell-per730-01 ~]# ethtool -k bond0 |grep large
large-receive-offload: on
[root@dell-per730-01 ~]# ethtool -k p5p1 |grep large
large-receive-offload: on
[root@dell-per730-01 ~]# ethtool -K bond0 lro off
[root@dell-per730-01 ~]# ethtool -k bond0 |grep large
large-receive-offload: off
[root@dell-per730-01 ~]# ethtool -k p5p1 |grep large
large-receive-offload: off

dmesg dump:

[ 1033.277986] bond0: Disabling feature 0x0000000000008000 on lower dev p5p2.
[ 1034.067949] bnx2x 0000:06:00.1 p5p2: using MSI-X  IRQs: sp 74  fp[0] 76 ... fp[7] 83
[ 1034.753612] bond0: Disabling feature 0x0000000000008000 on lower dev p5p1.
[ 1035.591019] bnx2x 0000:06:00.0 p5p1: using MSI-X  IRQs: sp 62  fp[0] 64 ... fp[7] 71

This has been successfully tested with bnx2x, qlcnic and netxen network
cards as slaves in a bond interface. Turning LRO on or off on the master
also turns it on or off on each of the slaves, new slaves are added with
LRO in the same state as the master, and LRO can't be toggled on the
slaves.

Also, this should largely remove the need for dev_disable_lro(), and most,
if not all, of its call sites can be replaced by simply making sure
NETIF_F_LRO isn't included in the relevant device's feature flags.

Note that this patch is driven by bug reports from users saying it was
confusing that bonds and slaves had different settings for the same
features, and while it won't be 100% in sync if a lower device doesn't
support a feature like LRO, I think this is a good step in the right
direction.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <gospo@cumulusnetworks.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Nikolay Aleksandrov <razor@blackwall.org>
CC: Michal Kubecek <mkubecek@suse.cz>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 11 +++++++++
 net/core/dev.c                  | 50 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 9672781c593d..0f5837a9b1ba 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -125,6 +125,11 @@ enum {
 #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 #define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
 
+#define for_each_netdev_feature(mask_addr, feature)				\
+	int bit;								\
+	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)	\
+		feature = __NETIF_F_BIT(bit);
+
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
@@ -167,6 +172,12 @@ enum {
  */
 #define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
 
+/*
+ * If upper/master device has these features disabled, they must be disabled
+ * on all lower/slave devices as well.
+ */
+#define NETIF_F_UPPER_DISABLES	NETIF_F_LRO
+
 /* changeable features with no special hardware requirements */
 #define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 13f49f81ae13..c4d2b430788d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6288,6 +6288,44 @@ static void rollback_registered(struct net_device *dev)
 	list_del(&single);
 }
 
+static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+	struct net_device *upper, netdev_features_t features)
+{
+	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+	netdev_features_t feature;
+
+	for_each_netdev_feature(&upper_disables, feature) {
+		if (!(upper->wanted_features & feature)
+		    && (features & feature)) {
+			netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
+				   &feature, upper->name);
+			features &= ~feature;
+		}
+	}
+
+	return features;
+}
+
+static void netdev_sync_lower_features(struct net_device *upper,
+	struct net_device *lower, netdev_features_t features)
+{
+	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+	netdev_features_t feature;
+
+	for_each_netdev_feature(&upper_disables, feature) {
+		if (!(features & feature) && (lower->features & feature)) {
+			netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
+				   &feature, lower->name);
+			lower->wanted_features &= ~feature;
+			netdev_update_features(lower);
+
+			if (unlikely(lower->features & feature))
+				netdev_WARN(upper, "failed to disable %pNF on %s!\n",
+					    &feature, lower->name);
+		}
+	}
+}
+
 static netdev_features_t netdev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
@@ -6357,7 +6395,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 
 int __netdev_update_features(struct net_device *dev)
 {
+	struct net_device *upper, *lower;
 	netdev_features_t features;
+	struct list_head *iter;
 	int err = 0;
 
 	ASSERT_RTNL();
@@ -6370,6 +6410,10 @@ int __netdev_update_features(struct net_device *dev)
 	/* driver might be less strict about feature dependencies */
 	features = netdev_fix_features(dev, features);
 
+	/* some features can't be enabled if they're off an an upper device */
+	netdev_for_each_upper_dev_rcu(dev, upper, iter)
+		features = netdev_sync_upper_features(dev, upper, features);
+
 	if (dev->features == features)
 		return 0;
 
@@ -6386,6 +6430,12 @@ int __netdev_update_features(struct net_device *dev)
 		return -1;
 	}
 
+	/* some features must be disabled on lower devices when disabled
+	 * on an upper device (think: bonding master or bridge)
+	 */
+	netdev_for_each_lower_dev(dev, lower, iter)
+		netdev_sync_lower_features(dev, lower, features);
+
 	if (!err)
 		dev->features = features;
 
-- 
cgit v1.2.3


From 2976b17989094e97567510be3ea91fc2f0c7aab3 Mon Sep 17 00:00:00 2001
From: Simon Guinot <simon.guinot@sequanux.org>
Date: Sat, 26 Sep 2015 23:02:34 +0200
Subject: leds: netxbig: add device tree binding

This patch adds device tree support for the netxbig LEDs.

This also introduces a additionnal DT binding for the GPIO extension bus
(netxbig-gpio-ext) used to configure the LEDs. Since this bus could also
be used to control other devices, then it seems more suitable to have it
in a separate DT binding.

Signed-off-by: Simon Guinot <simon.guinot@sequanux.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
---
 .../devicetree/bindings/gpio/netxbig-gpio-ext.txt  |  22 ++
 .../devicetree/bindings/leds/leds-netxbig.txt      |  92 +++++++
 drivers/leds/leds-netxbig.c                        | 280 +++++++++++++++++++--
 include/dt-bindings/leds/leds-netxbig.h            |  18 ++
 .../linux/platform_data/leds-kirkwood-netxbig.h    |   1 +
 5 files changed, 391 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/gpio/netxbig-gpio-ext.txt
 create mode 100644 Documentation/devicetree/bindings/leds/leds-netxbig.txt
 create mode 100644 include/dt-bindings/leds/leds-netxbig.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/gpio/netxbig-gpio-ext.txt b/Documentation/devicetree/bindings/gpio/netxbig-gpio-ext.txt
new file mode 100644
index 000000000000..50ec2e690701
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/netxbig-gpio-ext.txt
@@ -0,0 +1,22 @@
+Binding for the GPIO extension bus found on some LaCie/Seagate boards
+(Example: 2Big/5Big Network v2, 2Big NAS).
+
+Required properties:
+- compatible: "lacie,netxbig-gpio-ext".
+- addr-gpios: GPIOs representing the address register (LSB -> MSB).
+- data-gpios: GPIOs representing the data register (LSB -> MSB).
+- enable-gpio: latches the new configuration (address, data) on raising edge.
+
+Example:
+
+netxbig_gpio_ext: netxbig-gpio-ext {
+	compatible = "lacie,netxbig-gpio-ext";
+
+	addr-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH
+		      &gpio1 16 GPIO_ACTIVE_HIGH
+		      &gpio1 17 GPIO_ACTIVE_HIGH>;
+	data-gpios = <&gpio1 12 GPIO_ACTIVE_HIGH
+		      &gpio1 13 GPIO_ACTIVE_HIGH
+		      &gpio1 14 GPIO_ACTIVE_HIGH>;
+	enable-gpio = <&gpio0 29 GPIO_ACTIVE_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-netxbig.txt b/Documentation/devicetree/bindings/leds/leds-netxbig.txt
new file mode 100644
index 000000000000..5ef92a26d768
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-netxbig.txt
@@ -0,0 +1,92 @@
+Binding for the CPLD LEDs (GPIO extension bus) found on some LaCie/Seagate
+boards (Example: 2Big/5Big Network v2, 2Big NAS).
+
+Required properties:
+- compatible: "lacie,netxbig-leds".
+- gpio-ext: Phandle for the gpio-ext bus.
+
+Optional properties:
+- timers: Timer array. Each timer entry is represented by three integers:
+  Mode (gpio-ext bus), delay_on and delay_off.
+
+Each LED is represented as a sub-node of the netxbig-leds device.
+
+Required sub-node properties:
+- mode-addr: Mode register address on gpio-ext bus.
+- mode-val: Mode to value mapping. Each entry is represented by two integers:
+  A mode and the corresponding value on the gpio-ext bus.
+- bright-addr: Brightness register address on gpio-ext bus.
+- max-brightness: Maximum brightness value.
+
+Optional sub-node properties:
+- label: Name for this LED. If omitted, the label is taken from the node name.
+- linux,default-trigger: Trigger assigned to the LED.
+
+Example:
+
+netxbig-leds {
+	compatible = "lacie,netxbig-leds";
+
+	gpio-ext = &gpio_ext;
+
+	timers = <NETXBIG_LED_TIMER1 500 500
+		  NETXBIG_LED_TIMER2 500 1000>;
+
+	blue-power {
+		label = "netxbig:blue:power";
+		mode-addr = <0>;
+		mode-val = <NETXBIG_LED_OFF 0
+			    NETXBIG_LED_ON 1
+			    NETXBIG_LED_TIMER1 3
+			    NETXBIG_LED_TIMER2 7>;
+		bright-addr = <1>;
+		max-brightness = <7>;
+	};
+	red-power {
+		label = "netxbig:red:power";
+		mode-addr = <0>;
+		mode-val = <NETXBIG_LED_OFF 0
+			    NETXBIG_LED_ON 2
+			    NETXBIG_LED_TIMER1 4>;
+		bright-addr = <1>;
+		max-brightness = <7>;
+	};
+	blue-sata0 {
+		label = "netxbig:blue:sata0";
+		mode-addr = <3>;
+		mode-val = <NETXBIG_LED_OFF 0
+			    NETXBIG_LED_ON 7
+			    NETXBIG_LED_SATA 1
+			    NETXBIG_LED_TIMER1 3>;
+		bright-addr = <2>;
+		max-brightness = <7>;
+	};
+	red-sata0 {
+		label = "netxbig:red:sata0";
+		mode-addr = <3>;
+		mode-val = <NETXBIG_LED_OFF 0
+			    NETXBIG_LED_ON 2
+			    NETXBIG_LED_TIMER1 4>;
+		bright-addr = <2>;
+		max-brightness = <7>;
+	};
+	blue-sata1 {
+		label = "netxbig:blue:sata1";
+		mode-addr = <4>;
+		mode-val = <NETXBIG_LED_OFF 0
+			    NETXBIG_LED_ON 7
+			    NETXBIG_LED_SATA 1
+			    NETXBIG_LED_TIMER1 3>;
+		bright-addr = <2>;
+		max-brightness = <7>;
+	};
+	red-sata1 {
+		label = "netxbig:red:sata1";
+		mode-addr = <4>;
+		mode-val = <NETXBIG_LED_OFF 0
+			    NETXBIG_LED_ON 2
+			    NETXBIG_LED_TIMER1 4>;
+		bright-addr = <2>;
+		max-brightness = <7>;
+	};
+};
diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c
index 25e419752a7b..e1f3c2632278 100644
--- a/drivers/leds/leds-netxbig.c
+++ b/drivers/leds/leds-netxbig.c
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
 #include <linux/leds.h>
 #include <linux/platform_data/leds-kirkwood-netxbig.h>
 
@@ -140,6 +141,11 @@ struct netxbig_led_data {
 	spinlock_t		lock;
 };
 
+struct netxbig_led_priv {
+	struct netxbig_led_platform_data *pdata;
+	struct netxbig_led_data leds_data[];
+};
+
 static int netxbig_led_get_timer_mode(enum netxbig_led_mode *mode,
 				      unsigned long delay_on,
 				      unsigned long delay_off,
@@ -304,12 +310,12 @@ static void delete_netxbig_led(struct netxbig_led_data *led_dat)
 	led_classdev_unregister(&led_dat->cdev);
 }
 
-static int
-create_netxbig_led(struct platform_device *pdev,
-		   struct netxbig_led_data *led_dat,
-		   const struct netxbig_led *template)
+static int create_netxbig_led(struct platform_device *pdev, int led,
+			      struct netxbig_led_priv *priv)
 {
-	struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct netxbig_led_platform_data *pdata = priv->pdata;
+	struct netxbig_led_data *led_dat = &priv->leds_data[led];
+	const struct netxbig_led *template = &priv->pdata->leds[led];
 
 	spin_lock_init(&led_dat->lock);
 	led_dat->gpio_ext = pdata->gpio_ext;
@@ -333,7 +339,7 @@ create_netxbig_led(struct platform_device *pdev,
 	led_dat->mode_addr = template->mode_addr;
 	led_dat->mode_val = template->mode_val;
 	led_dat->bright_addr = template->bright_addr;
-	led_dat->bright_max = (1 << pdata->gpio_ext->num_data) - 1;
+	led_dat->bright_max = template->bright_max;
 	led_dat->timer = pdata->timer;
 	led_dat->num_timer = pdata->num_timer;
 	/*
@@ -346,38 +352,269 @@ create_netxbig_led(struct platform_device *pdev,
 	return led_classdev_register(&pdev->dev, &led_dat->cdev);
 }
 
+#ifdef CONFIG_OF_GPIO
+static int gpio_ext_get_of_pdata(struct device *dev, struct device_node *np,
+				 struct netxbig_gpio_ext *gpio_ext)
+{
+	int *addr, *data;
+	int num_addr, num_data;
+	int ret;
+	int i;
+
+	ret = of_gpio_named_count(np, "addr-gpios");
+	if (ret < 0) {
+		dev_err(dev,
+			"Failed to count GPIOs in DT property addr-gpios\n");
+		return ret;
+	}
+	num_addr = ret;
+	addr = devm_kzalloc(dev, num_addr * sizeof(*addr), GFP_KERNEL);
+	if (!addr)
+		return -ENOMEM;
+
+	for (i = 0; i < num_addr; i++) {
+		ret = of_get_named_gpio(np, "addr-gpios", i);
+		if (ret < 0)
+			return ret;
+		addr[i] = ret;
+	}
+	gpio_ext->addr = addr;
+	gpio_ext->num_addr = num_addr;
+
+	ret = of_gpio_named_count(np, "data-gpios");
+	if (ret < 0) {
+		dev_err(dev,
+			"Failed to count GPIOs in DT property data-gpios\n");
+		return ret;
+	}
+	num_data = ret;
+	data = devm_kzalloc(dev, num_data * sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	for (i = 0; i < num_data; i++) {
+		ret = of_get_named_gpio(np, "data-gpios", i);
+		if (ret < 0)
+			return ret;
+		data[i] = ret;
+	}
+	gpio_ext->data = data;
+	gpio_ext->num_data = num_data;
+
+	ret = of_get_named_gpio(np, "enable-gpio", 0);
+	if (ret < 0) {
+		dev_err(dev,
+			"Failed to get GPIO from DT property enable-gpio\n");
+		return ret;
+	}
+	gpio_ext->enable = ret;
+
+	return 0;
+}
+
+static int netxbig_leds_get_of_pdata(struct device *dev,
+				     struct netxbig_led_platform_data *pdata)
+{
+	struct device_node *np = dev->of_node;
+	struct device_node *gpio_ext_np;
+	struct device_node *child;
+	struct netxbig_gpio_ext *gpio_ext;
+	struct netxbig_led_timer *timers;
+	struct netxbig_led *leds, *led;
+	int num_timers;
+	int num_leds = 0;
+	int ret;
+	int i;
+
+	/* GPIO extension */
+	gpio_ext_np = of_parse_phandle(np, "gpio-ext", 0);
+	if (!gpio_ext_np) {
+		dev_err(dev, "Failed to get DT handle gpio-ext\n");
+		return -EINVAL;
+	}
+
+	gpio_ext = devm_kzalloc(dev, sizeof(*gpio_ext), GFP_KERNEL);
+	if (!gpio_ext)
+		return -ENOMEM;
+	ret = gpio_ext_get_of_pdata(dev, gpio_ext_np, gpio_ext);
+	if (ret)
+		return ret;
+	of_node_put(gpio_ext_np);
+	pdata->gpio_ext = gpio_ext;
+
+	/* Timers (optional) */
+	ret = of_property_count_u32_elems(np, "timers");
+	if (ret > 0) {
+		if (ret % 3)
+			return -EINVAL;
+		num_timers = ret / 3;
+		timers = devm_kzalloc(dev, num_timers * sizeof(*timers),
+				      GFP_KERNEL);
+		if (!timers)
+			return -ENOMEM;
+		for (i = 0; i < num_timers; i++) {
+			u32 tmp;
+
+			of_property_read_u32_index(np, "timers", 3 * i,
+						   &timers[i].mode);
+			if (timers[i].mode >= NETXBIG_LED_MODE_NUM)
+				return -EINVAL;
+			of_property_read_u32_index(np, "timers",
+						   3 * i + 1, &tmp);
+			timers[i].delay_on = tmp;
+			of_property_read_u32_index(np, "timers",
+						   3 * i + 2, &tmp);
+			timers[i].delay_off = tmp;
+		}
+		pdata->timer = timers;
+		pdata->num_timer = num_timers;
+	}
+
+	/* LEDs */
+	num_leds = of_get_child_count(np);
+	if (!num_leds) {
+		dev_err(dev, "No LED subnodes found in DT\n");
+		return -ENODEV;
+	}
+
+	leds = devm_kzalloc(dev, num_leds * sizeof(*leds), GFP_KERNEL);
+	if (!leds)
+		return -ENOMEM;
+
+	led = leds;
+	for_each_child_of_node(np, child) {
+		const char *string;
+		int *mode_val;
+		int num_modes;
+
+		ret = of_property_read_u32(child, "mode-addr",
+					   &led->mode_addr);
+		if (ret)
+			goto err_node_put;
+
+		ret = of_property_read_u32(child, "bright-addr",
+					   &led->bright_addr);
+		if (ret)
+			goto err_node_put;
+
+		ret = of_property_read_u32(child, "max-brightness",
+					   &led->bright_max);
+		if (ret)
+			goto err_node_put;
+
+		mode_val =
+			devm_kzalloc(dev,
+				     NETXBIG_LED_MODE_NUM * sizeof(*mode_val),
+				     GFP_KERNEL);
+		if (!mode_val) {
+			ret = -ENOMEM;
+			goto err_node_put;
+		}
+
+		for (i = 0; i < NETXBIG_LED_MODE_NUM; i++)
+			mode_val[i] = NETXBIG_LED_INVALID_MODE;
+
+		ret = of_property_count_u32_elems(child, "mode-val");
+		if (ret < 0 || ret % 2) {
+			ret = -EINVAL;
+			goto err_node_put;
+		}
+		num_modes = ret / 2;
+		if (num_modes > NETXBIG_LED_MODE_NUM) {
+			ret = -EINVAL;
+			goto err_node_put;
+		}
+
+		for (i = 0; i < num_modes; i++) {
+			int mode;
+			int val;
+
+			of_property_read_u32_index(child,
+						   "mode-val", 2 * i, &mode);
+			of_property_read_u32_index(child,
+						   "mode-val", 2 * i + 1, &val);
+			if (mode >= NETXBIG_LED_MODE_NUM) {
+				ret = -EINVAL;
+				goto err_node_put;
+			}
+			mode_val[mode] = val;
+		}
+		led->mode_val = mode_val;
+
+		if (!of_property_read_string(child, "label", &string))
+			led->name = string;
+		else
+			led->name = child->name;
+
+		if (!of_property_read_string(child,
+					     "linux,default-trigger", &string))
+			led->default_trigger = string;
+
+		led++;
+	}
+
+	pdata->leds = leds;
+	pdata->num_leds = num_leds;
+
+	return 0;
+
+err_node_put:
+	of_node_put(child);
+	return ret;
+}
+
+static const struct of_device_id of_netxbig_leds_match[] = {
+	{ .compatible = "lacie,netxbig-leds", },
+	{},
+};
+#else
+static inline int
+netxbig_leds_get_of_pdata(struct device *dev,
+			  struct netxbig_led_platform_data *pdata)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_OF_GPIO */
+
 static int netxbig_led_probe(struct platform_device *pdev)
 {
 	struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct netxbig_led_data *leds_data;
+	struct netxbig_led_priv *priv;
 	int i;
 	int ret;
 
-	if (!pdata)
-		return -EINVAL;
+	if (!pdata) {
+		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+		ret = netxbig_leds_get_of_pdata(&pdev->dev, pdata);
+		if (ret)
+			return ret;
+	}
 
-	leds_data = devm_kzalloc(&pdev->dev,
-		sizeof(struct netxbig_led_data) * pdata->num_leds, GFP_KERNEL);
-	if (!leds_data)
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv) +
+			    pdata->num_leds * sizeof(struct netxbig_led_data),
+			    GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
+	priv->pdata = pdata;
 
 	ret = gpio_ext_init(pdata->gpio_ext);
 	if (ret < 0)
 		return ret;
 
 	for (i = 0; i < pdata->num_leds; i++) {
-		ret = create_netxbig_led(pdev, &leds_data[i], &pdata->leds[i]);
+		ret = create_netxbig_led(pdev, i, priv);
 		if (ret < 0)
 			goto err_free_leds;
 	}
-
-	platform_set_drvdata(pdev, leds_data);
+	platform_set_drvdata(pdev, priv);
 
 	return 0;
 
 err_free_leds:
 	for (i = i - 1; i >= 0; i--)
-		delete_netxbig_led(&leds_data[i]);
+		delete_netxbig_led(&priv->leds_data[i]);
 
 	gpio_ext_free(pdata->gpio_ext);
 	return ret;
@@ -385,14 +622,12 @@ err_free_leds:
 
 static int netxbig_led_remove(struct platform_device *pdev)
 {
-	struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct netxbig_led_data *leds_data;
+	struct netxbig_led_priv *priv = platform_get_drvdata(pdev);
+	struct netxbig_led_platform_data *pdata = priv->pdata;
 	int i;
 
-	leds_data = platform_get_drvdata(pdev);
-
 	for (i = 0; i < pdata->num_leds; i++)
-		delete_netxbig_led(&leds_data[i]);
+		delete_netxbig_led(&priv->leds_data[i]);
 
 	gpio_ext_free(pdata->gpio_ext);
 
@@ -403,7 +638,8 @@ static struct platform_driver netxbig_led_driver = {
 	.probe		= netxbig_led_probe,
 	.remove		= netxbig_led_remove,
 	.driver		= {
-		.name	= "leds-netxbig",
+		.name		= "leds-netxbig",
+		.of_match_table	= of_match_ptr(of_netxbig_leds_match),
 	},
 };
 
diff --git a/include/dt-bindings/leds/leds-netxbig.h b/include/dt-bindings/leds/leds-netxbig.h
new file mode 100644
index 000000000000..92658b0310b2
--- /dev/null
+++ b/include/dt-bindings/leds/leds-netxbig.h
@@ -0,0 +1,18 @@
+/*
+ * This header provides constants for netxbig LED bindings.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _DT_BINDINGS_LEDS_NETXBIG_H
+#define _DT_BINDINGS_LEDS_NETXBIG_H
+
+#define NETXBIG_LED_OFF		0
+#define NETXBIG_LED_ON		1
+#define NETXBIG_LED_SATA	2
+#define NETXBIG_LED_TIMER1	3
+#define NETXBIG_LED_TIMER2	4
+
+#endif /* _DT_BINDINGS_LEDS_NETXBIG_H */
diff --git a/include/linux/platform_data/leds-kirkwood-netxbig.h b/include/linux/platform_data/leds-kirkwood-netxbig.h
index d2be19a51acd..3c85a735c380 100644
--- a/include/linux/platform_data/leds-kirkwood-netxbig.h
+++ b/include/linux/platform_data/leds-kirkwood-netxbig.h
@@ -40,6 +40,7 @@ struct netxbig_led {
 	int		mode_addr;
 	int		*mode_val;
 	int		bright_addr;
+	int		bright_max;
 };
 
 struct netxbig_led_platform_data {
-- 
cgit v1.2.3


From 1e935949111e77b2b1b6fa550e88ff0573c2f4c7 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 29 Sep 2015 01:27:24 -0700
Subject: watchdog: Always evaluate new timeout against min_timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up to now, a new timeout value is only evaluated against min_timeout
if max_timeout is provided. This does not really make sense; a driver
can have a minimum timeout even if it does not have a maximum timeout.
Ensure that it is not smaller than min_timeout, even if max_timeout
is not set.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index d74a0e907b9e..e90e3ea5ebeb 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -119,8 +119,15 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway
 /* Use the following function to check if a timeout value is invalid */
 static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t)
 {
-	return ((wdd->max_timeout != 0) &&
-		(t < wdd->min_timeout || t > wdd->max_timeout));
+	/*
+	 * The timeout is invalid if
+	 * - the requested value is smaller than the configured minimum timeout,
+	 * or
+	 * - a maximum timeout is configured, and the requested value is larger
+	 *   than the maximum timeout.
+	 */
+	return t < wdd->min_timeout ||
+		(wdd->max_timeout && t > wdd->max_timeout);
 }
 
 /* Use the following functions to manipulate watchdog driver specific data */
-- 
cgit v1.2.3


From 5f94c943d5dd4b51f3248193e622dd5fcdbb8b11 Mon Sep 17 00:00:00 2001
From: Stefan Sørensen <stefan.sorensen@spectralink.com>
Date: Tue, 3 Nov 2015 09:34:07 +0100
Subject: ptp: Change ptp_class to a proper bitmask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the definition of PTP_CLASS_L2 to not have any bits overlapping with
the other defined protocol values, allowing the PTP_CLASS_* definitions to
be for simple filtering on packet type.

Signed-off-by: Stefan Sørensen <stefan.sorensen@spectralink.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h |  7 ++++---
 net/core/ptp_classifier.c    | 16 ++++++++--------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 159c987b1853..a079656b614c 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -32,9 +32,9 @@
 #define PTP_CLASS_VMASK 0x0f /* max protocol version is 15 */
 #define PTP_CLASS_IPV4  0x10 /* event in an IPV4 UDP packet */
 #define PTP_CLASS_IPV6  0x20 /* event in an IPV6 UDP packet */
-#define PTP_CLASS_L2    0x30 /* event in a L2 packet */
-#define PTP_CLASS_PMASK 0x30 /* mask for the packet type field */
-#define PTP_CLASS_VLAN  0x40 /* event in a VLAN tagged packet */
+#define PTP_CLASS_L2    0x40 /* event in a L2 packet */
+#define PTP_CLASS_PMASK	0x70 /* mask for the packet type field */
+#define PTP_CLASS_VLAN	0x80 /* event in a VLAN tagged packet */
 
 #define PTP_CLASS_V1_IPV4 (PTP_CLASS_V1 | PTP_CLASS_IPV4)
 #define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /* probably DNE */
@@ -42,6 +42,7 @@
 #define PTP_CLASS_V2_IPV6 (PTP_CLASS_V2 | PTP_CLASS_IPV6)
 #define PTP_CLASS_V2_L2   (PTP_CLASS_V2 | PTP_CLASS_L2)
 #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN)
+#define PTP_CLASS_L4      (PTP_CLASS_IPV4 | PTP_CLASS_IPV6)
 
 #define PTP_EV_PORT 319
 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 4eab4a94a59d..703cf76aa7c2 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -58,7 +58,7 @@
  *   jneq #0x0, drop_ieee1588      ; for PTP_GEN_BIT and drop these
  *   ldh [18]                      ; reload payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x70                      ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ *   or #0xc0                      ; PTP_CLASS_VLAN|PTP_CLASS_L2
  *   ret a                         ; return PTP class
  *
  * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
@@ -73,7 +73,7 @@
  *   jneq #319, drop_8021q_ipv4    ; is port PTP_EV_PORT ?
  *   ldh [x + 26]                  ; load payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x50                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ *   or #0x90                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
  *   ret a                         ; return PTP class
  *   drop_8021q_ipv4: ret #0x0     ; PTP_CLASS_NONE
  *
@@ -86,7 +86,7 @@
  *   jneq #319, drop_8021q_ipv6          ; is port PTP_EV_PORT ?
  *   ldh [66]                      ; load payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x60                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
+ *   or #0xa0                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
  *   ret a                         ; return PTP class
  *   drop_8021q_ipv6: ret #0x0     ; PTP_CLASS_NONE
  *
@@ -98,7 +98,7 @@
  *   jneq #0x0, drop_ieee1588      ; for PTP_GEN_BIT and drop these
  *   ldh [14]                      ; reload payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x30                      ; PTP_CLASS_L2
+ *   or #0x40                      ; PTP_CLASS_L2
  *   ret a                         ; return PTP class
  *   drop_ieee1588: ret #0x0       ; PTP_CLASS_NONE
  */
@@ -150,7 +150,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0, 35, 0x00000000 },
 		{ 0x28,  0,  0, 0x00000012 },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000070 },
+		{ 0x44,  0,  0, 0x000000c0 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x15,  0, 12, 0x00000800 },
 		{ 0x30,  0,  0, 0x0000001b },
@@ -162,7 +162,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0,  4, 0x0000013f },
 		{ 0x48,  0,  0, 0x0000001a },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000050 },
+		{ 0x44,  0,  0, 0x00000090 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 		{ 0x15,  0,  8, 0x000086dd },
@@ -172,7 +172,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0,  4, 0x0000013f },
 		{ 0x28,  0,  0, 0x00000042 },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000060 },
+		{ 0x44,  0,  0, 0x000000a0 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 		{ 0x15,  0,  7, 0x000088f7 },
@@ -181,7 +181,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0,  4, 0x00000000 },
 		{ 0x28,  0,  0, 0x0000000e },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000030 },
+		{ 0x44,  0,  0, 0x00000040 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
-- 
cgit v1.2.3


From 5ba3f7d61a3a9e6d94462b207d302931b53d8c61 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Tue, 3 Nov 2015 10:15:59 -0500
Subject: net/core: fix for_each_netdev_feature

As pointed out by Nikolay and further explained by Geert, the initial
for_each_netdev_feature macro was broken, as feature would get set outside
of the block of code it was intended to run in, thus only ever working for
the first feature bit in the mask. While less pretty this way, this is
tested and confirmed functional with multiple feature bits set in
NETIF_F_UPPER_DISABLES.

[root@dell-per730-01 ~]# ethtool -K bond0 lro off
...
[  242.761394] bond0: Disabling feature 0x0000000000008000 on lower dev p5p2.
[  243.552178] bnx2x 0000:06:00.1 p5p2: using MSI-X  IRQs: sp 74  fp[0] 76 ... fp[7] 83
[  244.353978] bond0: Disabling feature 0x0000000000008000 on lower dev p5p1.
[  245.147420] bnx2x 0000:06:00.0 p5p1: using MSI-X  IRQs: sp 62  fp[0] 64 ... fp[7] 71

[root@dell-per730-01 ~]# ethtool -K bond0 gro off
...
[  251.925645] bond0: Disabling feature 0x0000000000004000 on lower dev p5p2.
[  252.713693] bnx2x 0000:06:00.1 p5p2: using MSI-X  IRQs: sp 74  fp[0] 76 ... fp[7] 83
[  253.499085] bond0: Disabling feature 0x0000000000004000 on lower dev p5p1.
[  254.290922] bnx2x 0000:06:00.0 p5p1: using MSI-X  IRQs: sp 62  fp[0] 64 ... fp[7] 71

Fixes: fd867d51f ("net/core: generic support for disabling netdev features down stack")
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <gospo@cumulusnetworks.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Nikolay Aleksandrov <razor@blackwall.org>
CC: Michal Kubecek <mkubecek@suse.cz>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Geert Uytterhoeven <geert@linux-m68k.org>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 6 ++----
 net/core/dev.c                  | 8 ++++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 0f5837a9b1ba..f0d87347df19 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -125,10 +125,8 @@ enum {
 #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 #define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
 
-#define for_each_netdev_feature(mask_addr, feature)				\
-	int bit;								\
-	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)	\
-		feature = __NETIF_F_BIT(bit);
+#define for_each_netdev_feature(mask_addr, bit)	\
+	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
diff --git a/net/core/dev.c b/net/core/dev.c
index c4d2b430788d..8ce3f74cd6b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6293,8 +6293,10 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
 {
 	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
 	netdev_features_t feature;
+	int feature_bit;
 
-	for_each_netdev_feature(&upper_disables, feature) {
+	for_each_netdev_feature(&upper_disables, feature_bit) {
+		feature = __NETIF_F_BIT(feature_bit);
 		if (!(upper->wanted_features & feature)
 		    && (features & feature)) {
 			netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
@@ -6311,8 +6313,10 @@ static void netdev_sync_lower_features(struct net_device *upper,
 {
 	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
 	netdev_features_t feature;
+	int feature_bit;
 
-	for_each_netdev_feature(&upper_disables, feature) {
+	for_each_netdev_feature(&upper_disables, feature_bit) {
+		feature = __NETIF_F_BIT(feature_bit);
 		if (!(features & feature) && (lower->features & feature)) {
 			netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
 				   &feature, lower->name);
-- 
cgit v1.2.3


From b7ceb7d50048d0dd4830f106f0fb7f5424031598 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 2 Nov 2015 17:12:27 +0100
Subject: lightnvm: refactor phys addrs type to u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For cases where CONFIG_LBDAF is not set. The struct ppa_addr exceeds its
type on 32 bit architectures. ppa_addr requires a 64bit integer to hold
the generic ppa format. We therefore refactor it to u64 and
replaces the sector_t usages with u64 for physical addresses.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/rrpc.c  | 17 ++++++++---------
 drivers/lightnvm/rrpc.h  |  6 +++---
 include/linux/lightnvm.h | 28 ++++++++++++++--------------
 3 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 22fcd629565d..64a888a5e9b3 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -116,15 +116,14 @@ static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
 	return (rblk->next_page == rrpc->dev->pgs_per_blk);
 }
 
-static sector_t block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
+static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct nvm_block *blk = rblk->parent;
 
 	return blk->id * rrpc->dev->pgs_per_blk;
 }
 
-static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev,
-								sector_t addr)
+static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
 {
 	struct ppa_addr paddr;
 
@@ -231,7 +230,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 	struct page *page;
 	int slot;
 	int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
-	sector_t phys_addr;
+	u64 phys_addr;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
 	if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
@@ -464,7 +463,7 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
 }
 
 static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
-					struct rrpc_block *rblk, sector_t paddr)
+					struct rrpc_block *rblk, u64 paddr)
 {
 	struct rrpc_addr *gp;
 	struct rrpc_rev_addr *rev;
@@ -486,9 +485,9 @@ static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
 	return gp;
 }
 
-static sector_t rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
+static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
-	sector_t addr = ADDR_EMPTY;
+	u64 addr = ADDR_EMPTY;
 
 	spin_lock(&rblk->lock);
 	if (block_is_full(rrpc, rblk))
@@ -516,7 +515,7 @@ static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
 	struct rrpc_lun *rlun;
 	struct rrpc_block *rblk;
 	struct nvm_lun *lun;
-	sector_t paddr;
+	u64 paddr;
 
 	rlun = rrpc_get_lun_rr(rrpc, is_gc);
 	lun = rlun->parent;
@@ -1144,7 +1143,7 @@ static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
 	struct nvm_dev *dev = rrpc->dev;
 	int offset;
 	struct rrpc_addr *laddr;
-	sector_t paddr, pladdr;
+	u64 paddr, pladdr;
 
 	for (offset = 0; offset < dev->pgs_per_blk; offset++) {
 		paddr = block_to_addr(rrpc, rblk) + offset;
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index b5df08d7b8bf..a9696a06c38c 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -86,7 +86,7 @@ struct rrpc {
 	struct nvm_dev *dev;
 	struct gendisk *disk;
 
-	sector_t poffset; /* physical page offset */
+	u64 poffset; /* physical page offset */
 	int lun_offset;
 
 	int nr_luns;
@@ -136,13 +136,13 @@ struct rrpc_block_gc {
 
 /* Logical to physical mapping */
 struct rrpc_addr {
-	sector_t addr;
+	u64 addr;
 	struct rrpc_block *rblk;
 };
 
 /* Physical to logical mapping */
 struct rrpc_rev_addr {
-	sector_t addr;
+	u64 addr;
 };
 
 static inline sector_t rrpc_get_laddr(struct bio *bio)
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 122b176600fa..5ebd70d12f35 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -134,26 +134,26 @@ struct ppa_addr {
 	union {
 		/* Channel-based PPA format in nand 4x2x2x2x8x10 */
 		struct {
-			sector_t ch		: 4;
-			sector_t sec		: 2; /* 4 sectors per page */
-			sector_t pl		: 2; /* 4 planes per LUN */
-			sector_t lun		: 2; /* 4 LUNs per channel */
-			sector_t pg		: 8; /* 256 pages per block */
-			sector_t blk		: 10;/* 1024 blocks per plane */
-			sector_t resved		: 36;
+			u64 ch		: 4;
+			u64 sec		: 2; /* 4 sectors per page */
+			u64 pl		: 2; /* 4 planes per LUN */
+			u64 lun		: 2; /* 4 LUNs per channel */
+			u64 pg		: 8; /* 256 pages per block */
+			u64 blk		: 10;/* 1024 blocks per plane */
+			u64 resved		: 36;
 		} chnl;
 
 		/* Generic structure for all addresses */
 		struct {
-			sector_t sec		: NVM_SEC_BITS;
-			sector_t pl		: NVM_PL_BITS;
-			sector_t pg		: NVM_PG_BITS;
-			sector_t blk		: NVM_BLK_BITS;
-			sector_t lun		: NVM_LUN_BITS;
-			sector_t ch		: NVM_CH_BITS;
+			u64 sec		: NVM_SEC_BITS;
+			u64 pl		: NVM_PL_BITS;
+			u64 pg		: NVM_PG_BITS;
+			u64 blk		: NVM_BLK_BITS;
+			u64 lun		: NVM_LUN_BITS;
+			u64 ch		: NVM_CH_BITS;
 		} g;
 
-		sector_t ppa;
+		u64 ppa;
 	};
 } __packed;
 
-- 
cgit v1.2.3


From 8fbcf237439f841e7e9c4675790e08ea1c295bd3 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 3 Nov 2015 18:25:34 +0100
Subject: nfs: Remove unused xdr page offsets in getacl/setacl arguments

The arguments passed around for getacl and setacl xdr encoding, struct
nfs_setaclargs and struct nfs_getaclargs, both contain an array of
pages, an offset into the first page, and the length of the page data.
The offset is unused as it is always zero; remove it.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 5 ++---
 fs/nfs/nfs4xdr.c        | 4 ++--
 include/linux/nfs_xdr.h | 2 --
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87a081c6299d..7ed8f2cd97f8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4603,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
 #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
 
 static int buf_to_pages_noslab(const void *buf, size_t buflen,
-		struct page **pages, unsigned int *pgbase)
+		struct page **pages)
 {
 	struct page *newpage, **spages;
 	int rc = 0;
@@ -4747,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		goto out_free;
 
 	args.acl_len = npages * PAGE_SIZE;
-	args.acl_pgbase = 0;
 
 	dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
 		__func__, buf, buflen, npages, args.acl_len);
@@ -4839,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	if (npages > ARRAY_SIZE(pages))
 		return -ERANGE;
-	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
 	if (i < 0)
 		return i;
 	nfs4_inode_return_delegation(inode);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f656791a338..22a1ddd4fe96 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(arg->acl_len);
-	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
+	xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
 }
 
 static void
@@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-		args->acl_pages, args->acl_pgbase, args->acl_len);
+		args->acl_pages, 0, args->acl_len);
 
 	encode_nops(&hdr);
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4728e7e5fc49..570d630f98ae 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -705,7 +705,6 @@ struct nfs_setaclargs {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
 };
 
@@ -717,7 +716,6 @@ struct nfs_getaclargs {
 	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
 };
 
-- 
cgit v1.2.3


From bb99d8ccec7f83a2730a29d1ae7eee5ffa446a9e Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Fri, 30 Oct 2015 14:25:39 +0900
Subject: tracing: Allow arch-specific stack tracer

A stack frame may be used in a different way depending on cpu architecture.
Thus it is not always appropriate to slurp the stack contents, as current
check_stack() does, in order to calcurate a stack index (height) at a given
function call. At least not on arm64.
In addition, there is a possibility that we will mistakenly detect a stale
stack frame which has not been overwritten.

This patch makes check_stack() a weak function so as to later implement
arch-specific version.

Link: http://lkml.kernel.org/r/1446182741-31019-5-git-send-email-takahiro.akashi@linaro.org

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h     | 11 +++++++
 kernel/trace/trace_stack.c | 80 +++++++++++++++++++++++++---------------------
 2 files changed, 54 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6cd8c0ee4b6f..b4c92ab9e08b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -263,7 +263,18 @@ static inline void ftrace_kill(void) { }
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_STACK_TRACER
+
+#define STACK_TRACE_ENTRIES 500
+
+struct stack_trace;
+
+extern unsigned stack_trace_index[];
+extern struct stack_trace stack_trace_max;
+extern unsigned long stack_trace_max_size;
+extern arch_spinlock_t max_stack_lock;
+
 extern int stack_tracer_enabled;
+void stack_trace_print(void);
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b746399ab59c..50945a7939f4 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -16,24 +16,22 @@
 
 #include "trace.h"
 
-#define STACK_TRACE_ENTRIES 500
-
 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
 	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
-static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+unsigned stack_trace_index[STACK_TRACE_ENTRIES];
 
 /*
  * Reserve one entry for the passed in ip. This will allow
  * us to remove most or all of the stack size overhead
  * added by the stack tracer itself.
  */
-static struct stack_trace max_stack_trace = {
+struct stack_trace stack_trace_max = {
 	.max_entries		= STACK_TRACE_ENTRIES - 1,
 	.entries		= &stack_dump_trace[0],
 };
 
-static unsigned long max_stack_size;
-static arch_spinlock_t max_stack_lock =
+unsigned long stack_trace_max_size;
+arch_spinlock_t max_stack_lock =
 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 static DEFINE_PER_CPU(int, trace_active);
@@ -42,30 +40,38 @@ static DEFINE_MUTEX(stack_sysctl_mutex);
 int stack_tracer_enabled;
 static int last_stack_tracer_enabled;
 
-static inline void print_max_stack(void)
+void stack_trace_print(void)
 {
 	long i;
 	int size;
 
 	pr_emerg("        Depth    Size   Location    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries);
+			   stack_trace_max.nr_entries);
 
-	for (i = 0; i < max_stack_trace.nr_entries; i++) {
+	for (i = 0; i < stack_trace_max.nr_entries; i++) {
 		if (stack_dump_trace[i] == ULONG_MAX)
 			break;
-		if (i+1 == max_stack_trace.nr_entries ||
+		if (i+1 == stack_trace_max.nr_entries ||
 				stack_dump_trace[i+1] == ULONG_MAX)
-			size = stack_dump_index[i];
+			size = stack_trace_index[i];
 		else
-			size = stack_dump_index[i] - stack_dump_index[i+1];
+			size = stack_trace_index[i] - stack_trace_index[i+1];
 
-		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_dump_index[i],
+		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_trace_index[i],
 				size, (void *)stack_dump_trace[i]);
 	}
 }
 
-static inline void
+/*
+ * When arch-specific code overides this function, the following
+ * data should be filled up, assuming max_stack_lock is held to
+ * prevent concurrent updates.
+ *     stack_trace_index[]
+ *     stack_trace_max
+ *     stack_trace_max_size
+ */
+void __weak
 check_stack(unsigned long ip, unsigned long *stack)
 {
 	unsigned long this_size, flags; unsigned long *p, *top, *start;
@@ -78,7 +84,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 	/* Remove the frame of the tracer */
 	this_size -= frame_size;
 
-	if (this_size <= max_stack_size)
+	if (this_size <= stack_trace_max_size)
 		return;
 
 	/* we do not handle interrupt stacks yet */
@@ -93,18 +99,18 @@ check_stack(unsigned long ip, unsigned long *stack)
 		this_size -= tracer_frame;
 
 	/* a race could have already updated it */
-	if (this_size <= max_stack_size)
+	if (this_size <= stack_trace_max_size)
 		goto out;
 
-	max_stack_size = this_size;
+	stack_trace_max_size = this_size;
 
-	max_stack_trace.nr_entries = 0;
-	max_stack_trace.skip = 3;
+	stack_trace_max.nr_entries = 0;
+	stack_trace_max.skip = 3;
 
-	save_stack_trace(&max_stack_trace);
+	save_stack_trace(&stack_trace_max);
 
 	/* Skip over the overhead of the stack tracer itself */
-	for (i = 0; i < max_stack_trace.nr_entries; i++) {
+	for (i = 0; i < stack_trace_max.nr_entries; i++) {
 		if (stack_dump_trace[i] == ip)
 			break;
 	}
@@ -124,18 +130,18 @@ check_stack(unsigned long ip, unsigned long *stack)
 	 * loop will only happen once. This code only takes place
 	 * on a new max, so it is far from a fast path.
 	 */
-	while (i < max_stack_trace.nr_entries) {
+	while (i < stack_trace_max.nr_entries) {
 		int found = 0;
 
-		stack_dump_index[x] = this_size;
+		stack_trace_index[x] = this_size;
 		p = start;
 
-		for (; p < top && i < max_stack_trace.nr_entries; p++) {
+		for (; p < top && i < stack_trace_max.nr_entries; p++) {
 			if (stack_dump_trace[i] == ULONG_MAX)
 				break;
 			if (*p == stack_dump_trace[i]) {
 				stack_dump_trace[x] = stack_dump_trace[i++];
-				this_size = stack_dump_index[x++] =
+				this_size = stack_trace_index[x++] =
 					(top - p) * sizeof(unsigned long);
 				found = 1;
 				/* Start the search from here */
@@ -150,7 +156,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 				if (unlikely(!tracer_frame)) {
 					tracer_frame = (p - stack) *
 						sizeof(unsigned long);
-					max_stack_size -= tracer_frame;
+					stack_trace_max_size -= tracer_frame;
 				}
 			}
 		}
@@ -159,12 +165,12 @@ check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
-	max_stack_trace.nr_entries = x;
+	stack_trace_max.nr_entries = x;
 	for (; x < i; x++)
 		stack_dump_trace[x] = ULONG_MAX;
 
 	if (task_stack_end_corrupted(current)) {
-		print_max_stack();
+		stack_trace_print();
 		BUG();
 	}
 
@@ -262,7 +268,7 @@ __next(struct seq_file *m, loff_t *pos)
 {
 	long n = *pos - 1;
 
-	if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+	if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
 		return NULL;
 
 	m->private = (void *)n;
@@ -332,9 +338,9 @@ static int t_show(struct seq_file *m, void *v)
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries);
+			   stack_trace_max.nr_entries);
 
-		if (!stack_tracer_enabled && !max_stack_size)
+		if (!stack_tracer_enabled && !stack_trace_max_size)
 			print_disabled(m);
 
 		return 0;
@@ -342,17 +348,17 @@ static int t_show(struct seq_file *m, void *v)
 
 	i = *(long *)v;
 
-	if (i >= max_stack_trace.nr_entries ||
+	if (i >= stack_trace_max.nr_entries ||
 	    stack_dump_trace[i] == ULONG_MAX)
 		return 0;
 
-	if (i+1 == max_stack_trace.nr_entries ||
+	if (i+1 == stack_trace_max.nr_entries ||
 	    stack_dump_trace[i+1] == ULONG_MAX)
-		size = stack_dump_index[i];
+		size = stack_trace_index[i];
 	else
-		size = stack_dump_index[i] - stack_dump_index[i+1];
+		size = stack_trace_index[i] - stack_trace_index[i+1];
 
-	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
+	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_trace_index[i], size);
 
 	trace_lookup_stack(m, i);
 
@@ -442,7 +448,7 @@ static __init int stack_trace_init(void)
 		return 0;
 
 	trace_create_file("stack_max_size", 0644, d_tracer,
-			&max_stack_size, &stack_max_size_fops);
+			&stack_trace_max_size, &stack_max_size_fops);
 
 	trace_create_file("stack_trace", 0444, d_tracer,
 			NULL, &stack_trace_fops);
-- 
cgit v1.2.3


From 80220fa72b917c64675f3ba4008d2c5a7b50b281 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 3 Nov 2015 09:00:15 +0100
Subject: watchdog: include: fix some typos

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index e90e3ea5ebeb..5f18dd9ec224 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -25,7 +25,7 @@ struct watchdog_device;
  * @ping:	The routine that sends a keepalive ping to the watchdog device.
  * @status:	The routine that shows the status of the watchdog device.
  * @set_timeout:The routine for setting the watchdog devices timeout value.
- * @get_timeleft:The routine that get's the time that's left before a reset.
+ * @get_timeleft:The routine that gets the time left before a reset.
  * @ref:	The ref operation for dyn. allocated watchdog_device structs
  * @unref:	The unref operation for dyn. allocated watchdog_device structs
  * @ioctl:	The routines that handles extra ioctl calls.
@@ -33,7 +33,7 @@ struct watchdog_device;
  * The watchdog_ops structure contains a list of low-level operations
  * that control a watchdog device. It also contains the module that owns
  * these operations. The start and stop function are mandatory, all other
- * functions are optonal.
+ * functions are optional.
  */
 struct watchdog_ops {
 	struct module *owner;
-- 
cgit v1.2.3


From 760d280084f8805e5de73e3591912d5db9da9dbe Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 3 Nov 2015 09:00:16 +0100
Subject: watchdog: include: add units for timeout values in kerneldoc

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 5f18dd9ec224..027b1f43f12d 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -24,8 +24,8 @@ struct watchdog_device;
  * @stop:	The routine for stopping the watchdog device.
  * @ping:	The routine that sends a keepalive ping to the watchdog device.
  * @status:	The routine that shows the status of the watchdog device.
- * @set_timeout:The routine for setting the watchdog devices timeout value.
- * @get_timeleft:The routine that gets the time left before a reset.
+ * @set_timeout:The routine for setting the watchdog devices timeout value (in seconds).
+ * @get_timeleft:The routine that gets the time left before a reset (in seconds).
  * @ref:	The ref operation for dyn. allocated watchdog_device structs
  * @unref:	The unref operation for dyn. allocated watchdog_device structs
  * @ioctl:	The routines that handles extra ioctl calls.
@@ -59,9 +59,9 @@ struct watchdog_ops {
  * @info:	Pointer to a watchdog_info structure.
  * @ops:	Pointer to the list of watchdog operations.
  * @bootstatus:	Status of the watchdog device at boot.
- * @timeout:	The watchdog devices timeout value.
- * @min_timeout:The watchdog devices minimum timeout value.
- * @max_timeout:The watchdog devices maximum timeout value.
+ * @timeout:	The watchdog devices timeout value (in seconds).
+ * @min_timeout:The watchdog devices minimum timeout value (in seconds).
+ * @max_timeout:The watchdog devices maximum timeout value (in seconds).
  * @driver-data:Pointer to the drivers private data.
  * @lock:	Lock for watchdog core internal use only.
  * @status:	Field that contains the devices internal status bits.
-- 
cgit v1.2.3


From d332736df0c277905de06311ae084e2c76580a3f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 3 Nov 2015 14:50:15 -0500
Subject: tracing: Rename max_stack_lock to stack_trace_max_lock

Now that max_stack_lock is a global variable, it requires a naming
convention that is unlikely to collide. Rename it to the same naming
convention that the other stack_trace variables have.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h     |  2 +-
 kernel/trace/trace_stack.c | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b4c92ab9e08b..eae6548efbf0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -271,7 +271,7 @@ struct stack_trace;
 extern unsigned stack_trace_index[];
 extern struct stack_trace stack_trace_max;
 extern unsigned long stack_trace_max_size;
-extern arch_spinlock_t max_stack_lock;
+extern arch_spinlock_t stack_trace_max_lock;
 
 extern int stack_tracer_enabled;
 void stack_trace_print(void);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 50945a7939f4..0bd212af406c 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -31,7 +31,7 @@ struct stack_trace stack_trace_max = {
 };
 
 unsigned long stack_trace_max_size;
-arch_spinlock_t max_stack_lock =
+arch_spinlock_t stack_trace_max_lock =
 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 static DEFINE_PER_CPU(int, trace_active);
@@ -65,7 +65,7 @@ void stack_trace_print(void)
 
 /*
  * When arch-specific code overides this function, the following
- * data should be filled up, assuming max_stack_lock is held to
+ * data should be filled up, assuming stack_trace_max_lock is held to
  * prevent concurrent updates.
  *     stack_trace_index[]
  *     stack_trace_max
@@ -92,7 +92,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 		return;
 
 	local_irq_save(flags);
-	arch_spin_lock(&max_stack_lock);
+	arch_spin_lock(&stack_trace_max_lock);
 
 	/* In case another CPU set the tracer_frame on us */
 	if (unlikely(!frame_size))
@@ -175,7 +175,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 	}
 
  out:
-	arch_spin_unlock(&max_stack_lock);
+	arch_spin_unlock(&stack_trace_max_lock);
 	local_irq_restore(flags);
 }
 
@@ -246,9 +246,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 	cpu = smp_processor_id();
 	per_cpu(trace_active, cpu)++;
 
-	arch_spin_lock(&max_stack_lock);
+	arch_spin_lock(&stack_trace_max_lock);
 	*ptr = val;
-	arch_spin_unlock(&max_stack_lock);
+	arch_spin_unlock(&stack_trace_max_lock);
 
 	per_cpu(trace_active, cpu)--;
 	local_irq_restore(flags);
@@ -291,7 +291,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	cpu = smp_processor_id();
 	per_cpu(trace_active, cpu)++;
 
-	arch_spin_lock(&max_stack_lock);
+	arch_spin_lock(&stack_trace_max_lock);
 
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
@@ -303,7 +303,7 @@ static void t_stop(struct seq_file *m, void *p)
 {
 	int cpu;
 
-	arch_spin_unlock(&max_stack_lock);
+	arch_spin_unlock(&stack_trace_max_lock);
 
 	cpu = smp_processor_id();
 	per_cpu(trace_active, cpu)--;
-- 
cgit v1.2.3


From 105ff3cbf225036b75a6a46c96d1ddce8e7bdc66 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 3 Nov 2015 17:22:17 -0800
Subject: atomic: remove all traces of READ_ONCE_CTRL() and atomic*_read_ctrl()

This seems to be a mis-reading of how alpha memory ordering works, and
is not backed up by the alpha architecture manual.  The helper functions
don't do anything special on any other architectures, and the arguments
that support them being safe on other architectures also argue that they
are safe on alpha.

Basically, the "control dependency" is between a previous read and a
subsequent write that is dependent on the value read.  Even if the
subsequent write is actually done speculatively, there is no way that
such a speculative write could be made visible to other cpu's until it
has been committed, which requires validating the speculation.

Note that most weakely ordered architectures (very much including alpha)
do not guarantee any ordering relationship between two loads that depend
on each other on a control dependency:

    read A
    if (val == 1)
        read B

because the conditional may be predicted, and the "read B" may be
speculatively moved up to before reading the value A.  So we require the
user to insert a smp_rmb() between the two accesses to be correct:

    read A;
    if (A == 1)
        smp_rmb()
        read B

Alpha is further special in that it can break that ordering even if the
*address* of B depends on the read of A, because the cacheline that is
read later may be stale unless you have a memory barrier in between the
pointer read and the read of the value behind a pointer:

    read ptr
    read offset(ptr)

whereas all other weakly ordered architectures guarantee that the data
dependency (as opposed to just a control dependency) will order the two
accesses.  As a result, alpha needs a "smp_read_barrier_depends()" in
between those two reads for them to be ordered.

The coontrol dependency that "READ_ONCE_CTRL()" and "atomic_read_ctrl()"
had was a control dependency to a subsequent *write*, however, and
nobody can finalize such a subsequent write without having actually done
the read.  And were you to write such a value to a "stale" cacheline
(the way the unordered reads came to be), that would seem to lose the
write entirely.

So the things that make alpha able to re-order reads even more
aggressively than other weak architectures do not seem to be relevant
for a subsequent write.  Alpha memory ordering may be strange, but
there's no real indication that it is *that* strange.

Also, the alpha architecture reference manual very explicitly talks
about the definition of "Dependence Constraints" in section 5.6.1.7,
where a preceding read dominates a subsequent write.

Such a dependence constraint admittedly does not impose a BEFORE (alpha
architecture term for globally visible ordering), but it does guarantee
that there can be no "causal loop".  I don't see how you could avoid
such a loop if another cpu could see the stored value and then impact
the value of the first read.  Put another way: the read and the write
could not be seen as being out of order wrt other cpus.

So I do not see how these "x_ctrl()" functions can currently be necessary.

I may have to eat my words at some point, but in the absense of clear
proof that alpha actually needs this, or indeed even an explanation of
how alpha could _possibly_ need it, I do not believe these functions are
called for.

And if it turns out that alpha really _does_ need a barrier for this
case, that barrier still should not be "smp_read_barrier_depends()".
We'd have to make up some new speciality barrier just for alpha, along
with the documentation for why it really is necessary.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul E McKenney <paulmck@us.ibm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/memory-barriers.txt | 54 ++++++++++++++++-----------------------
 include/asm-generic/atomic-long.h |  1 -
 include/linux/atomic.h            | 18 -------------
 include/linux/compiler.h          | 16 ------------
 kernel/events/ring_buffer.c       |  2 +-
 5 files changed, 23 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index b5fe7657456e..aef9487303d0 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -617,16 +617,16 @@ case what's actually required is:
 However, stores are not speculated.  This means that ordering -is- provided
 for load-store control dependencies, as in the following example:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	if (q) {
 		WRITE_ONCE(b, p);
 	}
 
 Control dependencies pair normally with other types of barriers.  That
-said, please note that READ_ONCE_CTRL() is not optional!  Without the
-READ_ONCE_CTRL(), the compiler might combine the load from 'a' with
-other loads from 'a', and the store to 'b' with other stores to 'b',
-with possible highly counterintuitive effects on ordering.
+said, please note that READ_ONCE() is not optional! Without the
+READ_ONCE(), the compiler might combine the load from 'a' with other
+loads from 'a', and the store to 'b' with other stores to 'b', with
+possible highly counterintuitive effects on ordering.
 
 Worse yet, if the compiler is able to prove (say) that the value of
 variable 'a' is always non-zero, it would be well within its rights
@@ -636,16 +636,12 @@ as follows:
 	q = a;
 	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
 
-Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
-that DEC Alpha needs in order to respect control depedencies. Alternatively
-use one of atomic{,64}_read_ctrl().
-
-So don't leave out the READ_ONCE_CTRL().
+So don't leave out the READ_ONCE().
 
 It is tempting to try to enforce ordering on identical stores on both
 branches of the "if" statement as follows:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	if (q) {
 		barrier();
 		WRITE_ONCE(b, p);
@@ -659,7 +655,7 @@ branches of the "if" statement as follows:
 Unfortunately, current compilers will transform this as follows at high
 optimization levels:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	barrier();
 	WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
 	if (q) {
@@ -689,7 +685,7 @@ memory barriers, for example, smp_store_release():
 In contrast, without explicit memory barriers, two-legged-if control
 ordering is guaranteed only when the stores differ, for example:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	if (q) {
 		WRITE_ONCE(b, p);
 		do_something();
@@ -698,14 +694,14 @@ ordering is guaranteed only when the stores differ, for example:
 		do_something_else();
 	}
 
-The initial READ_ONCE_CTRL() is still required to prevent the compiler
-from proving the value of 'a'.
+The initial READ_ONCE() is still required to prevent the compiler from
+proving the value of 'a'.
 
 In addition, you need to be careful what you do with the local variable 'q',
 otherwise the compiler might be able to guess the value and again remove
 the needed conditional.  For example:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	if (q % MAX) {
 		WRITE_ONCE(b, p);
 		do_something();
@@ -718,7 +714,7 @@ If MAX is defined to be 1, then the compiler knows that (q % MAX) is
 equal to zero, in which case the compiler is within its rights to
 transform the above code into the following:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	WRITE_ONCE(b, p);
 	do_something_else();
 
@@ -729,7 +725,7 @@ is gone, and the barrier won't bring it back.  Therefore, if you are
 relying on this ordering, you should make sure that MAX is greater than
 one, perhaps as follows:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
 	if (q % MAX) {
 		WRITE_ONCE(b, p);
@@ -746,7 +742,7 @@ of the 'if' statement.
 You must also be careful not to rely too much on boolean short-circuit
 evaluation.  Consider this example:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	if (q || 1 > 0)
 		WRITE_ONCE(b, 1);
 
@@ -754,7 +750,7 @@ Because the first condition cannot fault and the second condition is
 always true, the compiler can transform this example as following,
 defeating control dependency:
 
-	q = READ_ONCE_CTRL(a);
+	q = READ_ONCE(a);
 	WRITE_ONCE(b, 1);
 
 This example underscores the need to ensure that the compiler cannot
@@ -768,7 +764,7 @@ x and y both being zero:
 
 	CPU 0                     CPU 1
 	=======================   =======================
-	r1 = READ_ONCE_CTRL(x);   r2 = READ_ONCE_CTRL(y);
+	r1 = READ_ONCE(x);        r2 = READ_ONCE(y);
 	if (r1 > 0)               if (r2 > 0)
 	  WRITE_ONCE(y, 1);         WRITE_ONCE(x, 1);
 
@@ -797,11 +793,6 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
 
 In summary:
 
-  (*) Control dependencies must be headed by READ_ONCE_CTRL(),
-      atomic{,64}_read_ctrl(). Or, as a much less preferable alternative,
-      interpose smp_read_barrier_depends() between a READ_ONCE() and the
-      control-dependent write.
-
   (*) Control dependencies can order prior loads against later stores.
       However, they do -not- guarantee any other sort of ordering:
       Not prior loads against later loads, nor prior stores against
@@ -817,14 +808,13 @@ In summary:
       between the prior load and the subsequent store, and this
       conditional must involve the prior load.  If the compiler is able
       to optimize the conditional away, it will have also optimized
-      away the ordering.  Careful use of READ_ONCE_CTRL() READ_ONCE(),
-      and WRITE_ONCE() can help to preserve the needed conditional.
+      away the ordering.  Careful use of READ_ONCE() and WRITE_ONCE()
+      can help to preserve the needed conditional.
 
   (*) Control dependencies require that the compiler avoid reordering the
-      dependency into nonexistence.  Careful use of READ_ONCE_CTRL(),
-      atomic{,64}_read_ctrl() or smp_read_barrier_depends() can help to
-      preserve your control dependency.  Please see the Compiler Barrier
-      section for more information.
+      dependency into nonexistence.  Careful use of READ_ONCE() or
+      atomic{,64}_read() can help to preserve your control dependency.
+      Please see the Compiler Barrier section for more information.
 
   (*) Control dependencies pair normally with other types of barriers.
 
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index f91093c2984c..eb1973bad80b 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -43,7 +43,6 @@ static inline long atomic_long_read##mo(const atomic_long_t *l)		\
 }
 ATOMIC_LONG_READ_OP()
 ATOMIC_LONG_READ_OP(_acquire)
-ATOMIC_LONG_READ_OP(_ctrl)
 
 #undef ATOMIC_LONG_READ_OP
 
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 27e580d232ca..301de78d65f7 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -4,15 +4,6 @@
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 
-#ifndef atomic_read_ctrl
-static inline int atomic_read_ctrl(const atomic_t *v)
-{
-	int val = atomic_read(v);
-	smp_read_barrier_depends(); /* Enforce control dependency. */
-	return val;
-}
-#endif
-
 /*
  * Relaxed variants of xchg, cmpxchg and some atomic operations.
  *
@@ -561,15 +552,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #include <asm-generic/atomic64.h>
 #endif
 
-#ifndef atomic64_read_ctrl
-static inline long long atomic64_read_ctrl(const atomic64_t *v)
-{
-	long long val = atomic64_read(v);
-	smp_read_barrier_depends(); /* Enforce control dependency. */
-	return val;
-}
-#endif
-
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 3d7810341b57..fe817432190c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -299,22 +299,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	__u.__val;					\
 })
 
-/**
- * READ_ONCE_CTRL - Read a value heading a control dependency
- * @x: The value to be read, heading the control dependency
- *
- * Control dependencies are tricky.  See Documentation/memory-barriers.txt
- * for important information on how to use them.  Note that in many cases,
- * use of smp_load_acquire() will be much simpler.  Control dependencies
- * should be avoided except on the hottest of hotpaths.
- */
-#define READ_ONCE_CTRL(x) \
-({ \
-	typeof(x) __val = READ_ONCE(x); \
-	smp_read_barrier_depends(); /* Enforce control dependency. */ \
-	__val; \
-})
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 182bc30899d5..b5d1ea79c595 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 	perf_output_get_handle(handle);
 
 	do {
-		tail = READ_ONCE_CTRL(rb->user_page->data_tail);
+		tail = READ_ONCE(rb->user_page->data_tail);
 		offset = head = local_read(&rb->head);
 		if (!rb->overwrite &&
 		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-- 
cgit v1.2.3


From 36734810488e618d48cc14782f7111b3dfaffb83 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Wed, 4 Nov 2015 08:23:51 -0500
Subject: audit: audit_dummy_context can be boolean

This patch makes audit_dummy_context return bool due to this
particular function only using either one or zero as its return
value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/audit.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b2abc996c25d..69844b680fcc 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -143,7 +143,7 @@ extern void __audit_inode_child(const struct inode *parent,
 extern void __audit_seccomp(unsigned long syscall, long signr, int code);
 extern void __audit_ptrace(struct task_struct *t);
 
-static inline int audit_dummy_context(void)
+static inline bool audit_dummy_context(void)
 {
 	void *p = current->audit_context;
 	return !p || *(int *)p;
@@ -345,9 +345,9 @@ static inline void audit_syscall_entry(int major, unsigned long a0,
 { }
 static inline void audit_syscall_exit(void *pt_regs)
 { }
-static inline int audit_dummy_context(void)
+static inline bool audit_dummy_context(void)
 {
-	return 1;
+	return true;
 }
 static inline struct filename *audit_reusename(const __user char *name)
 {
-- 
cgit v1.2.3


From 9fcf836b215ca5685030ecab3e35ecc14ee3bcfb Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Wed, 4 Nov 2015 08:23:51 -0500
Subject: audit: audit_string_contains_control can be boolean

This patch makes audit_string_contains_control return bool to improve
readability due to this particular function only using either one or
zero as its return value.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
[PM: tweaked subject line]
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/audit.h | 2 +-
 kernel/audit.c        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 69844b680fcc..20eba1eb0a3c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -457,7 +457,7 @@ extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp
 extern __printf(2, 3)
 void audit_log_format(struct audit_buffer *ab, const char *fmt, ...);
 extern void		    audit_log_end(struct audit_buffer *ab);
-extern int		    audit_string_contains_control(const char *string,
+extern bool		    audit_string_contains_control(const char *string,
 							  size_t len);
 extern void		    audit_log_n_hex(struct audit_buffer *ab,
 					  const unsigned char *buf,
diff --git a/kernel/audit.c b/kernel/audit.c
index c4c98d87456f..648036f7690d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1583,14 +1583,14 @@ void audit_log_n_string(struct audit_buffer *ab, const char *string,
  * @string: string to be checked
  * @len: max length of the string to check
  */
-int audit_string_contains_control(const char *string, size_t len)
+bool audit_string_contains_control(const char *string, size_t len)
 {
 	const unsigned char *p;
 	for (p = string; p < (const unsigned char *)string + len; p++) {
 		if (*p == '"' || *p < 0x21 || *p > 0x7e)
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 
 /**
-- 
cgit v1.2.3


From c75efa974e013640496620f26f0b532cb5cb17f9 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:50 +0300
Subject: drivers/hv: share Hyper-V SynIC constants with userspace

Moved Hyper-V synic contants from guest Hyper-V drivers private
header into x86 arch uapi Hyper-V header.

Added Hyper-V synic msr's flags into x86 arch uapi Hyper-V header.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/hyperv.h | 12 ++++++++++++
 drivers/hv/hyperv_vmbus.h          |  5 -----
 include/linux/hyperv.h             |  1 +
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 2677a0aac2cc..040d4083c24f 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -257,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 	__s64 tsc_offset;
 } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
 
+/* Define the number of synthetic interrupt sources. */
+#define HV_SYNIC_SINT_COUNT		(16)
+/* Define the expected SynIC version. */
+#define HV_SYNIC_VERSION_1		(0x1)
+
+#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
+#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
+#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
+
 #endif
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 3d70e36c918e..3782636562a1 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -63,9 +63,6 @@ enum hv_cpuid_function {
 /* Define version of the synthetic interrupt controller. */
 #define HV_SYNIC_VERSION		(1)
 
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1		(0x1)
-
 /* Define synthetic interrupt controller message constants. */
 #define HV_MESSAGE_SIZE			(256)
 #define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
@@ -105,8 +102,6 @@ enum hv_message_type {
 	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
 };
 
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT		(16)
 #define HV_SYNIC_STIMER_COUNT		(4)
 
 /* Define invalid partition identifier. */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 54733d5b503e..8fdc17b84739 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -26,6 +26,7 @@
 #define _HYPERV_H
 
 #include <uapi/linux/hyperv.h>
+#include <uapi/asm/hyperv.h>
 
 #include <linux/types.h>
 #include <linux/scatterlist.h>
-- 
cgit v1.2.3


From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 19:16:47 +0100
Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We do not want to do too much work in atomic context, in particular
not walking all the VCPUs of the virtual machine.  So we want
to distinguish the architecture-specific injection function for irqfd
from kvm_set_msi.  Since it's still empty, reuse the newly added
kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/irq_comm.c  | 14 ++++++++------
 include/linux/kvm_host.h |  7 +++----
 virt/kvm/eventfd.c       | 11 ++++-------
 3 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8f4499c7ffc1..75dc633c48dc 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -124,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 }
 
 
-static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
-			 struct kvm *kvm)
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int irq_source_id, int level,
+			      bool line_status)
 {
 	struct kvm_lapic_irq irq;
 	int r;
 
+	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
+		return -EWOULDBLOCK;
+
 	kvm_set_msi_irq(e, &irq);
 
 	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
@@ -165,10 +169,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
 		e = &entries[0];
-		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-			ret = kvm_set_msi_inatomic(e, kvm);
-		else
-			ret = -EWOULDBLOCK;
+		ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
+						irq, level);
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 87189a41d904..15c78f320678 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -830,10 +830,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
 		int irq_source_id, int level, bool line_status);
-
-int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
-		     int irq_source_id, int level, bool line_status);
-
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			       struct kvm *kvm, int irq_source_id,
+			       int level, bool line_status);
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e29fd2640709..46dbc0a7dfc1 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -171,7 +171,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
 }
 
-int __attribute__((weak)) kvm_arch_set_irq(
+int __attribute__((weak)) kvm_arch_set_irq_inatomic(
 				struct kvm_kernel_irq_routing_entry *irq,
 				struct kvm *kvm, int irq_source_id,
 				int level,
@@ -201,12 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 			irq = irqfd->irq_entry;
 		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
 		/* An event has been signaled, inject an interrupt */
-		if (irq.type == KVM_IRQ_ROUTING_MSI)
-			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
-					false);
-		else if (kvm_arch_set_irq(&irq, kvm,
-					  KVM_USERSPACE_IRQ_SOURCE_ID, 1,
-					  false) == -EWOULDBLOCK)
+		if (kvm_arch_set_irq_inatomic(&irq, kvm,
+					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+					      false) == -EWOULDBLOCK)
 			schedule_work(&irqfd->inject);
 		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
-- 
cgit v1.2.3


From 8a22f234a81ab4d1de5d948c3478608f08a9b844 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 18:52:02 +0100
Subject: KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function is not used outside device assignment, and
kvm_arch_set_irq_inatomic has a different prototype.  Move it here and
make it static to avoid confusion.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/assigned-dev.c | 37 +++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/irq_comm.c     | 34 ----------------------------------
 include/linux/kvm_host.h    |  1 -
 3 files changed, 37 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index 1c17ee807ef7..9dc091acd5fb 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include "irq.h"
 #include "assigned-dev.h"
+#include "trace/events/kvm.h"
 
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
@@ -131,6 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
+				int level)
+{
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	struct kvm_kernel_irq_routing_entry *e;
+	int ret = -EINVAL;
+	int idx;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	/*
+	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
+	 * which would need to be retried from thread context;  when same GSI
+	 * is connected to both PIC and IOAPIC, we'd have to report a
+	 * partial failure here.
+	 * Since there's no easy way to do this, we only support injecting MSI
+	 * which is limited to 1:1 GSI mapping.
+	 */
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+		e = &entries[0];
+		ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
+						irq, level);
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	return ret;
+}
+
+
 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 75dc633c48dc..84b96d319909 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -142,40 +142,6 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
 		return -EWOULDBLOCK;
 }
 
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- *  Other values - No need to retry.
- */
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
-{
-	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
-	struct kvm_kernel_irq_routing_entry *e;
-	int ret = -EINVAL;
-	int idx;
-
-	trace_kvm_set_irq(irq, level, irq_source_id);
-
-	/*
-	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
-	 * which would need to be retried from thread context;  when same GSI
-	 * is connected to both PIC and IOAPIC, we'd have to report a
-	 * partial failure here.
-	 * Since there's no easy way to do this, we only support injecting MSI
-	 * which is limited to 1:1 GSI mapping.
-	 */
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
-		e = &entries[0];
-		ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
-						irq, level);
-	}
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-	return ret;
-}
-
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15c78f320678..242a6d2b53ff 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -827,7 +827,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
 		int irq_source_id, int level, bool line_status);
 int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
-- 
cgit v1.2.3


From 033291eccbdb1b70ffc02641edae19ac825dc75d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 15 Oct 2015 15:08:48 -0600
Subject: vfio: Include No-IOMMU mode

There is really no way to safely give a user full access to a DMA
capable device without an IOMMU to protect the host system.  There is
also no way to provide DMA translation, for use cases such as device
assignment to virtual machines.  However, there are still those users
that want userspace drivers even under those conditions.  The UIO
driver exists for this use case, but does not provide the degree of
device access and programming that VFIO has.  In an effort to avoid
code duplication, this introduces a No-IOMMU mode for VFIO.

This mode requires building VFIO with CONFIG_VFIO_NOIOMMU and enabling
the "enable_unsafe_noiommu_mode" option on the vfio driver.  This
should make it very clear that this mode is not safe.  Additionally,
CAP_SYS_RAWIO privileges are necessary to work with groups and
containers using this mode.  Groups making use of this support are
named /dev/vfio/noiommu-$GROUP and can only make use of the special
VFIO_NOIOMMU_IOMMU for the container.  Use of this mode, specifically
binding a device without a native IOMMU group to a VFIO bus driver
will taint the kernel and should therefore not be considered
supported.  This patch includes no-iommu support for the vfio-pci bus
driver only.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vfio/Kconfig        |  15 ++++
 drivers/vfio/pci/vfio_pci.c |   8 +-
 drivers/vfio/vfio.c         | 186 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/vfio.h        |   3 +
 include/uapi/linux/vfio.h   |   7 ++
 5 files changed, 209 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 454017928ed0..b6d3cdc2791b 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,5 +31,20 @@ menuconfig VFIO
 
 	  If you don't know what to do here, say N.
 
+menuconfig VFIO_NOIOMMU
+	bool "VFIO No-IOMMU support"
+	depends on VFIO
+	help
+	  VFIO is built on the ability to isolate devices using the IOMMU.
+	  Only with an IOMMU can userspace access to DMA capable devices be
+	  considered secure.  VFIO No-IOMMU mode enables IOMMU groups for
+	  devices without IOMMU backing for the purpose of re-using the VFIO
+	  infrastructure in a non-secure mode.  Use of this mode will result
+	  in an unsupportable kernel and will therefore taint the kernel.
+	  Device assignment to virtual machines is also not possible with
+	  this mode since there is no IOMMU to provide DMA translation.
+
+	  If you don't know what to do here, say N.
+
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 964ad572aaee..32b88bd2c82c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
 		return -EINVAL;
 
-	group = iommu_group_get(&pdev->dev);
+	group = vfio_iommu_group_get(&pdev->dev);
 	if (!group)
 		return -EINVAL;
 
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 	if (!vdev) {
-		iommu_group_put(group);
+		vfio_iommu_group_put(group, &pdev->dev);
 		return -ENOMEM;
 	}
 
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
 	if (ret) {
-		iommu_group_put(group);
+		vfio_iommu_group_put(group, &pdev->dev);
 		kfree(vdev);
 		return ret;
 	}
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 	if (!vdev)
 		return;
 
-	iommu_group_put(pdev->dev.iommu_group);
+	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
 	kfree(vdev);
 
 	if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index ab056f7af54d..de632da2e22f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -62,6 +62,7 @@ struct vfio_container {
 	struct rw_semaphore		group_lock;
 	struct vfio_iommu_driver	*iommu_driver;
 	void				*iommu_data;
+	bool				noiommu;
 };
 
 struct vfio_unbound_dev {
@@ -84,6 +85,7 @@ struct vfio_group {
 	struct list_head		unbound_list;
 	struct mutex			unbound_lock;
 	atomic_t			opened;
+	bool				noiommu;
 };
 
 struct vfio_device {
@@ -95,6 +97,147 @@ struct vfio_device {
 	void				*device_data;
 };
 
+#ifdef CONFIG_VFIO_NOIOMMU
+static bool noiommu __read_mostly;
+module_param_named(enable_unsafe_noiommu_support,
+		   noiommu, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
+#endif
+
+/*
+ * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
+ * and remove functions, any use cases other than acquiring the first
+ * reference for the purpose of calling vfio_add_group_dev() or removing
+ * that symmetric reference after vfio_del_group_dev() should use the raw
+ * iommu_group_{get,put} functions.  In particular, vfio_iommu_group_put()
+ * removes the device from the dummy group and cannot be nested.
+ */
+struct iommu_group *vfio_iommu_group_get(struct device *dev)
+{
+	struct iommu_group *group;
+	int __maybe_unused ret;
+
+	group = iommu_group_get(dev);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+	/*
+	 * With noiommu enabled, an IOMMU group will be created for a device
+	 * that doesn't already have one and doesn't have an iommu_ops on their
+	 * bus.  We use iommu_present() again in the main code to detect these
+	 * fake groups.
+	 */
+	if (group || !noiommu || iommu_present(dev->bus))
+		return group;
+
+	group = iommu_group_alloc();
+	if (IS_ERR(group))
+		return NULL;
+
+	iommu_group_set_name(group, "vfio-noiommu");
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+	if (ret)
+		return NULL;
+
+	/*
+	 * Where to taint?  At this point we've added an IOMMU group for a
+	 * device that is not backed by iommu_ops, therefore any iommu_
+	 * callback using iommu_ops can legitimately Oops.  So, while we may
+	 * be about to give a DMA capable device to a user without IOMMU
+	 * protection, which is clearly taint-worthy, let's go ahead and do
+	 * it here.
+	 */
+	add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
+#endif
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
+
+void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
+{
+#ifdef CONFIG_VFIO_NOIOMMU
+	if (!iommu_present(dev->bus))
+		iommu_group_remove_device(dev);
+#endif
+
+	iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+static void *vfio_noiommu_open(unsigned long arg)
+{
+	if (arg != VFIO_NOIOMMU_IOMMU)
+		return ERR_PTR(-EINVAL);
+	if (!capable(CAP_SYS_RAWIO))
+		return ERR_PTR(-EPERM);
+
+	return NULL;
+}
+
+static void vfio_noiommu_release(void *iommu_data)
+{
+}
+
+static long vfio_noiommu_ioctl(void *iommu_data,
+			       unsigned int cmd, unsigned long arg)
+{
+	if (cmd == VFIO_CHECK_EXTENSION)
+		return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0;
+
+	return -ENOTTY;
+}
+
+static int vfio_iommu_present(struct device *dev, void *unused)
+{
+	return iommu_present(dev->bus) ? 1 : 0;
+}
+
+static int vfio_noiommu_attach_group(void *iommu_data,
+				     struct iommu_group *iommu_group)
+{
+	return iommu_group_for_each_dev(iommu_group, NULL,
+					vfio_iommu_present) ? -EINVAL : 0;
+}
+
+static void vfio_noiommu_detach_group(void *iommu_data,
+				      struct iommu_group *iommu_group)
+{
+}
+
+static struct vfio_iommu_driver_ops vfio_noiommu_ops = {
+	.name = "vfio-noiommu",
+	.owner = THIS_MODULE,
+	.open = vfio_noiommu_open,
+	.release = vfio_noiommu_release,
+	.ioctl = vfio_noiommu_ioctl,
+	.attach_group = vfio_noiommu_attach_group,
+	.detach_group = vfio_noiommu_detach_group,
+};
+
+static struct vfio_iommu_driver vfio_noiommu_driver = {
+	.ops = &vfio_noiommu_ops,
+};
+
+/*
+ * Wrap IOMMU drivers, the noiommu driver is the one and only driver for
+ * noiommu groups (and thus containers) and not available for normal groups.
+ */
+#define vfio_for_each_iommu_driver(con, pos)				\
+	for (pos = con->noiommu ? &vfio_noiommu_driver :		\
+	     list_first_entry(&vfio.iommu_drivers_list,			\
+			      struct vfio_iommu_driver, vfio_next);	\
+	     (con->noiommu ? pos != NULL :				\
+			&pos->vfio_next != &vfio.iommu_drivers_list);	\
+	      pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next))
+#else
+#define vfio_for_each_iommu_driver(con, pos)				\
+	list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next)
+#endif
+
+
 /**
  * IOMMU driver registration
  */
@@ -199,7 +342,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
+					    bool noiommu)
 {
 	struct vfio_group *group, *tmp;
 	struct device *dev;
@@ -217,6 +361,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 	atomic_set(&group->container_users, 0);
 	atomic_set(&group->opened, 0);
 	group->iommu_group = iommu_group;
+	group->noiommu = noiommu;
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -252,7 +397,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 
 	dev = device_create(vfio.class, NULL,
 			    MKDEV(MAJOR(vfio.group_devt), minor),
-			    group, "%d", iommu_group_id(iommu_group));
+			    group, "%s%d", noiommu ? "noiommu-" : "",
+			    iommu_group_id(iommu_group));
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
 		vfio_group_unlock_and_free(group);
@@ -640,7 +786,8 @@ int vfio_add_group_dev(struct device *dev,
 
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group) {
-		group = vfio_create_group(iommu_group);
+		group = vfio_create_group(iommu_group,
+					  !iommu_present(dev->bus));
 		if (IS_ERR(group)) {
 			iommu_group_put(iommu_group);
 			return PTR_ERR(group);
@@ -852,8 +999,7 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
 		 */
 		if (!driver) {
 			mutex_lock(&vfio.iommu_drivers_lock);
-			list_for_each_entry(driver, &vfio.iommu_drivers_list,
-					    vfio_next) {
+			vfio_for_each_iommu_driver(container, driver) {
 				if (!try_module_get(driver->ops->owner))
 					continue;
 
@@ -922,7 +1068,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
 	}
 
 	mutex_lock(&vfio.iommu_drivers_lock);
-	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+	vfio_for_each_iommu_driver(container, driver) {
 		void *data;
 
 		if (!try_module_get(driver->ops->owner))
@@ -1187,6 +1333,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	if (atomic_read(&group->container_users))
 		return -EINVAL;
 
+	if (group->noiommu && !capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
 	f = fdget(container_fd);
 	if (!f.file)
 		return -EBADF;
@@ -1202,6 +1351,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 
 	down_write(&container->group_lock);
 
+	/* Real groups and fake groups cannot mix */
+	if (!list_empty(&container->group_list) &&
+	    container->noiommu != group->noiommu) {
+		ret = -EPERM;
+		goto unlock_out;
+	}
+
 	driver = container->iommu_driver;
 	if (driver) {
 		ret = driver->ops->attach_group(container->iommu_data,
@@ -1211,6 +1367,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	}
 
 	group->container = container;
+	container->noiommu = group->noiommu;
 	list_add(&group->container_next, &container->group_list);
 
 	/* Get a reference on the container and mark a user within the group */
@@ -1241,6 +1398,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	    !group->container->iommu_driver || !vfio_group_viable(group))
 		return -EINVAL;
 
+	if (group->noiommu && !capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
 	device = vfio_device_get_from_name(group, buf);
 	if (!device)
 		return -ENODEV;
@@ -1283,6 +1443,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 
 	fd_install(ret, filep);
 
+	if (group->noiommu)
+		dev_warn(device->dev, "vfio-noiommu device opened by user "
+			 "(%s:%d)\n", current->comm, task_pid_nr(current));
+
 	return ret;
 }
 
@@ -1371,6 +1535,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 	if (!group)
 		return -ENODEV;
 
+	if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
+		vfio_group_put(group);
+		return -EPERM;
+	}
+
 	/* Do we need multiple instances of the group open?  Seems not. */
 	opened = atomic_cmpxchg(&group->opened, 0, 1);
 	if (opened) {
@@ -1533,6 +1702,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
 	if (!atomic_inc_not_zero(&group->container_users))
 		return ERR_PTR(-EINVAL);
 
+	if (group->noiommu) {
+		atomic_dec(&group->container_users);
+		return ERR_PTR(-EPERM);
+	}
+
 	if (!group->container->iommu_driver ||
 			!vfio_group_viable(group)) {
 		atomic_dec(&group->container_users);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ddb440975382..610a86a892b8 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,9 @@ struct vfio_device_ops {
 	void	(*request)(void *device_data, unsigned int count);
 };
 
+extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
+extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
+
 extern int vfio_add_group_dev(struct device *dev,
 			      const struct vfio_device_ops *ops,
 			      void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9fd7b5d8df2f..751b69f858c8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -38,6 +38,13 @@
 
 #define VFIO_SPAPR_TCE_v2_IOMMU		7
 
+/*
+ * The No-IOMMU IOMMU offers no translation or isolation for devices and
+ * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
+ * code will taint the host kernel and should be used with extreme caution.
+ */
+#define VFIO_NOIOMMU_IOMMU		8
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
-- 
cgit v1.2.3


From d618382ba5f1a4905db63f4980bf7b0a5826de9d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 Nov 2015 11:30:57 -0800
Subject: iommu-common: Fix error code used in iommu_tbl_range_{alloc,free}().

The value returned from iommu_tbl_range_alloc() (and the one passed
in as a fourth argument to iommu_tbl_range_free) is not a DMA address,
it is rather an index into the IOMMU page table.

Therefore using DMA_ERROR_CODE is not appropriate.

Use a more type matching error code define, IOMMU_ERROR_CODE, and
update all users of this interface.

Reported-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/iommu.c     | 12 ++++++------
 arch/sparc/kernel/ldc.c       |  2 +-
 arch/sparc/kernel/pci_sun4v.c | 18 +++++++++---------
 include/linux/iommu-common.h  |  1 +
 lib/iommu-common.c            | 10 +++-------
 5 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5320689c06e9..37686828c3d9 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -161,7 +161,7 @@ static inline iopte_t *alloc_npages(struct device *dev,
 
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		return NULL;
 
 	return iommu->page_table + entry;
@@ -253,7 +253,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	iommu = dev->archdata.iommu;
 
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
 
 	order = get_order(size);
 	if (order < 10)
@@ -426,7 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	iommu_free_ctx(iommu, ctx);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -492,7 +492,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
 			if (printk_ratelimit())
 				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
 				       " npages %lx\n", iommu, paddr, npages);
@@ -571,7 +571,7 @@ iommu_map_failed:
 				iopte_make_dummy(iommu, base + j);
 
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     DMA_ERROR_CODE);
+					     IOMMU_ERROR_CODE);
 
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -648,7 +648,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			iopte_make_dummy(iommu, base + i);
 
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     DMA_ERROR_CODE);
+				     IOMMU_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 1ae5eb1bb045..59d503866431 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1953,7 +1953,7 @@ static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
 
 	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
 				      npages, NULL, (unsigned long)-1, 0);
-	if (unlikely(entry < 0))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		return NULL;
 
 	return iommu->page_table + entry;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index d2fe57dad433..836e8cef47e2 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -159,7 +159,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
 
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		goto range_alloc_fail;
 
 	*dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
@@ -187,7 +187,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	return ret;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
 
 range_alloc_fail:
 	free_pages(first_page, order);
@@ -226,7 +226,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 	devhandle = pbm->devhandle;
 	entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
 	order = get_order(size);
 	if (order < 10)
 		free_pages((unsigned long)cpu, order);
@@ -256,7 +256,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
 
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		goto bad;
 
 	bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
@@ -288,7 +288,7 @@ bad:
 	return DMA_ERROR_CODE;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 	return DMA_ERROR_CODE;
 }
 
@@ -317,7 +317,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	bus_addr &= IO_PAGE_MASK;
 	entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -376,7 +376,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
 			if (printk_ratelimit())
 				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
 				       " npages %lx\n", iommu, paddr, npages);
@@ -451,7 +451,7 @@ iommu_map_failed:
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     DMA_ERROR_CODE);
+					     IOMMU_ERROR_CODE);
 			/* XXX demap? XXX */
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -496,7 +496,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		entry = ((dma_handle - tbl->table_map_base) >> shift);
 		dma_4v_iommu_demap(&devhandle, entry, npages);
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     DMA_ERROR_CODE);
+				     IOMMU_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h
index bbced83b32ee..376a27c9cc6a 100644
--- a/include/linux/iommu-common.h
+++ b/include/linux/iommu-common.h
@@ -7,6 +7,7 @@
 
 #define IOMMU_POOL_HASHBITS     4
 #define IOMMU_NR_POOLS          (1 << IOMMU_POOL_HASHBITS)
+#define IOMMU_ERROR_CODE	(~(unsigned long) 0)
 
 struct iommu_pool {
 	unsigned long	start;
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
index ff19f66d3f7f..0f2c887be770 100644
--- a/lib/iommu-common.c
+++ b/lib/iommu-common.c
@@ -11,10 +11,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/hash.h>
 
-#ifndef	DMA_ERROR_CODE
-#define	DMA_ERROR_CODE (~(dma_addr_t)0x0)
-#endif
-
 static unsigned long iommu_large_alloc = 15;
 
 static	DEFINE_PER_CPU(unsigned int, iommu_hash_common);
@@ -124,7 +120,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev,
 	/* Sanity check */
 	if (unlikely(npages == 0)) {
 		WARN_ON_ONCE(1);
-		return DMA_ERROR_CODE;
+		return IOMMU_ERROR_CODE;
 	}
 
 	if (largealloc) {
@@ -207,7 +203,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev,
 			goto again;
 		} else {
 			/* give up */
-			n = DMA_ERROR_CODE;
+			n = IOMMU_ERROR_CODE;
 			goto bail;
 		}
 	}
@@ -259,7 +255,7 @@ void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
 	unsigned long flags;
 	unsigned long shift = iommu->table_shift;
 
-	if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
+	if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
 		entry = (dma_addr - iommu->table_map_base) >> shift;
 	pool = get_pool(iommu, entry);
 
-- 
cgit v1.2.3


From e02328f47bd75fde9decf9657ec7d769b370f857 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 8 Sep 2015 14:17:47 +0200
Subject: vga_switcheroo: Drop client power state VGA_SWITCHEROO_INIT

hda_intel.c:azx_probe() defers initialization of an audio controller
on the discrete GPU if the GPU is powered off. The power state of the
GPU is determined by calling vga_switcheroo_get_client_state().

vga_switcheroo_get_client_state() returns VGA_SWITCHEROO_INIT if
vga_switcheroo is not enabled, i.e. if no second GPU or no handler
has registered.

This can go wrong in the following scenario:
- Driver for the integrated GPU is not loaded.
- Driver for the discrete GPU registers with vga_switcheroo, uses driver
  power control to power down the GPU, handler cuts power to the GPU.
- Driver for the audio controller gets loaded after the GPU was powered
  down, calls vga_switcheroo_get_client_state() which returns
  VGA_SWITCHEROO_INIT instead of VGA_SWITCHEROO_OFF.
- Consequence: azx_probe() tries to initialize the audio controller even
  though the GPU is powered down.

The power state VGA_SWITCHEROO_INIT was introduced by c8e9cf7bb240
("vga_switcheroo: Add a helper function to get the client state").
It is not apparent what its benefit might be. The idea seems to
be to initialize the audio controller even if the power state is
VGA_SWITCHEROO_OFF (were vga_switcheroo enabled), but as shown
above this can fail.

Drop VGA_SWITCHEROO_INIT to solve this.

Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/vga/vga_switcheroo.c | 2 --
 include/linux/vga_switcheroo.h   | 5 -----
 2 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 56bbbd65ae8a..41edd5a3f100 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -356,8 +356,6 @@ enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *pdev)
 	client = find_client_from_pci(&vgasr_priv.clients, pdev);
 	if (!client)
 		ret = VGA_SWITCHEROO_NOT_FOUND;
-	else if (!vgasr_priv.active)
-		ret = VGA_SWITCHEROO_INIT;
 	else
 		ret = client->pwr_state;
 	mutex_unlock(&vgasr_mutex);
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 786bc931dbd1..69e1d4a1f1b3 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -39,10 +39,6 @@ struct pci_dev;
  * enum vga_switcheroo_state - client power state
  * @VGA_SWITCHEROO_OFF: off
  * @VGA_SWITCHEROO_ON: on
- * @VGA_SWITCHEROO_INIT: client has registered with vga_switcheroo but
- * 	vga_switcheroo is not enabled, i.e. no second client or no handler
- * 	has registered. Only used in vga_switcheroo_get_client_state() which
- * 	in turn is only called from hda_intel.c
  * @VGA_SWITCHEROO_NOT_FOUND: client has not registered with vga_switcheroo.
  * 	Only used in vga_switcheroo_get_client_state() which in turn is only
  * 	called from hda_intel.c
@@ -53,7 +49,6 @@ enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
 	/* below are referred only from vga_switcheroo_get_client_state() */
-	VGA_SWITCHEROO_INIT,
 	VGA_SWITCHEROO_NOT_FOUND,
 };
 
-- 
cgit v1.2.3


From 8f25348b65cd073f77945f559ab1e5de83422cd1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 4 Nov 2015 14:59:06 +0100
Subject: net: add forgotten IFF_L3MDEV_SLAVE define

Fixes: fee6d4c77 ("net: Add netif_is_l3_slave")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4ac653b7b8ac..2c00772bd136 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1322,6 +1322,7 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
+#define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 
 /**
  *	struct net_device - The DEVICE structure.
-- 
cgit v1.2.3


From 9154301a47b33bdc273d8254c407792524367558 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 29 Sep 2015 15:52:59 -0700
Subject: HID: hid-input: allow input_configured callback return errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When configuring input device via input_configured callback we may
encounter errors (for example input_mt_init_slots() may fail). Instead
of continuing with half-initialized input device let's allow driver
indicate failures.

Signed-off-by: Jaikumar Ganesh <jaikumarg@android.com>
Signed-off-by: Arve Hjønnevåg <arve@android.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Nikolai Kondrashov <Nikolai.Kondrashov@redhat.com>
Acked-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-appleir.c        |  4 +++-
 drivers/hid/hid-elo.c            |  4 +++-
 drivers/hid/hid-input.c          | 10 ++++++----
 drivers/hid/hid-lenovo.c         |  4 +++-
 drivers/hid/hid-logitech-hidpp.c |  4 +++-
 drivers/hid/hid-magicmouse.c     |  8 ++++++--
 drivers/hid/hid-multitouch.c     | 20 +++++++++++++++-----
 drivers/hid/hid-ntrig.c          |  6 ++++--
 drivers/hid/hid-rmi.c            | 11 +++++++----
 drivers/hid/hid-sony.c           | 13 ++++++++++---
 drivers/hid/hid-uclogic.c        |  6 ++++--
 include/linux/hid.h              |  4 ++--
 12 files changed, 66 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-appleir.c b/drivers/hid/hid-appleir.c
index 0e6a42d37eb6..07cbc70f00e7 100644
--- a/drivers/hid/hid-appleir.c
+++ b/drivers/hid/hid-appleir.c
@@ -256,7 +256,7 @@ out:
 	return 0;
 }
 
-static void appleir_input_configured(struct hid_device *hid,
+static int appleir_input_configured(struct hid_device *hid,
 		struct hid_input *hidinput)
 {
 	struct input_dev *input_dev = hidinput->input;
@@ -275,6 +275,8 @@ static void appleir_input_configured(struct hid_device *hid,
 	for (i = 0; i < ARRAY_SIZE(appleir_key_table); i++)
 		set_bit(appleir->keymap[i], input_dev->keybit);
 	clear_bit(KEY_RESERVED, input_dev->keybit);
+
+	return 0;
 }
 
 static int appleir_input_mapping(struct hid_device *hid,
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index 4e49462870ab..aad8c162a825 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -37,7 +37,7 @@ static bool use_fw_quirk = true;
 module_param(use_fw_quirk, bool, S_IRUGO);
 MODULE_PARM_DESC(use_fw_quirk, "Do periodic pokes for broken M firmwares (default = true)");
 
-static void elo_input_configured(struct hid_device *hdev,
+static int elo_input_configured(struct hid_device *hdev,
 		struct hid_input *hidinput)
 {
 	struct input_dev *input = hidinput->input;
@@ -45,6 +45,8 @@ static void elo_input_configured(struct hid_device *hdev,
 	set_bit(BTN_TOUCH, input->keybit);
 	set_bit(ABS_PRESSURE, input->absbit);
 	input_set_abs_params(input, ABS_PRESSURE, 0, 256, 0, 0);
+
+	return 0;
 }
 
 static void elo_process_data(struct input_dev *input, const u8 *data, int size)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 53aeaf6252c7..2ba6bf69b7d0 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1510,8 +1510,9 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 				 * UGCI) cram a lot of unrelated inputs into the
 				 * same interface. */
 				hidinput->report = report;
-				if (drv->input_configured)
-					drv->input_configured(hid, hidinput);
+				if (drv->input_configured &&
+				    drv->input_configured(hid, hidinput))
+					goto out_cleanup;
 				if (input_register_device(hidinput->input))
 					goto out_cleanup;
 				hidinput = NULL;
@@ -1532,8 +1533,9 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 	}
 
 	if (hidinput) {
-		if (drv->input_configured)
-			drv->input_configured(hid, hidinput);
+		if (drv->input_configured &&
+		    drv->input_configured(hid, hidinput))
+			goto out_cleanup;
 		if (input_register_device(hidinput->input))
 			goto out_cleanup;
 	}
diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c
index e4bc6cb6d7fa..8979f1fd5208 100644
--- a/drivers/hid/hid-lenovo.c
+++ b/drivers/hid/hid-lenovo.c
@@ -848,7 +848,7 @@ static void lenovo_remove(struct hid_device *hdev)
 	hid_hw_stop(hdev);
 }
 
-static void lenovo_input_configured(struct hid_device *hdev,
+static int lenovo_input_configured(struct hid_device *hdev,
 		struct hid_input *hi)
 {
 	switch (hdev->product) {
@@ -863,6 +863,8 @@ static void lenovo_input_configured(struct hid_device *hdev,
 			}
 			break;
 	}
+
+	return 0;
 }
 
 
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 484196459305..a25f562f2d7b 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1160,13 +1160,15 @@ static void hidpp_populate_input(struct hidpp_device *hidpp,
 		m560_populate_input(hidpp, input, origin_is_hid_core);
 }
 
-static void hidpp_input_configured(struct hid_device *hdev,
+static int hidpp_input_configured(struct hid_device *hdev,
 				struct hid_input *hidinput)
 {
 	struct hidpp_device *hidpp = hid_get_drvdata(hdev);
 	struct input_dev *input = hidinput->input;
 
 	hidpp_populate_input(hidpp, input, true);
+
+	return 0;
 }
 
 static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data,
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 29a74c1efcb8..d6fa496d0ca2 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -471,18 +471,22 @@ static int magicmouse_input_mapping(struct hid_device *hdev,
 	return 0;
 }
 
-static void magicmouse_input_configured(struct hid_device *hdev,
+static int magicmouse_input_configured(struct hid_device *hdev,
 		struct hid_input *hi)
 
 {
 	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+	int ret;
 
-	int ret = magicmouse_setup_input(msc->input, hdev);
+	ret = magicmouse_setup_input(msc->input, hdev);
 	if (ret) {
 		hid_err(hdev, "magicmouse setup input failed (%d)\n", ret);
 		/* clean msc->input to notify probe() of the failure */
 		msc->input = NULL;
+		return ret;
 	}
+
+	return 0;
 }
 
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 426b2f1a3450..2ed42d8f805b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -725,12 +725,13 @@ static void mt_touch_report(struct hid_device *hid, struct hid_report *report)
 		mt_sync_frame(td, report->field[0]->hidinput->input);
 }
 
-static void mt_touch_input_configured(struct hid_device *hdev,
+static int mt_touch_input_configured(struct hid_device *hdev,
 					struct hid_input *hi)
 {
 	struct mt_device *td = hid_get_drvdata(hdev);
 	struct mt_class *cls = &td->mtclass;
 	struct input_dev *input = hi->input;
+	int ret;
 
 	if (!td->maxcontacts)
 		td->maxcontacts = MT_DEFAULT_MAXCONTACT;
@@ -752,9 +753,12 @@ static void mt_touch_input_configured(struct hid_device *hdev,
 	if (td->is_buttonpad)
 		__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
 
-	input_mt_init_slots(input, td->maxcontacts, td->mt_flags);
+	ret = input_mt_init_slots(input, td->maxcontacts, td->mt_flags);
+	if (ret)
+		return ret;
 
 	td->mt_flags = 0;
+	return 0;
 }
 
 static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
@@ -930,15 +934,19 @@ static void mt_post_parse(struct mt_device *td)
 		cls->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE;
 }
 
-static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
+static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
 {
 	struct mt_device *td = hid_get_drvdata(hdev);
 	char *name;
 	const char *suffix = NULL;
 	struct hid_field *field = hi->report->field[0];
+	int ret;
 
-	if (hi->report->id == td->mt_report_id)
-		mt_touch_input_configured(hdev, hi);
+	if (hi->report->id == td->mt_report_id) {
+		ret = mt_touch_input_configured(hdev, hi);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * some egalax touchscreens have "application == HID_DG_TOUCHSCREEN"
@@ -989,6 +997,8 @@ static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
 			hi->input->name = name;
 		}
 	}
+
+	return 0;
 }
 
 static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c
index 600f2075512f..756d1ef9bd99 100644
--- a/drivers/hid/hid-ntrig.c
+++ b/drivers/hid/hid-ntrig.c
@@ -859,14 +859,14 @@ not_claimed_input:
 	return 1;
 }
 
-static void ntrig_input_configured(struct hid_device *hid,
+static int ntrig_input_configured(struct hid_device *hid,
 		struct hid_input *hidinput)
 
 {
 	struct input_dev *input = hidinput->input;
 
 	if (hidinput->report->maxfield < 1)
-		return;
+		return 0;
 
 	switch (hidinput->report->field[0]->application) {
 	case HID_DG_PEN:
@@ -890,6 +890,8 @@ static void ntrig_input_configured(struct hid_device *hid,
 							"N-Trig MultiTouch";
 		break;
 	}
+
+	return 0;
 }
 
 static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id)
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
index 2c148129beb2..67cd059a8f46 100644
--- a/drivers/hid/hid-rmi.c
+++ b/drivers/hid/hid-rmi.c
@@ -1173,7 +1173,7 @@ static int rmi_populate(struct hid_device *hdev)
 	return 0;
 }
 
-static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
+static int rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
 {
 	struct rmi_data *data = hid_get_drvdata(hdev);
 	struct input_dev *input = hi->input;
@@ -1185,10 +1185,10 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
 	hid_dbg(hdev, "Opening low level driver\n");
 	ret = hid_hw_open(hdev);
 	if (ret)
-		return;
+		return ret;
 
 	if (!(data->device_flags & RMI_DEVICE))
-		return;
+		return 0;
 
 	/* Allow incoming hid reports */
 	hid_device_io_start(hdev);
@@ -1228,7 +1228,9 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
 	input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 0x0f, 0, 0);
 	input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 0x0f, 0, 0);
 
-	input_mt_init_slots(input, data->max_fingers, INPUT_MT_POINTER);
+	ret = input_mt_init_slots(input, data->max_fingers, INPUT_MT_POINTER);
+	if (ret < 0)
+		goto exit;
 
 	if (data->button_count) {
 		__set_bit(EV_KEY, input->evbit);
@@ -1244,6 +1246,7 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
 exit:
 	hid_device_io_stop(hdev);
 	hid_hw_close(hdev);
+	return ret;
 }
 
 static int rmi_input_mapping(struct hid_device *hdev,
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 661f94f8ab8b..774cd2210566 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -1360,20 +1360,27 @@ static int sony_register_touchpad(struct hid_input *hi, int touch_count,
 	return 0;
 }
 
-static void sony_input_configured(struct hid_device *hdev,
+static int sony_input_configured(struct hid_device *hdev,
 					struct hid_input *hidinput)
 {
 	struct sony_sc *sc = hid_get_drvdata(hdev);
+	int ret;
 
 	/*
 	 * The Dualshock 4 touchpad supports 2 touches and has a
 	 * resolution of 1920x942 (44.86 dots/mm).
 	 */
 	if (sc->quirks & DUALSHOCK4_CONTROLLER) {
-		if (sony_register_touchpad(hidinput, 2, 1920, 942) != 0)
+		ret = sony_register_touchpad(hidinput, 2, 1920, 942);
+		if (ret) {
 			hid_err(sc->hdev,
-				"Unable to initialize multi-touch slots\n");
+				"Unable to initialize multi-touch slots: %d\n",
+				ret);
+			return ret;
+		}
 	}
+
+	return 0;
 }
 
 /*
diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c
index b905d501e752..85ac43517e3f 100644
--- a/drivers/hid/hid-uclogic.c
+++ b/drivers/hid/hid-uclogic.c
@@ -731,7 +731,7 @@ static int uclogic_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 	return 0;
 }
 
-static void uclogic_input_configured(struct hid_device *hdev,
+static int uclogic_input_configured(struct hid_device *hdev,
 		struct hid_input *hi)
 {
 	char *name;
@@ -741,7 +741,7 @@ static void uclogic_input_configured(struct hid_device *hdev,
 
 	/* no report associated (HID_QUIRK_MULTI_INPUT not set) */
 	if (!hi->report)
-		return;
+		return 0;
 
 	field = hi->report->field[0];
 
@@ -774,6 +774,8 @@ static void uclogic_input_configured(struct hid_device *hdev,
 			hi->input->name = name;
 		}
 	}
+
+	return 0;
 }
 
 /**
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f17980de2662..251a1d382e23 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -698,8 +698,8 @@ struct hid_driver {
 	int (*input_mapped)(struct hid_device *hdev,
 			struct hid_input *hidinput, struct hid_field *field,
 			struct hid_usage *usage, unsigned long **bit, int *max);
-	void (*input_configured)(struct hid_device *hdev,
-				 struct hid_input *hidinput);
+	int (*input_configured)(struct hid_device *hdev,
+				struct hid_input *hidinput);
 	void (*feature_mapping)(struct hid_device *hdev,
 			struct hid_field *field,
 			struct hid_usage *usage);
-- 
cgit v1.2.3


From 805c4bc05705fb2b71ec970960b456eee9900953 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 5 Nov 2015 11:07:13 -0800
Subject: tcp: fix req->saved_syn race

For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
ireq->pktopts race"), we need to make sure we do not access
req->saved_syn unless we own the request sock.

This fixes races for listeners using TCP_SAVE_SYN option.

Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Ying Cai <ycai@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  7 +++++++
 net/ipv4/tcp_ipv4.c      |  2 ++
 net/ipv4/tcp_minisocks.c |  3 ---
 net/ipv6/tcp_ipv6.c      | 20 ++++++++++++--------
 4 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c906f4534581..b386361ba3e8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -397,6 +397,13 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
 	queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
 }
 
+static inline void tcp_move_syn(struct tcp_sock *tp,
+				struct request_sock *req)
+{
+	tp->saved_syn = req->saved_syn;
+	req->saved_syn = NULL;
+}
+
 static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 {
 	kfree(tp->saved_syn);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1c2648bbac4b..59aff63b1776 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1326,6 +1326,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	if (*own_req)
+		tcp_move_syn(newtp, req_unhash);
 
 	return newsk;
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3575dd1e5b67..ac6b1961ffeb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -551,9 +551,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->rack.mstamp.v64 = 0;
 		newtp->rack.advanced = 0;
 
-		newtp->saved_syn = req->saved_syn;
-		req->saved_syn = NULL;
-
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
 	return newsk;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ea2f4d5440b5..c509e5562429 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1140,14 +1140,18 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 		goto out;
 	}
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-	/* Clone pktoptions received with SYN, if we own the req */
-	if (*own_req && ireq->pktopts) {
-		newnp->pktoptions = skb_clone(ireq->pktopts,
-					      sk_gfp_atomic(sk, GFP_ATOMIC));
-		consume_skb(ireq->pktopts);
-		ireq->pktopts = NULL;
-		if (newnp->pktoptions)
-			skb_set_owner_r(newnp->pktoptions, newsk);
+	if (*own_req) {
+		tcp_move_syn(newtp, req_unhash);
+
+		/* Clone pktoptions received with SYN, if we own the req */
+		if (ireq->pktopts) {
+			newnp->pktoptions = skb_clone(ireq->pktopts,
+						      sk_gfp_atomic(sk, GFP_ATOMIC));
+			consume_skb(ireq->pktopts);
+			ireq->pktopts = NULL;
+			if (newnp->pktoptions)
+				skb_set_owner_r(newnp->pktoptions, newsk);
+		}
 	}
 
 	return newsk;
-- 
cgit v1.2.3


From 720abae3d68ae966044497dd9ee5ccf3b16e700c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 5 Nov 2015 18:44:18 -0800
Subject: rcu: force alignment on struct callback_head/rcu_head

Make struct callback_head aligned to size of pointer.  On most
architectures it happens naturally due ABI requirements, but some
architectures (like CRIS) have weird ABI and we need to ask it explicitly.

The alignment is required to guarantee that bits 0 and 1 of @next will be
clear under normal conditions -- as long as we use call_rcu(),
call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.

This guarantee is important for few reasons:
 - future call_rcu_lazy() will make use of lower bits in the pointer;
 - the structure shares storage spacer in struct page with @compound_head,
   which encode PageTail() in bit 0. The guarantee is needed to avoid
   false-positive PageTail().

False postive PageTail() caused crash on crisv32[1].  It happend due
misaligned task_struct->rcu, which was byte-aligned.

[1] http://lkml.kernel.org/r/55FAEA67.9000102@roeck-us.net

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index c314989d9158..70d8500bddf1 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -205,11 +205,25 @@ struct ustat {
  * struct callback_head - callback structure for use with RCU and task_work
  * @next: next update requests in a list
  * @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ *  - future call_rcu_lazy() will make use of lower bits in the pointer;
+ *  - the structure shares storage spacer in struct page with @compound_head,
+ *    which encode PageTail() in bit 0. The guarantee is needed to avoid
+ *    false-positive PageTail().
  */
 struct callback_head {
 	struct callback_head *next;
 	void (*func)(struct callback_head *head);
-};
+} __attribute__((aligned(sizeof(void *))));
 #define rcu_head callback_head
 
 typedef void (*rcu_callback_t)(struct rcu_head *head);
-- 
cgit v1.2.3


From 55537871ef666b4153fd1ef8782e4a13fee142cc Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 5 Nov 2015 18:44:41 -0800
Subject: kernel/watchdog.c: perform all-CPU backtrace in case of hard lockup

In many cases of hardlockup reports, it's actually not possible to know
why it triggered, because the CPU that got stuck is usually waiting on a
resource (with IRQs disabled) in posession of some other CPU is holding.

IOW, we are often looking at the stacktrace of the victim and not the
actual offender.

Introduce sysctl / cmdline parameter that makes it possible to have
hardlockup detector perform all-CPU backtrace.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  5 +++++
 Documentation/sysctl/kernel.txt     | 12 ++++++++++++
 include/linux/nmi.h                 |  1 +
 kernel/sysctl.c                     |  9 +++++++++
 kernel/watchdog.c                   | 33 ++++++++++++++++++++++++++++-----
 5 files changed, 55 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6263a2da3e2f..0231f4508abe 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1269,6 +1269,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
 			Default: 1024
 
+	hardlockup_all_cpu_backtrace=
+			[KNL] Should the hard-lockup detector generate
+			backtraces on all cpus.
+			Format: <integer>
+
 	hashdist=	[KNL,NUMA] Large hashes allocated during boot
 			are distributed across NUMA nodes.  Defaults on
 			for 64-bit NUMA, off otherwise.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6fccb69c03e7..af70d1541d3a 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -33,6 +33,7 @@ show up in /proc/sys/kernel:
 - domainname
 - hostname
 - hotplug
+- hardlockup_all_cpu_backtrace
 - hung_task_panic
 - hung_task_check_count
 - hung_task_timeout_secs
@@ -292,6 +293,17 @@ Information Service) or YP (Yellow Pages) domainname. These two
 domain names are in general different. For a detailed discussion
 see the hostname(1) man page.
 
+==============================================================
+hardlockup_all_cpu_backtrace:
+
+This value controls the hard lockup detector behavior when a hard
+lockup condition is detected as to whether or not to gather further
+debug information. If enabled, arch-specific all-CPU stack dumping
+will be initiated.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
 ==============================================================
 
 hotplug:
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 78488e099ce7..7ec5b86735f3 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -73,6 +73,7 @@ extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long *watchdog_cpumask_bits;
 extern int sysctl_softlockup_all_cpu_backtrace;
+extern int sysctl_hardlockup_all_cpu_backtrace;
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
 			 void __user *, size_t *, loff_t *);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 96c856b04081..1a5faa3e1521 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -898,6 +898,15 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "hardlockup_all_cpu_backtrace",
+		.data		= &sysctl_hardlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif /* CONFIG_SMP */
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 452e4ed507e5..f6b32b8cbffe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -57,8 +57,10 @@ int __read_mostly watchdog_thresh = 10;
 
 #ifdef CONFIG_SMP
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #else
 #define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
 static struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -112,6 +114,7 @@ static unsigned long soft_lockup_nmi_warn;
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 static int hardlockup_panic =
 			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
 /*
  * We may not want to enable hard lockup detection by default in all cases,
  * for example when running the kernel as a guest on a hypervisor. In these
@@ -173,6 +176,13 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
 	return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+	sysctl_hardlockup_all_cpu_backtrace =
+		!!simple_strtol(str, NULL, 0);
+	return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
 
 /*
@@ -318,17 +328,30 @@ static void watchdog_overflow_callback(struct perf_event *event,
 	 */
 	if (is_hardlockup()) {
 		int this_cpu = smp_processor_id();
+		struct pt_regs *regs = get_irq_regs();
 
 		/* only print hardlockups once */
 		if (__this_cpu_read(hard_watchdog_warn) == true)
 			return;
 
-		if (hardlockup_panic)
-			panic("Watchdog detected hard LOCKUP on cpu %d",
-			      this_cpu);
+		pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+		print_modules();
+		print_irqtrace_events(current);
+		if (regs)
+			show_regs(regs);
 		else
-			WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
-			     this_cpu);
+			dump_stack();
+
+		/*
+		 * Perform all-CPU dump only once to avoid multiple hardlockups
+		 * generating interleaving traces
+		 */
+		if (sysctl_hardlockup_all_cpu_backtrace &&
+				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
+			trigger_allbutself_cpu_backtrace();
+
+		if (hardlockup_panic)
+			panic("Hard LOCKUP");
 
 		__this_cpu_write(hard_watchdog_warn, true);
 		return;
-- 
cgit v1.2.3


From ac1f591249d95372f3a5ab3828d4af5dfbf5efd3 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Thu, 5 Nov 2015 18:44:44 -0800
Subject: kernel/watchdog.c: add sysctl knob hardlockup_panic

The only way to enable a hardlockup to panic the machine is to set
'nmi_watchdog=panic' on the kernel command line.

This makes it awkward for end users and folks who want to run automate
tests (like myself).

Mimic the softlockup_panic knob and create a /proc/sys/kernel/hardlockup_panic
knob.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/lockup-watchdogs.txt |  5 +++--
 include/linux/sched.h              |  1 +
 kernel/sysctl.c                    | 11 +++++++++++
 kernel/watchdog.c                  |  2 +-
 4 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
index 22dd6af2e4bd..4a6e33e1af61 100644
--- a/Documentation/lockup-watchdogs.txt
+++ b/Documentation/lockup-watchdogs.txt
@@ -20,8 +20,9 @@ kernel mode for more than 10 seconds (see "Implementation" below for
 details), without letting other interrupts have a chance to run.
 Similarly to the softlockup case, the current stack trace is displayed
 upon detection and the system will stay locked up unless the default
-behavior is changed, which can be done through a compile time knob,
-"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog"
+behavior is changed, which can be done through a sysctl,
+'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC",
+and a kernel parameter, "nmi_watchdog"
 (see "Documentation/kernel-parameters.txt" for details).
 
 The panic option can be used in combination with panic_timeout (this
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c115d617739d..5423b9c82fee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -384,6 +384,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
+extern unsigned int  hardlockup_panic;
 void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a5faa3e1521..dc6858d6639e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -888,6 +888,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	{
+		.procname	= "hardlockup_panic",
+		.data		= &hardlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
 #ifdef CONFIG_SMP
 	{
 		.procname	= "softlockup_all_cpu_backtrace",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f6b32b8cbffe..0a23125369f1 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -112,7 +112,7 @@ static unsigned long soft_lockup_nmi_warn;
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic =
+unsigned int __read_mostly hardlockup_panic =
 			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 static unsigned long hardlockup_allcpu_dumped;
 /*
-- 
cgit v1.2.3


From fda901241fb89449244537db4fb27b06e491b74f Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Thu, 5 Nov 2015 18:44:59 -0800
Subject: slab: convert slab_is_available() to boolean

A good candidate to return a boolean result.

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Cc: Christoph Lameter <cl@linux.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 +-
 mm/slab_common.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7e37d448ed91..7c82e3b307a3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -111,7 +111,7 @@ struct mem_cgroup;
  * struct kmem_cache related prototypes
  */
 void __init kmem_cache_init(void);
-int slab_is_available(void);
+bool slab_is_available(void);
 
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 5ce4faeb16fb..113a6fd597db 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -692,7 +692,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 
-int slab_is_available(void)
+bool slab_is_available(void)
 {
 	return slab_state >= UP;
 }
-- 
cgit v1.2.3


From a744fd17b5233360681ce03e43804406745b680b Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 5 Nov 2015 18:45:02 -0800
Subject: compiler.h: add support for function attribute assume_aligned

gcc 4.9 added the function attribute assume_aligned, indicating to the
caller that the returned pointer may be assumed to have a certain minimal
alignment.  This is useful if, for example, the return value is passed to
memset().  Add a shorthand macro for that.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 17 +++++++++++++++++
 include/linux/compiler.h     |  8 ++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 8efb40e61d6e..02fa6176120d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -210,6 +210,23 @@
 #define __visible	__attribute__((externally_visible))
 #endif
 
+
+#if GCC_VERSION >= 40900
+/*
+ * __assume_aligned(n, k): Tell the optimizer that the returned
+ * pointer can be assumed to be k modulo n. The second argument is
+ * optional (default 0), so we use a variadic macro to make the
+ * shorthand.
+ *
+ * Beware: Do not apply this to functions which may return
+ * ERR_PTRs. Also, it is probably unwise to apply it to functions
+ * returning extra information in the low bits (but in that case the
+ * compiler should see some alignment anyway, when the return value is
+ * massaged by 'flags = ptr & 3; ptr &= ~3;').
+ */
+#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+#endif
+
 /*
  * GCC 'asm goto' miscompiles certain code sequences:
  *
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 52a459ff75f4..4dac1036594f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -417,6 +417,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 #define __visible
 #endif
 
+/*
+ * Assume alignment of return value.
+ */
+#ifndef __assume_aligned
+#define __assume_aligned(a, ...)
+#endif
+
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-- 
cgit v1.2.3


From 8748dd5c98f7fe506ccb31a094833401ed120915 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 5 Nov 2015 18:45:05 -0800
Subject: include/linux/compiler-gcc.h: hide assume_aligned attribute from
 sparse

The patch "slab.h: sprinkle __assume_aligned attributes" causes *tons* of
whinges if you do 'make C=2' with sparse 0.5.0:

  CHECK   drivers/media/usb/pwc/pwc-if.c
include/linux/slab.h:307:43: error: attribute '__assume_aligned__': unknown attribute
include/linux/slab.h:308:58: error: attribute '__assume_aligned__': unknown attribute
include/linux/slab.h:337:73: error: attribute '__assume_aligned__': unknown attribute
include/linux/slab.h:375:74: error: attribute '__assume_aligned__': unknown attribute
include/linux/slab.h:378:80: error: attribute '__assume_aligned__': unknown attribute

sparse apparently pretends to be gcc >= 4.9, yet isn't prepared to handle
all the function attributes supported by those gccs and complains loudly.
So hide the definition of __assume_aligned from it (so that the generic
one in compiler.h gets used).

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Tested-By: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Cc: Christopher Li <sparse@chrisli.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 02fa6176120d..0e3110a0b771 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -211,7 +211,7 @@
 #endif
 
 
-#if GCC_VERSION >= 40900
+#if GCC_VERSION >= 40900 && !defined(__CHECKER__)
 /*
  * __assume_aligned(n, k): Tell the optimizer that the returned
  * pointer can be assumed to be k modulo n. The second argument is
-- 
cgit v1.2.3


From 0ab32b6f1b88444524e52429fab334ff96683a3f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 5 Nov 2015 18:46:03 -0800
Subject: uaccess: reimplement probe_kernel_address() using probe_kernel_read()

probe_kernel_address() is basically the same as the (later added)
probe_kernel_read().

The return value on EFAULT is a bit different: probe_kernel_address()
returns number-of-bytes-not-copied whereas probe_kernel_read() returns
-EFAULT.  All callers have been checked, none cared.

probe_kernel_read() can be overridden by the architecture whereas
probe_kernel_address() cannot.  parisc, blackfin and um do this, to insert
additional checking.  Hence this patch possibly fixes obscure bugs,
although there are only two probe_kernel_address() callsites outside
arch/.

My first attempt involved removing probe_kernel_address() entirely and
converting all callsites to use probe_kernel_read() directly, but that got
tiresome.

This patch shrinks mm/slab_common.o by 218 bytes.  For a single
probe_kernel_address() callsite.

Cc: Steven Miao <realmz6@gmail.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/alignment.c       |  2 +-
 arch/powerpc/sysdev/fsl_pci.c |  2 +-
 include/linux/uaccess.h       | 40 ++++++++++------------------------------
 mm/maccess.c                  |  5 +++++
 4 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 00b7f7de28a1..7d5f4c736a16 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -803,7 +803,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 			}
 		}
 	} else {
-		fault = probe_kernel_address(instrptr, instr);
+		fault = probe_kernel_address((void *)instrptr, instr);
 		instr = __mem_to_opcode_arm(instr);
 	}
 
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ebc1f412cf49..13b9bcf5485e 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -999,7 +999,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
 			ret = get_user(regs->nip, &inst);
 			pagefault_enable();
 		} else {
-			ret = probe_kernel_address(regs->nip, inst);
+			ret = probe_kernel_address((void *)regs->nip, inst);
 		}
 
 		if (mcheck_handle_load(regs, inst)) {
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d6f2c2c5b043..558129af828a 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -75,36 +75,6 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 
 #endif		/* ARCH_HAS_NOCACHE_UACCESS */
 
-/**
- * probe_kernel_address(): safely attempt to read from a location
- * @addr: address to read from - its type is type typeof(retval)*
- * @retval: read into this variable
- *
- * Safely read from address @addr into variable @revtal.  If a kernel fault
- * happens, handle that and return -EFAULT.
- * We ensure that the __get_user() is executed in atomic context so that
- * do_page_fault() doesn't attempt to take mmap_sem.  This makes
- * probe_kernel_address() suitable for use within regions where the caller
- * already holds mmap_sem, or other locks which nest inside mmap_sem.
- * This must be a macro because __get_user() needs to know the types of the
- * args.
- *
- * We don't include enough header files to be able to do the set_fs().  We
- * require that the probe_kernel_address() caller will do that.
- */
-#define probe_kernel_address(addr, retval)		\
-	({						\
-		long ret;				\
-		mm_segment_t old_fs = get_fs();		\
-							\
-		set_fs(KERNEL_DS);			\
-		pagefault_disable();			\
-		ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval));		\
-		pagefault_enable();			\
-		set_fs(old_fs);				\
-		ret;					\
-	})
-
 /*
  * probe_kernel_read(): safely attempt to read from a location
  * @dst: pointer to the buffer that shall take the data
@@ -131,4 +101,14 @@ extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size
 
 extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 
+/**
+ * probe_kernel_address(): safely attempt to read from a location
+ * @addr: address to read from
+ * @retval: read into this variable
+ *
+ * Returns 0 on success, or -EFAULT.
+ */
+#define probe_kernel_address(addr, retval)		\
+	probe_kernel_read(&retval, addr, sizeof(retval))
+
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 34fe24759ed1..1b13638d238d 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -13,6 +13,11 @@
  *
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
+ *
+ * We ensure that the copy_from_user is executed in atomic context so that
+ * do_page_fault() doesn't attempt to take mmap_sem.  This makes
+ * probe_kernel_read() suitable for use within regions where the caller
+ * already holds mmap_sem, or other locks which nest inside mmap_sem.
  */
 
 long __weak probe_kernel_read(void *dst, const void *src, size_t size)
-- 
cgit v1.2.3


From 626ebc4100285be56fe3546f29b6afeb36b6871a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 18:46:09 -0800
Subject: memcg: flatten task_struct->memcg_oom

task_struct->memcg_oom is a sub-struct containing fields which are used
for async memcg oom handling.  Most task_struct fields aren't packaged
this way and it can lead to unnecessary alignment paddings.  This patch
flattens it.

* task.memcg_oom.memcg          -> task.memcg_in_oom
* task.memcg_oom.gfp_mask	-> task.memcg_oom_gfp_mask
* task.memcg_oom.order          -> task.memcg_oom_order
* task.memcg_oom.may_oom        -> task.memcg_may_oom

In addition, task.memcg_may_oom is relocated to where other bitfields are
which reduces the size of task_struct.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 +++++-----
 include/linux/sched.h      | 13 ++++++-------
 mm/memcontrol.c            | 16 ++++++++--------
 3 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3e3318ddfc0e..56174c7199ee 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -406,19 +406,19 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 
 static inline void mem_cgroup_oom_enable(void)
 {
-	WARN_ON(current->memcg_oom.may_oom);
-	current->memcg_oom.may_oom = 1;
+	WARN_ON(current->memcg_may_oom);
+	current->memcg_may_oom = 1;
 }
 
 static inline void mem_cgroup_oom_disable(void)
 {
-	WARN_ON(!current->memcg_oom.may_oom);
-	current->memcg_oom.may_oom = 0;
+	WARN_ON(!current->memcg_may_oom);
+	current->memcg_may_oom = 0;
 }
 
 static inline bool task_in_memcg_oom(struct task_struct *p)
 {
-	return p->memcg_oom.memcg;
+	return p->memcg_in_oom;
 }
 
 bool mem_cgroup_oom_synchronize(bool wait);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5423b9c82fee..17bf8b845aa0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1473,7 +1473,9 @@ struct task_struct {
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 	unsigned sched_migrated:1;
-
+#ifdef CONFIG_MEMCG
+	unsigned memcg_may_oom:1;
+#endif
 #ifdef CONFIG_MEMCG_KMEM
 	unsigned memcg_kmem_skip_account:1;
 #endif
@@ -1804,12 +1806,9 @@ struct task_struct {
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_MEMCG
-	struct memcg_oom_info {
-		struct mem_cgroup *memcg;
-		gfp_t gfp_mask;
-		int order;
-		unsigned int may_oom:1;
-	} memcg_oom;
+	struct mem_cgroup *memcg_in_oom;
+	gfp_t memcg_oom_gfp_mask;
+	int memcg_oom_order;
 #endif
 #ifdef CONFIG_UPROBES
 	struct uprobe_task *utask;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c57c4423c688..47bd7f13f526 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1661,7 +1661,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	if (!current->memcg_oom.may_oom)
+	if (!current->memcg_may_oom)
 		return;
 	/*
 	 * We are in the middle of the charge context here, so we
@@ -1678,9 +1678,9 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 	 * and when we know whether the fault was overall successful.
 	 */
 	css_get(&memcg->css);
-	current->memcg_oom.memcg = memcg;
-	current->memcg_oom.gfp_mask = mask;
-	current->memcg_oom.order = order;
+	current->memcg_in_oom = memcg;
+	current->memcg_oom_gfp_mask = mask;
+	current->memcg_oom_order = order;
 }
 
 /**
@@ -1702,7 +1702,7 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
  */
 bool mem_cgroup_oom_synchronize(bool handle)
 {
-	struct mem_cgroup *memcg = current->memcg_oom.memcg;
+	struct mem_cgroup *memcg = current->memcg_in_oom;
 	struct oom_wait_info owait;
 	bool locked;
 
@@ -1730,8 +1730,8 @@ bool mem_cgroup_oom_synchronize(bool handle)
 	if (locked && !memcg->oom_kill_disable) {
 		mem_cgroup_unmark_under_oom(memcg);
 		finish_wait(&memcg_oom_waitq, &owait.wait);
-		mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
-					 current->memcg_oom.order);
+		mem_cgroup_out_of_memory(memcg, current->memcg_oom_gfp_mask,
+					 current->memcg_oom_order);
 	} else {
 		schedule();
 		mem_cgroup_unmark_under_oom(memcg);
@@ -1748,7 +1748,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
 		memcg_oom_recover(memcg);
 	}
 cleanup:
-	current->memcg_oom.memcg = NULL;
+	current->memcg_in_oom = NULL;
 	css_put(&memcg->css);
 	return true;
 }
-- 
cgit v1.2.3


From b23afb93d317c65cef553b804f08dec8a7a0f7e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 18:46:11 -0800
Subject: memcg: punt high overage reclaim to return-to-userland path

Currently, try_charge() tries to reclaim memory synchronously when the
high limit is breached; however, if the allocation doesn't have
__GFP_WAIT, synchronous reclaim is skipped.  If a process performs only
speculative allocations, it can blow way past the high limit.  This is
actually easily reproducible by simply doing "find /".  slab/slub
allocator tries speculative allocations first, so as long as there's
memory which can be consumed without blocking, it can keep allocating
memory regardless of the high limit.

This patch makes try_charge() always punt the over-high reclaim to the
return-to-userland path.  If try_charge() detects that high limit is
breached, it adds the overage to current->memcg_nr_pages_over_high and
schedules execution of mem_cgroup_handle_over_high() which performs
synchronous reclaim from the return-to-userland path.

As long as kernel doesn't have a run-away allocation spree, this should
provide enough protection while making kmemcg behave more consistently.
It also has the following benefits.

- All over-high reclaims can use GFP_KERNEL regardless of the specific
  gfp mask in use, e.g. GFP_NOFS, when the limit was breached.

- It copes with prio inversion.  Previously, a low-prio task with
  small memory.high might perform over-high reclaim with a bunch of
  locks held.  If a higher prio task needed any of these locks, it
  would have to wait until the low prio task finished reclaim and
  released the locks.  By handing over-high reclaim to the task exit
  path this issue can be avoided.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++++
 include/linux/sched.h      |  3 +++
 include/linux/tracehook.h  |  3 +++
 mm/memcontrol.c            | 47 ++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 51 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 56174c7199ee..77bf42966200 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -401,6 +401,8 @@ static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return inactive * inactive_ratio < active;
 }
 
+void mem_cgroup_handle_over_high(void);
+
 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 				struct task_struct *p);
 
@@ -620,6 +622,10 @@ static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
 {
 }
 
+static inline void mem_cgroup_handle_over_high(void)
+{
+}
+
 static inline void mem_cgroup_oom_enable(void)
 {
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17bf8b845aa0..055f2ee3b0f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1809,6 +1809,9 @@ struct task_struct {
 	struct mem_cgroup *memcg_in_oom;
 	gfp_t memcg_oom_gfp_mask;
 	int memcg_oom_order;
+
+	/* number of pages to reclaim on returning to userland */
+	unsigned int memcg_nr_pages_over_high;
 #endif
 #ifdef CONFIG_UPROBES
 	struct uprobe_task *utask;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 84d497297c5f..26c152122a42 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -50,6 +50,7 @@
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/task_work.h>
+#include <linux/memcontrol.h>
 struct linux_binprm;
 
 /*
@@ -188,6 +189,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 	smp_mb__after_atomic();
 	if (unlikely(current->task_works))
 		task_work_run();
+
+	mem_cgroup_handle_over_high();
 }
 
 #endif	/* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 47bd7f13f526..327dcda3ebf6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
 #include <linux/oom.h>
 #include <linux/lockdep.h>
 #include <linux/file.h>
+#include <linux/tracehook.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -1972,6 +1973,31 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+	unsigned int nr_pages = current->memcg_nr_pages_over_high;
+	struct mem_cgroup *memcg, *pos;
+
+	if (likely(!nr_pages))
+		return;
+
+	pos = memcg = get_mem_cgroup_from_mm(current->mm);
+
+	do {
+		if (page_counter_read(&pos->memory) <= pos->high)
+			continue;
+		mem_cgroup_events(pos, MEMCG_HIGH, 1);
+		try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
+	} while ((pos = parent_mem_cgroup(pos)));
+
+	css_put(&memcg->css);
+	current->memcg_nr_pages_over_high = 0;
+}
+
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		      unsigned int nr_pages)
 {
@@ -2080,17 +2106,22 @@ done_restock:
 	css_get_many(&memcg->css, batch);
 	if (batch > nr_pages)
 		refill_stock(memcg, batch - nr_pages);
-	if (!(gfp_mask & __GFP_WAIT))
-		goto done;
+
 	/*
-	 * If the hierarchy is above the normal consumption range,
-	 * make the charging task trim their excess contribution.
+	 * If the hierarchy is above the normal consumption range, schedule
+	 * reclaim on returning to userland.  We can perform reclaim here
+	 * if __GFP_WAIT but let's always punt for simplicity and so that
+	 * GFP_KERNEL can consistently be used during reclaim.  @memcg is
+	 * not recorded as it most likely matches current's and won't
+	 * change in the meantime.  As high limit is checked again before
+	 * reclaim, the cost of mismatch is negligible.
 	 */
 	do {
-		if (page_counter_read(&memcg->memory) <= memcg->high)
-			continue;
-		mem_cgroup_events(memcg, MEMCG_HIGH, 1);
-		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+		if (page_counter_read(&memcg->memory) > memcg->high) {
+			current->memcg_nr_pages_over_high += nr_pages;
+			set_notify_resume(current);
+			break;
+		}
 	} while ((memcg = parent_mem_cgroup(memcg)));
 done:
 	return ret;
-- 
cgit v1.2.3


From cbfb479809c1b8d871cb9a31832e065e900a24c1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 18:46:14 -0800
Subject: memcg: collect kmem bypass conditions into __memcg_kmem_bypass()

memcg_kmem_newpage_charge() and memcg_kmem_get_cache() are testing the
same series of conditions to decide whether to bypass kmem accounting.
Collect the tests into __memcg_kmem_bypass().

This is pure refactoring.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 46 ++++++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 77bf42966200..d8174e8d6ab4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -777,20 +777,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
 		      unsigned long nr_pages);
 void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages);
 
-/**
- * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
- * @gfp: the gfp allocation flags.
- * @memcg: a pointer to the memcg this was charged against.
- * @order: allocation order.
- *
- * returns true if the memcg where the current task belongs can hold this
- * allocation.
- *
- * We return true automatically if this allocation is not to be accounted to
- * any memcg.
- */
-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+static inline bool __memcg_kmem_bypass(gfp_t gfp)
 {
 	if (!memcg_kmem_enabled())
 		return true;
@@ -812,6 +799,26 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 	if (unlikely(fatal_signal_pending(current)))
 		return true;
 
+	return false;
+}
+
+/**
+ * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
+ * @gfp: the gfp allocation flags.
+ * @memcg: a pointer to the memcg this was charged against.
+ * @order: allocation order.
+ *
+ * returns true if the memcg where the current task belongs can hold this
+ * allocation.
+ *
+ * We return true automatically if this allocation is not to be accounted to
+ * any memcg.
+ */
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	if (__memcg_kmem_bypass(gfp))
+		return true;
 	return __memcg_kmem_newpage_charge(gfp, memcg, order);
 }
 
@@ -854,17 +861,8 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 static __always_inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
-	if (!memcg_kmem_enabled())
-		return cachep;
-	if (gfp & __GFP_NOACCOUNT)
-		return cachep;
-	if (gfp & __GFP_NOFAIL)
+	if (__memcg_kmem_bypass(gfp))
 		return cachep;
-	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
-		return cachep;
-	if (unlikely(fatal_signal_pending(current)))
-		return cachep;
-
 	return __memcg_kmem_get_cache(cachep);
 }
 
-- 
cgit v1.2.3


From 7f822c24c2b32a9feafb33e3b9a9e23ef4278c2c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 18:46:20 -0800
Subject: memcg: drop unnecessary cold-path tests from __memcg_kmem_bypass()

__memcg_kmem_bypass() decides whether a kmem allocation should be bypassed
to the root memcg.  Some conditions that it tests are valid criteria
regarding who should be held accountable; however, there are a couple
unnecessary tests for cold paths - __GFP_FAIL and fatal_signal_pending().

The previous patch updated try_charge() to handle both __GFP_FAIL and
dying tasks correctly and the only thing these two tests are doing is
making accounting less accurate and sprinkling tests for cold path
conditions in the hot paths.  There's nothing meaningful gained by these
extra tests.

This patch removes the two unnecessary tests from __memcg_kmem_bypass().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d8174e8d6ab4..641bb60dd7db 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -781,24 +781,10 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp)
 {
 	if (!memcg_kmem_enabled())
 		return true;
-
 	if (gfp & __GFP_NOACCOUNT)
 		return true;
-	/*
-	 * __GFP_NOFAIL allocations will move on even if charging is not
-	 * possible. Therefore we don't even try, and have this allocation
-	 * unaccounted. We could in theory charge it forcibly, but we hope
-	 * those allocations are rare, and won't be worth the trouble.
-	 */
-	if (gfp & __GFP_NOFAIL)
-		return true;
 	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
 		return true;
-
-	/* If the test is dying, just let it go. */
-	if (unlikely(fatal_signal_pending(current)))
-		return true;
-
 	return false;
 }
 
-- 
cgit v1.2.3


From 35bd16a227534cb6ffc9b26a33061c2dcf91934b Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Thu, 5 Nov 2015 18:47:00 -0800
Subject: mm/memblock: make memblock_remove_range() static

memblock_remove_range() is only used in the mm/memblock.c, so we can make
it static.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 4 ----
 mm/memblock.c            | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c518eb589260..24daf8fc4d7c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -89,10 +89,6 @@ int memblock_add_range(struct memblock_type *type,
 		       phys_addr_t base, phys_addr_t size,
 		       int nid, unsigned long flags);
 
-int memblock_remove_range(struct memblock_type *type,
-			  phys_addr_t base,
-			  phys_addr_t size);
-
 void __next_mem_range(u64 *idx, int nid, ulong flags,
 		      struct memblock_type *type_a,
 		      struct memblock_type *type_b, phys_addr_t *out_start,
diff --git a/mm/memblock.c b/mm/memblock.c
index 1c7b647e5897..d300f1329814 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -706,7 +706,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 	return 0;
 }
 
-int __init_memblock memblock_remove_range(struct memblock_type *type,
+static int __init_memblock memblock_remove_range(struct memblock_type *type,
 					  phys_addr_t base, phys_addr_t size)
 {
 	int start_rgn, end_rgn;
-- 
cgit v1.2.3


From b171e4093017d4d6e411f5e97823e5e4a21266a2 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 5 Nov 2015 18:47:06 -0800
Subject: mm/page_alloc: remove unused parameter in init_currently_empty_zone()

Commit a2f3aa025766 ("[PATCH] Fix sparsemem on Cell") fixed an oops
experienced on the Cell architecture when init-time functions,
early_*(), are called at runtime by introducing an 'enum memmap_context'
parameter to memmap_init_zone() and init_currently_empty_zone().  This
parameter is intended to be used to tell whether the call of these two
functions is being made on behalf of a hotplug event, or happening at
boot-time.  However, init_currently_empty_zone() does not use this
parameter at all, so remove it.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 3 +--
 mm/memory_hotplug.c    | 4 ++--
 mm/page_alloc.c        | 6 ++----
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d94347737292..2d7e660cdefe 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -823,8 +823,7 @@ enum memmap_context {
 	MEMMAP_HOTPLUG,
 };
 extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
-				     unsigned long size,
-				     enum memmap_context context);
+				     unsigned long size);
 
 extern void lruvec_init(struct lruvec *lruvec);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0780d118d26e..67d488ab495e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -339,8 +339,8 @@ static int __ref ensure_zone_is_initialized(struct zone *zone,
 			unsigned long start_pfn, unsigned long num_pages)
 {
 	if (!zone_is_initialized(zone))
-		return init_currently_empty_zone(zone, start_pfn, num_pages,
-						 MEMMAP_HOTPLUG);
+		return init_currently_empty_zone(zone, start_pfn, num_pages);
+
 	return 0;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 805bbad2e24e..c60605df9b48 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4900,8 +4900,7 @@ static __meminit void zone_pcp_init(struct zone *zone)
 
 int __meminit init_currently_empty_zone(struct zone *zone,
 					unsigned long zone_start_pfn,
-					unsigned long size,
-					enum memmap_context context)
+					unsigned long size)
 {
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	int ret;
@@ -5413,8 +5412,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 
 		set_pageblock_order();
 		setup_usemap(pgdat, zone, zone_start_pfn, size);
-		ret = init_currently_empty_zone(zone, zone_start_pfn,
-						size, MEMMAP_EARLY);
+		ret = init_currently_empty_zone(zone, zone_start_pfn, size);
 		BUG_ON(ret);
 		memmap_init(size, nid, j, zone_start_pfn);
 		zone_start_pfn += size;
-- 
cgit v1.2.3


From 600e19afc5f8a6c18ea49cee9511c5797db02391 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <klamm@yandex-team.ru>
Date: Thu, 5 Nov 2015 18:47:08 -0800
Subject: mm: use only per-device readahead limit

Maximal readahead size is limited now by two values:
 1) by global 2Mb constant (MAX_READAHEAD in max_sane_readahead())
 2) by configurable per-device value* (bdi->ra_pages)

There are devices, which require custom readahead limit.
For instance, for RAIDs it's calculated as number of devices
multiplied by chunk size times 2.

Readahead size can never be larger than bdi->ra_pages * 2 value
(POSIX_FADV_SEQUNTIAL doubles readahead size).

If so, why do we need two limits?
I suggest to completely remove this max_sane_readahead() stuff and
use per-device readahead limit everywhere.

Also, using right readahead size for RAID disks can significantly
increase i/o performance:

before:
  dd if=/dev/md2 of=/dev/null bs=100M count=100
  100+0 records in
  100+0 records out
  10485760000 bytes (10 GB) copied, 12.9741 s, 808 MB/s

after:
  $ dd if=/dev/md2 of=/dev/null bs=100M count=100
  100+0 records in
  100+0 records out
  10485760000 bytes (10 GB) copied, 8.91317 s, 1.2 GB/s

(It's an 8-disks RAID5 storage).

This patch doesn't change sys_readahead and madvise(MADV_WILLNEED)
behavior introduced by 6d2be915e589b58 ("mm/readahead.c: fix readahead
failure for memoryless NUMA nodes and limit readahead pages").

Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: onstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 --
 mm/filemap.c       |  8 +++-----
 mm/readahead.c     | 14 ++------------
 3 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80001de019ba..fa08f3cf0f22 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2036,8 +2036,6 @@ void page_cache_async_readahead(struct address_space *mapping,
 				pgoff_t offset,
 				unsigned long size);
 
-unsigned long max_sane_readahead(unsigned long nr);
-
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 327910c2400c..1fe962b49f31 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1807,7 +1807,6 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
 				   struct file *file,
 				   pgoff_t offset)
 {
-	unsigned long ra_pages;
 	struct address_space *mapping = file->f_mapping;
 
 	/* If we don't want any read-ahead, don't bother */
@@ -1836,10 +1835,9 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
 	/*
 	 * mmap read-around
 	 */
-	ra_pages = max_sane_readahead(ra->ra_pages);
-	ra->start = max_t(long, 0, offset - ra_pages / 2);
-	ra->size = ra_pages;
-	ra->async_size = ra_pages / 4;
+	ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+	ra->size = ra->ra_pages;
+	ra->async_size = ra->ra_pages / 4;
 	ra_submit(ra, mapping, file);
 }
 
diff --git a/mm/readahead.c b/mm/readahead.c
index 24682f6f4cfd..998ad592f408 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -213,7 +213,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
 		return -EINVAL;
 
-	nr_to_read = max_sane_readahead(nr_to_read);
+	nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages);
 	while (nr_to_read) {
 		int err;
 
@@ -232,16 +232,6 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	return 0;
 }
 
-#define MAX_READAHEAD   ((512*4096)/PAGE_CACHE_SIZE)
-/*
- * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
- * sensible upper limit.
- */
-unsigned long max_sane_readahead(unsigned long nr)
-{
-	return min(nr, MAX_READAHEAD);
-}
-
 /*
  * Set the initial window size, round to next power of 2 and square
  * for small size, x 4 for medium, and x 2 for large
@@ -380,7 +370,7 @@ ondemand_readahead(struct address_space *mapping,
 		   bool hit_readahead_marker, pgoff_t offset,
 		   unsigned long req_size)
 {
-	unsigned long max = max_sane_readahead(ra->ra_pages);
+	unsigned long max = ra->ra_pages;
 	pgoff_t prev_offset;
 
 	/*
-- 
cgit v1.2.3


From 5d317b2b6536592a9b51fe65faed43d65ca9158e Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 5 Nov 2015 18:47:14 -0800
Subject: mm: hugetlb: proc: add HugetlbPages field to /proc/PID/status

Currently there's no easy way to get per-process usage of hugetlb pages,
which is inconvenient because userspace applications which use hugetlb
typically want to control their processes on the basis of how much memory
(including hugetlb) they use.  So this patch simply provides easy access
to the info via /proc/PID/status.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Joern Engel <joern@logfs.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  2 ++
 fs/proc/task_mmu.c                 |  1 +
 include/linux/hugetlb.h            | 19 +++++++++++++++++++
 include/linux/mm_types.h           |  3 +++
 mm/hugetlb.c                       |  9 +++++++++
 mm/rmap.c                          |  4 +++-
 6 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a7d6c06f36c4..12ac0e4455d4 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -175,6 +175,7 @@ read the file /proc/PID/status:
   VmLib:      1412 kB
   VmPTE:        20 kb
   VmSwap:        0 kB
+  HugetlbPages:          0 kB
   Threads:        1
   SigQ:   0/28578
   SigPnd: 0000000000000000
@@ -238,6 +239,7 @@ Table 1-2: Contents of the status files (as of 4.1)
  VmPTE                       size of page table entries
  VmPMD                       size of second level page tables
  VmSwap                      size of swap usage (the number of referred swapents)
+ HugetlbPages                size of hugetlb memory portions
  Threads                     number of threads
  SigQ                        number of signals queued/max. number for queue
  SigPnd                      bitmap of pending signals for the thread
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5988b83836fb..288185e24762 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -70,6 +70,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		ptes >> 10,
 		pmds >> 10,
 		swap << (PAGE_SHIFT-10));
+	hugetlb_report_usage(m, mm);
 }
 
 unsigned long task_vsize(struct mm_struct *mm)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5e35379f58a5..685c262e0be8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -483,6 +483,17 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 #define hugepages_supported() (HPAGE_SHIFT != 0)
 #endif
 
+void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
+
+static inline void hugetlb_count_add(long l, struct mm_struct *mm)
+{
+	atomic_long_add(l, &mm->hugetlb_usage);
+}
+
+static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
+{
+	atomic_long_sub(l, &mm->hugetlb_usage);
+}
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
@@ -519,6 +530,14 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 {
 	return &mm->page_table_lock;
 }
+
+static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
+{
+}
+
+static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
+{
+}
 #endif	/* CONFIG_HUGETLB_PAGE */
 
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3d6baa7d4534..0a85da25a822 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -486,6 +486,9 @@ struct mm_struct {
 	/* address of the bounds directory */
 	void __user *bd_addr;
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+	atomic_long_t hugetlb_usage;
+#endif
 };
 
 static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9cc773483624..abfbe8ca3323 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2790,6 +2790,12 @@ void hugetlb_show_meminfo(void)
 				1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
 }
 
+void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm)
+{
+	seq_printf(m, "HugetlbPages:\t%8lu kB\n",
+		   atomic_long_read(&mm->hugetlb_usage) << (PAGE_SHIFT - 10));
+}
+
 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
 unsigned long hugetlb_total_pages(void)
 {
@@ -3025,6 +3031,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			get_page(ptepage);
 			page_dup_rmap(ptepage);
 			set_huge_pte_at(dst, addr, dst_pte, entry);
+			hugetlb_count_add(pages_per_huge_page(h), dst);
 		}
 		spin_unlock(src_ptl);
 		spin_unlock(dst_ptl);
@@ -3105,6 +3112,7 @@ again:
 		if (huge_pte_dirty(pte))
 			set_page_dirty(page);
 
+		hugetlb_count_sub(pages_per_huge_page(h), mm);
 		page_remove_rmap(page);
 		force_flush = !__tlb_remove_page(tlb, page);
 		if (force_flush) {
@@ -3509,6 +3517,7 @@ retry:
 				&& (vma->vm_flags & VM_SHARED)));
 	set_huge_pte_at(mm, address, ptep, new_pte);
 
+	hugetlb_count_add(pages_per_huge_page(h), mm);
 	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
 		/* Optimization, do the COW without a second fault */
 		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
diff --git a/mm/rmap.c b/mm/rmap.c
index f5b5c1f3dcd7..d40e7aefb888 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1352,7 +1352,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	update_hiwater_rss(mm);
 
 	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
-		if (!PageHuge(page)) {
+		if (PageHuge(page)) {
+			hugetlb_count_sub(1 << compound_order(page), mm);
+		} else {
 			if (PageAnon(page))
 				dec_mm_counter(mm, MM_ANONPAGES);
 			else
-- 
cgit v1.2.3


From aa750fd71c242dba02ee2034e15fbd7d0cdb2461 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Thu, 5 Nov 2015 18:47:23 -0800
Subject: mm/filemap.c: make global sync not clear error status of individual
 inodes

filemap_fdatawait() is a function to wait for on-going writeback to
complete but also consume and clear error status of the mapping set during
writeback.

The latter functionality is critical for applications to detect writeback
error with system calls like fsync(2)/fdatasync(2).

However filemap_fdatawait() is also used by sync(2) or FIFREEZE ioctl,
which don't check error status of individual mappings.

As a result, fsync() may not be able to detect writeback error if events
happen in the following order:

   Application                    System admin
   ----------------------------------------------------------
   write data on page cache
                                  Run sync command
                                  writeback completes with error
                                  filemap_fdatawait() clears error
   fsync returns success
   (but the data is not on disk)

This patch adds filemap_fdatawait_keep_errors() for call sites where
writeback error is not handled so that they don't clear error status.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: Fengguang Wu <fengguang.wu@gmail.com>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c  |  7 +++++-
 fs/sync.c          |  7 +++++-
 include/linux/fs.h |  1 +
 mm/filemap.c       | 67 +++++++++++++++++++++++++++++++++++++++++++-----------
 4 files changed, 67 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7378169e90be..206a68b1db1a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2149,7 +2149,12 @@ static void wait_sb_inodes(struct super_block *sb)
 		iput(old_inode);
 		old_inode = inode;
 
-		filemap_fdatawait(mapping);
+		/*
+		 * We keep the error status of individual mapping so that
+		 * applications can catch the writeback error using fsync(2).
+		 * See filemap_fdatawait_keep_errors() for details.
+		 */
+		filemap_fdatawait_keep_errors(mapping);
 
 		cond_resched();
 
diff --git a/fs/sync.c b/fs/sync.c
index fbc98ee62044..4ec430ae2b0d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -86,7 +86,12 @@ static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
 
 static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 {
-	filemap_fdatawait(bdev->bd_inode->i_mapping);
+	/*
+	 * We keep the error status of individual mapping so that
+	 * applications can catch the writeback error using fsync(2).
+	 * See filemap_fdatawait_keep_errors() for details.
+	 */
+	filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 72d8a844c692..9355f377fd46 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2422,6 +2422,7 @@ extern int write_inode_now(struct inode *, int);
 extern int filemap_fdatawrite(struct address_space *);
 extern int filemap_flush(struct address_space *);
 extern int filemap_fdatawait(struct address_space *);
+extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
 				   loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
diff --git a/mm/filemap.c b/mm/filemap.c
index 1fe962b49f31..884766da1165 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -331,23 +331,14 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
-/**
- * filemap_fdatawait_range - wait for writeback to complete
- * @mapping:		address space structure to wait for
- * @start_byte:		offset in bytes where the range starts
- * @end_byte:		offset in bytes where the range ends (inclusive)
- *
- * Walk the list of under-writeback pages of the given address space
- * in the given range and wait for all of them.
- */
-int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
-			    loff_t end_byte)
+static int __filemap_fdatawait_range(struct address_space *mapping,
+				     loff_t start_byte, loff_t end_byte)
 {
 	pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
 	pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
 	struct pagevec pvec;
 	int nr_pages;
-	int ret2, ret = 0;
+	int ret = 0;
 
 	if (end_byte < start_byte)
 		goto out;
@@ -374,6 +365,29 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
 		cond_resched();
 	}
 out:
+	return ret;
+}
+
+/**
+ * filemap_fdatawait_range - wait for writeback to complete
+ * @mapping:		address space structure to wait for
+ * @start_byte:		offset in bytes where the range starts
+ * @end_byte:		offset in bytes where the range ends (inclusive)
+ *
+ * Walk the list of under-writeback pages of the given address space
+ * in the given range and wait for all of them.  Check error status of
+ * the address space and return it.
+ *
+ * Since the error status of the address space is cleared by this function,
+ * callers are responsible for checking the return value and handling and/or
+ * reporting the error.
+ */
+int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
+			    loff_t end_byte)
+{
+	int ret, ret2;
+
+	ret = __filemap_fdatawait_range(mapping, start_byte, end_byte);
 	ret2 = filemap_check_errors(mapping);
 	if (!ret)
 		ret = ret2;
@@ -382,12 +396,39 @@ out:
 }
 EXPORT_SYMBOL(filemap_fdatawait_range);
 
+/**
+ * filemap_fdatawait_keep_errors - wait for writeback without clearing errors
+ * @mapping: address space structure to wait for
+ *
+ * Walk the list of under-writeback pages of the given address space
+ * and wait for all of them.  Unlike filemap_fdatawait(), this function
+ * does not clear error status of the address space.
+ *
+ * Use this function if callers don't handle errors themselves.  Expected
+ * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
+ * fsfreeze(8)
+ */
+void filemap_fdatawait_keep_errors(struct address_space *mapping)
+{
+	loff_t i_size = i_size_read(mapping->host);
+
+	if (i_size == 0)
+		return;
+
+	__filemap_fdatawait_range(mapping, 0, i_size - 1);
+}
+
 /**
  * filemap_fdatawait - wait for all under-writeback pages to complete
  * @mapping: address space structure to wait for
  *
  * Walk the list of under-writeback pages of the given address space
- * and wait for all of them.
+ * and wait for all of them.  Check error status of the address space
+ * and return it.
+ *
+ * Since the error status of the address space is cleared by this function,
+ * callers are responsible for checking the return value and handling and/or
+ * reporting the error.
  */
 int filemap_fdatawait(struct address_space *mapping)
 {
-- 
cgit v1.2.3


From 13308ca9ef9acb325b52bd8562639b5844f3cbf2 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Thu, 5 Nov 2015 18:47:40 -0800
Subject: mm/memcontrol: make mem_cgroup_inactive_anon_is_low() return bool

Make mem_cgroup_inactive_anon_is_low return bool due to this particular
function only using either one or zero as its return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 641bb60dd7db..4142f94822ea 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -382,7 +382,7 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 	return mz->lru_size[lru];
 }
 
-static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
+static inline bool mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 {
 	unsigned long inactive_ratio;
 	unsigned long inactive;
@@ -585,10 +585,10 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
-static inline int
+static inline bool
 mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 {
-	return 1;
+	return true;
 }
 
 static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
-- 
cgit v1.2.3


From fa6c7b46aaa0cc00846703e8c0ec1e1636ff25ba Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 5 Nov 2015 18:47:56 -0800
Subject: mm, compaction: export tracepoints status strings to userspace

Some compaction tracepoints convert the integer return values to strings
using the compaction_status_string array.  This works for in-kernel
printing, but not userspace trace printing of raw captured trace such as
via trace-cmd report.

This patch converts the private array to appropriate tracepoint macros
that result in proper userspace support.

trace-cmd output before:
transhuge-stres-4235  [000]   453.149280: mm_compaction_finished: node=0
  zone=ffffffff81815d7a order=9 ret=

after:
transhuge-stres-4235  [000]   453.149280: mm_compaction_finished: node=0
  zone=ffffffff81815d7a order=9 ret=partial

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h        |  2 +-
 include/trace/events/compaction.h | 33 +++++++++++++++++++++++++++++++--
 mm/compaction.c                   | 11 -----------
 3 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index aa8f61cf3a19..f14ba989092f 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -15,7 +15,7 @@
 /* For more detailed tracepoint output */
 #define COMPACT_NO_SUITABLE_PAGE	5
 #define COMPACT_NOT_SUITABLE_ZONE	6
-/* When adding new state, please change compaction_status_string, too */
+/* When adding new states, please adjust include/trace/events/compaction.h */
 
 /* Used to signal whether compaction detected need_sched() or lock contention */
 /* No contention detected */
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 9a6a3fe0fb51..1275a555199a 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -9,6 +9,35 @@
 #include <linux/tracepoint.h>
 #include <trace/events/gfpflags.h>
 
+#define COMPACTION_STATUS					\
+	EM( COMPACT_DEFERRED,		"deferred")		\
+	EM( COMPACT_SKIPPED,		"skipped")		\
+	EM( COMPACT_CONTINUE,		"continue")		\
+	EM( COMPACT_PARTIAL,		"partial")		\
+	EM( COMPACT_COMPLETE,		"complete")		\
+	EM( COMPACT_NO_SUITABLE_PAGE,	"no_suitable_page")	\
+	EMe(COMPACT_NOT_SUITABLE_ZONE,	"not_suitable_zone")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+COMPACTION_STATUS
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{a, b},
+#define EMe(a, b)	{a, b}
+
 DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
 
 	TP_PROTO(
@@ -161,7 +190,7 @@ TRACE_EVENT(mm_compaction_end,
 		__entry->free_pfn,
 		__entry->zone_end,
 		__entry->sync ? "sync" : "async",
-		compaction_status_string[__entry->status])
+		__print_symbolic(__entry->status, COMPACTION_STATUS))
 );
 
 TRACE_EVENT(mm_compaction_try_to_compact_pages,
@@ -217,7 +246,7 @@ DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
 		__entry->nid,
 		__entry->name,
 		__entry->order,
-		compaction_status_string[__entry->ret])
+		__print_symbolic(__entry->ret, COMPACTION_STATUS))
 );
 
 DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_finished,
diff --git a/mm/compaction.c b/mm/compaction.c
index a8e659398fdf..a5849c442a57 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -35,17 +35,6 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
 #endif
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
-#ifdef CONFIG_TRACEPOINTS
-static const char *const compaction_status_string[] = {
-	"deferred",
-	"skipped",
-	"continue",
-	"partial",
-	"complete",
-	"no_suitable_page",
-	"not_suitable_zone",
-};
-#endif
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/compaction.h>
-- 
cgit v1.2.3


From 2d1e10412c2388ff9b6afc60536eaa195a419289 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 5 Nov 2015 18:48:02 -0800
Subject: mm, compaction: distinguish contended status in tracepoints

Compaction returns prematurely with COMPACT_PARTIAL when contended or has
fatal signal pending.  This is ok for the callers, but might be misleading
in the traces, as the usual reason to return COMPACT_PARTIAL is that we
think the allocation should succeed.  After this patch we distinguish the
premature ending condition in the mm_compaction_finished and
mm_compaction_end tracepoints.

The contended status covers the following reasons:
- lock contention or need_resched() detected in async compaction
- fatal signal pending
- too many pages isolated in the zone (only for async compaction)
Further distinguishing the exact reason seems unnecessary for now.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h        | 1 +
 include/trace/events/compaction.h | 3 ++-
 mm/compaction.c                   | 9 ++++++---
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index f14ba989092f..4cd4ddf64cc7 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -15,6 +15,7 @@
 /* For more detailed tracepoint output */
 #define COMPACT_NO_SUITABLE_PAGE	5
 #define COMPACT_NOT_SUITABLE_ZONE	6
+#define COMPACT_CONTENDED		7
 /* When adding new states, please adjust include/trace/events/compaction.h */
 
 /* Used to signal whether compaction detected need_sched() or lock contention */
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index a43000d76688..c92d1e1cbad9 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -16,7 +16,8 @@
 	EM( COMPACT_PARTIAL,		"partial")		\
 	EM( COMPACT_COMPLETE,		"complete")		\
 	EM( COMPACT_NO_SUITABLE_PAGE,	"no_suitable_page")	\
-	EMe(COMPACT_NOT_SUITABLE_ZONE,	"not_suitable_zone")
+	EM( COMPACT_NOT_SUITABLE_ZONE,	"not_suitable_zone")	\
+	EMe(COMPACT_CONTENDED,		"contended")
 
 #ifdef CONFIG_ZONE_DMA
 #define IFDEF_ZONE_DMA(X) X
diff --git a/mm/compaction.c b/mm/compaction.c
index a5849c442a57..de3e1e71cd9f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1202,7 +1202,7 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
 	unsigned long watermark;
 
 	if (cc->contended || fatal_signal_pending(current))
-		return COMPACT_PARTIAL;
+		return COMPACT_CONTENDED;
 
 	/* Compaction run completes if the migrate and free scanner meet */
 	if (compact_scanners_met(cc)) {
@@ -1393,7 +1393,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 		switch (isolate_migratepages(zone, cc)) {
 		case ISOLATE_ABORT:
-			ret = COMPACT_PARTIAL;
+			ret = COMPACT_CONTENDED;
 			putback_movable_pages(&cc->migratepages);
 			cc->nr_migratepages = 0;
 			goto out;
@@ -1424,7 +1424,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 			 * and we want compact_finished() to detect it
 			 */
 			if (err == -ENOMEM && !compact_scanners_met(cc)) {
-				ret = COMPACT_PARTIAL;
+				ret = COMPACT_CONTENDED;
 				goto out;
 			}
 		}
@@ -1477,6 +1477,9 @@ out:
 	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
 				cc->free_pfn, end_pfn, sync, ret);
 
+	if (ret == COMPACT_CONTENDED)
+		ret = COMPACT_PARTIAL;
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From da39da3a54fed88e29024f2f1f6cd7357cd03a44 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 5 Nov 2015 18:48:05 -0800
Subject: mm, oom: remove task_lock protecting comm printing

The oom killer takes task_lock() in a couple of places solely to protect
printing the task's comm.

A process's comm, including current's comm, may change due to
/proc/pid/comm or PR_SET_NAME.

The comm will always be NULL-terminated, so the worst race scenario would
only be during update.  We can tolerate a comm being printed that is in
the middle of an update to avoid taking the lock.

Other locations in the kernel have already dropped task_lock() when
printing comm, so this is consistent.

Signed-off-by: David Rientjes <rientjes@google.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h |  4 ++--
 kernel/cpuset.c        | 14 +++++++-------
 mm/oom_kill.c          |  8 +-------
 3 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 1b357997cac5..5a1311942358 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -93,7 +93,7 @@ extern int current_cpuset_is_being_rebound(void);
 
 extern void rebuild_sched_domains(void);
 
-extern void cpuset_print_task_mems_allowed(struct task_struct *p);
+extern void cpuset_print_current_mems_allowed(void);
 
 /*
  * read_mems_allowed_begin is required when making decisions involving
@@ -219,7 +219,7 @@ static inline void rebuild_sched_domains(void)
 	partition_sched_domains(1, NULL, NULL);
 }
 
-static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
+static inline void cpuset_print_current_mems_allowed(void)
 {
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f0acff0f66c9..9ef59a37c190 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2598,22 +2598,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 }
 
 /**
- * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
- * @tsk: pointer to task_struct of some task.
+ * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
  *
- * Description: Prints @task's name, cpuset name, and cached copy of its
+ * Description: Prints current's name, cpuset name, and cached copy of its
  * mems_allowed to the kernel log.
  */
-void cpuset_print_task_mems_allowed(struct task_struct *tsk)
+void cpuset_print_current_mems_allowed(void)
 {
 	struct cgroup *cgrp;
 
 	rcu_read_lock();
 
-	cgrp = task_cs(tsk)->css.cgroup;
-	pr_info("%s cpuset=", tsk->comm);
+	cgrp = task_cs(current)->css.cgroup;
+	pr_info("%s cpuset=", current->comm);
 	pr_cont_cgroup_name(cgrp);
-	pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&tsk->mems_allowed));
+	pr_cont(" mems_allowed=%*pbl\n",
+		nodemask_pr_args(&current->mems_allowed));
 
 	rcu_read_unlock();
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c170d9f0a158..58f3d2748ced 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -377,13 +377,11 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 static void dump_header(struct oom_control *oc, struct task_struct *p,
 			struct mem_cgroup *memcg)
 {
-	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
 		"oom_score_adj=%hd\n",
 		current->comm, oc->gfp_mask, oc->order,
 		current->signal->oom_score_adj);
-	cpuset_print_task_mems_allowed(current);
-	task_unlock(current);
+	cpuset_print_current_mems_allowed();
 	dump_stack();
 	if (memcg)
 		mem_cgroup_print_oom_info(memcg, p);
@@ -509,10 +507,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	if (__ratelimit(&oom_rs))
 		dump_header(oc, p, memcg);
 
-	task_lock(p);
 	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
 		message, task_pid_nr(p), p->comm, points);
-	task_unlock(p);
 
 	/*
 	 * If any of p's children has a different mm and is eligible for kill,
@@ -586,10 +582,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			if (fatal_signal_pending(p))
 				continue;
 
-			task_lock(p);	/* Protect ->comm from prctl() */
 			pr_info("Kill process %d (%s) sharing same memory\n",
 				task_pid_nr(p), p->comm);
-			task_unlock(p);
 			do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
 		}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 3ca65c19ddbb45f504edf92fe7126ecc94d56e36 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Thu, 5 Nov 2015 18:48:29 -0800
Subject: mm: optimize PageHighMem() check

This came up when implementing HIHGMEM/PAE40 for ARC.  The kmap() /
kmap_atomic() generated code seemed needlessly bloated due to the way
PageHighMem() macro is implemented.  It derives the exact zone for page
and then does pointer subtraction with first zone to infer the zone_type.
The pointer arithmatic in turn generates the code bloat.

PageHighMem(page)
  is_highmem(page_zone(page))
     zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones

Instead use is_highmem_idx() to work on zone_type available in page flags

   ----- Before -----
80756348:	mov_s      r13,r0
8075634a:	ld_s       r2,[r13,0]
8075634c:	lsr_s      r2,r2,30
8075634e:	mpy        r2,r2,0x2a4
80756352:	add_s      r2,r2,0x80aef880
80756358:	ld_s       r3,[r2,28]
8075635a:	sub_s      r2,r2,r3
8075635c:	breq       r2,0x2a4,80756378 <kmap+0x48>
80756364:	breq       r2,0x548,80756378 <kmap+0x48>

   ----- After  -----
80756330:	mov_s      r13,r0
80756332:	ld_s       r2,[r13,0]
80756334:	lsr_s      r2,r2,30
80756336:	sub_s      r2,r2,1
80756338:	brlo       r2,2,80756348 <kmap+0x30>

For x86 defconfig build (32 bit only) it saves around 900 bytes.
For ARC defconfig with HIGHMEM, it saved around 2K bytes.

   ---->8-------
./scripts/bloat-o-meter x86/vmlinux-defconfig-pre x86/vmlinux-defconfig-post
add/remove: 0/0 grow/shrink: 0/36 up/down: 0/-934 (-934)
function                                     old     new   delta
saveable_page                                162     154      -8
saveable_highmem_page                        154     146      -8
skb_gro_reset_offset                         147     131     -16
...
...
__change_page_attr_set_clr                  1715    1678     -37
setup_data_read                              434     394     -40
mon_bin_event                               1967    1927     -40
swsusp_save                                 1148    1105     -43
_set_pages_array                             549     493     -56
   ---->8-------

e.g. For ARC kmap()

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jennifer Herbert <jennifer.herbert@citrix.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 416509e26d6d..a525e5067484 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -256,7 +256,7 @@ PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
  * Must use a macro here due to header dependency issues. page_zone() is not
  * available at this point.
  */
-#define PageHighMem(__p) is_highmem(page_zone(__p))
+#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
 #else
 PAGEFLAG_FALSE(HighMem)
 #endif
-- 
cgit v1.2.3


From c2d42c16ad83006a706d83e51a7268db04af733a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 5 Nov 2015 18:48:43 -0800
Subject: mm/vmstat.c: uninline node_page_state()

With x86_64 (config http://ozlabs.org/~akpm/config-akpm2.txt) and old gcc
(4.4.4), drivers/base/node.c:node_read_meminfo() is using 2344 bytes of
stack.  Uninlining node_page_state() reduces this to 440 bytes.

The stack consumption issue is fixed by newer gcc (4.8.4) however with
that compiler this patch reduces the node.o text size from 7314 bytes to
4578.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 24 +-----------------------
 mm/vmstat.c            | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 82e7db7f7100..49dfe40b3673 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
 }
 
 #ifdef CONFIG_NUMA
-/*
- * Determine the per node value of a stat item. This function
- * is called frequently in a NUMA machine, so try to be as
- * frugal as possible.
- */
-static inline unsigned long node_page_state(int node,
-				 enum zone_stat_item item)
-{
-	struct zone *zones = NODE_DATA(node)->node_zones;
-
-	return
-#ifdef CONFIG_ZONE_DMA
-		zone_page_state(&zones[ZONE_DMA], item) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
-		zone_page_state(&zones[ZONE_DMA32], item) +
-#endif
-#ifdef CONFIG_HIGHMEM
-		zone_page_state(&zones[ZONE_HIGHMEM], item) +
-#endif
-		zone_page_state(&zones[ZONE_NORMAL], item) +
-		zone_page_state(&zones[ZONE_MOVABLE], item);
-}
 
+extern unsigned long node_page_state(int node, enum zone_stat_item item);
 extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
 
 #else
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fbf14485a049..ffcb4f58bf3e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -591,6 +591,28 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
 	else
 		__inc_zone_state(z, NUMA_OTHER);
 }
+
+/*
+ * Determine the per node value of a stat item.
+ */
+unsigned long node_page_state(int node, enum zone_stat_item item)
+{
+	struct zone *zones = NODE_DATA(node)->node_zones;
+
+	return
+#ifdef CONFIG_ZONE_DMA
+		zone_page_state(&zones[ZONE_DMA], item) +
+#endif
+#ifdef CONFIG_ZONE_DMA32
+		zone_page_state(&zones[ZONE_DMA32], item) +
+#endif
+#ifdef CONFIG_HIGHMEM
+		zone_page_state(&zones[ZONE_HIGHMEM], item) +
+#endif
+		zone_page_state(&zones[ZONE_NORMAL], item) +
+		zone_page_state(&zones[ZONE_MOVABLE], item);
+}
+
 #endif
 
 #ifdef CONFIG_COMPACTION
-- 
cgit v1.2.3


From f7ae3a95eab4e6766768cef664d2d429f53bd5f4 Mon Sep 17 00:00:00 2001
From: yalin wang <yalin.wang2010@gmail.com>
Date: Thu, 5 Nov 2015 18:48:50 -0800
Subject: include/linux/vm_event_item.h: change HIGHMEM_ZONE macro definition

Change HIGHMEM_ZONE to be the same as the DMA_ZONE macro.

Signed-off-by: yalin wang <yalin.wang2010@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 9246d32dc973..e623d392db0c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -14,12 +14,12 @@
 #endif
 
 #ifdef CONFIG_HIGHMEM
-#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#define HIGHMEM_ZONE(xx) xx##_HIGH,
 #else
 #define HIGHMEM_ZONE(xx)
 #endif
 
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE
 
 enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGALLOC),
-- 
cgit v1.2.3


From d05e83a6f861ad02c2fcba75d4c4cfe49e3bc90f Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 5 Nov 2015 18:48:59 -0800
Subject: memcg: simplify charging kmem pages

Charging kmem pages proceeds in two steps.  First, we try to charge the
allocation size to the memcg the current task belongs to, then we allocate
a page and "commit" the charge storing the pointer to the memcg in the
page struct.

Such a design looks overcomplicated, because there is not much sense in
trying charging the allocation before actually allocating a page: we won't
be able to consume much memory over the limit even if we charge after
doing the actual allocation, besides we already charge user pages post
factum, so being pedantic with kmem pages just looks pointless.

So this patch simplifies the design by merging the "charge" and the
"commit" steps into the same function, which takes the allocated page.

Also, rename the charge and uncharge methods to memcg_kmem_charge and
memcg_kmem_uncharge and make the charge method return error code instead
of bool to conform to mem_cgroup_try_charge.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 69 +++++++++++++---------------------------------
 mm/memcontrol.c            | 39 +++-----------------------
 mm/page_alloc.c            | 18 ++++++------
 3 files changed, 32 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4142f94822ea..c95246627f87 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -752,11 +752,8 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
  * conditions, but because they are pretty simple, they are expected to be
  * fast.
  */
-bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
-					int order);
-void __memcg_kmem_commit_charge(struct page *page,
-				       struct mem_cgroup *memcg, int order);
-void __memcg_kmem_uncharge_pages(struct page *page, int order);
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
+void __memcg_kmem_uncharge(struct page *page, int order);
 
 /*
  * helper for acessing a memcg's index. It will be used as an index in the
@@ -789,52 +786,30 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp)
 }
 
 /**
- * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
- * @gfp: the gfp allocation flags.
- * @memcg: a pointer to the memcg this was charged against.
- * @order: allocation order.
+ * memcg_kmem_charge: charge a kmem page
+ * @page: page to charge
+ * @gfp: reclaim mode
+ * @order: allocation order
  *
- * returns true if the memcg where the current task belongs can hold this
- * allocation.
- *
- * We return true automatically if this allocation is not to be accounted to
- * any memcg.
+ * Returns 0 on success, an error code on failure.
  */
-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+static __always_inline int memcg_kmem_charge(struct page *page,
+					     gfp_t gfp, int order)
 {
 	if (__memcg_kmem_bypass(gfp))
-		return true;
-	return __memcg_kmem_newpage_charge(gfp, memcg, order);
+		return 0;
+	return __memcg_kmem_charge(page, gfp, order);
 }
 
 /**
- * memcg_kmem_uncharge_pages: uncharge pages from memcg
- * @page: pointer to struct page being freed
- * @order: allocation order.
+ * memcg_kmem_uncharge: uncharge a kmem page
+ * @page: page to uncharge
+ * @order: allocation order
  */
-static inline void
-memcg_kmem_uncharge_pages(struct page *page, int order)
+static __always_inline void memcg_kmem_uncharge(struct page *page, int order)
 {
 	if (memcg_kmem_enabled())
-		__memcg_kmem_uncharge_pages(page, order);
-}
-
-/**
- * memcg_kmem_commit_charge: embeds correct memcg in a page
- * @page: pointer to struct page recently allocated
- * @memcg: the memcg structure we charged against
- * @order: allocation order.
- *
- * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
- * failure of the allocation. if @page is NULL, this function will revert the
- * charges. Otherwise, it will commit @page to @memcg.
- */
-static inline void
-memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
-{
-	if (memcg_kmem_enabled() && memcg)
-		__memcg_kmem_commit_charge(page, memcg, order);
+		__memcg_kmem_uncharge(page, order);
 }
 
 /**
@@ -878,18 +853,12 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
 	return false;
 }
 
-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
-{
-	return true;
-}
-
-static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 {
+	return 0;
 }
 
-static inline void
-memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+static inline void memcg_kmem_uncharge(struct page *page, int order)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a1c05ff5892d..e249279f60c1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2404,57 +2404,26 @@ void __memcg_kmem_put_cache(struct kmem_cache *cachep)
 		css_put(&cachep->memcg_params.memcg->css);
 }
 
-/*
- * We need to verify if the allocation against current->mm->owner's memcg is
- * possible for the given order. But the page is not allocated yet, so we'll
- * need a further commit step to do the final arrangements.
- *
- * It is possible for the task to switch cgroups in this mean time, so at
- * commit time, we can't rely on task conversion any longer.  We'll then use
- * the handle argument to return to the caller which cgroup we should commit
- * against. We could also return the memcg directly and avoid the pointer
- * passing, but a boolean return value gives better semantics considering
- * the compiled-out case as well.
- *
- * Returning true means the allocation is possible.
- */
-bool
-__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
-	*_memcg = NULL;
-
 	memcg = get_mem_cgroup_from_mm(current->mm);
 
 	if (!memcg_kmem_is_active(memcg)) {
 		css_put(&memcg->css);
-		return true;
+		return 0;
 	}
 
 	ret = memcg_charge_kmem(memcg, gfp, 1 << order);
-	if (!ret)
-		*_memcg = memcg;
 
 	css_put(&memcg->css);
-	return (ret == 0);
-}
-
-void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
-			      int order)
-{
-	VM_BUG_ON(mem_cgroup_is_root(memcg));
-
-	/* The page allocation failed. Revert */
-	if (!page) {
-		memcg_uncharge_kmem(memcg, 1 << order);
-		return;
-	}
 	page->mem_cgroup = memcg;
+	return ret;
 }
 
-void __memcg_kmem_uncharge_pages(struct page *page, int order)
+void __memcg_kmem_uncharge(struct page *page, int order)
 {
 	struct mem_cgroup *memcg = page->mem_cgroup;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 06e62300d627..446bb36ee59d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3428,24 +3428,24 @@ EXPORT_SYMBOL(__free_page_frag);
 struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *page;
-	struct mem_cgroup *memcg = NULL;
 
-	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
-		return NULL;
 	page = alloc_pages(gfp_mask, order);
-	memcg_kmem_commit_charge(page, memcg, order);
+	if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
+		__free_pages(page, order);
+		page = NULL;
+	}
 	return page;
 }
 
 struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 {
 	struct page *page;
-	struct mem_cgroup *memcg = NULL;
 
-	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
-		return NULL;
 	page = alloc_pages_node(nid, gfp_mask, order);
-	memcg_kmem_commit_charge(page, memcg, order);
+	if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
+		__free_pages(page, order);
+		page = NULL;
+	}
 	return page;
 }
 
@@ -3455,7 +3455,7 @@ struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
  */
 void __free_kmem_pages(struct page *page, unsigned int order)
 {
-	memcg_kmem_uncharge_pages(page, order);
+	memcg_kmem_uncharge(page, order);
 	__free_pages(page, order);
 }
 
-- 
cgit v1.2.3


From f3ccb2c42297757d2e9b820ad37960462df7b7c1 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 5 Nov 2015 18:49:01 -0800
Subject: memcg: unify slab and other kmem pages charging

We have memcg_kmem_charge and memcg_kmem_uncharge methods for charging and
uncharging kmem pages to memcg, but currently they are not used for
charging slab pages (i.e.  they are only used for charging pages allocated
with alloc_kmem_pages).  The only reason why the slab subsystem uses
special helpers, memcg_charge_slab and memcg_uncharge_slab, is that it
needs to charge to the memcg of kmem cache while memcg_charge_kmem charges
to the memcg that the current task belongs to.

To remove this diversity, this patch adds an extra argument to
__memcg_kmem_charge that can be a pointer to a memcg or NULL.  If it is
not NULL, the function tries to charge to the memcg it points to,
otherwise it charge to the current context.  Next, it makes the slab
subsystem use this function to charge slab pages.

Since memcg_charge_kmem and memcg_uncharge_kmem helpers are now used only
in __memcg_kmem_charge and __memcg_kmem_uncharge, they are inlined.  Since
__memcg_kmem_charge stores a pointer to the memcg in the page struct, we
don't need memcg_uncharge_slab anymore and can use free_kmem_pages.
Besides, one can now detect which memcg a slab page belongs to by reading
/proc/kpagecgroup.

Note, this patch switches slab to charge-after-alloc design.  Since this
design is already used for all other memcg charges, it should not make any
difference.

[hannes@cmpxchg.org: better to have an outer function than a magic parameter for the memcg lookup]
Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++--
 mm/memcontrol.c            | 71 +++++++++++++++++++++-------------------------
 mm/slab.c                  | 12 ++++----
 mm/slab.h                  | 24 +++++-----------
 mm/slub.c                  | 12 ++++----
 5 files changed, 53 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c95246627f87..0ba4c86d3b40 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -752,6 +752,8 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
  * conditions, but because they are pretty simple, they are expected to be
  * fast.
  */
+int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+			      struct mem_cgroup *memcg);
 int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 void __memcg_kmem_uncharge(struct page *page, int order);
 
@@ -770,10 +772,6 @@ void __memcg_kmem_put_cache(struct kmem_cache *cachep);
 
 struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr);
 
-int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
-		      unsigned long nr_pages);
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages);
-
 static inline bool __memcg_kmem_bypass(gfp_t gfp)
 {
 	if (!memcg_kmem_enabled())
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e249279f60c1..9575cff544fa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2215,34 +2215,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 }
 
 #ifdef CONFIG_MEMCG_KMEM
-int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
-		      unsigned long nr_pages)
-{
-	struct page_counter *counter;
-	int ret = 0;
-
-	ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
-	if (ret < 0)
-		return ret;
-
-	ret = try_charge(memcg, gfp, nr_pages);
-	if (ret)
-		page_counter_uncharge(&memcg->kmem, nr_pages);
-
-	return ret;
-}
-
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
-{
-	page_counter_uncharge(&memcg->memory, nr_pages);
-	if (do_swap_account)
-		page_counter_uncharge(&memcg->memsw, nr_pages);
-
-	page_counter_uncharge(&memcg->kmem, nr_pages);
-
-	css_put_many(&memcg->css, nr_pages);
-}
-
 static int memcg_alloc_cache_id(void)
 {
 	int id, size;
@@ -2404,36 +2376,59 @@ void __memcg_kmem_put_cache(struct kmem_cache *cachep)
 		css_put(&cachep->memcg_params.memcg->css);
 }
 
-int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+			      struct mem_cgroup *memcg)
 {
-	struct mem_cgroup *memcg;
-	int ret;
-
-	memcg = get_mem_cgroup_from_mm(current->mm);
+	unsigned int nr_pages = 1 << order;
+	struct page_counter *counter;
+	int ret = 0;
 
-	if (!memcg_kmem_is_active(memcg)) {
-		css_put(&memcg->css);
+	if (!memcg_kmem_is_active(memcg))
 		return 0;
+
+	ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
+	if (ret)
+		return ret;
+
+	ret = try_charge(memcg, gfp, nr_pages);
+	if (ret) {
+		page_counter_uncharge(&memcg->kmem, nr_pages);
+		return ret;
 	}
 
-	ret = memcg_charge_kmem(memcg, gfp, 1 << order);
+	page->mem_cgroup = memcg;
 
+	return 0;
+}
+
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+{
+	struct mem_cgroup *memcg;
+	int ret;
+
+	memcg = get_mem_cgroup_from_mm(current->mm);
+	ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
 	css_put(&memcg->css);
-	page->mem_cgroup = memcg;
 	return ret;
 }
 
 void __memcg_kmem_uncharge(struct page *page, int order)
 {
 	struct mem_cgroup *memcg = page->mem_cgroup;
+	unsigned int nr_pages = 1 << order;
 
 	if (!memcg)
 		return;
 
 	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
 
-	memcg_uncharge_kmem(memcg, 1 << order);
+	page_counter_uncharge(&memcg->kmem, nr_pages);
+	page_counter_uncharge(&memcg->memory, nr_pages);
+	if (do_swap_account)
+		page_counter_uncharge(&memcg->memsw, nr_pages);
+
 	page->mem_cgroup = NULL;
+	css_put_many(&memcg->css, nr_pages);
 }
 
 struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
diff --git a/mm/slab.c b/mm/slab.c
index 461935bab9ef..272e809404d5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1593,16 +1593,17 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		flags |= __GFP_RECLAIMABLE;
 
-	if (memcg_charge_slab(cachep, flags, cachep->gfporder))
-		return NULL;
-
 	page = __alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
 	if (!page) {
-		memcg_uncharge_slab(cachep, cachep->gfporder);
 		slab_out_of_memory(cachep, flags, nodeid);
 		return NULL;
 	}
 
+	if (memcg_charge_slab(page, flags, cachep->gfporder, cachep)) {
+		__free_pages(page, cachep->gfporder);
+		return NULL;
+	}
+
 	/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
 	if (page_is_pfmemalloc(page))
 		pfmemalloc_active = true;
@@ -1654,8 +1655,7 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	__free_pages(page, cachep->gfporder);
-	memcg_uncharge_slab(cachep, cachep->gfporder);
+	__free_kmem_pages(page, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
diff --git a/mm/slab.h b/mm/slab.h
index bf51a8d254cf..27492eb678f7 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -236,23 +236,16 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 	return s->memcg_params.root_cache;
 }
 
-static __always_inline int memcg_charge_slab(struct kmem_cache *s,
-					     gfp_t gfp, int order)
+static __always_inline int memcg_charge_slab(struct page *page,
+					     gfp_t gfp, int order,
+					     struct kmem_cache *s)
 {
 	if (!memcg_kmem_enabled())
 		return 0;
 	if (is_root_cache(s))
 		return 0;
-	return memcg_charge_kmem(s->memcg_params.memcg, gfp, 1 << order);
-}
-
-static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
-{
-	if (!memcg_kmem_enabled())
-		return;
-	if (is_root_cache(s))
-		return;
-	memcg_uncharge_kmem(s->memcg_params.memcg, 1 << order);
+	return __memcg_kmem_charge_memcg(page, gfp, order,
+					 s->memcg_params.memcg);
 }
 
 extern void slab_init_memcg_params(struct kmem_cache *);
@@ -289,15 +282,12 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 	return s;
 }
 
-static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order)
+static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
+				    struct kmem_cache *s)
 {
 	return 0;
 }
 
-static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
-{
-}
-
 static inline void slab_init_memcg_params(struct kmem_cache *s)
 {
 }
diff --git a/mm/slub.c b/mm/slub.c
index e1bb147827ef..423dbe77d145 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1328,16 +1328,15 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
 
 	flags |= __GFP_NOTRACK;
 
-	if (memcg_charge_slab(s, flags, order))
-		return NULL;
-
 	if (node == NUMA_NO_NODE)
 		page = alloc_pages(flags, order);
 	else
 		page = __alloc_pages_node(node, flags, order);
 
-	if (!page)
-		memcg_uncharge_slab(s, order);
+	if (page && memcg_charge_slab(page, flags, order, s)) {
+		__free_pages(page, order);
+		page = NULL;
+	}
 
 	return page;
 }
@@ -1476,8 +1475,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	page_mapcount_reset(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_pages(page, order);
-	memcg_uncharge_slab(s, order);
+	__free_kmem_pages(page, order);
 }
 
 #define need_reserve_slab_rcu						\
-- 
cgit v1.2.3


From df4065516b0dbfa35ac0e9b8124d441221c0a285 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 5 Nov 2015 18:49:04 -0800
Subject: memcg: simplify and inline __mem_cgroup_from_kmem

Before the previous patch ("memcg: unify slab and other kmem pages
charging"), __mem_cgroup_from_kmem had to handle two types of kmem - slab
pages and pages allocated with alloc_kmem_pages - memcg in the page
struct.  Now we can unify it.  Since after it, this function becomes tiny
we can fold it into mem_cgroup_from_kmem.

[hughd@google.com: move mem_cgroup_from_kmem into list_lru.c]
Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 15 ---------------
 mm/list_lru.c              | 10 ++++++++++
 mm/memcontrol.c            | 18 ------------------
 3 files changed, 10 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0ba4c86d3b40..998311e0c70c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -770,8 +770,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
 void __memcg_kmem_put_cache(struct kmem_cache *cachep);
 
-struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr);
-
 static inline bool __memcg_kmem_bypass(gfp_t gfp)
 {
 	if (!memcg_kmem_enabled())
@@ -830,13 +828,6 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
 	if (memcg_kmem_enabled())
 		__memcg_kmem_put_cache(cachep);
 }
-
-static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
-{
-	if (!memcg_kmem_enabled())
-		return NULL;
-	return __mem_cgroup_from_kmem(ptr);
-}
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -882,11 +873,5 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
 {
 }
-
-static inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
-{
-	return NULL;
-}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
-
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 28237476b055..afc71ea9a381 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -63,6 +63,16 @@ list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
 	return &nlru->lru;
 }
 
+static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
+{
+	struct page *page;
+
+	if (!memcg_kmem_enabled())
+		return NULL;
+	page = virt_to_head_page(ptr);
+	return page->mem_cgroup;
+}
+
 static inline struct list_lru_one *
 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9575cff544fa..c0111cb667b5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2430,24 +2430,6 @@ void __memcg_kmem_uncharge(struct page *page, int order)
 	page->mem_cgroup = NULL;
 	css_put_many(&memcg->css, nr_pages);
 }
-
-struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
-{
-	struct mem_cgroup *memcg = NULL;
-	struct kmem_cache *cachep;
-	struct page *page;
-
-	page = virt_to_head_page(ptr);
-	if (PageSlab(page)) {
-		cachep = page->slab_cache;
-		if (!is_root_cache(cachep))
-			memcg = cachep->memcg_params.memcg;
-	} else
-		/* page allocated by alloc_kmem_pages */
-		memcg = page->mem_cgroup;
-
-	return memcg;
-}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-- 
cgit v1.2.3


From 706874e9096e9d468eed9c2b03c8374e806535f3 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 5 Nov 2015 18:49:27 -0800
Subject: mm: do not inc NR_PAGETABLE if ptlock_init failed

If ALLOC_SPLIT_PTLOCKS is defined, ptlock_init may fail, in which case we
shouldn't increment NR_PAGETABLE.

Since small allocations, such as ptlock, normally do not fail (currently
they can fail if kmemcg is used though), this patch does not really fix
anything and should be considered as a code cleanup.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa08f3cf0f22..3c258f8eb9ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1606,8 +1606,10 @@ static inline void pgtable_init(void)
 
 static inline bool pgtable_page_ctor(struct page *page)
 {
+	if (!ptlock_init(page))
+		return false;
 	inc_zone_page_state(page, NR_PAGETABLE);
-	return ptlock_init(page);
+	return true;
 }
 
 static inline void pgtable_page_dtor(struct page *page)
-- 
cgit v1.2.3


From 45637bab30d6e7651737f51aa99417baef4d114a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 5 Nov 2015 18:49:40 -0800
Subject: mm: rename mem_cgroup_migrate to mem_cgroup_replace_page

After v4.3's commit 0610c25daa3e ("memcg: fix dirty page migration")
mem_cgroup_migrate() doesn't have much to offer in page migration: convert
migrate_misplaced_transhuge_page() to set_page_memcg() instead.

Then rename mem_cgroup_migrate() to mem_cgroup_replace_page(), since its
remaining callers are replace_page_cache_page() and shmem_replace_page():
both of whom passed lrucare true, so just eliminate that argument.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 ++-----
 mm/filemap.c               |  2 +-
 mm/memcontrol.c            | 29 ++++++++---------------------
 mm/migrate.c               |  5 ++---
 mm/shmem.c                 |  2 +-
 5 files changed, 14 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 998311e0c70c..c06c70b4404e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -297,8 +297,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
 void mem_cgroup_uncharge(struct page *page);
 void mem_cgroup_uncharge_list(struct list_head *page_list);
 
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
-			bool lrucare);
+void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage);
 
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
@@ -537,9 +536,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 
-static inline void mem_cgroup_migrate(struct page *oldpage,
-				      struct page *newpage,
-				      bool lrucare)
+static inline void mem_cgroup_replace_page(struct page *old, struct page *new)
 {
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 884766da1165..58e04e26f996 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -551,7 +551,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		mem_cgroup_end_page_stat(memcg);
-		mem_cgroup_migrate(old, new, true);
+		mem_cgroup_replace_page(old, new);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0111cb667b5..a44494f3a965 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4531,9 +4531,8 @@ static int mem_cgroup_move_account(struct page *page,
 		goto out;
 
 	/*
-	 * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup
-	 * of its source page while we change it: page migration takes
-	 * both pages off the LRU, but page cache replacement doesn't.
+	 * Prevent mem_cgroup_replace_page() from looking at
+	 * page->mem_cgroup of its source page while we change it.
 	 */
 	if (!trylock_page(page))
 		goto out;
@@ -5495,7 +5494,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
 }
 
 /**
- * mem_cgroup_migrate - migrate a charge to another page
+ * mem_cgroup_replace_page - migrate a charge to another page
  * @oldpage: currently charged page
  * @newpage: page to transfer the charge to
  * @lrucare: either or both pages might be on the LRU already
@@ -5504,16 +5503,13 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
  *
  * Both pages must be locked, @newpage->mapping must be set up.
  */
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
-			bool lrucare)
+void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 {
 	struct mem_cgroup *memcg;
 	int isolated;
 
 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
-	VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
-	VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
 	VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
 	VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage),
 		       newpage);
@@ -5525,25 +5521,16 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 	if (newpage->mem_cgroup)
 		return;
 
-	/*
-	 * Swapcache readahead pages can get migrated before being
-	 * charged, and migration from compaction can happen to an
-	 * uncharged page when the PFN walker finds a page that
-	 * reclaim just put back on the LRU but has not released yet.
-	 */
+	/* Swapcache readahead pages can get replaced before being charged */
 	memcg = oldpage->mem_cgroup;
 	if (!memcg)
 		return;
 
-	if (lrucare)
-		lock_page_lru(oldpage, &isolated);
-
+	lock_page_lru(oldpage, &isolated);
 	oldpage->mem_cgroup = NULL;
+	unlock_page_lru(oldpage, isolated);
 
-	if (lrucare)
-		unlock_page_lru(oldpage, isolated);
-
-	commit_charge(newpage, memcg, lrucare);
+	commit_charge(newpage, memcg, true);
 }
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index ed72c499df8a..836e4108eaef 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -30,7 +30,6 @@
 #include <linux/mempolicy.h>
 #include <linux/vmalloc.h>
 #include <linux/security.h>
-#include <linux/memcontrol.h>
 #include <linux/syscalls.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
@@ -1831,8 +1830,8 @@ fail_putback:
 	}
 
 	mlock_migrate_page(new_page, page);
-	mem_cgroup_migrate(page, new_page, false);
-
+	set_page_memcg(new_page, page_memcg(page));
+	set_page_memcg(page, NULL);
 	page_remove_rmap(page);
 
 	spin_unlock(ptl);
diff --git a/mm/shmem.c b/mm/shmem.c
index 48ce82926d93..6529226b3814 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1023,7 +1023,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 		 */
 		oldpage = newpage;
 	} else {
-		mem_cgroup_migrate(oldpage, newpage, true);
+		mem_cgroup_replace_page(oldpage, newpage);
 		lru_cache_add_anon(newpage);
 		*pagep = newpage;
 	}
-- 
cgit v1.2.3


From 6071ca5201066f4b2a61cfb693dd186d6bc6e9f3 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 5 Nov 2015 18:50:26 -0800
Subject: mm: page_counter: let page_counter_try_charge() return bool

page_counter_try_charge() currently returns 0 on success and -ENOMEM on
failure, which is surprising behavior given the function name.

Make it follow the expected pattern of try_stuff() functions that return a
boolean true to indicate success, or false for failure.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_counter.h |  6 +++---
 mm/hugetlb_cgroup.c          |  3 ++-
 mm/memcontrol.c              | 11 +++++------
 mm/page_counter.c            | 14 +++++++-------
 4 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 17fa4f8de3a6..7e62920a3a94 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -36,9 +36,9 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
 
 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
-int page_counter_try_charge(struct page_counter *counter,
-			    unsigned long nr_pages,
-			    struct page_counter **fail);
+bool page_counter_try_charge(struct page_counter *counter,
+			     unsigned long nr_pages,
+			     struct page_counter **fail);
 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
 int page_counter_limit(struct page_counter *counter, unsigned long limit);
 int page_counter_memparse(const char *buf, const char *max,
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 6e0057439a46..33d59abe91f1 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -186,7 +186,8 @@ again:
 	}
 	rcu_read_unlock();
 
-	ret = page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter);
+	if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter))
+		ret = -ENOMEM;
 	css_put(&h_cg->css);
 done:
 	*ptr = h_cg;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 59b100fde35f..d47de73d7c36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2016,8 +2016,8 @@ retry:
 		return 0;
 
 	if (!do_swap_account ||
-	    !page_counter_try_charge(&memcg->memsw, batch, &counter)) {
-		if (!page_counter_try_charge(&memcg->memory, batch, &counter))
+	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
+		if (page_counter_try_charge(&memcg->memory, batch, &counter))
 			goto done_restock;
 		if (do_swap_account)
 			page_counter_uncharge(&memcg->memsw, batch);
@@ -2381,14 +2381,13 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 {
 	unsigned int nr_pages = 1 << order;
 	struct page_counter *counter;
-	int ret = 0;
+	int ret;
 
 	if (!memcg_kmem_is_active(memcg))
 		return 0;
 
-	ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
-	if (ret)
-		return ret;
+	if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter))
+		return -ENOMEM;
 
 	ret = try_charge(memcg, gfp, nr_pages);
 	if (ret) {
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 11b4beda14ba..7c6a63d2c27f 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -56,12 +56,12 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
  * @nr_pages: number of pages to charge
  * @fail: points first counter to hit its limit, if any
  *
- * Returns 0 on success, or -ENOMEM and @fail if the counter or one of
- * its ancestors has hit its configured limit.
+ * Returns %true on success, or %false and @fail if the counter or one
+ * of its ancestors has hit its configured limit.
  */
-int page_counter_try_charge(struct page_counter *counter,
-			    unsigned long nr_pages,
-			    struct page_counter **fail)
+bool page_counter_try_charge(struct page_counter *counter,
+			     unsigned long nr_pages,
+			     struct page_counter **fail)
 {
 	struct page_counter *c;
 
@@ -99,13 +99,13 @@ int page_counter_try_charge(struct page_counter *counter,
 		if (new > c->watermark)
 			c->watermark = new;
 	}
-	return 0;
+	return true;
 
 failed:
 	for (c = counter; c != *fail; c = c->parent)
 		page_counter_cancel(c, nr_pages);
 
-	return -ENOMEM;
+	return false;
 }
 
 /**
-- 
cgit v1.2.3


From 5ba97bf9d8754bd984e81c1061fcda682681939e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 5 Nov 2015 18:50:40 -0800
Subject: mm: remove refresh_cpu_vm_stats() definition for !SMP kernel

refresh_cpu_vm_stats(int cpu) is no longer referenced by !SMP kernel
since Linux 3.12.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 49dfe40b3673..5dbc8b0ee567 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -247,7 +247,6 @@ static inline void __dec_zone_page_state(struct page *page,
 
 #define set_pgdat_percpu_threshold(pgdat, callback) { }
 
-static inline void refresh_cpu_vm_stats(int cpu) { }
 static inline void refresh_zone_stat_thresholds(void) { }
 static inline void cpu_vm_stats_fold(int cpu) { }
 
-- 
cgit v1.2.3


From a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@akamai.com>
Date: Thu, 5 Nov 2015 18:51:33 -0800
Subject: mm: mlock: add new mlock system call

With the refactored mlock code, introduce a new system call for mlock.
The new call will allow the user to specify what lock states are being
added.  mlock2 is trivial at the moment, but a follow on patch will add a
new mlock state making it useful.

Signed-off-by: Eric B Munson <emunson@akamai.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/entry/syscalls/syscall_32.tbl | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl | 1 +
 include/linux/syscalls.h               | 2 ++
 include/uapi/asm-generic/unistd.h      | 4 +++-
 kernel/sys_ni.c                        | 1 +
 mm/mlock.c                             | 8 ++++++++
 6 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index caa2c712d1e7..f17705e1332c 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -382,3 +382,4 @@
 373	i386	shutdown		sys_shutdown
 374	i386	userfaultfd		sys_userfaultfd
 375	i386	membarrier		sys_membarrier
+376	i386	mlock2			sys_mlock2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842fdf1f6..314a90bfc09c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -331,6 +331,7 @@
 322	64	execveat		stub_execveat
 323	common	userfaultfd		sys_userfaultfd
 324	common	membarrier		sys_membarrier
+325	common	mlock2			sys_mlock2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a460e2ef2843..a156b82dd14c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -887,4 +887,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
 
 asmlinkage long sys_membarrier(int cmd, int flags);
 
+asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index ee124009e12a..1324b0292ec2 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -713,9 +713,11 @@ __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 283
 __SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_mlock2 284
+__SYSCALL(__NR_mlock2, sys_mlock2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 284
+#define __NR_syscalls 285
 
 /*
  * All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a02decf15583..0623787ec67a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -194,6 +194,7 @@ cond_syscall(sys_mlock);
 cond_syscall(sys_munlock);
 cond_syscall(sys_mlockall);
 cond_syscall(sys_munlockall);
+cond_syscall(sys_mlock2);
 cond_syscall(sys_mincore);
 cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
diff --git a/mm/mlock.c b/mm/mlock.c
index fbd8c03f7b37..35dcf8fa7195 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -644,6 +644,14 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 	return do_mlock(start, len, VM_LOCKED);
 }
 
+SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
+{
+	if (flags)
+		return -EINVAL;
+
+	return do_mlock(start, len, VM_LOCKED);
+}
+
 SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 {
 	int ret;
-- 
cgit v1.2.3


From de60f5f10c58d4f34b68622442c0e04180367f3f Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@akamai.com>
Date: Thu, 5 Nov 2015 18:51:36 -0800
Subject: mm: introduce VM_LOCKONFAULT

The cost of faulting in all memory to be locked can be very high when
working with large mappings.  If only portions of the mapping will be used
this can incur a high penalty for locking.

For the example of a large file, this is the usage pattern for a large
statical language model (probably applies to other statical or graphical
models as well).  For the security example, any application transacting in
data that cannot be swapped out (credit card data, medical records, etc).

This patch introduces the ability to request that pages are not
pre-faulted, but are placed on the unevictable LRU when they are finally
faulted in.  The VM_LOCKONFAULT flag will be used together with VM_LOCKED
and has no effect when set without VM_LOCKED.  Setting the VM_LOCKONFAULT
flag for a VMA will cause pages faulted into that VMA to be added to the
unevictable LRU when they are faulted or if they are already present, but
will not cause any missing pages to be faulted in.

Exposing this new lock state means that we cannot overload the meaning of
the FOLL_POPULATE flag any longer.  Prior to this patch it was used to
mean that the VMA for a fault was locked.  This means we need the new
FOLL_MLOCK flag to communicate the locked state of a VMA.  FOLL_POPULATE
will now only control if the VMA should be populated and in the case of
VM_LOCKONFAULT, it will not be set.

Signed-off-by: Eric B Munson <emunson@akamai.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  5 +++++
 kernel/fork.c      |  3 ++-
 mm/debug.c         |  1 +
 mm/gup.c           | 10 ++++++++--
 mm/huge_memory.c   |  2 +-
 mm/hugetlb.c       |  4 ++--
 mm/mlock.c         |  2 +-
 mm/mmap.c          |  2 +-
 8 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3c258f8eb9ae..906c46a05707 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -139,6 +139,7 @@ extern unsigned int kobjsize(const void *objp);
 
 #define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
 #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
+#define VM_LOCKONFAULT	0x00080000	/* Lock the pages covered when they are faulted in */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
@@ -202,6 +203,9 @@ extern unsigned int kobjsize(const void *objp);
 /* This mask defines which mm->def_flags a process can inherit its parent */
 #define VM_INIT_DEF_MASK	VM_NOHUGEPAGE
 
+/* This mask is used to clear all the VMA flags used by mlock */
+#define VM_LOCKED_CLEAR_MASK	(~(VM_LOCKED | VM_LOCKONFAULT))
+
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
@@ -2137,6 +2141,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
 #define FOLL_TRIED	0x800	/* a retry, previous pass started an IO */
+#define FOLL_MLOCK	0x1000	/* lock present pages */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6ac894244d39..a30fae45b486 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -454,7 +454,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		tmp->vm_mm = mm;
 		if (anon_vma_fork(tmp, mpnt))
 			goto fail_nomem_anon_vma_fork;
-		tmp->vm_flags &= ~(VM_LOCKED|VM_UFFD_MISSING|VM_UFFD_WP);
+		tmp->vm_flags &=
+			~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP);
 		tmp->vm_next = tmp->vm_prev = NULL;
 		tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 		file = tmp->vm_file;
diff --git a/mm/debug.c b/mm/debug.c
index 6c1b3ea61bfd..e784110fb51d 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -125,6 +125,7 @@ static const struct trace_print_flags vmaflags_names[] = {
 	{VM_GROWSDOWN,			"growsdown"	},
 	{VM_PFNMAP,			"pfnmap"	},
 	{VM_DENYWRITE,			"denywrite"	},
+	{VM_LOCKONFAULT,		"lockonfault"	},
 	{VM_LOCKED,			"locked"	},
 	{VM_IO,				"io"		},
 	{VM_SEQ_READ,			"seqread"	},
diff --git a/mm/gup.c b/mm/gup.c
index a798293fc648..deafa2c91b36 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -129,7 +129,7 @@ retry:
 		 */
 		mark_page_accessed(page);
 	}
-	if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
+	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
 		/*
 		 * The preliminary mapping check is mainly to avoid the
 		 * pointless overhead of lock_page on the ZERO_PAGE
@@ -299,6 +299,9 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 	unsigned int fault_flags = 0;
 	int ret;
 
+	/* mlock all present pages, but do not fault in new pages */
+	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+		return -ENOENT;
 	/* For mm_populate(), just skip the stack guard page. */
 	if ((*flags & FOLL_POPULATE) &&
 			(stack_guard_page_start(vma, address) ||
@@ -890,7 +893,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
 	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
 	VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
 
-	gup_flags = FOLL_TOUCH | FOLL_POPULATE;
+	gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+	if (vma->vm_flags & VM_LOCKONFAULT)
+		gup_flags &= ~FOLL_POPULATE;
+
 	/*
 	 * We want to touch writable mappings with a write fault in order
 	 * to break COW, except for shared mappings because these don't COW
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3fd0311c3ba7..f5c08b46fef8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1307,7 +1307,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  pmd, _pmd,  1))
 			update_mmu_cache_pmd(vma, addr, pmd);
 	}
-	if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
+	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
 		if (page->mapping && trylock_page(page)) {
 			lru_add_drain();
 			if (page->mapping)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 241de2712b36..74ef0c6a25dd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4137,8 +4137,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
 	unsigned long s_end = sbase + PUD_SIZE;
 
 	/* Allow segments to share if only one is marked locked */
-	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
-	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+	unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
+	unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK;
 
 	/*
 	 * match the virtual addresses, permission and the alignment of the
diff --git a/mm/mlock.c b/mm/mlock.c
index 35dcf8fa7195..ca3894113b97 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -422,7 +422,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
 void munlock_vma_pages_range(struct vm_area_struct *vma,
 			     unsigned long start, unsigned long end)
 {
-	vma->vm_flags &= ~VM_LOCKED;
+	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
 
 	while (start < end) {
 		struct page *page = NULL;
diff --git a/mm/mmap.c b/mm/mmap.c
index 220effde8ea3..2ce04a649f6b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1661,7 +1661,7 @@ out:
 					vma == get_gate_vma(current->mm)))
 			mm->locked_vm += (len >> PAGE_SHIFT);
 		else
-			vma->vm_flags &= ~VM_LOCKED;
+			vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
 	}
 
 	if (file)
-- 
cgit v1.2.3


From 61b590b9ee4221173ad6990a1150c5c9db73564e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 23 Oct 2015 12:43:18 +0200
Subject: netfilter: ingress: don't use nf_hook_list_active

nf_hook_list_active() always returns true once at least one device has
NF_INGRESS hook enabled.

Thus, don't use this function. Instead, inverse the test and use the static
key to elide list_empty test if no NF_INGRESS hooks are active.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 187feabe557c..ba7ce8805fe3 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -5,10 +5,13 @@
 #include <linux/netdevice.h>
 
 #ifdef CONFIG_NETFILTER_INGRESS
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
 {
-	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
-				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+#ifdef HAVE_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+		return false;
+#endif
+	return !list_empty(&skb->dev->nf_hooks_ingress);
 }
 
 static inline int nf_hook_ingress(struct sk_buff *skb)
-- 
cgit v1.2.3


From b4865988eab598e56e6e628b9b32441acd142b28 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 6 Nov 2015 18:35:57 +0100
Subject: netfilter: ingress: fix wrong input interface on hook

The input and output interfaces in nf_hook_state_init() are flipped.
This fixes iif matching on nftables.

Reported-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index ba7ce8805fe3..5fcd375ef175 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -19,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	struct nf_hook_state state;
 
 	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
-			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
-			   skb->dev, NULL, dev_net(skb->dev), NULL);
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
+			   skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
 }
 
-- 
cgit v1.2.3


From 1b9863c6aa56d92126ec0d5c42eae25df52b7ca1 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:47 -0700
Subject: device property: Introducing enum dev_dma_attr

A device could have one of the following DMA attributes:
    * DMA not supported
    * DMA non-coherent
    * DMA coherent

So, this patch introduces enum dev_dma_attribute. This will be used by
new APIs introduced in later patches.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 463de52fe891..8eecf200bae5 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -27,6 +27,12 @@ enum dev_prop_type {
 	DEV_PROP_MAX,
 };
 
+enum dev_dma_attr {
+	DEV_DMA_NOT_SUPPORTED,
+	DEV_DMA_NON_COHERENT,
+	DEV_DMA_COHERENT,
+};
+
 bool device_property_present(struct device *dev, const char *propname);
 int device_property_read_u8_array(struct device *dev, const char *propname,
 				  u8 *val, size_t nval);
-- 
cgit v1.2.3


From b84f196d963c3159329f72ca1913b08679004a43 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:48 -0700
Subject: ACPI: Adding DMA Attribute APIs for ACPI Device

Adding acpi_get_dma_attr() to query DMA attributes of ACPI devices.
It returns the enum dev_dma_attr, which communicates DMA information
more clearly. This API replaces the acpi_check_dma(), which will be
removed in subsequent patch.

This patch also provides a convenient function, acpi_dma_supported(),
to check DMA support of the specified ACPI device.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c     | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_bus.h |  3 +++
 include/linux/acpi.h    | 10 ++++++++++
 3 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d1ce377db3e9..ed3d76fadccf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1308,6 +1308,48 @@ void acpi_free_pnp_ids(struct acpi_device_pnp *pnp)
 	kfree(pnp->unique_id);
 }
 
+/**
+ * acpi_dma_supported - Check DMA support for the specified device.
+ * @adev: The pointer to acpi device
+ *
+ * Return false if DMA is not supported. Otherwise, return true
+ */
+bool acpi_dma_supported(struct acpi_device *adev)
+{
+	if (!adev)
+		return false;
+
+	if (adev->flags.cca_seen)
+		return true;
+
+	/*
+	* Per ACPI 6.0 sec 6.2.17, assume devices can do cache-coherent
+	* DMA on "Intel platforms".  Presumably that includes all x86 and
+	* ia64, and other arches will set CONFIG_ACPI_CCA_REQUIRED=y.
+	*/
+	if (!IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED))
+		return true;
+
+	return false;
+}
+
+/**
+ * acpi_get_dma_attr - Check the supported DMA attr for the specified device.
+ * @adev: The pointer to acpi device
+ *
+ * Return enum dev_dma_attr.
+ */
+enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
+{
+	if (!acpi_dma_supported(adev))
+		return DEV_DMA_NOT_SUPPORTED;
+
+	if (adev->flags.coherent_dma)
+		return DEV_DMA_COHERENT;
+	else
+		return DEV_DMA_NON_COHERENT;
+}
+
 static void acpi_init_coherency(struct acpi_device *adev)
 {
 	unsigned long long cca = 0;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8df990520304..e56e6520edce 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -596,6 +596,9 @@ struct acpi_pci_root {
 
 /* helper */
 
+bool acpi_dma_supported(struct acpi_device *adev);
+enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
+
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
 					   u64 address, bool check_children);
 int acpi_is_root_bridge(acpi_handle);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 496265b0f527..292af3b69ede 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -579,6 +579,16 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
 	return false;
 }
 
+static inline bool acpi_dma_supported(struct acpi_device *adev)
+{
+	return false;
+}
+
+static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
+{
+	return DEV_DMA_NOT_SUPPORTED;
+}
+
 #define ACPI_PTR(_ptr)	(NULL)
 
 #endif	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From e5e558644bbb23cad03c586703331b8bcd9e0e6c Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:49 -0700
Subject: device property: Adding DMA Attribute APIs for Generic Devices

The function device_dma_is_coherent() does not sufficiently
communicate device DMA attributes. Instead, this patch introduces
device_get_dma_attr(), which returns enum dev_dma_attr.
It replaces the acpi_check_dma(), which will be removed in
subsequent patch.

This also provides a convenient function, device_dma_supported(),
to check DMA support of the specified device.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 29 +++++++++++++++++++++++++++++
 include/linux/property.h |  4 ++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index de40623bbd8a..05d57a2afa05 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -611,6 +611,35 @@ bool device_dma_is_coherent(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_dma_is_coherent);
 
+bool device_dma_supported(struct device *dev)
+{
+	/* For DT, this is always supported.
+	 * For ACPI, this depends on CCA, which
+	 * is determined by the acpi_dma_supported().
+	 */
+	if (IS_ENABLED(CONFIG_OF) && dev->of_node)
+		return true;
+
+	return acpi_dma_supported(ACPI_COMPANION(dev));
+}
+EXPORT_SYMBOL_GPL(device_dma_supported);
+
+enum dev_dma_attr device_get_dma_attr(struct device *dev)
+{
+	enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED;
+
+	if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
+		if (of_dma_is_coherent(dev->of_node))
+			attr = DEV_DMA_COHERENT;
+		else
+			attr = DEV_DMA_NON_COHERENT;
+	} else
+		attr = acpi_get_dma_attr(ACPI_COMPANION(dev));
+
+	return attr;
+}
+EXPORT_SYMBOL_GPL(device_get_dma_attr);
+
 /**
  * device_get_phy_mode - Get phy mode for given device
  * @dev:	Pointer to the given device
diff --git a/include/linux/property.h b/include/linux/property.h
index 8eecf200bae5..7200490b7e6f 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -176,6 +176,10 @@ void device_add_property_set(struct device *dev, struct property_set *pset);
 
 bool device_dma_is_coherent(struct device *dev);
 
+bool device_dma_supported(struct device *dev);
+
+enum dev_dma_attr device_get_dma_attr(struct device *dev);
+
 int device_get_phy_mode(struct device *dev);
 
 void *device_get_mac_address(struct device *dev, char *addr, int alen);
-- 
cgit v1.2.3


From ab3d527329f01dd63dc852041006d1a24895d116 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:51 -0700
Subject: device property: ACPI: Remove unused DMA APIs

These DMA APIs are replaced with the newer versions, which return
the enum dev_dma_attr. So, we can safely remove them.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 13 -------------
 include/acpi/acpi_bus.h  | 34 ----------------------------------
 include/linux/acpi.h     |  5 -----
 include/linux/property.h |  2 --
 4 files changed, 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 05d57a2afa05..1325ff225cc4 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -598,19 +598,6 @@ unsigned int device_get_child_node_count(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_get_child_node_count);
 
-bool device_dma_is_coherent(struct device *dev)
-{
-	bool coherent = false;
-
-	if (IS_ENABLED(CONFIG_OF) && dev->of_node)
-		coherent = of_dma_is_coherent(dev->of_node);
-	else
-		acpi_check_dma(ACPI_COMPANION(dev), &coherent);
-
-	return coherent;
-}
-EXPORT_SYMBOL_GPL(device_dma_is_coherent);
-
 bool device_dma_supported(struct device *dev)
 {
 	/* For DT, this is always supported.
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index e56e6520edce..e45d58d6b0a7 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -390,40 +390,6 @@ struct acpi_data_node {
 	struct completion kobj_done;
 };
 
-static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
-{
-	bool ret = false;
-
-	if (!adev)
-		return ret;
-
-	/**
-	 * Currently, we only support _CCA=1 (i.e. coherent_dma=1)
-	 * This should be equivalent to specifyig dma-coherent for
-	 * a device in OF.
-	 *
-	 * For the case when _CCA=0 (i.e. coherent_dma=0 && cca_seen=1),
-	 * There are two cases:
-	 * case 1. Do not support and disable DMA.
-	 * case 2. Support but rely on arch-specific cache maintenance for
-	 *         non-coherence DMA operations.
-	 * Currently, we implement case 2 above.
-	 *
-	 * For the case when _CCA is missing (i.e. cca_seen=0) and
-	 * platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
-	 * and fallback to arch-specific default handling.
-	 *
-	 * See acpi_init_coherency() for more info.
-	 */
-	if (adev->flags.coherent_dma ||
-	    (adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) {
-		ret = true;
-		if (coherent)
-			*coherent = adev->flags.coherent_dma;
-	}
-	return ret;
-}
-
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
 	return fwnode && (fwnode->type == FWNODE_ACPI
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 292af3b69ede..b5868300df75 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -574,11 +574,6 @@ static inline int acpi_device_modalias(struct device *dev,
 	return -ENODEV;
 }
 
-static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
-{
-	return false;
-}
-
 static inline bool acpi_dma_supported(struct acpi_device *adev)
 {
 	return false;
diff --git a/include/linux/property.h b/include/linux/property.h
index 7200490b7e6f..0a3705a7c9f2 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -174,8 +174,6 @@ struct property_set {
 
 void device_add_property_set(struct device *dev, struct property_set *pset);
 
-bool device_dma_is_coherent(struct device *dev);
-
 bool device_dma_supported(struct device *dev);
 
 enum dev_dma_attr device_get_dma_attr(struct device *dev);
-- 
cgit v1.2.3


From 50230713b63941f4b6b562eea0834f751aa0801e Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:53 -0700
Subject: PCI: OF: Move of_pci_dma_configure() to pci_dma_configure()

This patch move of_pci_dma_configure() to a more generic
pci_dma_configure(), which can be extended by non-OF code (e.g. ACPI).

This has no functional change.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Rob Herring <robh+dt@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/of/of_pci.c    | 19 -------------------
 drivers/pci/probe.c    | 22 +++++++++++++++++++++-
 include/linux/of_pci.h |  3 ---
 3 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index a2f510cc683b..b66ee4ebf650 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -117,25 +117,6 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
-/**
- * of_pci_dma_configure - Setup DMA configuration
- * @dev: ptr to pci_dev struct of the PCI device
- *
- * Function to update PCI devices's DMA configuration using the same
- * info from the OF node of host bridge's parent (if any).
- */
-void of_pci_dma_configure(struct pci_dev *pci_dev)
-{
-	struct device *dev = &pci_dev->dev;
-	struct device *bridge = pci_get_host_bridge_device(pci_dev);
-
-	if (bridge->parent)
-		of_dma_configure(dev, bridge->parent->of_node);
-
-	pci_put_host_bridge_device(bridge);
-}
-EXPORT_SYMBOL_GPL(of_pci_dma_configure);
-
 #if defined(CONFIG_OF_ADDRESS)
 /**
  * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8361d27e5eca..d35f83d80b15 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/of_device.h>
 #include <linux/of_pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/slab.h>
@@ -1633,6 +1634,25 @@ static void pci_set_msi_domain(struct pci_dev *dev)
 				   dev_get_msi_domain(&dev->bus->dev));
 }
 
+/**
+ * pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to pci_dev struct of the PCI device
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node of host bridge's parent (if any).
+ */
+static void pci_dma_configure(struct pci_dev *dev)
+{
+	struct device *bridge = pci_get_host_bridge_device(dev);
+
+	if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) {
+		if (bridge->parent)
+			of_dma_configure(&dev->dev, bridge->parent->of_node);
+	}
+
+	pci_put_host_bridge_device(bridge);
+}
+
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 {
 	int ret;
@@ -1646,7 +1666,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
-	of_pci_dma_configure(dev);
+	pci_dma_configure(dev);
 
 	pci_set_dma_max_seg_size(dev, 65536);
 	pci_set_dma_seg_boundary(dev, 0xffffffff);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 29fd3fe1c035..ce0e5abeb454 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np);
 int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
-void of_pci_dma_configure(struct pci_dev *pci_dev);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -51,8 +50,6 @@ of_get_pci_domain_nr(struct device_node *node)
 {
 	return -1;
 }
-
-static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { }
 #endif
 
 #if defined(CONFIG_OF_ADDRESS)
-- 
cgit v1.2.3


From e2b19197ff9dc46f3e3888f273c4395f9e5a9856 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:09 -0800
Subject: mm, page_alloc: remove unnecessary parameter from
 zone_watermark_ok_safe

Overall, the intent of this series is to remove the zonelist cache which
was introduced to avoid high overhead in the page allocator.  Once this is
done, it is necessary to reduce the cost of watermark checks.

The series starts with minor micro-optimisations.

Next it notes that GFP flags that affect watermark checks are abused.
__GFP_WAIT historically identified callers that could not sleep and could
access reserves.  This was later abused to identify callers that simply
prefer to avoid sleeping and have other options.  A patch distinguishes
between atomic callers, high-priority callers and those that simply wish
to avoid sleep.

The zonelist cache has been around for a long time but it is of dubious
merit with a lot of complexity and some issues that are explained.  The
most important issue is that a failed THP allocation can cause a zone to
be treated as "full".  This potentially causes unnecessary stalls, reclaim
activity or remote fallbacks.  The issues could be fixed but it's not
worth it.  The series places a small number of other micro-optimisations
on top before examining GFP flags watermarks.

High-order watermarks enforcement can cause high-order allocations to fail
even though pages are free.  The watermark checks both protect high-order
atomic allocations and make kswapd aware of high-order pages but there is
a much better way that can be handled using migrate types.  This series
uses page grouping by mobility to reserve pageblocks for high-order
allocations with the size of the reservation depending on demand.  kswapd
awareness is maintained by examining the free lists.  By patch 12 in this
series, there are no high-order watermark checks while preserving the
properties that motivated the introduction of the watermark checks.

This patch (of 10):

No user of zone_watermark_ok_safe() specifies alloc_flags.  This patch
removes the unnecessary parameter.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 mm/page_alloc.c        | 5 +++--
 mm/vmscan.c            | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2d7e660cdefe..e326843c995a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -817,7 +817,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
 bool zone_watermark_ok(struct zone *z, unsigned int order,
 		unsigned long mark, int classzone_idx, int alloc_flags);
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-		unsigned long mark, int classzone_idx, int alloc_flags);
+		unsigned long mark, int classzone_idx);
 enum memmap_context {
 	MEMMAP_EARLY,
 	MEMMAP_HOTPLUG,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 446bb36ee59d..d73c346d91b3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2249,6 +2249,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 		min -= min / 2;
 	if (alloc_flags & ALLOC_HARDER)
 		min -= min / 4;
+
 #ifdef CONFIG_CMA
 	/* If allocation can't use CMA areas don't use free CMA pages */
 	if (!(alloc_flags & ALLOC_CMA))
@@ -2278,14 +2279,14 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 }
 
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-			unsigned long mark, int classzone_idx, int alloc_flags)
+			unsigned long mark, int classzone_idx)
 {
 	long free_pages = zone_page_state(z, NR_FREE_PAGES);
 
 	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
 		free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
 
-	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+	return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
 								free_pages);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 55721b619aee..e0cd7eed4e38 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2477,7 +2477,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
 			zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
 	watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
-	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0);
 
 	/*
 	 * If compaction is deferred, reclaim up to a point where
@@ -2960,7 +2960,7 @@ static bool zone_balanced(struct zone *zone, int order,
 			  unsigned long balance_gap, int classzone_idx)
 {
 	if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
-				    balance_gap, classzone_idx, 0))
+				    balance_gap, classzone_idx))
 		return false;
 
 	if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
-- 
cgit v1.2.3


From 46e700abc44ce215acb4341d9702ce3972eda571 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:15 -0800
Subject: mm, page_alloc: remove unnecessary taking of a seqlock when cpusets
 are disabled

There is a seqcounter that protects against spurious allocation failures
when a task is changing the allowed nodes in a cpuset.  There is no need
to check the seqcounter until a cpuset exists.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5a1311942358..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -104,6 +104,9 @@ extern void cpuset_print_current_mems_allowed(void);
  */
 static inline unsigned int read_mems_allowed_begin(void)
 {
+	if (!cpusets_enabled())
+		return 0;
+
 	return read_seqcount_begin(&current->mems_allowed_seq);
 }
 
@@ -115,6 +118,9 @@ static inline unsigned int read_mems_allowed_begin(void)
  */
 static inline bool read_mems_allowed_retry(unsigned int seq)
 {
+	if (!cpusets_enabled())
+		return false;
+
 	return read_seqcount_retry(&current->mems_allowed_seq, seq);
 }
 
-- 
cgit v1.2.3


From 016c13daa5c9e4827eca703e2f0621c131f2cca3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:18 -0800
Subject: mm, page_alloc: use masks and shifts when converting GFP flags to
 migrate types

This patch redefines which GFP bits are used for specifying mobility and
the order of the migrate types.  Once redefined it's possible to convert
GFP flags to a migrate type with a simple mask and shift.  The only
downside is that readers of OOM kill messages and allocation failures may
have been used to the existing values but scripts/gfp-translate will help.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    | 12 +++++++-----
 include/linux/mmzone.h |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f92cbd2f4450..440fca3e7e5d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,7 +14,7 @@ struct vm_area_struct;
 #define ___GFP_HIGHMEM		0x02u
 #define ___GFP_DMA32		0x04u
 #define ___GFP_MOVABLE		0x08u
-#define ___GFP_WAIT		0x10u
+#define ___GFP_RECLAIMABLE	0x10u
 #define ___GFP_HIGH		0x20u
 #define ___GFP_IO		0x40u
 #define ___GFP_FS		0x80u
@@ -29,7 +29,7 @@ struct vm_area_struct;
 #define ___GFP_NOMEMALLOC	0x10000u
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
-#define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_WAIT		0x80000u
 #define ___GFP_NOACCOUNT	0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
@@ -126,6 +126,7 @@ struct vm_area_struct;
 
 /* This mask makes up all the page movable related flags */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
 
 /* Control page allocator reclaim behavior */
 #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
@@ -152,14 +153,15 @@ struct vm_area_struct;
 /* Convert GFP flags to their corresponding migrate type */
 static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
-	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+	BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
+	BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
 
 	if (unlikely(page_group_by_mobility_disabled))
 		return MIGRATE_UNMOVABLE;
 
 	/* Group based on mobility */
-	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
-		((gfp_flags & __GFP_RECLAIMABLE) != 0);
+	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
 }
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e326843c995a..38bed71758ab 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,8 +37,8 @@
 
 enum {
 	MIGRATE_UNMOVABLE,
-	MIGRATE_RECLAIMABLE,
 	MIGRATE_MOVABLE,
+	MIGRATE_RECLAIMABLE,
 	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
 	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
-- 
cgit v1.2.3


From d0164adc89f6bb374d304ffcc375c6d2652fe67d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:21 -0800
Subject: mm, page_alloc: distinguish between being unable to sleep, unwilling
 to sleep and avoiding waking kswapd

__GFP_WAIT has been used to identify atomic context in callers that hold
spinlocks or are in interrupts.  They are expected to be high priority and
have access one of two watermarks lower than "min" which can be referred
to as the "atomic reserve".  __GFP_HIGH users get access to the first
lower watermark and can be called the "high priority reserve".

Over time, callers had a requirement to not block when fallback options
were available.  Some have abused __GFP_WAIT leading to a situation where
an optimisitic allocation with a fallback option can access atomic
reserves.

This patch uses __GFP_ATOMIC to identify callers that are truely atomic,
cannot sleep and have no alternative.  High priority users continue to use
__GFP_HIGH.  __GFP_DIRECT_RECLAIM identifies callers that can sleep and
are willing to enter direct reclaim.  __GFP_KSWAPD_RECLAIM to identify
callers that want to wake kswapd for background reclaim.  __GFP_WAIT is
redefined as a caller that is willing to enter direct reclaim and wake
kswapd for background reclaim.

This patch then converts a number of sites

o __GFP_ATOMIC is used by callers that are high priority and have memory
  pools for those requests. GFP_ATOMIC uses this flag.

o Callers that have a limited mempool to guarantee forward progress clear
  __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall
  into this category where kswapd will still be woken but atomic reserves
  are not used as there is a one-entry mempool to guarantee progress.

o Callers that are checking if they are non-blocking should use the
  helper gfpflags_allow_blocking() where possible. This is because
  checking for __GFP_WAIT as was done historically now can trigger false
  positives. Some exceptions like dm-crypt.c exist where the code intent
  is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to
  flag manipulations.

o Callers that built their own GFP flags instead of starting with GFP_KERNEL
  and friends now also need to specify __GFP_KSWAPD_RECLAIM.

The first key hazard to watch out for is callers that removed __GFP_WAIT
and was depending on access to atomic reserves for inconspicuous reasons.
In some cases it may be appropriate for them to use __GFP_HIGH.

The second key hazard is callers that assembled their own combination of
GFP flags instead of starting with something like GFP_KERNEL.  They may
now wish to specify __GFP_KSWAPD_RECLAIM.  It's almost certainly harmless
if it's missed in most cases as other activity will wake kswapd.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/balance                           | 14 ++++---
 arch/arm/mm/dma-mapping.c                          |  6 +--
 arch/arm/xen/mm.c                                  |  2 +-
 arch/arm64/mm/dma-mapping.c                        |  4 +-
 arch/x86/kernel/pci-dma.c                          |  2 +-
 block/bio.c                                        | 26 ++++++------
 block/blk-core.c                                   | 16 ++++----
 block/blk-ioc.c                                    |  2 +-
 block/blk-mq-tag.c                                 |  2 +-
 block/blk-mq.c                                     |  6 +--
 drivers/block/drbd/drbd_receiver.c                 |  3 +-
 drivers/block/osdblk.c                             |  2 +-
 drivers/connector/connector.c                      |  3 +-
 drivers/firewire/core-cdev.c                       |  2 +-
 drivers/gpu/drm/i915/i915_gem.c                    |  2 +-
 drivers/infiniband/core/sa_query.c                 |  2 +-
 drivers/iommu/amd_iommu.c                          |  2 +-
 drivers/iommu/intel-iommu.c                        |  2 +-
 drivers/md/dm-crypt.c                              |  6 +--
 drivers/md/dm-kcopyd.c                             |  2 +-
 drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c     |  2 +-
 drivers/media/pci/solo6x10/solo6x10-v4l2.c         |  2 +-
 drivers/media/pci/tw68/tw68-video.c                |  2 +-
 drivers/mtd/mtdcore.c                              |  3 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  2 +-
 drivers/staging/android/ion/ion_system_heap.c      |  2 +-
 .../lustre/include/linux/libcfs/libcfs_private.h   |  2 +-
 drivers/usb/host/u132-hcd.c                        |  2 +-
 drivers/video/fbdev/vermilion/vermilion.c          |  2 +-
 fs/btrfs/disk-io.c                                 |  2 +-
 fs/btrfs/extent_io.c                               | 14 +++----
 fs/btrfs/volumes.c                                 |  4 +-
 fs/ext4/super.c                                    |  2 +-
 fs/fscache/cookie.c                                |  2 +-
 fs/fscache/page.c                                  |  6 +--
 fs/jbd2/transaction.c                              |  4 +-
 fs/nfs/file.c                                      |  6 +--
 fs/xfs/xfs_qm.c                                    |  2 +-
 include/linux/gfp.h                                | 46 ++++++++++++++++------
 include/linux/skbuff.h                             |  6 +--
 include/net/sock.h                                 |  2 +-
 include/trace/events/gfpflags.h                    |  5 ++-
 kernel/audit.c                                     |  6 +--
 kernel/cgroup.c                                    |  2 +-
 kernel/locking/lockdep.c                           |  2 +-
 kernel/power/snapshot.c                            |  2 +-
 kernel/smp.c                                       |  2 +-
 lib/idr.c                                          |  4 +-
 lib/radix-tree.c                                   | 10 ++---
 mm/backing-dev.c                                   |  2 +-
 mm/dmapool.c                                       |  2 +-
 mm/memcontrol.c                                    |  6 +--
 mm/mempool.c                                       | 10 ++---
 mm/migrate.c                                       |  2 +-
 mm/page_alloc.c                                    | 43 ++++++++++++--------
 mm/slab.c                                          | 18 ++++-----
 mm/slub.c                                          | 10 ++---
 mm/vmalloc.c                                       |  2 +-
 mm/vmscan.c                                        |  4 +-
 mm/zswap.c                                         |  5 ++-
 net/core/skbuff.c                                  |  8 ++--
 net/core/sock.c                                    |  6 ++-
 net/netlink/af_netlink.c                           |  2 +-
 net/rds/ib_recv.c                                  |  4 +-
 net/rxrpc/ar-connection.c                          |  2 +-
 net/sctp/associola.c                               |  2 +-
 66 files changed, 210 insertions(+), 172 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index c46e68cf9344..964595481af6 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -1,12 +1,14 @@
 Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
 
-Memory balancing is needed for non __GFP_WAIT as well as for non
-__GFP_IO allocations.
+Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
+well as for non __GFP_IO allocations.
 
-There are two reasons to be requesting non __GFP_WAIT allocations:
-the caller can not sleep (typically intr context), or does not want
-to incur cost overheads of page stealing and possible swap io for
-whatever reasons.
+The first reason why a caller may avoid reclaim is that the caller can not
+sleep due to holding a spinlock or is in interrupt context. The second may
+be that the caller is willing to fail the allocation without incurring the
+overhead of page reclaim. This may happen for opportunistic high-order
+allocation requests that have order-0 fallback options. In such cases,
+the caller may also wish to avoid waking kswapd.
 
 __GFP_IO allocation requests are made to prevent file system deadlocks.
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ad4eb2d26e16..e62400e5fb99 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -651,12 +651,12 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
 	if (nommu())
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT))
+	else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
 		addr = __alloc_from_contiguous(dev, size, prot, &page,
 					       caller, want_vaddr);
 	else if (is_coherent)
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (!(gfp & __GFP_WAIT))
+	else if (!gfpflags_allow_blocking(gfp))
 		addr = __alloc_from_pool(size, &page);
 	else
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
@@ -1363,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	*handle = DMA_ERROR_CODE;
 	size = PAGE_ALIGN(size);
 
-	if (!(gfp & __GFP_WAIT))
+	if (!gfpflags_allow_blocking(gfp))
 		return __iommu_alloc_atomic(dev, size, handle);
 
 	/*
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 7c34f7126b04..c5f9a9e3d1f3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -25,7 +25,7 @@
 unsigned long xen_get_swiotlb_free_pages(unsigned int order)
 {
 	struct memblock_region *reg;
-	gfp_t flags = __GFP_NOWARN;
+	gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
 
 	for_each_memblock(memory, reg) {
 		if (reg->base < (phys_addr_t)0xffffffff) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6320361d8d4c..bb4bf6a06ad6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
 		flags |= GFP_DMA;
-	if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
+	if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
 		struct page *page;
 		void *addr;
 
@@ -148,7 +148,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 
 	size = PAGE_ALIGN(size);
 
-	if (!coherent && !(flags & __GFP_WAIT)) {
+	if (!coherent && !gfpflags_allow_blocking(flags)) {
 		struct page *page = NULL;
 		void *addr = __alloc_from_pool(size, &page, flags);
 
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cd99433b8ba1..6ba014c61d62 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 again:
 	page = NULL;
 	/* CMA can be used only in the context which permits sleeping */
-	if (flag & __GFP_WAIT) {
+	if (gfpflags_allow_blocking(flag)) {
 		page = dma_alloc_from_contiguous(dev, count, get_order(size));
 		if (page && page_to_phys(page) + size > dma_mask) {
 			dma_release_from_contiguous(dev, page, count);
diff --git a/block/bio.c b/block/bio.c
index ad3f276d74bc..4f184d938942 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -211,7 +211,7 @@ fallback:
 		bvl = mempool_alloc(pool, gfp_mask);
 	} else {
 		struct biovec_slab *bvs = bvec_slabs + *idx;
-		gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+		gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
 
 		/*
 		 * Make this allocation restricted and don't dump info on
@@ -221,11 +221,11 @@ fallback:
 		__gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 
 		/*
-		 * Try a slab allocation. If this fails and __GFP_WAIT
+		 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
 		 * is set, retry with the 1-entry mempool
 		 */
 		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
-		if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
+		if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
 			*idx = BIOVEC_MAX_IDX;
 			goto fallback;
 		}
@@ -395,12 +395,12 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  *   If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
  *   backed by the @bs's mempool.
  *
- *   When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
- *   able to allocate a bio. This is due to the mempool guarantees. To make this
- *   work, callers must never allocate more than 1 bio at a time from this pool.
- *   Callers that need to allocate more than 1 bio must always submit the
- *   previously allocated bio for IO before attempting to allocate a new one.
- *   Failure to do so can cause deadlocks under memory pressure.
+ *   When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
+ *   always be able to allocate a bio. This is due to the mempool guarantees.
+ *   To make this work, callers must never allocate more than 1 bio at a time
+ *   from this pool. Callers that need to allocate more than 1 bio must always
+ *   submit the previously allocated bio for IO before attempting to allocate
+ *   a new one. Failure to do so can cause deadlocks under memory pressure.
  *
  *   Note that when running under generic_make_request() (i.e. any block
  *   driver), bios are not submitted until after you return - see the code in
@@ -459,13 +459,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		 * We solve this, and guarantee forward progress, with a rescuer
 		 * workqueue per bio_set. If we go to allocate and there are
 		 * bios on current->bio_list, we first try the allocation
-		 * without __GFP_WAIT; if that fails, we punt those bios we
-		 * would be blocking to the rescuer workqueue before we retry
-		 * with the original gfp_flags.
+		 * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
+		 * bios we would be blocking to the rescuer workqueue before
+		 * we retry with the original gfp_flags.
 		 */
 
 		if (current->bio_list && !bio_list_empty(current->bio_list))
-			gfp_mask &= ~__GFP_WAIT;
+			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
 		if (!p && gfp_mask != saved_gfp) {
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..9e32f0868e36 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1206,8 +1206,8 @@ rq_starved:
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
- * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
- * function keeps retrying under memory pressure and fails iff @q is dead.
+ * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
+ * this function keeps retrying under memory pressure and fails iff @q is dead.
  *
  * Must be called with @q->queue_lock held and,
  * Returns ERR_PTR on failure, with @q->queue_lock held.
@@ -1227,7 +1227,7 @@ retry:
 	if (!IS_ERR(rq))
 		return rq;
 
-	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
+	if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return rq;
 	}
@@ -1305,11 +1305,11 @@ EXPORT_SYMBOL(blk_get_request);
  * BUG.
  *
  * WARNING: When allocating/cloning a bio-chain, careful consideration should be
- * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
- * anything but the first bio in the chain. Otherwise you risk waiting for IO
- * completion of a bio that hasn't been submitted yet, thus resulting in a
- * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
- * of bio_alloc(), as that avoids the mempool deadlock.
+ * given to how you allocate bios. In particular, you cannot use
+ * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
+ * you risk waiting for IO completion of a bio that hasn't been submitted yet,
+ * thus resulting in a deadlock. Alternatively bios should be allocated using
+ * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
  * If possible a big IO should be split into smaller parts when allocation
  * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 1a27f45ec776..381cb50a673c 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
 {
 	struct io_context *ioc;
 
-	might_sleep_if(gfp_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_flags));
 
 	do {
 		task_lock(task);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 60ac684c8b8c..a07ca3488d96 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
 	if (tag != -1)
 		return tag;
 
-	if (!(data->gfp & __GFP_WAIT))
+	if (!gfpflags_allow_blocking(data->gfp))
 		return -1;
 
 	bs = bt_wait_ptr(bt, hctx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..68c0a3416b34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -244,11 +244,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
 
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
-	blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT,
+	blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
 			reserved, ctx, hctx);
 
 	rq = __blk_mq_alloc_request(&alloc_data, rw);
-	if (!rq && (gfp & __GFP_WAIT)) {
+	if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
 
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
 		blk_mq_set_alloc_data(&alloc_data, q,
-				__GFP_WAIT|GFP_ATOMIC, false, ctx, hctx);
+				__GFP_WAIT|__GFP_HIGH, false, ctx, hctx);
 		rq = __blk_mq_alloc_request(&alloc_data, rw);
 		ctx = alloc_data.ctx;
 		hctx = alloc_data.hctx;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c097909c589c..b4b5680ac6ad 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -357,7 +357,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 	}
 
 	if (has_payload && data_size) {
-		page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
+		page = drbd_alloc_pages(peer_device, nr_pages,
+					gfpflags_allow_blocking(gfp_mask));
 		if (!page)
 			goto fail;
 	}
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index e22942596207..1b709a4e3b5e 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -271,7 +271,7 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
 			goto err_out;
 
 		tmp->bi_bdev = NULL;
-		gfpmask &= ~__GFP_WAIT;
+		gfpmask &= ~__GFP_DIRECT_RECLAIM;
 		tmp->bi_next = NULL;
 
 		if (!new_chain)
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 30f522848c73..d7373ca69c99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -124,7 +124,8 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
 	if (group)
 		return netlink_broadcast(dev->nls, skb, portid, group,
 					 gfp_mask);
-	return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT));
+	return netlink_unicast(dev->nls, skb, portid,
+			!gfpflags_allow_blocking(gfp_mask));
 }
 EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
 
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3973a7c441..36a7c2d89a01 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -486,7 +486,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
 static int add_client_resource(struct client *client,
 			       struct client_resource *resource, gfp_t gfp_mask)
 {
-	bool preload = !!(gfp_mask & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp_mask);
 	unsigned long flags;
 	int ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d631a946481..d58cb9e034fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2215,7 +2215,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 */
 	mapping = file_inode(obj->base.filp)->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
-	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
+	gfp |= __GFP_NORETRY | __GFP_NOWARN;
 	gfp &= ~(__GFP_IO | __GFP_WAIT);
 	sg = st->sgl;
 	st->nents = 0;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d8e0..59ab264c99c4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1083,7 +1083,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 {
-	bool preload = !!(gfp_mask & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp_mask);
 	unsigned long flags;
 	int ret, id;
 
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0d533bba4ad1..8b2be1e7714f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2668,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 
 	page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
 	if (!page) {
-		if (!(flag & __GFP_WAIT))
+		if (!gfpflags_allow_blocking(flag))
 			return NULL;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7cf80c1a8a16..f1042daef9ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3647,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 			flags |= GFP_DMA32;
 	}
 
-	if (flags & __GFP_WAIT) {
+	if (gfpflags_allow_blocking(flags)) {
 		unsigned int count = size >> PAGE_SHIFT;
 
 		page = dma_alloc_from_contiguous(dev, count, order);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3729b394432c..917d47e290ae 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -994,7 +994,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
 	struct bio_vec *bvec;
 
 retry:
-	if (unlikely(gfp_mask & __GFP_WAIT))
+	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_lock(&cc->bio_alloc_lock);
 
 	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
@@ -1010,7 +1010,7 @@ retry:
 		if (!page) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
-			gfp_mask |= __GFP_WAIT;
+			gfp_mask |= __GFP_DIRECT_RECLAIM;
 			goto retry;
 		}
 
@@ -1027,7 +1027,7 @@ retry:
 	}
 
 return_clone:
-	if (unlikely(gfp_mask & __GFP_WAIT))
+	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_unlock(&cc->bio_alloc_lock);
 
 	return clone;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3a7cade5e27d..1452ed9aacb4 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -244,7 +244,7 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
 	*pages = NULL;
 
 	do {
-		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
+		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
 		if (unlikely(!pl)) {
 			/* Use reserved pages */
 			pl = kc->pages;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 1bd2fd47421f..4432fd69b7cb 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -1297,7 +1297,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
 	solo_enc->vidq.ops = &solo_enc_video_qops;
 	solo_enc->vidq.mem_ops = &vb2_dma_sg_memops;
 	solo_enc->vidq.drv_priv = solo_enc;
-	solo_enc->vidq.gfp_flags = __GFP_DMA32;
+	solo_enc->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
 	solo_enc->vidq.lock = &solo_enc->lock;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index 26df903585d7..f7ce493b1fee 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -678,7 +678,7 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
 	solo_dev->vidq.mem_ops = &vb2_dma_contig_memops;
 	solo_dev->vidq.drv_priv = solo_dev;
 	solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	solo_dev->vidq.gfp_flags = __GFP_DMA32;
+	solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
 	solo_dev->vidq.lock = &solo_dev->lock;
 	ret = vb2_queue_init(&solo_dev->vidq);
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 4c3293dcddbc..46642ef9151b 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -979,7 +979,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
 	dev->vidq.ops = &tw68_video_qops;
 	dev->vidq.mem_ops = &vb2_dma_sg_memops;
 	dev->vidq.drv_priv = dev;
-	dev->vidq.gfp_flags = __GFP_DMA32;
+	dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
 	dev->vidq.lock = &dev->lock;
 	dev->vidq.min_buffers_needed = 2;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 8bbbb751bf45..2dfb291a47c6 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1188,8 +1188,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
  */
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 {
-	gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
-		       __GFP_NORETRY | __GFP_NO_KSWAPD;
+	gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY;
 	size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
 	void *kbuf;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 44173be5cbf0..f8d7a2f06950 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -691,7 +691,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
 {
 	if (fp->rx_frag_size) {
 		/* GFP_KERNEL allocations are used only during initialization */
-		if (unlikely(gfp_mask & __GFP_WAIT))
+		if (unlikely(gfpflags_allow_blocking(gfp_mask)))
 			return (void *)__get_free_page(gfp_mask);
 
 		return netdev_alloc_frag(fp->rx_frag_size);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index ada724aab3d5..d4c3e5512dd5 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -27,7 +27,7 @@
 #include "ion_priv.h"
 
 static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
-				     __GFP_NORETRY) & ~__GFP_WAIT;
+				     __GFP_NORETRY) & ~__GFP_DIRECT_RECLAIM;
 static gfp_t low_order_gfp_flags  = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
 static const unsigned int orders[] = {8, 4, 0};
 static const int num_orders = ARRAY_SIZE(orders);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 6af733de69ca..f0b0423a716b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -95,7 +95,7 @@ do {								    \
 do {									    \
 	LASSERT(!in_interrupt() ||					    \
 		((size) <= LIBCFS_VMALLOC_SIZE &&			    \
-		 ((mask) & __GFP_WAIT) == 0));				    \
+		 !gfpflags_allow_blocking(mask)));			    \
 } while (0)
 
 #define LIBCFS_ALLOC_POST(ptr, size)					    \
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 0a94895a358d..692ccc69345e 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2244,7 +2244,7 @@ static int u132_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 {
 	struct u132 *u132 = hcd_to_u132(hcd);
 	if (irqs_disabled()) {
-		if (__GFP_WAIT & mem_flags) {
+		if (gfpflags_allow_blocking(mem_flags)) {
 			printk(KERN_ERR "invalid context for function that might sleep\n");
 			return -EINVAL;
 		}
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6b70d7f62b2f..1c1e95a0b8fa 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -99,7 +99,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
 		 * below the first 16MB.
 		 */
 
-		flags = __GFP_DMA | __GFP_HIGH;
+		flags = __GFP_DMA | __GFP_HIGH | __GFP_KSWAPD_RECLAIM;
 		va->logical =
 			 __get_free_pages(flags, --max_order);
 	} while (va->logical == 0 && max_order > min_order);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1e60d00d4ea7..c339d561e596 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2572,7 +2572,7 @@ int open_ctree(struct super_block *sb,
 	fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 	fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
 	/* readahead state */
-	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
+	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 	spin_lock_init(&fs_info->reada_lock);
 
 	fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3915c9473e94..032abfbebe76 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -594,7 +594,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
 		clear = 1;
 again:
-	if (!prealloc && (mask & __GFP_WAIT)) {
+	if (!prealloc && gfpflags_allow_blocking(mask)) {
 		/*
 		 * Don't care for allocation failure here because we might end
 		 * up not needing the pre-allocated extent state at all, which
@@ -718,7 +718,7 @@ search_again:
 	if (start > end)
 		goto out;
 	spin_unlock(&tree->lock);
-	if (mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(mask))
 		cond_resched();
 	goto again;
 }
@@ -850,7 +850,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 
 	bits |= EXTENT_FIRST_DELALLOC;
 again:
-	if (!prealloc && (mask & __GFP_WAIT)) {
+	if (!prealloc && gfpflags_allow_blocking(mask)) {
 		prealloc = alloc_extent_state(mask);
 		BUG_ON(!prealloc);
 	}
@@ -1028,7 +1028,7 @@ search_again:
 	if (start > end)
 		goto out;
 	spin_unlock(&tree->lock);
-	if (mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(mask))
 		cond_resched();
 	goto again;
 }
@@ -1076,7 +1076,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	btrfs_debug_check_extent_io_range(tree, start, end);
 
 again:
-	if (!prealloc && (mask & __GFP_WAIT)) {
+	if (!prealloc && gfpflags_allow_blocking(mask)) {
 		/*
 		 * Best effort, don't worry if extent state allocation fails
 		 * here for the first iteration. We might have a cached state
@@ -1253,7 +1253,7 @@ search_again:
 	if (start > end)
 		goto out;
 	spin_unlock(&tree->lock);
-	if (mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(mask))
 		cond_resched();
 	first_iteration = false;
 	goto again;
@@ -4319,7 +4319,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_CACHE_SIZE - 1;
 
-	if ((mask & __GFP_WAIT) &&
+	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > 16 * 1024 * 1024) {
 		u64 len;
 		while (start <= end) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6fc735869c18..e023919b4470 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -156,8 +156,8 @@ static struct btrfs_device *__alloc_device(void)
 	spin_lock_init(&dev->reada_lock);
 	atomic_set(&dev->reada_in_flight, 0);
 	atomic_set(&dev->dev_stats_ccnt, 0);
-	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
 	return dev;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a63c7b0a10cf..49f6c78ee3af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1058,7 +1058,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 		return 0;
 	if (journal)
 		return jbd2_journal_try_to_free_buffers(journal, page,
-							wait & ~__GFP_WAIT);
+						wait & ~__GFP_DIRECT_RECLAIM);
 	return try_to_free_buffers(page);
 }
 
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d403c69bee08..4304072161aa 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
 
 	/* radix tree insertion won't use the preallocation pool unless it's
 	 * told it may not wait */
-	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT);
+	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
 	switch (cookie->def->type) {
 	case FSCACHE_COOKIE_TYPE_INDEX:
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 483bbc613bf0..79483b3d8c6f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page)
 
 /*
  * decide whether a page can be released, possibly by cancelling a store to it
- * - we're allowed to sleep if __GFP_WAIT is flagged
+ * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged
  */
 bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
 				  struct page *page,
@@ -122,7 +122,7 @@ page_busy:
 	 * allocator as the work threads writing to the cache may all end up
 	 * sleeping on memory allocation, so we may need to impose a timeout
 	 * too. */
-	if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) {
+	if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) {
 		fscache_stat(&fscache_n_store_vmscan_busy);
 		return false;
 	}
@@ -132,7 +132,7 @@ page_busy:
 		_debug("fscache writeout timeout page: %p{%lx}",
 			page, page->index);
 
-	gfp &= ~__GFP_WAIT;
+	gfp &= ~__GFP_DIRECT_RECLAIM;
 	goto try_again;
 }
 EXPORT_SYMBOL(__fscache_maybe_release_page);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6b8338ec2464..89463eee6791 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1937,8 +1937,8 @@ out:
  * @journal: journal for operation
  * @page: to try and free
  * @gfp_mask: we use the mask to detect how hard should we try to release
- * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
- * release the buffers.
+ * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
+ * code to release the buffers.
  *
  *
  * For all the buffers on this page,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 37f639d50af5..93e236429c5d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
 	/* Always try to initiate a 'commit' if relevant, but only
-	 * wait for it if __GFP_WAIT is set.  Even then, only wait 1
-	 * second and only if the 'bdi' is not congested.
+	 * wait for it if the caller allows blocking.  Even then,
+	 * only wait 1 second and only if the 'bdi' is not congested.
 	 * Waiting indefinitely can cause deadlocks when the NFS
 	 * server is on this machine, when a new TCP connection is
 	 * needed and in other rare cases.  There is no particular
@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 	if (mapping) {
 		struct nfs_server *nfss = NFS_SERVER(mapping->host);
 		nfs_commit_inode(mapping->host, 0);
-		if ((gfp & __GFP_WAIT) &&
+		if (gfpflags_allow_blocking(gfp) &&
 		    !bdi_write_congested(&nfss->backing_dev_info)) {
 			wait_on_page_bit_killable_timeout(page, PG_private,
 							  HZ);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index eac9549efd52..587174fd4f2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -525,7 +525,7 @@ xfs_qm_shrink_scan(
 	unsigned long		freed;
 	int			error;
 
-	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
+	if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
 		return 0;
 
 	INIT_LIST_HEAD(&isol.buffers);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 440fca3e7e5d..b56e811b6f7c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -29,12 +29,13 @@ struct vm_area_struct;
 #define ___GFP_NOMEMALLOC	0x10000u
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
-#define ___GFP_WAIT		0x80000u
+#define ___GFP_ATOMIC		0x80000u
 #define ___GFP_NOACCOUNT	0x100000u
 #define ___GFP_NOTRACK		0x200000u
-#define ___GFP_NO_KSWAPD	0x400000u
+#define ___GFP_DIRECT_RECLAIM	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
 #define ___GFP_WRITE		0x1000000u
+#define ___GFP_KSWAPD_RECLAIM	0x2000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -71,7 +72,7 @@ struct vm_area_struct;
  * __GFP_MOVABLE: Flag that this page will be movable by the page migration
  * mechanism or reclaimed
  */
-#define __GFP_WAIT	((__force gfp_t)___GFP_WAIT)	/* Can wait and reschedule? */
+#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)  /* Caller cannot wait or reschedule */
 #define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
 #define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
@@ -94,23 +95,37 @@ struct vm_area_struct;
 #define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
-#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
+/*
+ * A caller that is willing to wait may enter direct reclaim and will
+ * wake kswapd to reclaim pages in the background until the high
+ * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to
+ * avoid unnecessary delays when a fallback option is available but
+ * still allow kswapd to reclaim in the background. The kswapd flag
+ * can be cleared when the reclaiming of pages would cause unnecessary
+ * disruption.
+ */
+#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
+#define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
  * allocations that simply cannot be supported (e.g. page tables).
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
-/* This equals 0, but use constants in case they ever change */
-#define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
-/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
-#define GFP_ATOMIC	(__GFP_HIGH)
+/*
+ * GFP_ATOMIC callers can not sleep, need the allocation to succeed.
+ * A lower watermark is applied to allow access to "atomic reserves"
+ */
+#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
@@ -119,10 +134,10 @@ struct vm_area_struct;
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_IOFS	(__GFP_IO | __GFP_FS)
-#define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
-			 __GFP_NO_KSWAPD)
+#define GFP_IOFS	(__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM)
+#define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
+			 ~__GFP_KSWAPD_RECLAIM)
 
 /* This mask makes up all the page movable related flags */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -164,6 +179,11 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
 }
 
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+	return gfp_flags & __GFP_DIRECT_RECLAIM;
+}
+
 #ifdef CONFIG_HIGHMEM
 #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
 #else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 24f4dfd94c51..4355129fff91 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb)
 
 static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
 {
-	might_sleep_if(pri & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(pri));
 
 	if (skb_cloned(skb))
 		return pskb_expand_head(skb, 0, 0, pri);
@@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb)
  */
 static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 {
-	might_sleep_if(pri & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(pri));
 	if (skb_shared(skb)) {
 		struct sk_buff *nskb = skb_clone(skb, pri);
 
@@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
 					  gfp_t pri)
 {
-	might_sleep_if(pri & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(pri));
 	if (skb_cloned(skb)) {
 		struct sk_buff *nskb = skb_copy(skb, pri);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index f570e75e3da9..bbf7c2cf15b4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2041,7 +2041,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-	if (sk->sk_allocation & __GFP_WAIT)
+	if (gfpflags_allow_blocking(sk->sk_allocation))
 		return &current->task_frag;
 
 	return &sk->sk_frag;
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..dde6bf092c8a 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -20,7 +20,7 @@
 	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
 	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
 	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
-	{(unsigned long)__GFP_WAIT,		"GFP_WAIT"},		\
+	{(unsigned long)__GFP_ATOMIC,		"GFP_ATOMIC"},		\
 	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
 	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
 	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
@@ -36,7 +36,8 @@
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
-	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
+	{(unsigned long)__GFP_DIRECT_RECLAIM,	"GFP_DIRECT_RECLAIM"},	\
+	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"GFP_KSWAPD_RECLAIM"},	\
 	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
 	) : "GFP_NOWAIT"
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 8a056a32ded7..5ffcbd354a52 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1371,16 +1371,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 	if (unlikely(audit_filter_type(type)))
 		return NULL;
 
-	if (gfp_mask & __GFP_WAIT) {
+	if (gfp_mask & __GFP_DIRECT_RECLAIM) {
 		if (audit_pid && audit_pid == current->pid)
-			gfp_mask &= ~__GFP_WAIT;
+			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 		else
 			reserve = 0;
 	}
 
 	while (audit_backlog_limit
 	       && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
-		if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
+		if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
 			long sleep_time;
 
 			sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b9d0cce3f9ce..f1603c153890 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -299,7 +299,7 @@ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 
 	idr_preload(gfp_mask);
 	spin_lock_bh(&cgroup_idr_lock);
-	ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT);
+	ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
 	spin_unlock_bh(&cgroup_idr_lock);
 	idr_preload_end();
 	return ret;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4c9952..deae3907ac1e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
 		return;
 
 	/* no reclaim without waiting on it */
-	if (!(gfp_mask & __GFP_WAIT))
+	if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return;
 
 	/* this guy won't enter reclaim */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5235dd4e1e2f..3a970604308f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1779,7 +1779,7 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 	while (to_alloc-- > 0) {
 		struct page *page;
 
-		page = alloc_image_page(__GFP_HIGHMEM);
+		page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
 		memory_bm_set_bit(bm, page_to_pfn(page));
 	}
 	return nr_highmem;
diff --git a/kernel/smp.c b/kernel/smp.c
index 07854477c164..d903c02223af 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 	cpumask_var_t cpus;
 	int cpu, ret;
 
-	might_sleep_if(gfp_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_flags));
 
 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
 		preempt_disable();
diff --git a/lib/idr.c b/lib/idr.c
index 5335c43adf46..6098336df267 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -399,7 +399,7 @@ void idr_preload(gfp_t gfp_mask)
 	 * allocation guarantee.  Disallow usage from those contexts.
 	 */
 	WARN_ON_ONCE(in_interrupt());
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
 
 	preempt_disable();
 
@@ -453,7 +453,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
 	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
 	int id;
 
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
 
 	/* sanity checks */
 	if (WARN_ON_ONCE(start < 0))
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f9ebe1c82060..fcf5d98574ce 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -188,7 +188,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 	 * preloading in the interrupt anyway as all the allocations have to
 	 * be atomic. So just do normal allocation when in interrupt.
 	 */
-	if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) {
+	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
 		struct radix_tree_preload *rtp;
 
 		/*
@@ -249,7 +249,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * with preemption not disabled.
  *
  * To make use of this facility, the radix tree must be initialised without
- * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
 static int __radix_tree_preload(gfp_t gfp_mask)
 {
@@ -286,12 +286,12 @@ out:
  * with preemption not disabled.
  *
  * To make use of this facility, the radix tree must be initialised without
- * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
 int radix_tree_preload(gfp_t gfp_mask)
 {
 	/* Warn on non-sensical use... */
-	WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT));
+	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
 	return __radix_tree_preload(gfp_mask);
 }
 EXPORT_SYMBOL(radix_tree_preload);
@@ -303,7 +303,7 @@ EXPORT_SYMBOL(radix_tree_preload);
  */
 int radix_tree_maybe_preload(gfp_t gfp_mask)
 {
-	if (gfp_mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(gfp_mask))
 		return __radix_tree_preload(gfp_mask);
 	/* Preloading doesn't help anything with this gfp mask, skip it */
 	preempt_disable();
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 619984fc07ec..8ed2ffd963c5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -637,7 +637,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 {
 	struct bdi_writeback *wb;
 
-	might_sleep_if(gfp & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
 	if (!memcg_css->parent)
 		return &bdi->wb;
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 312a716fa14c..57312b5d6e12 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -326,7 +326,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 	size_t offset;
 	void *retval;
 
-	might_sleep_if(mem_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(mem_flags));
 
 	spin_lock_irqsave(&pool->lock, flags);
 	list_for_each_entry(page, &pool->page_list, page_list) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc502e590366..05374f09339c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2046,7 +2046,7 @@ retry:
 	if (unlikely(task_in_memcg_oom(current)))
 		goto nomem;
 
-	if (!(gfp_mask & __GFP_WAIT))
+	if (!gfpflags_allow_blocking(gfp_mask))
 		goto nomem;
 
 	mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
@@ -4364,8 +4364,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
 {
 	int ret;
 
-	/* Try a single bulk charge without reclaim first */
-	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+	/* Try a single bulk charge without reclaim first, kswapd may wake */
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
diff --git a/mm/mempool.c b/mm/mempool.c
index 4c533bc51d73..004d42b1dfaf 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -320,13 +320,13 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 	gfp_t gfp_temp;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
 
-	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
+	gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
 
 repeat_alloc:
 
@@ -349,7 +349,7 @@ repeat_alloc:
 	}
 
 	/*
-	 * We use gfp mask w/o __GFP_WAIT or IO for the first round.  If
+	 * We use gfp mask w/o direct reclaim or IO for the first round.  If
 	 * alloc failed with that and @pool was empty, retry immediately.
 	 */
 	if (gfp_temp != gfp_mask) {
@@ -358,8 +358,8 @@ repeat_alloc:
 		goto repeat_alloc;
 	}
 
-	/* We must not sleep if !__GFP_WAIT */
-	if (!(gfp_mask & __GFP_WAIT)) {
+	/* We must not sleep if !__GFP_DIRECT_RECLAIM */
+	if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
 		spin_unlock_irqrestore(&pool->lock, flags);
 		return NULL;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 2834faba719a..e60379eb23f8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 					 (GFP_HIGHUSER_MOVABLE |
 					  __GFP_THISNODE | __GFP_NOMEMALLOC |
 					  __GFP_NORETRY | __GFP_NOWARN) &
-					 ~GFP_IOFS, 0);
+					 ~(__GFP_IO | __GFP_FS), 0);
 
 	return newpage;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 67390988881a..70461f3e3378 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -169,12 +169,12 @@ void pm_restrict_gfp_mask(void)
 	WARN_ON(!mutex_is_locked(&pm_mutex));
 	WARN_ON(saved_gfp_mask);
 	saved_gfp_mask = gfp_allowed_mask;
-	gfp_allowed_mask &= ~GFP_IOFS;
+	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
 }
 
 bool pm_suspended_storage(void)
 {
-	if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+	if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
 		return false;
 	return true;
 }
@@ -2183,7 +2183,7 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 		return false;
 	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
 		return false;
-	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT))
+	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
 	return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2685,7 +2685,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 		if (test_thread_flag(TIF_MEMDIE) ||
 		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
 			filter &= ~SHOW_MEM_FILTER_NODES;
-	if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+	if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
 		filter &= ~SHOW_MEM_FILTER_NODES;
 
 	if (fmt) {
@@ -2945,7 +2945,6 @@ static inline int
 gfp_to_alloc_flags(gfp_t gfp_mask)
 {
 	int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
-	const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
 
 	/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
 	BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2954,11 +2953,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 	 * The caller may dip into page reserves a bit more if the caller
 	 * cannot run direct reclaim, or if the caller has realtime scheduling
 	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
-	 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
+	 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
 	 */
 	alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
 
-	if (atomic) {
+	if (gfp_mask & __GFP_ATOMIC) {
 		/*
 		 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
 		 * if it can't schedule.
@@ -2995,11 +2994,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
 	return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
 }
 
+static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
+{
+	return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
+}
+
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 						struct alloc_context *ac)
 {
-	const gfp_t wait = gfp_mask & __GFP_WAIT;
+	bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
 	struct page *page = NULL;
 	int alloc_flags;
 	unsigned long pages_reclaimed = 0;
@@ -3019,16 +3023,24 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 	}
 
+	/*
+	 * We also sanity check to catch abuse of atomic reserves being used by
+	 * callers that are not in atomic context.
+	 */
+	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+		gfp_mask &= ~__GFP_ATOMIC;
+
 	/*
 	 * If this allocation cannot block and it is for a specific node, then
 	 * fail early.  There's no need to wakeup kswapd or retry for a
 	 * speculative node-specific allocation.
 	 */
-	if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
+	if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
 		goto nopage;
 
 retry:
-	if (!(gfp_mask & __GFP_NO_KSWAPD))
+	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
 
 	/*
@@ -3071,8 +3083,8 @@ retry:
 		}
 	}
 
-	/* Atomic allocations - we can't balance anything */
-	if (!wait) {
+	/* Caller is not willing to reclaim, we can't balance anything */
+	if (!can_direct_reclaim) {
 		/*
 		 * All existing users of the deprecated __GFP_NOFAIL are
 		 * blockable, so warn of any new users that actually allow this
@@ -3102,7 +3114,7 @@ retry:
 		goto got_pg;
 
 	/* Checks for THP-specific high-order allocations */
-	if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+	if (is_thp_gfp_mask(gfp_mask)) {
 		/*
 		 * If compaction is deferred for high-order allocations, it is
 		 * because sync compaction recently failed. If this is the case
@@ -3137,8 +3149,7 @@ retry:
 	 * fault, so use asynchronous memory compaction for THP unless it is
 	 * khugepaged trying to collapse.
 	 */
-	if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
-						(current->flags & PF_KTHREAD))
+	if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
 		migration_mode = MIGRATE_SYNC_LIGHT;
 
 	/* Try direct reclaim and then allocating */
@@ -3209,7 +3220,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 
 	lockdep_trace_alloc(gfp_mask);
 
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
 	if (should_fail_alloc_page(gfp_mask, order))
 		return NULL;
diff --git a/mm/slab.c b/mm/slab.c
index 272e809404d5..a9ef77d19a9a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1031,12 +1031,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 }
 
 /*
- * Construct gfp mask to allocate from a specific node but do not invoke reclaim
- * or warn about failures.
+ * Construct gfp mask to allocate from a specific node but do not direct reclaim
+ * or warn about failures. kswapd may still wake to reclaim in the background.
  */
 static inline gfp_t gfp_exact_node(gfp_t flags)
 {
-	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
+	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM;
 }
 #endif
 
@@ -2633,7 +2633,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
 	offset *= cachep->colour_off;
 
-	if (local_flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(local_flags))
 		local_irq_enable();
 
 	/*
@@ -2663,7 +2663,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
 	cache_init_objs(cachep, page);
 
-	if (local_flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(local_flags))
 		local_irq_disable();
 	check_irq_off();
 	spin_lock(&n->list_lock);
@@ -2677,7 +2677,7 @@ static int cache_grow(struct kmem_cache *cachep,
 opps1:
 	kmem_freepages(cachep, page);
 failed:
-	if (local_flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(local_flags))
 		local_irq_disable();
 	return 0;
 }
@@ -2869,7 +2869,7 @@ force_grow:
 static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
 						gfp_t flags)
 {
-	might_sleep_if(flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(flags));
 #if DEBUG
 	kmem_flagcheck(cachep, flags);
 #endif
@@ -3057,11 +3057,11 @@ retry:
 		 */
 		struct page *page;
 
-		if (local_flags & __GFP_WAIT)
+		if (gfpflags_allow_blocking(local_flags))
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
 		page = kmem_getpages(cache, local_flags, numa_mem_id());
-		if (local_flags & __GFP_WAIT)
+		if (gfpflags_allow_blocking(local_flags))
 			local_irq_disable();
 		if (page) {
 			/*
diff --git a/mm/slub.c b/mm/slub.c
index 75a5fa92ac2a..97695622a858 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1265,7 +1265,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 {
 	flags &= gfp_allowed_mask;
 	lockdep_trace_alloc(flags);
-	might_sleep_if(flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(flags));
 
 	if (should_failslab(s->object_size, flags, s->flags))
 		return NULL;
@@ -1353,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	flags &= gfp_allowed_mask;
 
-	if (flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(flags))
 		local_irq_enable();
 
 	flags |= s->allocflags;
@@ -1363,8 +1363,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	 * so we fall-back to the minimum order allocation.
 	 */
 	alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
-	if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min))
-		alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT;
+	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
+		alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
 
 	page = alloc_slab_page(s, alloc_gfp, node, oo);
 	if (unlikely(!page)) {
@@ -1424,7 +1424,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	page->frozen = 1;
 
 out:
-	if (flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(flags))
 		local_irq_disable();
 	if (!page)
 		return NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9db9ef5e8481..7ee94dc10000 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1617,7 +1617,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			goto fail;
 		}
 		area->pages[i] = page;
-		if (gfp_mask & __GFP_WAIT)
+		if (gfpflags_allow_blocking(gfp_mask))
 			cond_resched();
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0cd7eed4e38..2aec4241b42a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1476,7 +1476,7 @@ static int too_many_isolated(struct zone *zone, int file,
 	 * won't get blocked by normal direct-reclaimers, forming a circular
 	 * deadlock.
 	 */
-	if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS)
+	if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
 		inactive >>= 3;
 
 	return isolated > inactive;
@@ -3791,7 +3791,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	/*
 	 * Do not scan if the allocation should not be delayed.
 	 */
-	if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
+	if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
 		return ZONE_RECLAIM_NOSCAN;
 
 	/*
diff --git a/mm/zswap.c b/mm/zswap.c
index 4043df7c672f..e54166d3732e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -571,7 +571,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	struct zswap_pool *pool;
-	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	if (!pool) {
@@ -1011,7 +1011,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	/* store */
 	len = dlen + sizeof(struct zswap_header);
 	ret = zpool_malloc(entry->pool->zpool, len,
-			   __GFP_NORETRY | __GFP_NOWARN, &handle);
+			   __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
+			   &handle);
 	if (ret == -ENOSPC) {
 		zswap_reject_compress_poor++;
 		goto put_dstmem;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
 	len += NET_SKB_PAD;
 
 	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-	    (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
 			goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	len += NET_SKB_PAD + NET_IP_ALIGN;
 
 	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-	    (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
 			goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 		return NULL;
 
 	gfp_head = gfp_mask;
-	if (gfp_head & __GFP_WAIT)
+	if (gfp_head & __GFP_DIRECT_RECLAIM)
 		gfp_head |= __GFP_REPEAT;
 
 	*errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
 		while (order) {
 			if (npages >= 1 << order) {
-				page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
+				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
 						   __GFP_COMP |
 						   __GFP_NOWARN |
 						   __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7529eb9463be..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1944,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
 
 	pfrag->offset = 0;
 	if (SKB_FRAG_PAGE_ORDER) {
-		pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
-					  __GFP_NOWARN | __GFP_NORETRY,
+		/* Avoid direct reclaim but allow kswapd to wake */
+		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+					  __GFP_COMP | __GFP_NOWARN |
+					  __GFP_NORETRY,
 					  SKB_FRAG_PAGE_ORDER);
 		if (likely(pfrag->page)) {
 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 	consume_skb(info.skb2);
 
 	if (info.delivered) {
-		if (info.congested && (allocation & __GFP_WAIT))
+		if (info.congested && gfpflags_allow_blocking(allocation))
 			yield();
 		return 0;
 	}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 96744b75db93..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
 	gfp_t slab_mask = GFP_NOWAIT;
 	gfp_t page_mask = GFP_NOWAIT;
 
-	if (gfp & __GFP_WAIT) {
+	if (gfp & __GFP_DIRECT_RECLAIM) {
 		slab_mask = GFP_KERNEL;
 		page_mask = GFP_HIGHUSER;
 	}
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 	struct ib_recv_wr *failed_wr;
 	unsigned int posted = 0;
 	int ret = 0;
-	bool can_wait = !!(gfp & __GFP_WAIT);
+	bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
 	u32 pos;
 
 	/* the goal here is to just make sure that someone, somewhere
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 692b3e67fb54..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 		if (bundle->num_conns >= 20) {
 			_debug("too many conns");
 
-			if (!(gfp & __GFP_WAIT)) {
+			if (!gfpflags_allow_blocking(gfp)) {
 				_leave(" = -EAGAIN");
 				return -EAGAIN;
 			}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 /* Set an association id for a given association */
 int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
-	bool preload = !!(gfp & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp);
 	int ret;
 
 	/* If the id is already assigned, keep it. */
-- 
cgit v1.2.3


From 40113370836e8e79befa585277296ed42781ef31 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:25 -0800
Subject: mm: page_alloc: remove GFP_IOFS

GFP_IOFS was intended to be shorthand for clearing two flags, not a set of
allocation flags.  There is only one user of this flag combination now and
there appears to be no reason why Lustre had to be protected from reclaim
stalls.  As none of the sites appear to be atomic, this patch simply
deletes GFP_IOFS and converts Lustre to using GFP_KERNEL, GFP_NOFS or
GFP_NOIO as appropriate.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Oleg Drokin <oleg.drokin@intel.com>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/lustre/lnet/lnet/router.c           | 2 +-
 drivers/staging/lustre/lnet/selftest/conrpc.c       | 2 +-
 drivers/staging/lustre/lnet/selftest/rpc.c          | 2 +-
 drivers/staging/lustre/lustre/libcfs/module.c       | 2 +-
 drivers/staging/lustre/lustre/libcfs/tracefile.c    | 2 +-
 drivers/staging/lustre/lustre/llite/remote_perm.c   | 2 +-
 drivers/staging/lustre/lustre/mgc/mgc_request.c     | 8 ++++----
 drivers/staging/lustre/lustre/obdecho/echo_client.c | 2 +-
 drivers/staging/lustre/lustre/osc/osc_cache.c       | 2 +-
 include/linux/gfp.h                                 | 1 -
 10 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index fe49f1b87652..4ea651c6db3a 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1245,7 +1245,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
 	for (i = 0; i < npages; i++) {
 		page = alloc_pages_node(
 				cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				__GFP_ZERO | GFP_IOFS, 0);
+				GFP_KERNEL | __GFP_ZERO, 0);
 		if (page == NULL) {
 			while (--i >= 0)
 				__free_page(rb->rb_kiov[i].kiov_page);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 0060ff64f88e..64a0335934f3 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -860,7 +860,7 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 			bulk->bk_iovs[i].kiov_offset = 0;
 			bulk->bk_iovs[i].kiov_len    = len;
 			bulk->bk_iovs[i].kiov_page   =
-				alloc_page(GFP_IOFS);
+				alloc_page(GFP_KERNEL);
 
 			if (bulk->bk_iovs[i].kiov_page == NULL) {
 				lstcon_rpc_put(*crpc);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 162f9d330496..7005002c15da 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -146,7 +146,7 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
 		int nob;
 
 		pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				      GFP_IOFS, 0);
+				      GFP_KERNEL, 0);
 		if (pg == NULL) {
 			CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
 			srpc_free_bulk(bk);
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 50e8fd23fa17..07a68594c279 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -319,7 +319,7 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a
 	struct libcfs_ioctl_data *data;
 	int err = 0;
 
-	LIBCFS_ALLOC_GFP(buf, 1024, GFP_IOFS);
+	LIBCFS_ALLOC_GFP(buf, 1024, GFP_KERNEL);
 	if (buf == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c
index 973c7c209dfc..f2d018d7823c 100644
--- a/drivers/staging/lustre/lustre/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lustre/libcfs/tracefile.c
@@ -810,7 +810,7 @@ int cfs_trace_allocate_string_buffer(char **str, int nob)
 	if (nob > 2 * PAGE_CACHE_SIZE)	    /* string must be "sensible" */
 		return -EINVAL;
 
-	*str = kmalloc(nob, GFP_IOFS | __GFP_ZERO);
+	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
 	if (*str == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
index c902133dfc97..fe4a72268e3a 100644
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ b/drivers/staging/lustre/lustre/llite/remote_perm.c
@@ -82,7 +82,7 @@ static struct hlist_head *alloc_rmtperm_hash(void)
 	struct hlist_head *hash;
 	int i;
 
-	hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_IOFS | __GFP_ZERO);
+	hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_NOFS | __GFP_ZERO);
 	if (!hash)
 		return NULL;
 
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index b81efcd997ae..5f53f3b7ceff 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -1112,7 +1112,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 	LASSERT(cfg->cfg_instance != NULL);
 	LASSERT(cfg->cfg_sb == cfg->cfg_instance);
 
-	inst = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+	inst = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
 	if (!inst)
 		return -ENOMEM;
 
@@ -1308,14 +1308,14 @@ static int mgc_process_recover_log(struct obd_device *obd,
 	if (cfg->cfg_last_idx == 0) /* the first time */
 		nrpages = CONFIG_READ_NRPAGES_INIT;
 
-	pages = kcalloc(nrpages, sizeof(*pages), GFP_NOFS);
+	pages = kcalloc(nrpages, sizeof(*pages), GFP_KERNEL);
 	if (pages == NULL) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
 	for (i = 0; i < nrpages; i++) {
-		pages[i] = alloc_page(GFP_IOFS);
+		pages[i] = alloc_page(GFP_KERNEL);
 		if (pages[i] == NULL) {
 			rc = -ENOMEM;
 			goto out;
@@ -1466,7 +1466,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc,
 	if (cld->cld_cfg.cfg_sb)
 		lsi = s2lsi(cld->cld_cfg.cfg_sb);
 
-	env = kzalloc(sizeof(*env), GFP_NOFS);
+	env = kzalloc(sizeof(*env), GFP_KERNEL);
 	if (!env)
 		return -ENOMEM;
 
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index b6f000bb8c82..f61ef669644c 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -1562,7 +1562,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 		  (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
 		  (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
 
-	gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER;
+	gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
 
 	LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
 	LASSERT(lsm != NULL);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index cfb83bcfcb17..b1d1a87f05e3 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -346,7 +346,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
 {
 	struct osc_extent *ext;
 
-	ext = kmem_cache_alloc(osc_extent_kmem, GFP_IOFS | __GFP_ZERO);
+	ext = kmem_cache_alloc(osc_extent_kmem, GFP_NOFS | __GFP_ZERO);
 	if (ext == NULL)
 		return NULL;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b56e811b6f7c..86f9f7da86ea 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -134,7 +134,6 @@ struct vm_area_struct;
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_IOFS	(__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
 			 ~__GFP_KSWAPD_RECLAIM)
-- 
cgit v1.2.3


From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:28 -0800
Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM

__GFP_WAIT was used to signal that the caller was in atomic context and
could not sleep.  Now it is possible to distinguish between true atomic
context and callers that are not willing to sleep.  The latter should
clear __GFP_DIRECT_RECLAIM so kswapd will still wake.  As clearing
__GFP_WAIT behaves differently, there is a risk that people will clear the
wrong flags.  This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly
indicate what it does -- setting it allows all reclaim activity, clearing
them prevents it.

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/blk-core.c                            |  4 ++--
 block/blk-mq.c                              |  2 +-
 block/scsi_ioctl.c                          |  6 +++---
 drivers/block/drbd/drbd_bitmap.c            |  2 +-
 drivers/block/mtip32xx/mtip32xx.c           |  2 +-
 drivers/block/paride/pd.c                   |  2 +-
 drivers/block/pktcdvd.c                     |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c             |  2 +-
 drivers/ide/ide-atapi.c                     |  2 +-
 drivers/ide/ide-cd.c                        |  2 +-
 drivers/ide/ide-cd_ioctl.c                  |  2 +-
 drivers/ide/ide-devsets.c                   |  2 +-
 drivers/ide/ide-disk.c                      |  2 +-
 drivers/ide/ide-ioctls.c                    |  4 ++--
 drivers/ide/ide-park.c                      |  2 +-
 drivers/ide/ide-pm.c                        |  4 ++--
 drivers/ide/ide-tape.c                      |  4 ++--
 drivers/ide/ide-taskfile.c                  |  4 ++--
 drivers/infiniband/hw/qib/qib_init.c        |  2 +-
 drivers/misc/vmw_balloon.c                  |  2 +-
 drivers/nvme/host/pci.c                     |  6 ++++--
 drivers/scsi/scsi_error.c                   |  2 +-
 drivers/scsi/scsi_lib.c                     |  4 ++--
 drivers/staging/rdma/hfi1/init.c            |  2 +-
 drivers/staging/rdma/ipath/ipath_file_ops.c |  2 +-
 fs/cachefiles/internal.h                    |  2 +-
 fs/direct-io.c                              |  2 +-
 fs/nilfs2/mdt.h                             |  2 +-
 include/linux/gfp.h                         | 16 ++++++++--------
 kernel/power/swap.c                         | 16 ++++++++--------
 lib/percpu_ida.c                            |  2 +-
 mm/failslab.c                               |  8 ++++----
 mm/filemap.c                                |  2 +-
 mm/huge_memory.c                            |  2 +-
 mm/memcontrol.c                             |  2 +-
 mm/migrate.c                                |  2 +-
 mm/page_alloc.c                             |  9 +++++----
 security/integrity/ima/ima_crypto.c         |  2 +-
 38 files changed, 71 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e32f0868e36..590cca21c24a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
 		if (percpu_ref_tryget_live(&q->q_usage_counter))
 			return 0;
 
-		if (!(gfp & __GFP_WAIT))
+		if (!gfpflags_allow_blocking(gfp))
 			return -EBUSY;
 
 		ret = wait_event_interruptible(q->mq_freeze_wq,
@@ -2038,7 +2038,7 @@ void generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) {
+		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
 
 			q->make_request_fn(q, bio);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 68c0a3416b34..694f8703f83c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
 		blk_mq_set_alloc_data(&alloc_data, q,
-				__GFP_WAIT|__GFP_HIGH, false, ctx, hctx);
+				__GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
 		rq = __blk_mq_alloc_request(&alloc_data, rw);
 		ctx = alloc_data.ctx;
 		hctx = alloc_data.hctx;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index dda653ce7b24..0774799942e0 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -444,7 +444,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 
 	}
 
-	rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
+	rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto error_free_buffer;
@@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		break;
 	}
 
-	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) {
+	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
 		err = DRIVER_ERROR << 24;
 		goto error;
 	}
@@ -536,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	struct request *rq;
 	int err;
 
-	rq = blk_get_request(q, WRITE, __GFP_WAIT);
+	rq = blk_get_request(q, WRITE, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	blk_rq_set_block_pc(rq);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e5e0f19ceda0..3dc53a16ed3a 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1007,7 +1007,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
 	bm_set_page_unchanged(b->bm_pages[page_nr]);
 
 	if (ctx->flags & BM_AIO_COPY_PAGES) {
-		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
+		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
 		copy_highpage(page, b->bm_pages[page_nr]);
 		bm_store_page_idx(page, page_nr);
 	} else
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f504232c1ee7..a28a562f7b7f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
 	struct request *rq;
 
-	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true);
+	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
 	return blk_mq_rq_to_pdu(rq);
 }
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index b9242d78283d..562b5a4ca7b7 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -723,7 +723,7 @@ static int pd_special_command(struct pd_unit *disk,
 	struct request *rq;
 	int err = 0;
 
-	rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7be2375db7f2..5959c2981cc7 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,14 +704,14 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	int ret = 0;
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
-			     WRITE : READ, __GFP_WAIT);
+			     WRITE : READ, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	blk_rq_set_block_pc(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
-				      __GFP_WAIT);
+				      __GFP_RECLAIM);
 		if (ret)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d58cb9e034fe..7e505d4be7c0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2216,7 +2216,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	mapping = file_inode(obj->base.filp)->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
-	gfp &= ~(__GFP_IO | __GFP_WAIT);
+	gfp &= ~(__GFP_IO | __GFP_RECLAIM);
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 1362ad80a76c..05352f490d60 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	struct request *rq;
 	int error;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->special = (char *)pc;
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 64a6b827b3dd..ef907fd5ba98 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -441,7 +441,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		struct request *rq;
 		int error;
 
-		rq = blk_get_request(drive->queue, write, __GFP_WAIT);
+		rq = blk_get_request(drive->queue, write, __GFP_RECLAIM);
 
 		memcpy(rq->cmd, cmd, BLK_MAX_CDB);
 		rq->cmd_type = REQ_TYPE_ATA_PC;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 066e39036518..474173eb31bb 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -303,7 +303,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	struct request *rq;
 	int ret;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd_flags = REQ_QUIET;
 	ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index b05a74d78ef5..0dd43b4fcec6 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -165,7 +165,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 	if (!(setting->flags & DS_SYNC))
 		return setting->set(drive, arg);
 
-	rq = blk_get_request(q, READ, __GFP_WAIT);
+	rq = blk_get_request(q, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd_len = 5;
 	rq->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 56b9708894a5..37a8a907febe 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -477,7 +477,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
 		return -EBUSY;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index aa2e9b77b20d..d05db2469209 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 	if (NULL == (void *) arg) {
 		struct request *rq;
 
-		rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+		rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 		rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 		err = blk_execute_rq(drive->queue, NULL, rq, 0);
 		blk_put_request(rq);
@@ -221,7 +221,7 @@ static int generic_drive_reset(ide_drive_t *drive)
 	struct request *rq;
 	int ret = 0;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd_len = 1;
 	rq->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index c80868520488..2d7dca56dd24 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	}
 	spin_unlock_irq(&hwif->lock);
 
-	rq = blk_get_request(q, READ, __GFP_WAIT);
+	rq = blk_get_request(q, READ, __GFP_RECLAIM);
 	rq->cmd[0] = REQ_PARK_HEADS;
 	rq->cmd_len = 1;
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 081e43458d50..e34af488693a 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -18,7 +18,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -88,7 +88,7 @@ int generic_ide_resume(struct device *dev)
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
 	rq->cmd_flags |= REQ_PREEMPT;
 	rq->special = &rqpm;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index f5d51d1d09ee..12fa04997dcc 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -852,7 +852,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
 	BUG_ON(size < 0 || size % tape->blk_size);
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
@@ -860,7 +860,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 
 	if (size) {
 		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
-				      __GFP_WAIT);
+				      __GFP_RECLAIM);
 		if (ret)
 			goto out_put;
 	}
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 0979e126fff1..a716693417a3 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -430,7 +430,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	int error;
 	int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE;
 
-	rq = blk_get_request(drive->queue, rw, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	/*
@@ -441,7 +441,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	 */
 	if (nsect) {
 		error = blk_rq_map_kern(drive->queue, rq, buf,
-					nsect * SECTOR_SIZE, __GFP_WAIT);
+					nsect * SECTOR_SIZE, __GFP_RECLAIM);
 		if (error)
 			goto put_req;
 	}
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 7e00470adc30..4ff340fe904f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
 	 * heavy filesystem activity makes these fail, and we can
 	 * use compound pages.
 	 */
-	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
 
 	egrcnt = rcd->rcvegrcnt;
 	egroff = rcd->rcvegr_tid_base;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 89300870fefb..1e688bfec567 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
 
 /*
  * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
- * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
+ * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
  * __GFP_NOWARN, to suppress page allocation failure warnings.
  */
 #define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e878590e71b6..6c195554d94a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1025,11 +1025,13 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	req->special = (void *)0;
 
 	if (buffer && bufflen) {
-		ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+		ret = blk_rq_map_kern(q, req, buffer, bufflen,
+				      __GFP_DIRECT_RECLAIM);
 		if (ret)
 			goto out;
 	} else if (ubuffer && bufflen) {
-		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+				      __GFP_DIRECT_RECLAIM);
 		if (ret)
 			goto out;
 		bio = req->bio;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 66a96cd98b97..984ddcb4786d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1970,7 +1970,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	struct request *req;
 
 	/*
-	 * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a
+	 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
 	 * request becomes available
 	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 126a48c6431e..dd8ad2a44510 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -222,13 +222,13 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	int write = (data_direction == DMA_TO_DEVICE);
 	int ret = DRIVER_ERROR << 24;
 
-	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
+	req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM);
 	if (IS_ERR(req))
 		return ret;
 	blk_rq_set_block_pc(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
-					buffer, bufflen, __GFP_WAIT))
+					buffer, bufflen, __GFP_RECLAIM))
 		goto out;
 
 	req->cmd_len = COMMAND_SIZE(cmd[0]);
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 47a1202fcbdf..8666f3ad24e9 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1560,7 +1560,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 	 * heavy filesystem activity makes these fail, and we can
 	 * use compound pages.
 	 */
-	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
 
 	/*
 	 * The minimum size of the eager buffers is a groups of MTU-sized
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
index 5d9b9dbd8fc4..13c3cd11ab92 100644
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
@@ -905,7 +905,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
 	 * heavy filesystem activity makes these fail, and we can
 	 * use compound pages.
 	 */
-	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
 
 	egrcnt = dd->ipath_rcvegrcnt;
 	/* TID number offset for this port */
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index aecd0859eacb..9c4b737a54df 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -30,7 +30,7 @@ extern unsigned cachefiles_debug;
 #define CACHEFILES_DEBUG_KLEAVE	2
 #define CACHEFILES_DEBUG_KDEBUG	4
 
-#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC)
+#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC)
 
 /*
  * node records
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3ae0e0427191..18e7554cf94c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -361,7 +361,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 
 	/*
 	 * bio_alloc() is guaranteed to return a bio when called with
-	 * __GFP_WAIT and we request a valid number of vectors.
+	 * __GFP_RECLAIM and we request a valid number of vectors.
 	 */
 	bio = bio_alloc(GFP_KERNEL, nr_vecs);
 
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index fe529a87a208..03246cac3338 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
 }
 
 /* Default GFP flags using highmem */
-#define NILFS_MDT_GFP      (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
+#define NILFS_MDT_GFP      (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM)
 
 int nilfs_mdt_get_block(struct inode *, unsigned long, int,
 			void (*init_block)(struct inode *,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 86f9f7da86ea..369227202ac2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -107,7 +107,7 @@ struct vm_area_struct;
  * can be cleared when the reclaiming of pages would cause unnecessary
  * disruption.
  */
-#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
 
@@ -126,12 +126,12 @@ struct vm_area_struct;
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
-#define GFP_NOIO	(__GFP_WAIT)
-#define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
-#define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
-#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
+#define GFP_NOIO	(__GFP_RECLAIM)
+#define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
 			 __GFP_RECLAIMABLE)
-#define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
@@ -143,12 +143,12 @@ struct vm_area_struct;
 #define GFP_MOVABLE_SHIFT 3
 
 /* Control page allocator reclaim behavior */
-#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
 
 /* Control slab gfp mask during early boot */
-#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
+#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
 
 /* Control allocation constraints */
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b2066fb5b10f..12cd989dadf6 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -257,7 +257,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
 	struct bio *bio;
 	int error = 0;
 
-	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+	bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
 	bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = hib_resume_bdev;
 
@@ -356,7 +356,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 		return -ENOSPC;
 
 	if (hb) {
-		src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
+		src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
 		                              __GFP_NORETRY);
 		if (src) {
 			copy_page(src, buf);
@@ -364,7 +364,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 			ret = hib_wait_io(hb); /* Free pages */
 			if (ret)
 				return ret;
-			src = (void *)__get_free_page(__GFP_WAIT |
+			src = (void *)__get_free_page(__GFP_RECLAIM |
 			                              __GFP_NOWARN |
 			                              __GFP_NORETRY);
 			if (src) {
@@ -672,7 +672,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
 	nr_threads = num_online_cpus() - 1;
 	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
 
-	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+	page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
 	if (!page) {
 		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
 		ret = -ENOMEM;
@@ -975,7 +975,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 		last = tmp;
 
 		tmp->map = (struct swap_map_page *)
-		           __get_free_page(__GFP_WAIT | __GFP_HIGH);
+			   __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
 		if (!tmp->map) {
 			release_swap_reader(handle);
 			return -ENOMEM;
@@ -1242,9 +1242,9 @@ static int load_image_lzo(struct swap_map_handle *handle,
 
 	for (i = 0; i < read_pages; i++) {
 		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
-		                                  __GFP_WAIT | __GFP_HIGH :
-		                                  __GFP_WAIT | __GFP_NOWARN |
-		                                  __GFP_NORETRY);
+						  __GFP_RECLAIM | __GFP_HIGH :
+						  __GFP_RECLAIM | __GFP_NOWARN |
+						  __GFP_NORETRY);
 
 		if (!page[i]) {
 			if (i < LZO_CMP_PAGES) {
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index f75715131f20..6d40944960de 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -135,7 +135,7 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
  * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
  *
  * @gfp indicates whether or not to wait until a free id is available (it's not
- * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep
+ * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
  * however long it takes until another thread frees an id (same semantics as a
  * mempool).
  *
diff --git a/mm/failslab.c b/mm/failslab.c
index 98fb490311eb..79171b4a5826 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -3,11 +3,11 @@
 
 static struct {
 	struct fault_attr attr;
-	bool ignore_gfp_wait;
+	bool ignore_gfp_reclaim;
 	bool cache_filter;
 } failslab = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = true,
+	.ignore_gfp_reclaim = true,
 	.cache_filter = false,
 };
 
@@ -16,7 +16,7 @@ bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
 	if (gfpflags & __GFP_NOFAIL)
 		return false;
 
-        if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
+	if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM))
 		return false;
 
 	if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
@@ -42,7 +42,7 @@ static int __init failslab_debugfs_init(void)
 		return PTR_ERR(dir);
 
 	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
-				&failslab.ignore_gfp_wait))
+				&failslab.ignore_gfp_reclaim))
 		goto fail;
 	if (!debugfs_create_bool("cache-filter", mode, dir,
 				&failslab.cache_filter))
diff --git a/mm/filemap.c b/mm/filemap.c
index 58e04e26f996..6ef3674c0763 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2713,7 +2713,7 @@ EXPORT_SYMBOL(generic_file_write_iter);
  * page is known to the local caching routines.
  *
  * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
+ * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
  *
  */
 int try_to_release_page(struct page *page, gfp_t gfp_mask)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f5c08b46fef8..9812d4618651 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -786,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 
 static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 {
-	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
+	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
 }
 
 /* Caller must hold page table lock. */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 05374f09339c..a5470674a477 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2120,7 +2120,7 @@ done_restock:
 	/*
 	 * If the hierarchy is above the normal consumption range, schedule
 	 * reclaim on returning to userland.  We can perform reclaim here
-	 * if __GFP_WAIT but let's always punt for simplicity and so that
+	 * if __GFP_RECLAIM but let's always punt for simplicity and so that
 	 * GFP_KERNEL can consistently be used during reclaim.  @memcg is
 	 * not recorded as it most likely matches current's and won't
 	 * change in the meantime.  As high limit is checked again before
diff --git a/mm/migrate.c b/mm/migrate.c
index e60379eb23f8..7890d0bb5e23 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1752,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 		goto out_dropref;
 
 	new_page = alloc_pages_node(node,
-		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
 		HPAGE_PMD_ORDER);
 	if (!new_page)
 		goto out_fail;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 70461f3e3378..1b373096b990 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2160,11 +2160,11 @@ static struct {
 	struct fault_attr attr;
 
 	bool ignore_gfp_highmem;
-	bool ignore_gfp_wait;
+	bool ignore_gfp_reclaim;
 	u32 min_order;
 } fail_page_alloc = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = true,
+	.ignore_gfp_reclaim = true,
 	.ignore_gfp_highmem = true,
 	.min_order = 1,
 };
@@ -2183,7 +2183,8 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 		return false;
 	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
 		return false;
-	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
+	if (fail_page_alloc.ignore_gfp_reclaim &&
+			(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
 	return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2202,7 +2203,7 @@ static int __init fail_page_alloc_debugfs(void)
 		return PTR_ERR(dir);
 
 	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
-				&fail_page_alloc.ignore_gfp_wait))
+				&fail_page_alloc.ignore_gfp_reclaim))
 		goto fail;
 	if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
 				&fail_page_alloc.ignore_gfp_highmem))
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index e24121afb2f2..6eb62936c672 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -126,7 +126,7 @@ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size,
 {
 	void *ptr;
 	int order = ima_maxorder;
-	gfp_t gfp_mask = __GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY;
+	gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY;
 
 	if (order)
 		order = min(get_order(max_size), order);
-- 
cgit v1.2.3


From f77cf4e4cc9d40310a7224a1a67c733aeec78836 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:31 -0800
Subject: mm, page_alloc: delete the zonelist_cache

The zonelist cache (zlc) was introduced to skip over zones that were
recently known to be full.  This avoided expensive operations such as the
cpuset checks, watermark calculations and zone_reclaim.  The situation
today is different and the complexity of zlc is harder to justify.

1) The cpuset checks are no-ops unless a cpuset is active and in general
   are a lot cheaper.

2) zone_reclaim is now disabled by default and I suspect that was a large
   source of the cost that zlc wanted to avoid. When it is enabled, it's
   known to be a major source of stalling when nodes fill up and it's
   unwise to hit every other user with the overhead.

3) Watermark checks are expensive to calculate for high-order
   allocation requests. Later patches in this series will reduce the cost
   of the watermark checking.

4) The most important issue is that in the current implementation it
   is possible for a failed THP allocation to mark a zone full for order-0
   allocations and cause a fallback to remote nodes.

The last issue could be addressed with additional complexity but as the
benefit of zlc is questionable, it is better to remove it.  If stalls due
to zone_reclaim are ever reported then an alternative would be to
introduce deferring logic based on a timeout inside zone_reclaim itself
and leave the page allocator fast paths alone.

The impact on page-allocator microbenchmarks is negligible as they don't
hit the paths where the zlc comes into play.  Most page-reclaim related
workloads showed no noticeable difference as a result of the removal.

The impact was noticeable in a workload called "stutter".  One part uses a
lot of anonymous memory, a second measures mmap latency and a third copies
a large file.  In an ideal world the latency application would not notice
the mmap latency.  On a 2-node machine the results of this patch are

stutter
                             4.3.0-rc1             4.3.0-rc1
                              baseline              nozlc-v4
Min         mmap     20.9243 (  0.00%)     20.7716 (  0.73%)
1st-qrtle   mmap     22.0612 (  0.00%)     22.0680 ( -0.03%)
2nd-qrtle   mmap     22.3291 (  0.00%)     22.3809 ( -0.23%)
3rd-qrtle   mmap     25.2244 (  0.00%)     25.2396 ( -0.06%)
Max-90%     mmap     48.0995 (  0.00%)     28.3713 ( 41.02%)
Max-93%     mmap     52.5557 (  0.00%)     36.0170 ( 31.47%)
Max-95%     mmap     55.8173 (  0.00%)     47.3163 ( 15.23%)
Max-99%     mmap     67.3781 (  0.00%)     70.1140 ( -4.06%)
Max         mmap  24447.6375 (  0.00%)  12915.1356 ( 47.17%)
Mean        mmap     33.7883 (  0.00%)     27.7944 ( 17.74%)
Best99%Mean mmap     27.7825 (  0.00%)     25.2767 (  9.02%)
Best95%Mean mmap     26.3912 (  0.00%)     23.7994 (  9.82%)
Best90%Mean mmap     24.9886 (  0.00%)     23.2251 (  7.06%)
Best50%Mean mmap     22.0157 (  0.00%)     22.0261 ( -0.05%)
Best10%Mean mmap     21.6705 (  0.00%)     21.6083 (  0.29%)
Best5%Mean  mmap     21.5581 (  0.00%)     21.4611 (  0.45%)
Best1%Mean  mmap     21.3079 (  0.00%)     21.1631 (  0.68%)

Note that the maximum stall latency went from 24 seconds to 12 which is
still bad but an improvement.  The milage varies considerably 2-node
machine on an earlier test went from 494 seconds to 47 seconds and a
4-node machine that tested an earlier version of this patch went from a
worst case stall time of 6 seconds to 67ms.  The nature of the benchmark
is inherently unpredictable as it is hammering the system and the milage
will vary between machines.

There is a secondary impact with potentially more direct reclaim because
zones are now being considered instead of being skipped by zlc.  In this
particular test run it did not occur so will not be described.  However,
in at least one test the following was observed

1. Direct reclaim rates were higher. This was likely due to direct reclaim
  being entered instead of the zlc disabling a zone and busy looping.
  Busy looping may have the effect of allowing kswapd to make more
  progress and in some cases may be better overall. If this is found then
  the correct action is to put direct reclaimers to sleep on a waitqueue
  and allow kswapd make forward progress. Busy looping on the zlc is even
  worse than when the allocator used to blindly call congestion_wait().

2. There was higher swap activity as direct reclaim was active.

3. Direct reclaim efficiency was lower. This is related to 1 as more
  scanning activity also encountered more pages that could not be
  immediately reclaimed

In that case, the direct page scan and reclaim rates are noticeable but
it is not considered a problem for a few reasons

1. The test is primarily concerned with latency. The mmap attempts are also
   faulted which means there are THP allocation requests. The ZLC could
   cause zones to be disabled causing the process to busy loop instead
   of reclaiming.  This looks like elevated direct reclaim activity but
   it's the correct action to take based on what processes requested.

2. The test hammers reclaim and compaction heavily. The number of successful
   THP faults is highly variable but affects the reclaim stats. It's not a
   realistic or reasonable measure of page reclaim activity.

3. No other page-reclaim intensive workload that was tested showed a problem.

4. If a workload is identified that benefitted from the busy looping then it
   should be fixed by having direct reclaimers sleep on a wait queue until
   woken by kswapd instead of busy looping. We had this class of problem before
   when congestion_waits() with a fixed timeout was a brain damaged decision
   but happened to benefit some workloads.

If a workload is identified that relied on the zlc to busy loop then it
should be fixed correctly and have a direct reclaimer sleep on a waitqueue
until woken by kswapd.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  74 -----------------
 mm/page_alloc.c        | 212 -------------------------------------------------
 2 files changed, 286 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 38bed71758ab..1e88aae329ff 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -589,75 +589,8 @@ static inline bool zone_is_empty(struct zone *zone)
  * [1]	: No fallback (__GFP_THISNODE)
  */
 #define MAX_ZONELISTS 2
-
-
-/*
- * We cache key information from each zonelist for smaller cache
- * footprint when scanning for free pages in get_page_from_freelist().
- *
- * 1) The BITMAP fullzones tracks which zones in a zonelist have come
- *    up short of free memory since the last time (last_fullzone_zap)
- *    we zero'd fullzones.
- * 2) The array z_to_n[] maps each zone in the zonelist to its node
- *    id, so that we can efficiently evaluate whether that node is
- *    set in the current tasks mems_allowed.
- *
- * Both fullzones and z_to_n[] are one-to-one with the zonelist,
- * indexed by a zones offset in the zonelist zones[] array.
- *
- * The get_page_from_freelist() routine does two scans.  During the
- * first scan, we skip zones whose corresponding bit in 'fullzones'
- * is set or whose corresponding node in current->mems_allowed (which
- * comes from cpusets) is not set.  During the second scan, we bypass
- * this zonelist_cache, to ensure we look methodically at each zone.
- *
- * Once per second, we zero out (zap) fullzones, forcing us to
- * reconsider nodes that might have regained more free memory.
- * The field last_full_zap is the time we last zapped fullzones.
- *
- * This mechanism reduces the amount of time we waste repeatedly
- * reexaming zones for free memory when they just came up low on
- * memory momentarilly ago.
- *
- * The zonelist_cache struct members logically belong in struct
- * zonelist.  However, the mempolicy zonelists constructed for
- * MPOL_BIND are intentionally variable length (and usually much
- * shorter).  A general purpose mechanism for handling structs with
- * multiple variable length members is more mechanism than we want
- * here.  We resort to some special case hackery instead.
- *
- * The MPOL_BIND zonelists don't need this zonelist_cache (in good
- * part because they are shorter), so we put the fixed length stuff
- * at the front of the zonelist struct, ending in a variable length
- * zones[], as is needed by MPOL_BIND.
- *
- * Then we put the optional zonelist cache on the end of the zonelist
- * struct.  This optional stuff is found by a 'zlcache_ptr' pointer in
- * the fixed length portion at the front of the struct.  This pointer
- * both enables us to find the zonelist cache, and in the case of
- * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL)
- * to know that the zonelist cache is not there.
- *
- * The end result is that struct zonelists come in two flavors:
- *  1) The full, fixed length version, shown below, and
- *  2) The custom zonelists for MPOL_BIND.
- * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache.
- *
- * Even though there may be multiple CPU cores on a node modifying
- * fullzones or last_full_zap in the same zonelist_cache at the same
- * time, we don't lock it.  This is just hint data - if it is wrong now
- * and then, the allocator will still function, perhaps a bit slower.
- */
-
-
-struct zonelist_cache {
-	unsigned short z_to_n[MAX_ZONES_PER_ZONELIST];		/* zone->nid */
-	DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST);	/* zone full? */
-	unsigned long last_full_zap;		/* when last zap'd (jiffies) */
-};
 #else
 #define MAX_ZONELISTS 1
-struct zonelist_cache;
 #endif
 
 /*
@@ -675,9 +608,6 @@ struct zoneref {
  * allocation, the other zones are fallback zones, in decreasing
  * priority.
  *
- * If zlcache_ptr is not NULL, then it is just the address of zlcache,
- * as explained above.  If zlcache_ptr is NULL, there is no zlcache.
- * *
  * To speed the reading of the zonelist, the zonerefs contain the zone index
  * of the entry being read. Helper functions to access information given
  * a struct zoneref are
@@ -687,11 +617,7 @@ struct zoneref {
  * zonelist_node_idx()	- Return the index of the node for an entry
  */
 struct zonelist {
-	struct zonelist_cache *zlcache_ptr;		     // NULL or &zlcache
 	struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
-#ifdef CONFIG_NUMA
-	struct zonelist_cache zlcache;			     // optional ...
-#endif
 };
 
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1b373096b990..8dc6e3cd40f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2292,122 +2292,6 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
 }
 
 #ifdef CONFIG_NUMA
-/*
- * zlc_setup - Setup for "zonelist cache".  Uses cached zone data to
- * skip over zones that are not allowed by the cpuset, or that have
- * been recently (in last second) found to be nearly full.  See further
- * comments in mmzone.h.  Reduces cache footprint of zonelist scans
- * that have to skip over a lot of full or unallowed zones.
- *
- * If the zonelist cache is present in the passed zonelist, then
- * returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_states[N_MEMORY].)
- *
- * If the zonelist cache is not available for this zonelist, does
- * nothing and returns NULL.
- *
- * If the fullzones BITMAP in the zonelist cache is stale (more than
- * a second since last zap'd) then we zap it out (clear its bits.)
- *
- * We hold off even calling zlc_setup, until after we've checked the
- * first zone in the zonelist, on the theory that most allocations will
- * be satisfied from that first zone, so best to examine that zone as
- * quickly as we can.
- */
-static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-	nodemask_t *allowednodes;	/* zonelist_cache approximation */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return NULL;
-
-	if (time_after(jiffies, zlc->last_full_zap + HZ)) {
-		bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-		zlc->last_full_zap = jiffies;
-	}
-
-	allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
-					&cpuset_current_mems_allowed :
-					&node_states[N_MEMORY];
-	return allowednodes;
-}
-
-/*
- * Given 'z' scanning a zonelist, run a couple of quick checks to see
- * if it is worth looking at further for free memory:
- *  1) Check that the zone isn't thought to be full (doesn't have its
- *     bit set in the zonelist_cache fullzones BITMAP).
- *  2) Check that the zones node (obtained from the zonelist_cache
- *     z_to_n[] mapping) is allowed in the passed in allowednodes mask.
- * Return true (non-zero) if zone is worth looking at further, or
- * else return false (zero) if it is not.
- *
- * This check -ignores- the distinction between various watermarks,
- * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ...  If a zone is
- * found to be full for any variation of these watermarks, it will
- * be considered full for up to one second by all requests, unless
- * we are so low on memory on all allowed nodes that we are forced
- * into the second scan of the zonelist.
- *
- * In the second scan we ignore this zonelist cache and exactly
- * apply the watermarks to all zones, even it is slower to do so.
- * We are low on memory in the second scan, and should leave no stone
- * unturned looking for a free page.
- */
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
-						nodemask_t *allowednodes)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-	int i;				/* index of *z in zonelist zones */
-	int n;				/* node that zone *z is on */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return 1;
-
-	i = z - zonelist->_zonerefs;
-	n = zlc->z_to_n[i];
-
-	/* This zone is worth trying if it is allowed but not full */
-	return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones);
-}
-
-/*
- * Given 'z' scanning a zonelist, set the corresponding bit in
- * zlc->fullzones, so that subsequent attempts to allocate a page
- * from that zone don't waste time re-examining it.
- */
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-	int i;				/* index of *z in zonelist zones */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return;
-
-	i = z - zonelist->_zonerefs;
-
-	set_bit(i, zlc->fullzones);
-}
-
-/*
- * clear all zones full, called after direct reclaim makes progress so that
- * a zone that was recently full is not skipped over for up to a second
- */
-static void zlc_clear_zones_full(struct zonelist *zonelist)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return;
-
-	bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-}
-
 static bool zone_local(struct zone *local_zone, struct zone *zone)
 {
 	return local_zone->node == zone->node;
@@ -2418,28 +2302,7 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 	return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
 				RECLAIM_DISTANCE;
 }
-
 #else	/* CONFIG_NUMA */
-
-static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
-{
-	return NULL;
-}
-
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
-				nodemask_t *allowednodes)
-{
-	return 1;
-}
-
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
-{
-}
-
-static void zlc_clear_zones_full(struct zonelist *zonelist)
-{
-}
-
 static bool zone_local(struct zone *local_zone, struct zone *zone)
 {
 	return true;
@@ -2449,7 +2312,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
 	return true;
 }
-
 #endif	/* CONFIG_NUMA */
 
 static void reset_alloc_batches(struct zone *preferred_zone)
@@ -2476,9 +2338,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct zoneref *z;
 	struct page *page = NULL;
 	struct zone *zone;
-	nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
-	int zlc_active = 0;		/* set if using zonelist_cache */
-	int did_zlc_setup = 0;		/* just call zlc_setup() one time */
 	int nr_fair_skipped = 0;
 	bool zonelist_rescan;
 
@@ -2493,9 +2352,6 @@ zonelist_scan:
 								ac->nodemask) {
 		unsigned long mark;
 
-		if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
-			!zlc_zone_worth_trying(zonelist, z, allowednodes))
-				continue;
 		if (cpusets_enabled() &&
 			(alloc_flags & ALLOC_CPUSET) &&
 			!cpuset_zone_allowed(zone, gfp_mask))
@@ -2553,28 +2409,8 @@ zonelist_scan:
 			if (alloc_flags & ALLOC_NO_WATERMARKS)
 				goto try_this_zone;
 
-			if (IS_ENABLED(CONFIG_NUMA) &&
-					!did_zlc_setup && nr_online_nodes > 1) {
-				/*
-				 * we do zlc_setup if there are multiple nodes
-				 * and before considering the first zone allowed
-				 * by the cpuset.
-				 */
-				allowednodes = zlc_setup(zonelist, alloc_flags);
-				zlc_active = 1;
-				did_zlc_setup = 1;
-			}
-
 			if (zone_reclaim_mode == 0 ||
 			    !zone_allows_reclaim(ac->preferred_zone, zone))
-				goto this_zone_full;
-
-			/*
-			 * As we may have just activated ZLC, check if the first
-			 * eligible zone has failed zone_reclaim recently.
-			 */
-			if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
-				!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
 
 			ret = zone_reclaim(zone, gfp_mask, order);
@@ -2591,19 +2427,6 @@ zonelist_scan:
 						ac->classzone_idx, alloc_flags))
 					goto try_this_zone;
 
-				/*
-				 * Failed to reclaim enough to meet watermark.
-				 * Only mark the zone full if checking the min
-				 * watermark or if we failed to reclaim just
-				 * 1<<order pages or else the page allocator
-				 * fastpath will prematurely mark zones full
-				 * when the watermark is between the low and
-				 * min watermarks.
-				 */
-				if (((alloc_flags & ALLOC_WMARK_MASK) == ALLOC_WMARK_MIN) ||
-				    ret == ZONE_RECLAIM_SOME)
-					goto this_zone_full;
-
 				continue;
 			}
 		}
@@ -2616,9 +2439,6 @@ try_this_zone:
 				goto try_this_zone;
 			return page;
 		}
-this_zone_full:
-		if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
-			zlc_mark_zone_full(zonelist, z);
 	}
 
 	/*
@@ -2639,12 +2459,6 @@ this_zone_full:
 			zonelist_rescan = true;
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
-		/* Disable zlc cache for second zonelist scan */
-		zlc_active = 0;
-		zonelist_rescan = true;
-	}
-
 	if (zonelist_rescan)
 		goto zonelist_scan;
 
@@ -2889,10 +2703,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 	if (unlikely(!(*did_some_progress)))
 		return NULL;
 
-	/* After successful reclaim, reconsider all zones for allocation */
-	if (IS_ENABLED(CONFIG_NUMA))
-		zlc_clear_zones_full(ac->zonelist);
-
 retry:
 	page = get_page_from_freelist(gfp_mask, order,
 					alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
@@ -4228,20 +4038,6 @@ static void build_zonelists(pg_data_t *pgdat)
 	build_thisnode_zonelists(pgdat);
 }
 
-/* Construct the zonelist performance cache - see further mmzone.h */
-static void build_zonelist_cache(pg_data_t *pgdat)
-{
-	struct zonelist *zonelist;
-	struct zonelist_cache *zlc;
-	struct zoneref *z;
-
-	zonelist = &pgdat->node_zonelists[0];
-	zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
-	bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-	for (z = zonelist->_zonerefs; z->zone; z++)
-		zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
-}
-
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
 /*
  * Return node id of node used for "local" allocations.
@@ -4302,12 +4098,6 @@ static void build_zonelists(pg_data_t *pgdat)
 	zonelist->_zonerefs[j].zone_idx = 0;
 }
 
-/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
-static void build_zonelist_cache(pg_data_t *pgdat)
-{
-	pgdat->node_zonelists[0].zlcache_ptr = NULL;
-}
-
 #endif	/* CONFIG_NUMA */
 
 /*
@@ -4348,14 +4138,12 @@ static int __build_all_zonelists(void *data)
 
 	if (self && !node_online(self->node_id)) {
 		build_zonelists(self);
-		build_zonelist_cache(self);
 	}
 
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
 
 		build_zonelists(pgdat);
-		build_zonelist_cache(pgdat);
 	}
 
 	/*
-- 
cgit v1.2.3


From 974a786e63c96a2401a78ddba926f34c128474f1 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:34 -0800
Subject: mm, page_alloc: remove MIGRATE_RESERVE

MIGRATE_RESERVE preserves an old property of the buddy allocator that
existed prior to fragmentation avoidance -- min_free_kbytes worth of pages
tended to remain contiguous until the only alternative was to fail the
allocation.  At the time it was discovered that high-order atomic
allocations relied on this property so MIGRATE_RESERVE was introduced.  A
later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch
deletes MIGRATE_RESERVE and supporting code so it'll be easier to review.
Note that this patch in isolation may look like a false regression if
someone was bisecting high-order atomic allocation failures.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  10 +---
 mm/huge_memory.c       |   2 +-
 mm/page_alloc.c        | 148 +++----------------------------------------------
 mm/vmstat.c            |   1 -
 4 files changed, 11 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e88aae329ff..b86cfa3313cf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -39,8 +39,6 @@ enum {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
 	MIGRATE_RECLAIMABLE,
-	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
-	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
 	/*
 	 * MIGRATE_CMA migration type is designed to mimic the way
@@ -63,6 +61,8 @@ enum {
 	MIGRATE_TYPES
 };
 
+#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
+
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
 #else
@@ -429,12 +429,6 @@ struct zone {
 
 	const char		*name;
 
-	/*
-	 * Number of MIGRATE_RESERVE page block. To maintain for just
-	 * optimization. Protected by zone->lock.
-	 */
-	int			nr_migrate_reserve_block;
-
 #ifdef CONFIG_MEMORY_ISOLATION
 	/*
 	 * Number of isolated pageblock. It is used to solve incorrect
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9812d4618651..dabd247df535 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void)
 	for_each_populated_zone(zone)
 		nr_zones++;
 
-	/* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
+	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
 	recommended_min = pageblock_nr_pages * nr_zones * 2;
 
 	/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8dc6e3cd40f0..588812614377 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -817,7 +817,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			if (unlikely(has_isolate_pageblock(zone)))
 				mt = get_pageblock_migratetype(page);
 
-			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
 			__free_one_page(page, page_to_pfn(page), zone, 0, mt);
 			trace_mm_page_pcpu_drain(page, 0, mt);
 		} while (--to_free && --batch_free && !list_empty(list));
@@ -1417,15 +1416,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
  * the free lists for the desirable migrate type are depleted
  */
 static int fallbacks[MIGRATE_TYPES][4] = {
-	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
-	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
-	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
+	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
+	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
+	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
 #ifdef CONFIG_CMA
-	[MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
+	[MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
 #endif
-	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
 #ifdef CONFIG_MEMORY_ISOLATION
-	[MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
+	[MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
 #endif
 };
 
@@ -1598,7 +1596,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 	*can_steal = false;
 	for (i = 0;; i++) {
 		fallback_mt = fallbacks[migratetype][i];
-		if (fallback_mt == MIGRATE_RESERVE)
+		if (fallback_mt == MIGRATE_TYPES)
 			break;
 
 		if (list_empty(&area->free_list[fallback_mt]))
@@ -1676,25 +1674,13 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
 {
 	struct page *page;
 
-retry_reserve:
 	page = __rmqueue_smallest(zone, order, migratetype);
-
-	if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
+	if (unlikely(!page)) {
 		if (migratetype == MIGRATE_MOVABLE)
 			page = __rmqueue_cma_fallback(zone, order);
 
 		if (!page)
 			page = __rmqueue_fallback(zone, order, migratetype);
-
-		/*
-		 * Use MIGRATE_RESERVE rather than fail an allocation. goto
-		 * is used because __rmqueue_smallest is an inline function
-		 * and we want just one call site
-		 */
-		if (!page) {
-			migratetype = MIGRATE_RESERVE;
-			goto retry_reserve;
-		}
 	}
 
 	trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3492,7 +3478,6 @@ static void show_migration_types(unsigned char type)
 		[MIGRATE_UNMOVABLE]	= 'U',
 		[MIGRATE_RECLAIMABLE]	= 'E',
 		[MIGRATE_MOVABLE]	= 'M',
-		[MIGRATE_RESERVE]	= 'R',
 #ifdef CONFIG_CMA
 		[MIGRATE_CMA]		= 'C',
 #endif
@@ -4302,120 +4287,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
 	return ffz(~size);
 }
 
-/*
- * Check if a pageblock contains reserved pages
- */
-static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-		if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Mark a number of pageblocks as MIGRATE_RESERVE. The number
- * of blocks reserved is based on min_wmark_pages(zone). The memory within
- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
- * higher will lead to a bigger reserve which will get freed as contiguous
- * blocks as reclaim kicks in
- */
-static void setup_zone_migrate_reserve(struct zone *zone)
-{
-	unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
-	struct page *page;
-	unsigned long block_migratetype;
-	int reserve;
-	int old_reserve;
-
-	/*
-	 * Get the start pfn, end pfn and the number of blocks to reserve
-	 * We have to be careful to be aligned to pageblock_nr_pages to
-	 * make sure that we always check pfn_valid for the first page in
-	 * the block.
-	 */
-	start_pfn = zone->zone_start_pfn;
-	end_pfn = zone_end_pfn(zone);
-	start_pfn = roundup(start_pfn, pageblock_nr_pages);
-	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
-							pageblock_order;
-
-	/*
-	 * Reserve blocks are generally in place to help high-order atomic
-	 * allocations that are short-lived. A min_free_kbytes value that
-	 * would result in more than 2 reserve blocks for atomic allocations
-	 * is assumed to be in place to help anti-fragmentation for the
-	 * future allocation of hugepages at runtime.
-	 */
-	reserve = min(2, reserve);
-	old_reserve = zone->nr_migrate_reserve_block;
-
-	/* When memory hot-add, we almost always need to do nothing */
-	if (reserve == old_reserve)
-		return;
-	zone->nr_migrate_reserve_block = reserve;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
-		if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
-			return;
-
-		if (!pfn_valid(pfn))
-			continue;
-		page = pfn_to_page(pfn);
-
-		/* Watch out for overlapping nodes */
-		if (page_to_nid(page) != zone_to_nid(zone))
-			continue;
-
-		block_migratetype = get_pageblock_migratetype(page);
-
-		/* Only test what is necessary when the reserves are not met */
-		if (reserve > 0) {
-			/*
-			 * Blocks with reserved pages will never free, skip
-			 * them.
-			 */
-			block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
-			if (pageblock_is_reserved(pfn, block_end_pfn))
-				continue;
-
-			/* If this block is reserved, account for it */
-			if (block_migratetype == MIGRATE_RESERVE) {
-				reserve--;
-				continue;
-			}
-
-			/* Suitable for reserving if this block is movable */
-			if (block_migratetype == MIGRATE_MOVABLE) {
-				set_pageblock_migratetype(page,
-							MIGRATE_RESERVE);
-				move_freepages_block(zone, page,
-							MIGRATE_RESERVE);
-				reserve--;
-				continue;
-			}
-		} else if (!old_reserve) {
-			/*
-			 * At boot time we don't need to scan the whole zone
-			 * for turning off MIGRATE_RESERVE.
-			 */
-			break;
-		}
-
-		/*
-		 * If the reserve is met and this is a previous reserved block,
-		 * take it back
-		 */
-		if (block_migratetype == MIGRATE_RESERVE) {
-			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-			move_freepages_block(zone, page, MIGRATE_MOVABLE);
-		}
-	}
-}
-
 /*
  * Initially all pages are reserved - free ones are freed
  * up by free_all_bootmem() once the early boot process is
@@ -4455,9 +4326,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		 * movable at startup. This will force kernel allocations
 		 * to reserve their blocks rather than leaking throughout
 		 * the address space during boot when many long-lived
-		 * kernel allocations are made. Later some blocks near
-		 * the start are marked MIGRATE_RESERVE by
-		 * setup_zone_migrate_reserve()
+		 * kernel allocations are made.
 		 *
 		 * bitmap is created for zone's valid pfn range. but memmap
 		 * can be created for invalid pages (for alignment)
@@ -6018,7 +5887,6 @@ static void __setup_per_zone_wmarks(void)
 			high_wmark_pages(zone) - low_wmark_pages(zone) -
 			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
 
-		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ffcb4f58bf3e..5b289dcdcccf 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,7 +923,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
 	"Unmovable",
 	"Reclaimable",
 	"Movable",
-	"Reserve",
 #ifdef CONFIG_CMA
 	"CMA",
 #endif
-- 
cgit v1.2.3


From 0aaa29a56e4fb0fc9e24edb649e2733a672ca099 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:37 -0800
Subject: mm, page_alloc: reserve pageblocks for high-order atomic allocations
 on demand

High-order watermark checking exists for two reasons -- kswapd high-order
awareness and protection for high-order atomic requests.  Historically the
kernel depended on MIGRATE_RESERVE to preserve min_free_kbytes as
high-order free pages for as long as possible.  This patch introduces
MIGRATE_HIGHATOMIC that reserves pageblocks for high-order atomic
allocations on demand and avoids using those blocks for order-0
allocations.  This is more flexible and reliable than MIGRATE_RESERVE was.

A MIGRATE_HIGHORDER pageblock is created when an atomic high-order
allocation request steals a pageblock but limits the total number to 1% of
the zone.  Callers that speculatively abuse atomic allocations for
long-lived high-order allocations to access the reserve will quickly fail.
 Note that SLUB is currently not such an abuser as it reclaims at least
once.  It is possible that the pageblock stolen has few suitable
high-order pages and will need to steal again in the near future but there
would need to be strong justification to search all pageblocks for an
ideal candidate.

The pageblocks are unreserved if an allocation fails after a direct
reclaim attempt.

The watermark checks account for the reserved pageblocks when the
allocation request is not a high-order atomic allocation.

The reserved pageblocks can not be used for order-0 allocations.  This may
allow temporary wastage until a failed reclaim reassigns the pageblock.
This is deliberate as the intent of the reservation is to satisfy a
limited number of atomic high-order short-lived requests if the system
requires them.

The stutter benchmark was used to evaluate this but while it was running
there was a systemtap script that randomly allocated between 1 high-order
page and 12.5% of memory's worth of order-3 pages using GFP_ATOMIC.  This
is much larger than the potential reserve and it does not attempt to be
realistic.  It is intended to stress random high-order allocations from an
unknown source, show that there is a reduction in failures without
introducing an anomaly where atomic allocations are more reliable than
regular allocations.  The amount of memory reserved varied throughout the
workload as reserves were created and reclaimed under memory pressure.
The allocation failures once the workload warmed up were as follows;

4.2-rc5-vanilla		70%
4.2-rc5-atomic-reserve	56%

The failure rate was also measured while building multiple kernels.  The
failure rate was 14% but is 6% with this patch applied.

Overall, this is a small reduction but the reserves are small relative to
the number of allocation requests.  In early versions of the patch, the
failure rate reduced by a much larger amount but that required much larger
reserves and perversely made atomic allocations seem more reliable than
regular allocations.

[yalin.wang2010@gmail.com: fix redundant check and a memory leak]
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: yalin wang <yalin.wang2010@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   6 ++-
 mm/page_alloc.c        | 138 ++++++++++++++++++++++++++++++++++++++++++++++---
 mm/vmstat.c            |   1 +
 3 files changed, 135 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b86cfa3313cf..d3bafe4ff32b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -39,6 +39,8 @@ enum {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
 	MIGRATE_RECLAIMABLE,
+	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
+	MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
 	/*
 	 * MIGRATE_CMA migration type is designed to mimic the way
@@ -61,8 +63,6 @@ enum {
 	MIGRATE_TYPES
 };
 
-#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
-
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
 #else
@@ -334,6 +334,8 @@ struct zone {
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
 
+	unsigned long nr_reserved_highatomic;
+
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 588812614377..55e9c56dfe54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1615,6 +1615,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 	return -1;
 }
 
+/*
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
+ * there are no empty page blocks that contain a page with a suitable order
+ */
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
+				unsigned int alloc_order)
+{
+	int mt;
+	unsigned long max_managed, flags;
+
+	/*
+	 * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
+	 * Check is race-prone but harmless.
+	 */
+	max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+	if (zone->nr_reserved_highatomic >= max_managed)
+		return;
+
+	spin_lock_irqsave(&zone->lock, flags);
+
+	/* Recheck the nr_reserved_highatomic limit under the lock */
+	if (zone->nr_reserved_highatomic >= max_managed)
+		goto out_unlock;
+
+	/* Yoink! */
+	mt = get_pageblock_migratetype(page);
+	if (mt != MIGRATE_HIGHATOMIC &&
+			!is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+		zone->nr_reserved_highatomic += pageblock_nr_pages;
+		set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
+		move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Used when an allocation is about to fail under memory pressure. This
+ * potentially hurts the reliability of high-order allocations when under
+ * intense memory pressure but failed atomic allocations should be easier
+ * to recover from than an OOM.
+ */
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
+{
+	struct zonelist *zonelist = ac->zonelist;
+	unsigned long flags;
+	struct zoneref *z;
+	struct zone *zone;
+	struct page *page;
+	int order;
+
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+								ac->nodemask) {
+		/* Preserve at least one pageblock */
+		if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
+			continue;
+
+		spin_lock_irqsave(&zone->lock, flags);
+		for (order = 0; order < MAX_ORDER; order++) {
+			struct free_area *area = &(zone->free_area[order]);
+
+			if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+				continue;
+
+			page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
+						struct page, lru);
+
+			/*
+			 * It should never happen but changes to locking could
+			 * inadvertently allow a per-cpu drain to add pages
+			 * to MIGRATE_HIGHATOMIC while unreserving so be safe
+			 * and watch for underflows.
+			 */
+			zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
+				zone->nr_reserved_highatomic);
+
+			/*
+			 * Convert to ac->migratetype and avoid the normal
+			 * pageblock stealing heuristics. Minimally, the caller
+			 * is doing the work and needs the pages. More
+			 * importantly, if the block was always converted to
+			 * MIGRATE_UNMOVABLE or another type then the number
+			 * of pageblocks that cannot be completely freed
+			 * may increase.
+			 */
+			set_pageblock_migratetype(page, ac->migratetype);
+			move_freepages_block(zone, page, ac->migratetype);
+			spin_unlock_irqrestore(&zone->lock, flags);
+			return;
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
+
 /* Remove an element from the buddy allocator from the fallback list */
 static inline struct page *
 __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
@@ -1670,7 +1765,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
  * Call me with the zone->lock already held.
  */
 static struct page *__rmqueue(struct zone *zone, unsigned int order,
-						int migratetype)
+				int migratetype, gfp_t gfp_flags)
 {
 	struct page *page;
 
@@ -1700,7 +1795,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
-		struct page *page = __rmqueue(zone, order, migratetype);
+		struct page *page = __rmqueue(zone, order, migratetype, 0);
 		if (unlikely(page == NULL))
 			break;
 
@@ -2072,7 +2167,7 @@ int split_free_page(struct page *page)
 static inline
 struct page *buffered_rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
-			gfp_t gfp_flags, int migratetype)
+			gfp_t gfp_flags, int alloc_flags, int migratetype)
 {
 	unsigned long flags;
 	struct page *page;
@@ -2115,7 +2210,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 			WARN_ON_ONCE(order > 1);
 		}
 		spin_lock_irqsave(&zone->lock, flags);
-		page = __rmqueue(zone, order, migratetype);
+
+		page = NULL;
+		if (alloc_flags & ALLOC_HARDER) {
+			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+			if (page)
+				trace_mm_page_alloc_zone_locked(page, order, migratetype);
+		}
+		if (!page)
+			page = __rmqueue(zone, order, migratetype, gfp_flags);
 		spin_unlock(&zone->lock);
 		if (!page)
 			goto failed;
@@ -2226,15 +2329,24 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 			unsigned long mark, int classzone_idx, int alloc_flags,
 			long free_pages)
 {
-	/* free_pages may go negative - that's OK */
 	long min = mark;
 	int o;
 	long free_cma = 0;
 
+	/* free_pages may go negative - that's OK */
 	free_pages -= (1 << order) - 1;
+
 	if (alloc_flags & ALLOC_HIGH)
 		min -= min / 2;
-	if (alloc_flags & ALLOC_HARDER)
+
+	/*
+	 * If the caller does not have rights to ALLOC_HARDER then subtract
+	 * the high-atomic reserves. This will over-estimate the size of the
+	 * atomic reserve but it avoids a search.
+	 */
+	if (likely(!(alloc_flags & ALLOC_HARDER)))
+		free_pages -= z->nr_reserved_highatomic;
+	else
 		min -= min / 4;
 
 #ifdef CONFIG_CMA
@@ -2419,10 +2531,18 @@ zonelist_scan:
 
 try_this_zone:
 		page = buffered_rmqueue(ac->preferred_zone, zone, order,
-						gfp_mask, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype);
 		if (page) {
 			if (prep_new_page(page, order, gfp_mask, alloc_flags))
 				goto try_this_zone;
+
+			/*
+			 * If this is a high-order atomic allocation then check
+			 * if the pageblock should be reserved for the future
+			 */
+			if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
+				reserve_highatomic_pageblock(page, zone, order);
+
 			return page;
 		}
 	}
@@ -2695,9 +2815,11 @@ retry:
 
 	/*
 	 * If an allocation failed after direct reclaim, it could be because
-	 * pages are pinned on the per-cpu lists. Drain them and try again
+	 * pages are pinned on the per-cpu lists or in high alloc reserves.
+	 * Shrink them them and try again
 	 */
 	if (!page && !drained) {
+		unreserve_highatomic_pageblock(ac);
 		drain_all_pages(NULL);
 		drained = true;
 		goto retry;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5b289dcdcccf..879a2be23325 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,6 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
 	"Unmovable",
 	"Reclaimable",
 	"Movable",
+	"HighAtomic",
 #ifdef CONFIG_CMA
 	"CMA",
 #endif
-- 
cgit v1.2.3


From dd56b046426760aa0c852ad6e4b6b07891222d65 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:43 -0800
Subject: mm: page_alloc: hide some GFP internals and document the bits and
 flag combinations

Andrew stated the following

	We have quite a history of remote parts of the kernel using
	weird/wrong/inexplicable combinations of __GFP_ flags.	I tend
	to think that this is because we didn't adequately explain the
	interface.

	And I don't think that gfp.h really improved much in this area as
	a result of this patchset.  Could you go through it some time and
	decide if we've adequately documented all this stuff?

This patches first moves some GFP flag combinations that are part of the MM
internals to mm/internal.h. The rest of the patch documents the __GFP_FOO
bits under various headings and then documents the flag combinations. It
will not help callers that are brain damaged but the clarity might motivate
some fixes and avoid future mistakes.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 251 +++++++++++++++++++++++++++++++++++-----------------
 mm/internal.h       |  19 ++++
 mm/shmem.c          |   2 +
 mm/vmalloc.c        |   2 +
 4 files changed, 194 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 369227202ac2..6523109e136d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,9 +39,7 @@ struct vm_area_struct;
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
- * GFP bitmasks..
- *
- * Zone modifiers (see linux/mmzone.h - low three bits)
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
  *
  * Do not put any conditional on these. If necessary modify the definitions
  * without the underscores and use them consistently. The definitions here may
@@ -51,120 +49,211 @@ struct vm_area_struct;
 #define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
 #define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
+#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+
 /*
- * Action modifiers - doesn't change the zoning
+ * Page mobility and placement hints
  *
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- * _might_ fail.  This depends upon the particular VM implementation.
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
  *
- * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures. New users should be evaluated carefully
- * (and the flag should be used only when there is no reasonable failure policy)
- * but it is definitely preferable to use the flag rather than opencode endless
- * loop around allocator.
+ * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ *   moved by page migration during memory compaction or can be reclaimed.
  *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- * return NULL when direct reclaim and memory compaction have failed to allow
- * the allocation to succeed.  The OOM killer is not called with the current
- * implementation.
+ * __GFP_RECLAIMABLE is used for slab allocations that specify
+ *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ *   these pages will be spread between local zones to avoid all the dirty
+ *   pages being in one zone (fair zone allocation policy).
  *
- * __GFP_MOVABLE: Flag that this page will be movable by the page migration
- * mechanism or reclaimed
+ * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * __GFP_THISNODE forces the allocation to be satisified from the requested
+ *   node with no fallbacks or placement policy enforcements.
  */
-#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)  /* Caller cannot wait or reschedule */
-#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
-#define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
-#define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
-#define __GFP_COLD	((__force gfp_t)___GFP_COLD)	/* Cache-cold page required */
-#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)	/* Suppress page allocation failure warning */
-#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */
-#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */
-#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */
-#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
-#define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */
-#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
-							 * This takes precedence over the
-							 * __GFP_MEMALLOC flag if both are
-							 * set
-							 */
-#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
-#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
-#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
-#define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
-#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
-
-#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)
 
 /*
- * A caller that is willing to wait may enter direct reclaim and will
- * wake kswapd to reclaim pages in the background until the high
- * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to
- * avoid unnecessary delays when a fallback option is available but
- * still allow kswapd to reclaim in the background. The kswapd flag
- * can be cleared when the reclaiming of pages would cause unnecessary
- * disruption.
+ * Watermark modifiers -- controls access to emergency reserves
+ *
+ * __GFP_HIGH indicates that the caller is high-priority and that granting
+ *   the request is necessary before the system can make forward progress.
+ *   For example, creating an IO context to clean pages.
+ *
+ * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ *   high priority. Users are typically interrupt handlers. This may be
+ *   used in conjunction with __GFP_HIGH
+ *
+ * __GFP_MEMALLOC allows access to all memory. This should only be used when
+ *   the caller guarantees the allocation will allow more memory to be freed
+ *   very shortly e.g. process exiting or swapping. Users either should
+ *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ *
+ * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ *
+ * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
  */
-#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
+#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+#define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT)
+
+/*
+ * Reclaim modifiers
+ *
+ * __GFP_IO can start physical IO.
+ *
+ * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ *   allocator recursing into the filesystem which might already be holding
+ *   locks.
+ *
+ * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ *   This flag can be cleared to avoid unnecessary delays when a fallback
+ *   option is available.
+ *
+ * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ *   the low watermark is reached and have it reclaim pages until the high
+ *   watermark is reached. A caller may wish to clear this flag when fallback
+ *   options are available and the reclaim is likely to disrupt the system. The
+ *   canonical example is THP allocation where a fallback is cheap but
+ *   reclaim/compaction may cause indirect stalls.
+ *
+ * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ *
+ * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
+ *   _might_ fail.  This depends upon the particular VM implementation.
+ *
+ * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ *   cannot handle allocation failures. New users should be evaluated carefully
+ *   (and the flag should be used only when there is no reasonable failure
+ *   policy) but it is definitely preferable to use the flag rather than
+ *   opencode endless loop around allocator.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
+ *   return NULL when direct reclaim and memory compaction have failed to allow
+ *   the allocation to succeed.  The OOM killer is not called with the current
+ *   implementation.
+ */
+#define __GFP_IO	((__force gfp_t)___GFP_IO)
+#define __GFP_FS	((__force gfp_t)___GFP_FS)
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)
+#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
+#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
 /*
- * This may seem redundant, but it's a way of annotating false positives vs.
- * allocations that simply cannot be supported (e.g. page tables).
+ * Action modifiers
+ *
+ * __GFP_COLD indicates that the caller does not expect to be used in the near
+ *   future. Where possible, a cache-cold page will be returned.
+ *
+ * __GFP_NOWARN suppresses allocation failure reports.
+ *
+ * __GFP_COMP address compound page metadata.
+ *
+ * __GFP_ZERO returns a zeroed page on success.
+ *
+ * __GFP_NOTRACK avoids tracking with kmemcheck.
+ *
+ * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
+ *   distinguishing in the source between false positives and allocations that
+ *   cannot be supported (e.g. page tables).
+ *
+ * __GFP_OTHER_NODE is for allocations that are on a remote node but that
+ *   should not be accounted for as a remote allocation in vmstat. A
+ *   typical user would be khugepaged collapsing a huge page on a remote
+ *   node.
  */
+#define __GFP_COLD	((__force gfp_t)___GFP_COLD)
+#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP	((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
+#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
 
-#define __GFP_BITS_SHIFT 26	/* Room for N __GFP_FOO bits */
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /*
- * GFP_ATOMIC callers can not sleep, need the allocation to succeed.
- * A lower watermark is applied to allow access to "atomic reserves"
+ * Useful GFP flag combinations that are commonly used. It is recommended
+ * that subsystems start with one of these combinations and then set/clear
+ * __GFP_FOO flags as necessary.
+ *
+ * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ *   watermark is applied to allow access to "atomic reserves"
+ *
+ * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * GFP_NOWAIT is for kernel allocations that should not stall for direct
+ *   reclaim, start physical IO or use any filesystem callback.
+ *
+ * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ *   that do not require the starting of any physical IO.
+ *
+ * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ *
+ * GFP_USER is for userspace allocations that also need to be directly
+ *   accessibly by the kernel or hardware. It is typically used by hardware
+ *   for buffers that are mapped to userspace (e.g. graphics) that hardware
+ *   still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * GFP_DMA exists for historical reasons and should be avoided where possible.
+ *   The flags indicates that the caller requires that the lowest zone be
+ *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ *   it would require careful auditing as some users really require it and
+ *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
+ *   lowest zone as a type of emergency reserve.
+ *
+ * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
+ *   address.
+ *
+ * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ *   do not need to be directly accessible by the kernel but that cannot
+ *   move once in use. An example may be a hardware allocation that maps
+ *   data directly into userspace but has no addressing limitations.
+ *
+ * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ *   need direct access to but can use kmap() when access is required. They
+ *   are expected to be movable via page reclaim or page migration. Typically,
+ *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
+ *
+ * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
+ *   that will fail quickly if memory is not available and will not wake
+ *   kswapd on failure.
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
 #define GFP_NOIO	(__GFP_RECLAIM)
 #define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
-#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
 			 __GFP_RECLAIMABLE)
 #define GFP_USER	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA		__GFP_DMA
+#define GFP_DMA32	__GFP_DMA32
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
 			 ~__GFP_KSWAPD_RECLAIM)
 
-/* This mask makes up all the page movable related flags */
+/* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
 #define GFP_MOVABLE_SHIFT 3
 
-/* Control page allocator reclaim behavior */
-#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
-
-/* Control slab gfp mask during early boot */
-#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
-
-/* Control allocation constraints */
-#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
-
-/* Do not use these with a slab allocator */
-#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
-
-/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
-   platforms, used as appropriate on others */
-
-#define GFP_DMA		__GFP_DMA
-
-/* 4GB DMA on some platforms */
-#define GFP_DMA32	__GFP_DMA32
-
-/* Convert GFP flags to their corresponding migrate type */
 static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
 	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
@@ -177,6 +266,8 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 	/* Group based on mobility */
 	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
 }
+#undef GFP_MOVABLE_MASK
+#undef GFP_MOVABLE_SHIFT
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
diff --git a/mm/internal.h b/mm/internal.h
index ff0f1ada0f67..5b7841f6fa27 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -14,6 +14,25 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 
+/*
+ * The set of flags that only affect watermark checking and reclaim
+ * behaviour. This is used by the MM to obey the caller constraints
+ * about IO, FS and watermark checking while ignoring placement
+ * hints such as HIGHMEM usage.
+ */
+#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+
+/* The GFP flags allowed during early boot */
+#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
+
+/* Control allocation cpuset and node placement constraints */
+#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
+
+/* Do not use these with a slab allocator */
+#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
+
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 3b8b73928398..9187eee4128b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -73,6 +73,8 @@ static struct vfsmount *shm_mnt;
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
+#include "internal.h"
+
 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7ee94dc10000..d04563480c94 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -35,6 +35,8 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+#include "internal.h"
+
 struct vfree_deferred {
 	struct llist_head list;
 	struct work_struct wq;
-- 
cgit v1.2.3


From 89903327607232de32f05100cf03f9390b858e0b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 6 Nov 2015 16:28:46 -0800
Subject: include/linux/mmzone.h: reflow comment

Someone has an 86 column display.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d3bafe4ff32b..e23a9e704536 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -337,12 +337,13 @@ struct zone {
 	unsigned long nr_reserved_highatomic;
 
 	/*
-	 * We don't know if the memory that we're going to allocate will be freeable
-	 * or/and it will be released eventually, so to avoid totally wasting several
-	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
-	 * to run OOM on the lower zones despite there's tons of freeable ram
-	 * on the higher zones). This array is recalculated at runtime if the
-	 * sysctl_lowmem_reserve_ratio sysctl changes.
+	 * We don't know if the memory that we're going to allocate will be
+	 * freeable or/and it will be released eventually, so to avoid totally
+	 * wasting several GB of ram we must reserve some of the lower zone
+	 * memory (otherwise we risk to run OOM on the lower zones despite
+	 * there being tons of freeable ram on the higher zones).  This array is
+	 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
+	 * changes.
 	 */
 	long lowmem_reserve[MAX_NR_ZONES];
 
-- 
cgit v1.2.3


From c62d25556be6c965dc14288e796a576e8e39a7e9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 6 Nov 2015 16:28:49 -0800
Subject: mm, fs: introduce mapping_gfp_constraint()

There are many places which use mapping_gfp_mask to restrict a more
generic gfp mask which would be used for allocations which are not
directly related to the page cache but they are performed in the same
context.

Let's introduce a helper function which makes the restriction explicit and
easier to track.  This patch doesn't introduce any functional changes.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Michal Hocko <mhocko@suse.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_gem.c       | 2 +-
 drivers/gpu/drm/i915/i915_gem.c | 3 +--
 fs/btrfs/compression.c          | 7 +++----
 fs/btrfs/ctree.h                | 2 +-
 fs/btrfs/free-space-cache.c     | 4 ++--
 fs/buffer.c                     | 2 +-
 fs/ceph/addr.c                  | 7 ++++---
 fs/cifs/file.c                  | 2 +-
 fs/ext4/inode.c                 | 2 +-
 fs/ext4/readpage.c              | 2 +-
 fs/logfs/segment.c              | 2 +-
 fs/mpage.c                      | 4 ++--
 fs/namei.c                      | 2 +-
 fs/nilfs2/inode.c               | 4 ++--
 fs/ntfs/file.c                  | 4 ++--
 fs/splice.c                     | 2 +-
 include/linux/pagemap.h         | 7 +++++++
 mm/filemap.c                    | 4 ++--
 mm/readahead.c                  | 4 ++--
 19 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 3c2d4abd71c5..1d47d2e9487c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -491,7 +491,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
 		 * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping)
 		 * so shmem can relocate pages during swapin if required.
 		 */
-		BUG_ON((mapping_gfp_mask(mapping) & __GFP_DMA32) &&
+		BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) &&
 				(page_to_pfn(p) >= 0x00100000UL));
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e505d4be7c0..399aab265db3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2214,9 +2214,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 * Fail silently without starting the shrinker
 	 */
 	mapping = file_inode(obj->base.filp)->i_mapping;
-	gfp = mapping_gfp_mask(mapping);
+	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
-	gfp &= ~(__GFP_IO | __GFP_RECLAIM);
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 57ee8ca29b06..36dfeff2c1f4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -482,13 +482,12 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			goto next;
 		}
 
-		page = __page_cache_alloc(mapping_gfp_mask(mapping) &
-								~__GFP_FS);
+		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
+								 ~__GFP_FS));
 		if (!page)
 			break;
 
-		if (add_to_page_cache_lru(page, mapping, pg_index,
-								GFP_NOFS)) {
+		if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
 			page_cache_release(page);
 			goto next;
 		}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938efe33be80..eb90f0f1a124 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3316,7 +3316,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 
 static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
 {
-	return mapping_gfp_mask(mapping) & ~__GFP_FS;
+	return mapping_gfp_constraint(mapping, ~__GFP_FS);
 }
 
 /* extent-tree.c */
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index abe3a66bd3ba..ed05da1b977e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,8 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 	}
 
 	mapping_set_gfp_mask(inode->i_mapping,
-			mapping_gfp_mask(inode->i_mapping) &
-			~(__GFP_FS | __GFP_HIGHMEM));
+			mapping_gfp_constraint(inode->i_mapping,
+			~(__GFP_FS | __GFP_HIGHMEM)));
 
 	return inode;
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 82283abb2795..51aff0296ce2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -999,7 +999,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	int ret = 0;		/* Will call free_more_memory() */
 	gfp_t gfp_mask;
 
-	gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
 
 	/*
 	 * XXX: __getblk_slow() can not really deal with failure and
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9d23e788d1df..b7d218a168fb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1283,8 +1283,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		int ret1;
 		struct address_space *mapping = inode->i_mapping;
 		struct page *page = find_or_create_page(mapping, 0,
-						mapping_gfp_mask(mapping) &
-						~__GFP_FS);
+						mapping_gfp_constraint(mapping,
+						~__GFP_FS));
 		if (!page) {
 			ret = VM_FAULT_OOM;
 			goto out;
@@ -1428,7 +1428,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 		if (i_size_read(inode) == 0)
 			return;
 		page = find_or_create_page(mapping, 0,
-					   mapping_gfp_mask(mapping) & ~__GFP_FS);
+					   mapping_gfp_constraint(mapping,
+					   ~__GFP_FS));
 		if (!page)
 			return;
 		if (PageUptodate(page)) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 47c5c97e2dd3..0068e82217c3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3380,7 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
 	struct page *page, *tpage;
 	unsigned int expected_index;
 	int rc;
-	gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
+	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
 
 	INIT_LIST_HEAD(tmplist);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 612fbcf76b5c..60aaecd5598b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3344,7 +3344,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 	int err = 0;
 
 	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
-				   mapping_gfp_mask(mapping) & ~__GFP_FS);
+				   mapping_gfp_constraint(mapping, ~__GFP_FS));
 	if (!page)
 		return -ENOMEM;
 
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 560af0437704..1061611ae14d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 			page = list_entry(pages->prev, struct page, lru);
 			list_del(&page->lru);
 			if (add_to_page_cache_lru(page, mapping, page->index,
-					GFP_KERNEL & mapping_gfp_mask(mapping)))
+				  mapping_gfp_constraint(mapping, GFP_KERNEL)))
 				goto next_page;
 		}
 
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 7f9b096d8d57..6de0fbfc6c00 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -57,7 +57,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
 	filler_t *filler = super->s_devops->readpage;
 	struct page *page;
 
-	BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
+	BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS));
 	if (use_filler)
 		page = read_cache_page(mapping, index, filler, sb);
 	else {
diff --git a/fs/mpage.c b/fs/mpage.c
index 09abba7653aa..1480d3a18037 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -361,7 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	sector_t last_block_in_bio = 0;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
-	gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
+	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
 
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
@@ -397,7 +397,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 	sector_t last_block_in_bio = 0;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
-	gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping);
+	gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 0d3340b32e14..3c18970a8899 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4604,7 +4604,7 @@ EXPORT_SYMBOL(__page_symlink);
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
 	return __page_symlink(inode, symname, len,
-			!(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
+			!mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
 }
 EXPORT_SYMBOL(page_symlink);
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 4a73d6dffabf..ac2f64943ff4 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -356,7 +356,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 		goto failed;
 
 	mapping_set_gfp_mask(inode->i_mapping,
-			     mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
 
 	root = NILFS_I(dir)->i_root;
 	ii = NILFS_I(inode);
@@ -522,7 +522,7 @@ static int __nilfs_read_inode(struct super_block *sb,
 	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
 	nilfs_set_inode_flags(inode);
 	mapping_set_gfp_mask(inode->i_mapping,
-			     mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
 	return 0;
 
  failed_unmap:
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 262561fea923..9d383e5eff0e 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -525,8 +525,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
 				}
 			}
 			err = add_to_page_cache_lru(*cached_page, mapping,
-					index,
-					GFP_KERNEL & mapping_gfp_mask(mapping));
+				   index,
+				   mapping_gfp_constraint(mapping, GFP_KERNEL));
 			if (unlikely(err)) {
 				if (err == -EEXIST)
 					continue;
diff --git a/fs/splice.c b/fs/splice.c
index 5fc1e50a7f30..801c21cd77fe 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 				break;
 
 			error = add_to_page_cache_lru(page, mapping, index,
-					GFP_KERNEL & mapping_gfp_mask(mapping));
+				   mapping_gfp_constraint(mapping, GFP_KERNEL));
 			if (unlikely(error)) {
 				page_cache_release(page);
 				if (error == -EEXIST)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a6c78e00ea96..26eabf5ec718 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
 }
 
+/* Restricts the given gfp_mask to what the mapping allows. */
+static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
+		gfp_t gfp_mask)
+{
+	return mapping_gfp_mask(mapping) & gfp_mask;
+}
+
 /*
  * This is non-atomic.  Only to be used before the mapping is activated.
  * Probably needs a barrier...
diff --git a/mm/filemap.c b/mm/filemap.c
index 6ef3674c0763..1bb007624b53 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1722,7 +1722,7 @@ no_cached_page:
 			goto out;
 		}
 		error = add_to_page_cache_lru(page, mapping, index,
-					GFP_KERNEL & mapping_gfp_mask(mapping));
+				mapping_gfp_constraint(mapping, GFP_KERNEL));
 		if (error) {
 			page_cache_release(page);
 			if (error == -EEXIST) {
@@ -1824,7 +1824,7 @@ static int page_cache_read(struct file *file, pgoff_t offset)
 			return -ENOMEM;
 
 		ret = add_to_page_cache_lru(page, mapping, offset,
-				GFP_KERNEL & mapping_gfp_mask(mapping));
+				mapping_gfp_constraint(mapping, GFP_KERNEL));
 		if (ret == 0)
 			ret = mapping->a_ops->readpage(file, page);
 		else if (ret == -EEXIST)
diff --git a/mm/readahead.c b/mm/readahead.c
index 998ad592f408..ba22d7fe0afb 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -90,7 +90,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 		page = list_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping, page->index,
-				GFP_KERNEL & mapping_gfp_mask(mapping))) {
+				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
 			read_cache_pages_invalidate_page(mapping, page);
 			continue;
 		}
@@ -128,7 +128,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 		struct page *page = list_to_page(pages);
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping, page->index,
-				GFP_KERNEL & mapping_gfp_mask(mapping))) {
+				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
 			mapping->a_ops->readpage(filp, page);
 		}
 		page_cache_release(page);
-- 
cgit v1.2.3


From 3d9c637f4ae74b45d95bb6cbd793fbffad0a709c Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 6 Nov 2015 16:29:12 -0800
Subject: module: export param_free_charp()

Change the param_free_charp() function from static to exported.

It is used by zswap in the next patch ("zswap: use charp for zswap param
strings").

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Seth Jennings <sjennings@variantweb.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 1 +
 kernel/params.c             | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index c12f2147c350..52666d90ca94 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -386,6 +386,7 @@ extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
 extern const struct kernel_param_ops param_ops_charp;
 extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
+extern void param_free_charp(void *arg);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
 /* We used to allow int as well as bool.  We're taking that away! */
diff --git a/kernel/params.c b/kernel/params.c
index b6554aa71094..93a380a2345d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -325,10 +325,11 @@ int param_get_charp(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_charp);
 
-static void param_free_charp(void *arg)
+void param_free_charp(void *arg)
 {
 	maybe_kfree_parameter(*((char **)arg));
 }
+EXPORT_SYMBOL(param_free_charp);
 
 const struct kernel_param_ops param_ops_charp = {
 	.set = param_set_charp,
-- 
cgit v1.2.3


From 69e18f4dbedfbf208452e9da9979c92da30d2442 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 6 Nov 2015 16:29:18 -0800
Subject: zpool: remove redundant zpool->type string, const-ify zpool_get_type

Make the return type of zpool_get_type const; the string belongs to the
zpool driver and should not be modified.  Remove the redundant type field
in the struct zpool; it is private to zpool.c and isn't needed since
->driver->type can be used directly.  Add comments indicating strings must
be null-terminated.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h |  2 +-
 mm/zpool.c            | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 42f8ec992452..1f405bee3cd5 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -41,7 +41,7 @@ bool zpool_has_pool(char *type);
 struct zpool *zpool_create_pool(char *type, char *name,
 			gfp_t gfp, const struct zpool_ops *ops);
 
-char *zpool_get_type(struct zpool *pool);
+const char *zpool_get_type(struct zpool *pool);
 
 void zpool_destroy_pool(struct zpool *pool);
 
diff --git a/mm/zpool.c b/mm/zpool.c
index 8f670d3e8706..13f524dcf215 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -18,8 +18,6 @@
 #include <linux/zpool.h>
 
 struct zpool {
-	char *type;
-
 	struct zpool_driver *driver;
 	void *pool;
 	const struct zpool_ops *ops;
@@ -73,6 +71,7 @@ int zpool_unregister_driver(struct zpool_driver *driver)
 }
 EXPORT_SYMBOL(zpool_unregister_driver);
 
+/* this assumes @type is null-terminated. */
 static struct zpool_driver *zpool_get_driver(char *type)
 {
 	struct zpool_driver *driver;
@@ -113,6 +112,8 @@ static void zpool_put_driver(struct zpool_driver *driver)
  * not be loaded, and calling @zpool_create_pool() with the pool type will
  * fail.
  *
+ * The @type string must be null-terminated.
+ *
  * Returns: true if @type pool is available, false if not
  */
 bool zpool_has_pool(char *type)
@@ -145,6 +146,8 @@ EXPORT_SYMBOL(zpool_has_pool);
  *
  * Implementations must guarantee this to be thread-safe.
  *
+ * The @type and @name strings must be null-terminated.
+ *
  * Returns: New zpool on success, NULL on failure.
  */
 struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
@@ -174,7 +177,6 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
 		return NULL;
 	}
 
-	zpool->type = driver->type;
 	zpool->driver = driver;
 	zpool->pool = driver->create(name, gfp, ops, zpool);
 	zpool->ops = ops;
@@ -208,7 +210,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
  */
 void zpool_destroy_pool(struct zpool *zpool)
 {
-	pr_debug("destroying pool type %s\n", zpool->type);
+	pr_debug("destroying pool type %s\n", zpool->driver->type);
 
 	spin_lock(&pools_lock);
 	list_del(&zpool->list);
@@ -228,9 +230,9 @@ void zpool_destroy_pool(struct zpool *zpool)
  *
  * Returns: The type of zpool.
  */
-char *zpool_get_type(struct zpool *zpool)
+const char *zpool_get_type(struct zpool *zpool)
 {
-	return zpool->type;
+	return zpool->driver->type;
 }
 
 /**
-- 
cgit v1.2.3


From 6f3526d6db7cbe8b53e42d6bf0cad2072afcf3fe Mon Sep 17 00:00:00 2001
From: Sergey SENOZHATSKY <sergey.senozhatsky@gmail.com>
Date: Fri, 6 Nov 2015 16:29:21 -0800
Subject: mm: zsmalloc: constify struct zs_pool name

Constify `struct zs_pool' ->name.

[akpm@inux-foundation.org: constify zpool_create_pool()'s `type' arg also]
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h    |  6 ++++--
 include/linux/zsmalloc.h |  2 +-
 mm/zbud.c                |  2 +-
 mm/zpool.c               |  4 ++--
 mm/zsmalloc.c            | 10 +++++-----
 5 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 1f405bee3cd5..2e97b7707dff 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -38,7 +38,7 @@ enum zpool_mapmode {
 
 bool zpool_has_pool(char *type);
 
-struct zpool *zpool_create_pool(char *type, char *name,
+struct zpool *zpool_create_pool(const char *type, const char *name,
 			gfp_t gfp, const struct zpool_ops *ops);
 
 const char *zpool_get_type(struct zpool *pool);
@@ -83,7 +83,9 @@ struct zpool_driver {
 	atomic_t refcount;
 	struct list_head list;
 
-	void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops,
+	void *(*create)(const char *name,
+			gfp_t gfp,
+			const struct zpool_ops *ops,
 			struct zpool *zpool);
 	void (*destroy)(void *pool);
 
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 6398dfae53f1..34eb16098a33 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -41,7 +41,7 @@ struct zs_pool_stats {
 
 struct zs_pool;
 
-struct zs_pool *zs_create_pool(char *name, gfp_t flags);
+struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
 void zs_destroy_pool(struct zs_pool *pool);
 
 unsigned long zs_malloc(struct zs_pool *pool, size_t size);
diff --git a/mm/zbud.c b/mm/zbud.c
index fa48bcdff9d5..d8a181fd779b 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -137,7 +137,7 @@ static const struct zbud_ops zbud_zpool_ops = {
 	.evict =	zbud_zpool_evict
 };
 
-static void *zbud_zpool_create(char *name, gfp_t gfp,
+static void *zbud_zpool_create(const char *name, gfp_t gfp,
 			       const struct zpool_ops *zpool_ops,
 			       struct zpool *zpool)
 {
diff --git a/mm/zpool.c b/mm/zpool.c
index 13f524dcf215..fd3ff719c32c 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -72,7 +72,7 @@ int zpool_unregister_driver(struct zpool_driver *driver)
 EXPORT_SYMBOL(zpool_unregister_driver);
 
 /* this assumes @type is null-terminated. */
-static struct zpool_driver *zpool_get_driver(char *type)
+static struct zpool_driver *zpool_get_driver(const char *type)
 {
 	struct zpool_driver *driver;
 
@@ -150,7 +150,7 @@ EXPORT_SYMBOL(zpool_has_pool);
  *
  * Returns: New zpool on success, NULL on failure.
  */
-struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
+struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
 		const struct zpool_ops *ops)
 {
 	struct zpool_driver *driver;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f135b1b6fcdc..8b8e0dac0a2a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -237,7 +237,7 @@ struct link_free {
 };
 
 struct zs_pool {
-	char *name;
+	const char *name;
 
 	struct size_class **size_class;
 	struct kmem_cache *handle_cachep;
@@ -311,7 +311,7 @@ static void record_obj(unsigned long handle, unsigned long obj)
 
 #ifdef CONFIG_ZPOOL
 
-static void *zs_zpool_create(char *name, gfp_t gfp,
+static void *zs_zpool_create(const char *name, gfp_t gfp,
 			     const struct zpool_ops *zpool_ops,
 			     struct zpool *zpool)
 {
@@ -548,7 +548,7 @@ static const struct file_operations zs_stat_size_ops = {
 	.release        = single_release,
 };
 
-static int zs_pool_stat_create(char *name, struct zs_pool *pool)
+static int zs_pool_stat_create(const char *name, struct zs_pool *pool)
 {
 	struct dentry *entry;
 
@@ -588,7 +588,7 @@ static void __exit zs_stat_exit(void)
 {
 }
 
-static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
+static inline int zs_pool_stat_create(const char *name, struct zs_pool *pool)
 {
 	return 0;
 }
@@ -1866,7 +1866,7 @@ static int zs_register_shrinker(struct zs_pool *pool)
  * On success, a pointer to the newly created pool is returned,
  * otherwise NULL.
  */
-struct zs_pool *zs_create_pool(char *name, gfp_t flags)
+struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
 {
 	int i;
 	struct zs_pool *pool;
-- 
cgit v1.2.3


From 474e4eeaf26b6c3298ca3ae9d0a705b0853efb2a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:40 -0800
Subject: mm: drop page->slab_page

Since 8456a648cf44 ("slab: use struct page for slab management") nobody
uses slab_page field in struct page.

Let's drop it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0a85da25a822..c0ec46df6c13 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -131,7 +131,6 @@ struct page {
 #endif
 		};
 
-		struct slab *slab_page; /* slab fields */
 		struct rcu_head rcu_head;	/* Used by SLAB
 						 * when destroying via RCU
 						 */
-- 
cgit v1.2.3


From f1e61557f0230d51a3df8d825f2c156e75563bff Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:50 -0800
Subject: mm: pack compound_dtor and compound_order into one word in struct
 page

The patch halves space occupied by compound_dtor and compound_order in
struct page.

For compound_order, it's trivial long -> short conversion.

For get_compound_page_dtor(), we now use hardcoded table for destructor
lookup and store its index in the struct page instead of direct pointer
to destructor. It shouldn't be a big trouble to maintain the table: we
have only two destructor and NULL currently.

This patch free up one word in tail pages for reuse. This is preparation
for the next patch.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 24 +++++++++++++++++++-----
 include/linux/mm_types.h |  6 ++----
 mm/hugetlb.c             |  8 ++++----
 mm/page_alloc.c          | 11 ++++++++++-
 4 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 906c46a05707..6581c21320cb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -568,18 +568,32 @@ int split_free_page(struct page *page);
 /*
  * Compound pages have a destructor function.  Provide a
  * prototype for that function and accessor functions.
- * These are _only_ valid on the head of a PG_compound page.
+ * These are _only_ valid on the head of a compound page.
  */
+typedef void compound_page_dtor(struct page *);
+
+/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */
+enum compound_dtor_id {
+	NULL_COMPOUND_DTOR,
+	COMPOUND_PAGE_DTOR,
+#ifdef CONFIG_HUGETLB_PAGE
+	HUGETLB_PAGE_DTOR,
+#endif
+	NR_COMPOUND_DTORS,
+};
+extern compound_page_dtor * const compound_page_dtors[];
 
 static inline void set_compound_page_dtor(struct page *page,
-						compound_page_dtor *dtor)
+		enum compound_dtor_id compound_dtor)
 {
-	page[1].compound_dtor = dtor;
+	VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
+	page[1].compound_dtor = compound_dtor;
 }
 
 static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 {
-	return page[1].compound_dtor;
+	VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
+	return compound_page_dtors[page[1].compound_dtor];
 }
 
 static inline int compound_order(struct page *page)
@@ -589,7 +603,7 @@ static inline int compound_order(struct page *page)
 	return page[1].compound_order;
 }
 
-static inline void set_compound_order(struct page *page, unsigned long order)
+static inline void set_compound_order(struct page *page, unsigned int order)
 {
 	page[1].compound_order = order;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c0ec46df6c13..e334ef79cb43 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,8 +28,6 @@ struct mem_cgroup;
 		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
 #define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
 
-typedef void compound_page_dtor(struct page *);
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -136,8 +134,8 @@ struct page {
 						 */
 		/* First tail page of compound page */
 		struct {
-			compound_page_dtor *compound_dtor;
-			unsigned long compound_order;
+			unsigned short int compound_dtor;
+			unsigned short int compound_order;
 		};
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 74ef0c6a25dd..e90a29024c5c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1146,7 +1146,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
 				1 << PG_writeback);
 	}
 	VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-	set_compound_page_dtor(page, NULL);
+	set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
 	set_page_refcounted(page);
 	if (hstate_is_gigantic(h)) {
 		destroy_compound_gigantic_page(page, huge_page_order(h));
@@ -1242,7 +1242,7 @@ void free_huge_page(struct page *page)
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
 	INIT_LIST_HEAD(&page->lru);
-	set_compound_page_dtor(page, free_huge_page);
+	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
 	spin_lock(&hugetlb_lock);
 	set_hugetlb_cgroup(page, NULL);
 	h->nr_huge_pages++;
@@ -1294,7 +1294,7 @@ int PageHuge(struct page *page)
 		return 0;
 
 	page = compound_head(page);
-	return get_compound_page_dtor(page) == free_huge_page;
+	return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
 }
 EXPORT_SYMBOL_GPL(PageHuge);
 
@@ -1568,7 +1568,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	if (page) {
 		INIT_LIST_HEAD(&page->lru);
 		r_nid = page_to_nid(page);
-		set_compound_page_dtor(page, free_huge_page);
+		set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
 		set_hugetlb_cgroup(page, NULL);
 		/*
 		 * We incremented the global counters already
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b8d560afe266..fae1bd6f9f37 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -229,6 +229,15 @@ static char * const zone_names[MAX_NR_ZONES] = {
 #endif
 };
 
+static void free_compound_page(struct page *page);
+compound_page_dtor * const compound_page_dtors[] = {
+	NULL,
+	free_compound_page,
+#ifdef CONFIG_HUGETLB_PAGE
+	free_huge_page,
+#endif
+};
+
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
 
@@ -458,7 +467,7 @@ void prep_compound_page(struct page *page, unsigned long order)
 	int i;
 	int nr_pages = 1 << order;
 
-	set_compound_page_dtor(page, free_compound_page);
+	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
 	set_compound_order(page, order);
 	__SetPageHead(page);
 	for (i = 1; i < nr_pages; i++) {
-- 
cgit v1.2.3


From 1d798ca3f16437c71ff63e36597ff07f9c12e4d6 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:54 -0800
Subject: mm: make compound_head() robust

Hugh has pointed that compound_head() call can be unsafe in some
context. There's one example:

	CPU0					CPU1

isolate_migratepages_block()
  page_count()
    compound_head()
      !!PageTail() == true
					put_page()
					  tail->first_page = NULL
      head = tail->first_page
					alloc_pages(__GFP_COMP)
					   prep_compound_page()
					     tail->first_page = head
					     __SetPageTail(p);
      !!PageTail() == true
    <head == NULL dereferencing>

The race is pure theoretical. I don't it's possible to trigger it in
practice. But who knows.

We can fix the race by changing how encode PageTail() and compound_head()
within struct page to be able to update them in one shot.

The patch introduces page->compound_head into third double word block in
front of compound_dtor and compound_order. Bit 0 encodes PageTail() and
the rest bits are pointer to head page if bit zero is set.

The patch moves page->pmd_huge_pte out of word, just in case if an
architecture defines pgtable_t into something what can have the bit 0
set.

hugetlb_cgroup uses page->lru.next in the second tail page to store
pointer struct hugetlb_cgroup. The patch switch it to use page->private
in the second tail page instead. The space is free since ->first_page is
removed from the union.

The patch also opens possibility to remove HUGETLB_CGROUP_MIN_ORDER
limitation, since there's now space in first tail page to store struct
hugetlb_cgroup pointer. But that's out of scope of the patch.

That means page->compound_head shares storage space with:

 - page->lru.next;
 - page->next;
 - page->rcu_head.next;

That's too long list to be absolutely sure, but looks like nobody uses
bit 0 of the word.

page->rcu_head.next guaranteed[1] to have bit 0 clean as long as we use
call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu(). But future
call_rcu_lazy() is not allowed as it makes use of the bit and we can
get false positive PageTail().

[1] http://lkml.kernel.org/g/20150827163634.GD4029@linux.vnet.ibm.com

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/split_page_table_lock |  4 +-
 arch/xtensa/configs/iss_defconfig      |  1 -
 include/linux/hugetlb_cgroup.h         |  4 +-
 include/linux/mm.h                     | 53 ++--------------------
 include/linux/mm_types.h               | 22 ++++++++--
 include/linux/page-flags.h             | 80 ++++++++--------------------------
 mm/Kconfig                             | 12 -----
 mm/debug.c                             |  5 ---
 mm/huge_memory.c                       |  3 +-
 mm/hugetlb.c                           |  8 +---
 mm/hugetlb_cgroup.c                    |  2 +-
 mm/internal.h                          |  4 +-
 mm/memory-failure.c                    |  7 ---
 mm/page_alloc.c                        | 48 ++++++++++++--------
 mm/swap.c                              |  4 +-
 15 files changed, 82 insertions(+), 175 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
index 6dea4fd5c961..62842a857dab 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -54,8 +54,8 @@ everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
 which must be called on PTE table allocation / freeing.
 
 Make sure the architecture doesn't use slab allocator for page table
-allocation: slab uses page->slab_cache and page->first_page for its pages.
-These fields share storage with page->ptl.
+allocation: slab uses page->slab_cache for its pages.
+This field shares storage with page->ptl.
 
 PMD split lock only makes sense if you have more than two page table
 levels.
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index f3dfe0d921c2..44c6764d9146 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -169,7 +169,6 @@ CONFIG_FLATMEM_MANUAL=y
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 7edd30515298..24154c26d469 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
 
 	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
 		return NULL;
-	return (struct hugetlb_cgroup *)page[2].lru.next;
+	return (struct hugetlb_cgroup *)page[2].private;
 }
 
 static inline
@@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
 
 	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
 		return -1;
-	page[2].lru.next = (void *)h_cg;
+	page[2].private	= (unsigned long)h_cg;
 	return 0;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6581c21320cb..9671b6f23eda 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page,
 #endif
 }
 
-static inline struct page *compound_head_by_tail(struct page *tail)
-{
-	struct page *head = tail->first_page;
-
-	/*
-	 * page->first_page may be a dangling pointer to an old
-	 * compound page, so recheck that it is still a tail
-	 * page before returning.
-	 */
-	smp_rmb();
-	if (likely(PageTail(tail)))
-		return head;
-	return tail;
-}
-
-/*
- * Since either compound page could be dismantled asynchronously in THP
- * or we access asynchronously arbitrary positioned struct page, there
- * would be tail flag race. To handle this race, we should call
- * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
- */
-static inline struct page *compound_head(struct page *page)
-{
-	if (unlikely(PageTail(page)))
-		return compound_head_by_tail(page);
-	return page;
-}
-
-/*
- * If we access compound page synchronously such as access to
- * allocated page, there is no need to handle tail flag race, so we can
- * check tail flag directly without any synchronization primitive.
- */
-static inline struct page *compound_head_fast(struct page *page)
-{
-	if (unlikely(PageTail(page)))
-		return page->first_page;
-	return page;
-}
-
 /*
  * The atomic page->_mapcount, starts from -1: so that transitions
  * both from it and to it can be tracked, using atomic_inc_and_test
@@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page)
 	VM_BUG_ON_PAGE(!PageTail(page), page);
 	VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
 	VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
-	if (compound_tail_refcounted(page->first_page))
+	if (compound_tail_refcounted(compound_head(page)))
 		atomic_inc(&page->_mapcount);
 }
 
@@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x)
 {
 	struct page *page = virt_to_page(x);
 
-	/*
-	 * We don't need to worry about synchronization of tail flag
-	 * when we call virt_to_head_page() since it is only called for
-	 * already allocated page and this page won't be freed until
-	 * this virt_to_head_page() is finished. So use _fast variant.
-	 */
-	return compound_head_fast(page);
+	return compound_head(page);
 }
 
 /*
@@ -1586,8 +1540,7 @@ static inline bool ptlock_init(struct page *page)
 	 * with 0. Make sure nobody took it in use in between.
 	 *
 	 * It can happen if arch try to use slab for page table allocation:
-	 * slab code uses page->slab_cache and page->first_page (for tail
-	 * pages), which share storage with page->ptl.
+	 * slab code uses page->slab_cache, which share storage with page->ptl.
 	 */
 	VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
 	if (!ptlock_alloc(page))
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e334ef79cb43..bb91658c603f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -111,7 +111,13 @@ struct page {
 		};
 	};
 
-	/* Third double word block */
+	/*
+	 * Third double word block
+	 *
+	 * WARNING: bit 0 of the first word encode PageTail(). That means
+	 * the rest users of the storage space MUST NOT use the bit to
+	 * avoid collision and false-positive PageTail().
+	 */
 	union {
 		struct list_head lru;	/* Pageout list, eg. active_list
 					 * protected by zone->lru_lock !
@@ -132,14 +138,23 @@ struct page {
 		struct rcu_head rcu_head;	/* Used by SLAB
 						 * when destroying via RCU
 						 */
-		/* First tail page of compound page */
+		/* Tail pages of compound page */
 		struct {
+			unsigned long compound_head; /* If bit zero is set */
+
+			/* First tail page only */
 			unsigned short int compound_dtor;
 			unsigned short int compound_order;
 		};
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-		pgtable_t pmd_huge_pte; /* protected by page->ptl */
+		struct {
+			unsigned long __pad;	/* do not overlay pmd_huge_pte
+						 * with compound_head to avoid
+						 * possible bit 0 collision.
+						 */
+			pgtable_t pmd_huge_pte; /* protected by page->ptl */
+		};
 #endif
 	};
 
@@ -160,7 +175,6 @@ struct page {
 #endif
 #endif
 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
-		struct page *first_page;	/* Compound tail pages */
 	};
 
 #ifdef CONFIG_MEMCG
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a525e5067484..bb53c7b86315 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,12 +86,7 @@ enum pageflags {
 	PG_private,		/* If pagecache, has fs-private data */
 	PG_private_2,		/* If pagecache, has fs aux data */
 	PG_writeback,		/* Page is under writeback */
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
 	PG_head,		/* A head page */
-	PG_tail,		/* A tail page */
-#else
-	PG_compound,		/* A compound page */
-#endif
 	PG_swapcache,		/* Swap page: swp_entry_t in private */
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
@@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page)
 	test_set_page_writeback_keepwrite(page);
 }
 
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
-/*
- * System with lots of page flags available. This allows separate
- * flags for PageHead() and PageTail() checks of compound pages so that bit
- * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths except arch/powerpc/mm/init_64.c
- * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
- * and avoid handling those in real mode.
- */
 __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
-__PAGEFLAG(Tail, tail)
 
-static inline int PageCompound(struct page *page)
-{
-	return page->flags & ((1L << PG_head) | (1L << PG_tail));
-
-}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void ClearPageCompound(struct page *page)
+static inline int PageTail(struct page *page)
 {
-	BUG_ON(!PageHead(page));
-	ClearPageHead(page);
+	return READ_ONCE(page->compound_head) & 1;
 }
-#endif
-
-#define PG_head_mask ((1L << PG_head))
 
-#else
-/*
- * Reduce page flag use as much as possible by overlapping
- * compound page flags with the flags used for page cache pages. Possible
- * because PageCompound is always set for compound pages and not for
- * pages on the LRU and/or pagecache.
- */
-TESTPAGEFLAG(Compound, compound)
-__SETPAGEFLAG(Head, compound)  __CLEARPAGEFLAG(Head, compound)
-
-/*
- * PG_reclaim is used in combination with PG_compound to mark the
- * head and tail of a compound page. This saves one page flag
- * but makes it impossible to use compound pages for the page cache.
- * The PG_reclaim bit would have to be used for reclaim or readahead
- * if compound pages enter the page cache.
- *
- * PG_compound & PG_reclaim	=> Tail page
- * PG_compound & ~PG_reclaim	=> Head page
- */
-#define PG_head_mask ((1L << PG_compound))
-#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
-
-static inline int PageHead(struct page *page)
+static inline void set_compound_head(struct page *page, struct page *head)
 {
-	return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+	WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
 }
 
-static inline int PageTail(struct page *page)
+static inline void clear_compound_head(struct page *page)
 {
-	return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
+	WRITE_ONCE(page->compound_head, 0);
 }
 
-static inline void __SetPageTail(struct page *page)
+static inline struct page *compound_head(struct page *page)
 {
-	page->flags |= PG_head_tail_mask;
+	unsigned long head = READ_ONCE(page->compound_head);
+
+	if (unlikely(head & 1))
+		return (struct page *) (head - 1);
+	return page;
 }
 
-static inline void __ClearPageTail(struct page *page)
+static inline int PageCompound(struct page *page)
 {
-	page->flags &= ~PG_head_tail_mask;
-}
+	return PageHead(page) || PageTail(page);
 
+}
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void ClearPageCompound(struct page *page)
 {
-	BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound));
-	clear_bit(PG_compound, &page->flags);
+	BUG_ON(!PageHead(page));
+	ClearPageHead(page);
 }
 #endif
 
-#endif /* !PAGEFLAGS_EXTENDED */
+#define PG_head_mask ((1L << PG_head))
 
 #ifdef CONFIG_HUGETLB_PAGE
 int PageHuge(struct page *page);
diff --git a/mm/Kconfig b/mm/Kconfig
index 0d9fdcd01e47..97a4e06b15c0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -200,18 +200,6 @@ config MEMORY_HOTREMOVE
 	depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
 	depends on MIGRATION
 
-#
-# If we have space for more page flags then we can enable additional
-# optimizations and functionality.
-#
-# Regular Sparsemem takes page flag bits for the sectionid if it does not
-# use a virtual memmap. Disable extended page flags for 32 bit platforms
-# that require the use of a sectionid in the page flags.
-#
-config PAGEFLAGS_EXTENDED
-	def_bool y
-	depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM
-
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/debug.c b/mm/debug.c
index e784110fb51d..668aa35191ca 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -25,12 +25,7 @@ static const struct trace_print_flags pageflag_names[] = {
 	{1UL << PG_private,		"private"	},
 	{1UL << PG_private_2,		"private_2"	},
 	{1UL << PG_writeback,		"writeback"	},
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
 	{1UL << PG_head,		"head"		},
-	{1UL << PG_tail,		"tail"		},
-#else
-	{1UL << PG_compound,		"compound"	},
-#endif
 	{1UL << PG_swapcache,		"swapcache"	},
 	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
 	{1UL << PG_reclaim,		"reclaim"	},
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 73266ee7274c..e1ccc83f73d3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1755,8 +1755,7 @@ static void __split_huge_page_refcount(struct page *page,
 				      (1L << PG_unevictable)));
 		page_tail->flags |= (1L << PG_dirty);
 
-		/* clear PageTail before overwriting first_page */
-		smp_wmb();
+		clear_compound_head(page_tail);
 
 		if (page_is_young(page))
 			set_page_young(page_tail);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e90a29024c5c..4eb0f0964883 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1001,9 +1001,8 @@ static void destroy_compound_gigantic_page(struct page *page,
 	struct page *p = page + 1;
 
 	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-		__ClearPageTail(p);
+		clear_compound_head(p);
 		set_page_refcounted(p);
-		p->first_page = NULL;
 	}
 
 	set_compound_order(page, 0);
@@ -1276,10 +1275,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
 		 */
 		__ClearPageReserved(p);
 		set_page_count(p, 0);
-		p->first_page = page;
-		/* Make sure p->first_page is always valid for PageTail() */
-		smp_wmb();
-		__SetPageTail(p);
+		set_compound_head(p, page);
 	}
 }
 
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 33d59abe91f1..d8fb10de0f14 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -385,7 +385,7 @@ void __init hugetlb_cgroup_file_init(void)
 		/*
 		 * Add cgroup control files only if the huge page consists
 		 * of more than two normal pages. This is because we use
-		 * page[2].lru.next for storing cgroup details.
+		 * page[2].private for storing cgroup details.
 		 */
 		if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 			__hugetlb_cgroup_file_init(hstate_index(h));
diff --git a/mm/internal.h b/mm/internal.h
index 5b7841f6fa27..a7f5670fea23 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -80,9 +80,9 @@ static inline void __get_page_tail_foll(struct page *page,
 	 * speculative page access (like in
 	 * page_cache_get_speculative()) on tail pages.
 	 */
-	VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page);
+	VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page);
 	if (get_page_head)
-		atomic_inc(&page->first_page->_count);
+		atomic_inc(&compound_head(page)->_count);
 	get_huge_page_tail(page);
 }
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 16a0ec385320..8424b64711ac 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -776,8 +776,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 #define lru		(1UL << PG_lru)
 #define swapbacked	(1UL << PG_swapbacked)
 #define head		(1UL << PG_head)
-#define tail		(1UL << PG_tail)
-#define compound	(1UL << PG_compound)
 #define slab		(1UL << PG_slab)
 #define reserved	(1UL << PG_reserved)
 
@@ -800,12 +798,7 @@ static struct page_state {
 	 */
 	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },
 
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
 	{ head,		head,		MF_MSG_HUGE,		me_huge_page },
-	{ tail,		tail,		MF_MSG_HUGE,		me_huge_page },
-#else
-	{ compound,	compound,	MF_MSG_HUGE,		me_huge_page },
-#endif
 
 	{ sc|dirty,	sc|dirty,	MF_MSG_DIRTY_SWAPCACHE,	me_swapcache_dirty },
 	{ sc|dirty,	sc,		MF_MSG_CLEAN_SWAPCACHE,	me_swapcache_clean },
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fae1bd6f9f37..e361001519d3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,15 +445,15 @@ out:
 /*
  * Higher-order pages are called "compound pages".  They are structured thusly:
  *
- * The first PAGE_SIZE page is called the "head page".
+ * The first PAGE_SIZE page is called the "head page" and have PG_head set.
  *
- * The remaining PAGE_SIZE pages are called "tail pages".
+ * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
+ * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
  *
- * All pages have PG_compound set.  All tail pages have their ->first_page
- * pointing at the head page.
+ * The first tail page's ->compound_dtor holds the offset in array of compound
+ * page destructors. See compound_page_dtors.
  *
- * The first tail page's ->lru.next holds the address of the compound page's
- * put_page() function.  Its ->lru.prev holds the order of allocation.
+ * The first tail page's ->compound_order holds the order of allocation.
  * This usage means that zero-order pages may not be compound.
  */
 
@@ -473,10 +473,7 @@ void prep_compound_page(struct page *page, unsigned long order)
 	for (i = 1; i < nr_pages; i++) {
 		struct page *p = page + i;
 		set_page_count(p, 0);
-		p->first_page = page;
-		/* Make sure p->first_page is always valid for PageTail() */
-		smp_wmb();
-		__SetPageTail(p);
+		set_compound_head(p, page);
 	}
 }
 
@@ -854,17 +851,30 @@ static void free_one_page(struct zone *zone,
 
 static int free_tail_pages_check(struct page *head_page, struct page *page)
 {
-	if (!IS_ENABLED(CONFIG_DEBUG_VM))
-		return 0;
+	int ret = 1;
+
+	/*
+	 * We rely page->lru.next never has bit 0 set, unless the page
+	 * is PageTail(). Let's make sure that's true even for poisoned ->lru.
+	 */
+	BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
+
+	if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
+		ret = 0;
+		goto out;
+	}
 	if (unlikely(!PageTail(page))) {
 		bad_page(page, "PageTail not set", 0);
-		return 1;
+		goto out;
 	}
-	if (unlikely(page->first_page != head_page)) {
-		bad_page(page, "first_page not consistent", 0);
-		return 1;
+	if (unlikely(compound_head(page) != head_page)) {
+		bad_page(page, "compound_head not consistent", 0);
+		goto out;
 	}
-	return 0;
+	ret = 0;
+out:
+	clear_compound_head(page);
+	return ret;
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -931,6 +941,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
 			struct page *page = pfn_to_page(start_pfn);
 
 			init_reserved_page(start_pfn);
+
+			/* Avoid false-positive PageTail() */
+			INIT_LIST_HEAD(&page->lru);
+
 			SetPageReserved(page);
 		}
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 983f692a47fd..39395fb549c0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -201,7 +201,7 @@ out_put_single:
 				__put_single_page(page);
 			return;
 		}
-		VM_BUG_ON_PAGE(page_head != page->first_page, page);
+		VM_BUG_ON_PAGE(page_head != compound_head(page), page);
 		/*
 		 * We can release the refcount taken by
 		 * get_page_unless_zero() now that
@@ -262,7 +262,7 @@ static void put_compound_page(struct page *page)
 	 *  Case 3 is possible, as we may race with
 	 *  __split_huge_page_refcount tearing down a THP page.
 	 */
-	page_head = compound_head_by_tail(page);
+	page_head = compound_head(page);
 	if (!__compound_tail_refcounted(page_head))
 		put_unrefcounted_compound_page(page_head, page);
 	else
-- 
cgit v1.2.3


From d00181b96eb86c914cb327d1de974a1b71366e1b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:57 -0800
Subject: mm: use 'unsigned int' for page order

Let's try to be consistent about data type of page order.

[sfr@canb.auug.org.au: fix build (type of pageblock_order)]
[hughd@google.com: some configs end up with MAX_ORDER and pageblock_order having different types]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h              |  5 +++--
 include/linux/pageblock-flags.h |  2 +-
 mm/hugetlb.c                    | 19 ++++++++++---------
 mm/internal.h                   |  4 ++--
 mm/page_alloc.c                 | 29 ++++++++++++++++-------------
 5 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9671b6f23eda..00bad7793788 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -550,7 +550,7 @@ static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 	return compound_page_dtors[page[1].compound_dtor];
 }
 
-static inline int compound_order(struct page *page)
+static inline unsigned int compound_order(struct page *page)
 {
 	if (!PageHead(page))
 		return 0;
@@ -1810,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 
 extern __printf(3, 4)
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order,
+		const char *fmt, ...);
 
 extern void setup_per_cpu_pageset(void);
 
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 2baeee12f48e..e942558b3585 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -44,7 +44,7 @@ enum pageblock_bits {
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 
 /* Huge page sizes are variable */
-extern int pageblock_order;
+extern unsigned int pageblock_order;
 
 #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4eb0f0964883..7ce07d681265 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -994,7 +994,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 
 #if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
 static void destroy_compound_gigantic_page(struct page *page,
-					unsigned long order)
+					unsigned int order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -1009,7 +1009,7 @@ static void destroy_compound_gigantic_page(struct page *page,
 	__ClearPageHead(page);
 }
 
-static void free_gigantic_page(struct page *page, unsigned order)
+static void free_gigantic_page(struct page *page, unsigned int order)
 {
 	free_contig_range(page_to_pfn(page), 1 << order);
 }
@@ -1053,7 +1053,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
 	return zone_spans_pfn(zone, last_pfn);
 }
 
-static struct page *alloc_gigantic_page(int nid, unsigned order)
+static struct page *alloc_gigantic_page(int nid, unsigned int order)
 {
 	unsigned long nr_pages = 1 << order;
 	unsigned long ret, pfn, flags;
@@ -1089,7 +1089,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
 }
 
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
-static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+static void prep_compound_gigantic_page(struct page *page, unsigned int order);
 
 static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
 {
@@ -1122,9 +1122,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
 static inline bool gigantic_page_supported(void) { return true; }
 #else
 static inline bool gigantic_page_supported(void) { return false; }
-static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void free_gigantic_page(struct page *page, unsigned int order) { }
 static inline void destroy_compound_gigantic_page(struct page *page,
-						unsigned long order) { }
+						unsigned int order) { }
 static inline int alloc_fresh_gigantic_page(struct hstate *h,
 					nodemask_t *nodes_allowed) { return 0; }
 #endif
@@ -1250,7 +1250,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 	put_page(page); /* free it into the hugepage allocator */
 }
 
-static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned int order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -1968,7 +1968,8 @@ found:
 	return 1;
 }
 
-static void __init prep_compound_huge_page(struct page *page, int order)
+static void __init prep_compound_huge_page(struct page *page,
+		unsigned int order)
 {
 	if (unlikely(order > (MAX_ORDER - 1)))
 		prep_compound_gigantic_page(page, order);
@@ -2679,7 +2680,7 @@ static int __init hugetlb_init(void)
 module_init(hugetlb_init);
 
 /* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
+void __init hugetlb_add_hstate(unsigned int order)
 {
 	struct hstate *h;
 	unsigned long i;
diff --git a/mm/internal.h b/mm/internal.h
index a7f5670fea23..38e24b89e4c4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -177,7 +177,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
 					unsigned int order);
-extern void prep_compound_page(struct page *page, unsigned long order);
+extern void prep_compound_page(struct page *page, unsigned int order);
 #ifdef CONFIG_MEMORY_FAILURE
 extern bool is_free_buddy_page(struct page *page);
 #endif
@@ -235,7 +235,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
  * page cannot be allocated or merged in parallel. Alternatively, it must
  * handle invalid values gracefully, and use page_order_unsafe() below.
  */
-static inline unsigned long page_order(struct page *page)
+static inline unsigned int page_order(struct page *page)
 {
 	/* PageBuddy() must be checked by the caller */
 	return page_private(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e361001519d3..208e4c7e771b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,7 +181,7 @@ bool pm_suspended_storage(void)
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
-int pageblock_order __read_mostly;
+unsigned int pageblock_order __read_mostly;
 #endif
 
 static void __free_pages_ok(struct page *page, unsigned int order);
@@ -462,7 +462,7 @@ static void free_compound_page(struct page *page)
 	__free_pages_ok(page, compound_order(page));
 }
 
-void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned int order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -662,7 +662,7 @@ static inline void __free_one_page(struct page *page,
 	unsigned long combined_idx;
 	unsigned long uninitialized_var(buddy_idx);
 	struct page *buddy;
-	int max_order = MAX_ORDER;
+	unsigned int max_order = MAX_ORDER;
 
 	VM_BUG_ON(!zone_is_initialized(zone));
 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
@@ -675,7 +675,7 @@ static inline void __free_one_page(struct page *page,
 		 * pageblock. Without this, pageblock isolation
 		 * could cause incorrect freepage accounting.
 		 */
-		max_order = min(MAX_ORDER, pageblock_order + 1);
+		max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
 	} else {
 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	}
@@ -1471,7 +1471,7 @@ int move_freepages(struct zone *zone,
 			  int migratetype)
 {
 	struct page *page;
-	unsigned long order;
+	unsigned int order;
 	int pages_moved = 0;
 
 #ifndef CONFIG_HOLES_IN_ZONE
@@ -1584,7 +1584,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
 static void steal_suitable_fallback(struct zone *zone, struct page *page,
 							  int start_type)
 {
-	int current_order = page_order(page);
+	unsigned int current_order = page_order(page);
 	int pages;
 
 	/* Take ownership for orders >= pageblock_order */
@@ -2637,7 +2637,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
 		DEFAULT_RATELIMIT_INTERVAL,
 		DEFAULT_RATELIMIT_BURST);
 
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
 {
 	unsigned int filter = SHOW_MEM_FILTER_NODES;
 
@@ -2671,7 +2671,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 		va_end(args);
 	}
 
-	pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+	pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
 		current->comm, order, gfp_mask);
 
 	dump_stack();
@@ -3449,7 +3449,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order)
 	}
 }
 
-static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+static void *make_alloc_exact(unsigned long addr, unsigned int order,
+		size_t size)
 {
 	if (addr) {
 		unsigned long alloc_end = addr + (PAGE_SIZE << order);
@@ -3499,7 +3500,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
  */
 void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
-	unsigned order = get_order(size);
+	unsigned int order = get_order(size);
 	struct page *p = alloc_pages_node(nid, gfp_mask, order);
 	if (!p)
 		return NULL;
@@ -3800,7 +3801,8 @@ void show_free_areas(unsigned int filter)
 	}
 
 	for_each_populated_zone(zone) {
-		unsigned long nr[MAX_ORDER], flags, order, total = 0;
+		unsigned int order;
+		unsigned long nr[MAX_ORDER], flags, total = 0;
 		unsigned char types[MAX_ORDER];
 
 		if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4149,7 +4151,7 @@ static void build_zonelists(pg_data_t *pgdat)
 	nodemask_t used_mask;
 	int local_node, prev_node;
 	struct zonelist *zonelist;
-	int order = current_zonelist_order;
+	unsigned int order = current_zonelist_order;
 
 	/* initialize zonelists */
 	for (i = 0; i < MAX_ZONELISTS; i++) {
@@ -6678,7 +6680,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		       unsigned migratetype)
 {
 	unsigned long outer_start, outer_end;
-	int ret = 0, order;
+	unsigned int order;
+	int ret = 0;
 
 	struct compact_control cc = {
 		.nr_migratepages = 0,
-- 
cgit v1.2.3


From 1965c8b7ac7dd147663faf77a66a693ac3ddcb85 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:30:00 -0800
Subject: mm: use 'unsigned int' for compound_dtor/compound_order on 64BIT

On 64 bit system we have enough space in struct page to encode
compound_dtor and compound_order with unsigned int.

On x86-64 it leads to slightly smaller code size due usesage of plain
MOV instead of MOVZX (zero-extended move) or similar effect.

allyesconfig:

   text	   data	    bss	    dec	    hex	filename
159520446	48146736	72196096	279863278	10ae5fee	vmlinux.pre
159520382	48146736	72196096	279863214	10ae5fae	vmlinux.post

On other architectures without native support of 16-bit data types the

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb91658c603f..f8d1492a114f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -143,8 +143,19 @@ struct page {
 			unsigned long compound_head; /* If bit zero is set */
 
 			/* First tail page only */
+#ifdef CONFIG_64BIT
+			/*
+			 * On 64 bit system we have enough space in struct page
+			 * to encode compound_dtor and compound_order with
+			 * unsigned int. It can help compiler generate better or
+			 * smaller code on some archtectures.
+			 */
+			unsigned int compound_dtor;
+			unsigned int compound_order;
+#else
 			unsigned short int compound_dtor;
 			unsigned short int compound_order;
+#endif
 		};
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-- 
cgit v1.2.3


From 9add850c211a39d5ab1a091d48795e21599a73d0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 6 Nov 2015 16:30:09 -0800
Subject: include/linux/compiler-gcc.h: improve __visible documentation

Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0e3110a0b771..22ab246feed3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -205,7 +205,10 @@
 
 #if GCC_VERSION >= 40600
 /*
- * Tell the optimizer that something else uses this function or variable.
+ * When used with Link Time Optimization, gcc can optimize away C functions or
+ * variables which are referenced only from assembly code.  __visible tells the
+ * optimizer that something else uses this function or variable, thus preventing
+ * this.
  */
 #define __visible	__attribute__((externally_visible))
 #endif
-- 
cgit v1.2.3


From e2eb53aa96754b97d158eff884dde88abbad925e Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martink@posteo.de>
Date: Fri, 6 Nov 2015 16:30:58 -0800
Subject: bitops.h: improve sign_extend32()'s documentation

It is often overlooked that sign_extend32(), despite its name, is safe to
use for 16 and 8 bit types as well.  This should help prevent sign
extension being done manually some other way.

Signed-off-by: Martin Kepplinger <martin.kepplinger@theobroma-systems.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: George Spelvin <linux@horizon.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Maxime Coquelin <maxime.coquelin@st.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index e63553386ae7..5629923a8701 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -164,6 +164,8 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
  * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
  * @value: value to sign extend
  * @index: 0 based bit index (0<=index<32) to sign bit
+ *
+ * This is safe to use for 16- and 8-bit types as well.
  */
 static inline __s32 sign_extend32(__u32 value, int index)
 {
-- 
cgit v1.2.3


From 48e203e21b29cd4b2c58403fe8bca68e2e854895 Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martink@posteo.de>
Date: Fri, 6 Nov 2015 16:31:02 -0800
Subject: bitops.h: add sign_extend64()

Months back, this was discussed, see https://lkml.org/lkml/2015/1/18/289
The result was the 64-bit version being "likely fine", "valuable" and
"correct".  The discussion fell asleep but since there are possible users,
let's add it.

Signed-off-by: Martin Kepplinger <martin.kepplinger@theobroma-systems.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: George Spelvin <linux@horizon.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Maxime Coquelin <maxime.coquelin@st.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5629923a8701..2b8ed123ad36 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -173,6 +173,17 @@ static inline __s32 sign_extend32(__u32 value, int index)
 	return (__s32)(value << shift) >> shift;
 }
 
+/**
+ * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
+ * @value: value to sign extend
+ * @index: 0 based bit index (0<=index<64) to sign bit
+ */
+static inline __s64 sign_extend64(__u64 value, int index)
+{
+	__u8 shift = 63 - index;
+	return (__s64)(value << shift) >> shift;
+}
+
 static inline unsigned fls_long(unsigned long l)
 {
 	if (sizeof(l) == 4)
-- 
cgit v1.2.3


From 0a9df786a6ae2f898114bdd242b64920dedf53bd Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 6 Nov 2015 16:31:20 -0800
Subject: lib/kasprintf.c: introduce kvasprintf_const

This adds kvasprintf_const which tries to use kstrdup_const if possible:
If the format string contains no % characters, or if the format string is
exactly "%s", we delegate to kstrdup_const.  Otherwise, we fall back to
kvasprintf.

Just as for kstrdup_const, the main motivation is to save memory by
reusing .rodata when possible.

The return value should be freed by kfree_const, just like for
kstrdup_const.

There is deliberately no kasprintf_const: In the vast majority of cases,
the format string argument is a literal, so one can determine statically
whether one could instead use kstrdup_const directly (which would also
require one to change all corresponding kfree calls to kfree_const).

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  2 ++
 lib/kasprintf.c        | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5582410727cb..2c13f747ac2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -413,6 +413,8 @@ extern __printf(2, 3)
 char *kasprintf(gfp_t gfp, const char *fmt, ...);
 extern __printf(2, 0)
 char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
+extern __printf(2, 0)
+const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
 
 extern __scanf(2, 3)
 int sscanf(const char *, const char *, ...);
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index 32f12150fc4f..f194e6e593e1 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -31,6 +31,22 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
 }
 EXPORT_SYMBOL(kvasprintf);
 
+/*
+ * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt
+ * (or the sole vararg) points to rodata, we will then save a memory
+ * allocation and string copy. In any case, the return value should be
+ * freed using kfree_const().
+ */
+const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap)
+{
+	if (!strchr(fmt, '%'))
+		return kstrdup_const(fmt, gfp);
+	if (!strcmp(fmt, "%s"))
+		return kstrdup_const(va_arg(ap, const char*), gfp);
+	return kvasprintf(gfp, fmt, ap);
+}
+EXPORT_SYMBOL(kvasprintf_const);
+
 char *kasprintf(gfp_t gfp, const char *fmt, ...)
 {
 	va_list ap;
-- 
cgit v1.2.3


From 8de1ee7ebfb4979c6444e81273e12e7a972c367d Mon Sep 17 00:00:00 2001
From: Cody P Schafer <dev@codyps.com>
Date: Fri, 6 Nov 2015 16:31:28 -0800
Subject: rbtree: clarify documentation of
 rbtree_postorder_for_each_entry_safe()

I noticed that commit a20135ffbc44 ("writeback: don't drain
bdi_writeback_congested on bdi destruction") added a usage of
rbtree_postorder_for_each_entry_safe() in mm/backing-dev.c which appears
to try to rb_erase() elements from an rbtree while iterating over it using
rbtree_postorder_for_each_entry_safe().

Doing this will cause random nodes to be missed by the iteration because
rb_erase() may rebalance the tree, changing the ordering that we're trying
to iterate over.

The previous documentation for rbtree_postorder_for_each_entry_safe()
wasn't clear that this wasn't allowed, it was taken from the docs for
list_for_each_entry_safe(), where erasing isn't a problem due to
list_del() not reordering.

Explicitly warn developers about this potential pit-fall.

Note that I haven't fixed the actual issue that (it appears) the commit
referenced above introduced (not familiar enough with that code).

In general (and in this case), the patterns to follow are:
 - switch to rb_first() + rb_erase(), don't use
   rbtree_postorder_for_each_entry_safe().
 - keep the postorder iteration and don't rb_erase() at all. Instead
   just clear the fields of rb_node & cgwb_congested_tree as required by
   other users of those structures.

[akpm@linux-foundation.org: tweak comments]
Signed-off-by: Cody P Schafer <dev@codyps.com>
Cc: John de la Garza <john@jjdev.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 830c4992088d..a5aa7ae671f4 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -101,13 +101,21 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
 	})
 
 /**
- * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
- * given type safe against removal of rb_node entry
+ * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
+ * given type allowing the backing memory of @pos to be invalidated
  *
  * @pos:	the 'type *' to use as a loop cursor.
  * @n:		another 'type *' to use as temporary storage
  * @root:	'rb_root *' of the rbtree.
  * @field:	the name of the rb_node field within 'type'.
+ *
+ * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
+ * list_for_each_entry_safe() and allows the iteration to continue independent
+ * of changes to @pos by the body of the loop.
+ *
+ * Note, however, that it cannot handle other modifications that re-order the
+ * rbtree it is iterating over. This includes calling rb_erase() on @pos, as
+ * rb_erase() may rebalance the tree, causing us to miss some nodes.
  */
 #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
 	for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
-- 
cgit v1.2.3


From 2e01fabe67ccaff1d59bda01e60a61f5fb0aa7b6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:19 -0800
Subject: signals: kill block_all_signals() and unblock_all_signals()

It is hardly possible to enumerate all problems with block_all_signals()
and unblock_all_signals().  Just for example,

1. block_all_signals(SIGSTOP/etc) simply can't help if the caller is
   multithreaded. Another thread can dequeue the signal and force the
   group stop.

2. Even is the caller is single-threaded, it will "stop" anyway. It
   will not sleep, but it will spin in kernel space until SIGCONT or
   SIGKILL.

And a lot more. In short, this interface doesn't work at all, at least
the last 10+ years.

Daniel said:

  Yeah the only times I played around with the DRM_LOCK stuff was when
  old drivers accidentally deadlocked - my impression is that the entire
  DRM_LOCK thing was never really tested properly ;-) Hence I'm all for
  purging where this leaks out of the drm subsystem.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Dave Airlie <airlied@redhat.com>
Cc: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_lock.c | 41 -------------------------------------
 include/drm/drmP.h         |  1 -
 include/linux/sched.h      |  7 +------
 kernel/signal.c            | 51 +---------------------------------------------
 4 files changed, 2 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 4924d381b664..daa2ff12101b 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -38,8 +38,6 @@
 #include "drm_legacy.h"
 #include "drm_internal.h"
 
-static int drm_notifier(void *priv);
-
 static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
 
 /**
@@ -118,14 +116,8 @@ int drm_legacy_lock(struct drm_device *dev, void *data,
 	 * really probably not the correct answer but lets us debug xkb
  	 * xserver for now */
 	if (!file_priv->is_master) {
-		sigemptyset(&dev->sigmask);
-		sigaddset(&dev->sigmask, SIGSTOP);
-		sigaddset(&dev->sigmask, SIGTSTP);
-		sigaddset(&dev->sigmask, SIGTTIN);
-		sigaddset(&dev->sigmask, SIGTTOU);
 		dev->sigdata.context = lock->context;
 		dev->sigdata.lock = master->lock.hw_lock;
-		block_all_signals(drm_notifier, dev, &dev->sigmask);
 	}
 
 	if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
@@ -169,7 +161,6 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_
 		/* FIXME: Should really bail out here. */
 	}
 
-	unblock_all_signals();
 	return 0;
 }
 
@@ -287,38 +278,6 @@ int drm_legacy_lock_free(struct drm_lock_data *lock_data, unsigned int context)
 	return 0;
 }
 
-/**
- * If we get here, it means that the process has called DRM_IOCTL_LOCK
- * without calling DRM_IOCTL_UNLOCK.
- *
- * If the lock is not held, then let the signal proceed as usual.  If the lock
- * is held, then set the contended flag and keep the signal blocked.
- *
- * \param priv pointer to a drm_device structure.
- * \return one if the signal should be delivered normally, or zero if the
- * signal should be blocked.
- */
-static int drm_notifier(void *priv)
-{
-	struct drm_device *dev = priv;
-	struct drm_hw_lock *lock = dev->sigdata.lock;
-	unsigned int old, new, prev;
-
-	/* Allow signal delivery if lock isn't held */
-	if (!lock || !_DRM_LOCK_IS_HELD(lock->lock)
-	    || _DRM_LOCKING_CONTEXT(lock->lock) != dev->sigdata.context)
-		return 1;
-
-	/* Otherwise, set flag to force call to
-	   drmUnlock */
-	do {
-		old = lock->lock;
-		new = old | _DRM_LOCK_CONT;
-		prev = cmpxchg(&lock->lock, old, new);
-	} while (prev != old);
-	return 0;
-}
-
 /**
  * This function returns immediately and takes the hw lock
  * with the kernel context if it is free, otherwise it gets the highest priority when and if
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8b5ce7c5d9bb..f56cdcecc1c9 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -822,7 +822,6 @@ struct drm_device {
 
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
-	sigset_t sigmask;
 
 	struct {
 		int context;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eeb5066a44fb..923ec1a9b2b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1570,9 +1570,7 @@ struct task_struct {
 
 	unsigned long sas_ss_sp;
 	size_t sas_ss_size;
-	int (*notifier)(void *priv);
-	void *notifier_data;
-	sigset_t *notifier_mask;
+
 	struct callback_head *task_works;
 
 	struct audit_context *audit_context;
@@ -2476,9 +2474,6 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
 	return ret;
 }
 
-extern void block_all_signals(int (*notifier)(void *priv), void *priv,
-			      sigset_t *mask);
-extern void unblock_all_signals(void);
 extern void release_task(struct task_struct * p);
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
diff --git a/kernel/signal.c b/kernel/signal.c
index 0f6bbbe77b46..f2cbd4ed5cd4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -503,41 +503,6 @@ int unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-/*
- * Notify the system that a driver wants to block all signals for this
- * process, and wants to be notified if any signals at all were to be
- * sent/acted upon.  If the notifier routine returns non-zero, then the
- * signal will be acted upon after all.  If the notifier routine returns 0,
- * then then signal will be blocked.  Only one block per process is
- * allowed.  priv is a pointer to private data that the notifier routine
- * can use to determine if the signal should be blocked or not.
- */
-void
-block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->notifier_mask = mask;
-	current->notifier_data = priv;
-	current->notifier = notifier;
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-
-/* Notify the system that blocking has ended. */
-
-void
-unblock_all_signals(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->notifier = NULL;
-	current->notifier_data = NULL;
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-
 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 {
 	struct sigqueue *q, *first = NULL;
@@ -580,19 +545,8 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 {
 	int sig = next_signal(pending, mask);
 
-	if (sig) {
-		if (current->notifier) {
-			if (sigismember(current->notifier_mask, sig)) {
-				if (!(current->notifier)(current->notifier_data)) {
-					clear_thread_flag(TIF_SIGPENDING);
-					return 0;
-				}
-			}
-		}
-
+	if (sig)
 		collect_signal(sig, pending, info);
-	}
-
 	return sig;
 }
 
@@ -2483,9 +2437,6 @@ EXPORT_SYMBOL(force_sig);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
 EXPORT_SYMBOL(sigprocmask);
-EXPORT_SYMBOL(block_all_signals);
-EXPORT_SYMBOL(unblock_all_signals);
-
 
 /*
  * System call entry points.
-- 
cgit v1.2.3


From be0e6f290f78b84a3b21b8c8c46819c4514fe632 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:22 -0800
Subject: signal: turn dequeue_signal_lock() into kernel_dequeue_signal()

1. Rename dequeue_signal_lock() to kernel_dequeue_signal(). This
   matches another "for kthreads only" kernel_sigaction() helper.

2. Remove the "tsk" and "mask" arguments, they are always current
   and current->blocked. And it is simply wrong if tsk != current.

3. We could also remove the 3rd "siginfo_t *info" arg but it looks
   potentially useful. However we can simplify the callers if we
   change kernel_dequeue_signal() to accept info => NULL.

4. Remove _irqsave, it is never called from atomic context.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/nbd.c                          | 15 ++++-----------
 drivers/usb/gadget/function/f_mass_storage.c |  4 +---
 fs/jffs2/background.c                        |  3 +--
 include/linux/sched.h                        | 11 ++++++-----
 4 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1b87623381e2..93b3f99b6865 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -444,9 +444,7 @@ static int nbd_thread_recv(struct nbd_device *nbd)
 	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	if (signal_pending(current)) {
-		siginfo_t info;
-
-		ret = dequeue_signal_lock(current, &current->blocked, &info);
+		ret = kernel_dequeue_signal(NULL);
 		dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
 			 task_pid_nr(current), current->comm, ret);
 		mutex_lock(&nbd->tx_lock);
@@ -560,11 +558,8 @@ static int nbd_thread_send(void *data)
 					 !list_empty(&nbd->waiting_queue));
 
 		if (signal_pending(current)) {
-			siginfo_t info;
-			int ret;
+			int ret = kernel_dequeue_signal(NULL);
 
-			ret = dequeue_signal_lock(current, &current->blocked,
-						  &info);
 			dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
 				 task_pid_nr(current), current->comm, ret);
 			mutex_lock(&nbd->tx_lock);
@@ -592,10 +587,8 @@ static int nbd_thread_send(void *data)
 	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	/* Clear maybe pending signals */
-	if (signal_pending(current)) {
-		siginfo_t info;
-		dequeue_signal_lock(current, &current->blocked, &info);
-	}
+	if (signal_pending(current))
+		kernel_dequeue_signal(NULL);
 
 	return 0;
 }
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index cd54e72a6c50..5ec533826621 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2345,7 +2345,6 @@ static void fsg_disable(struct usb_function *f)
 
 static void handle_exception(struct fsg_common *common)
 {
-	siginfo_t		info;
 	int			i;
 	struct fsg_buffhd	*bh;
 	enum fsg_state		old_state;
@@ -2357,8 +2356,7 @@ static void handle_exception(struct fsg_common *common)
 	 * into a high-priority EXIT exception.
 	 */
 	for (;;) {
-		int sig =
-			dequeue_signal_lock(current, &current->blocked, &info);
+		int sig = kernel_dequeue_signal(NULL);
 		if (!sig)
 			break;
 		if (sig != SIGUSR1) {
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index bb9cebc9ca8a..f3145fd86d86 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -121,13 +121,12 @@ static int jffs2_garbage_collect_thread(void *_c)
 		/* Put_super will send a SIGKILL and then wait on the sem.
 		 */
 		while (signal_pending(current) || freezing(current)) {
-			siginfo_t info;
 			unsigned long signr;
 
 			if (try_to_freeze())
 				goto again;
 
-			signr = dequeue_signal_lock(current, &current->blocked, &info);
+			signr = kernel_dequeue_signal(NULL);
 
 			switch(signr) {
 			case SIGSTOP:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 923ec1a9b2b4..3d54924b4b86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2462,14 +2462,15 @@ extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
 
-static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+static inline int kernel_dequeue_signal(siginfo_t *info)
 {
-	unsigned long flags;
+	struct task_struct *tsk = current;
+	siginfo_t __info;
 	int ret;
 
-	spin_lock_irqsave(&tsk->sighand->siglock, flags);
-	ret = dequeue_signal(tsk, mask, info);
-	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+	spin_unlock_irq(&tsk->sighand->siglock);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 9a13049e83f346cb1cbd60c64e520a73c396af16 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:25 -0800
Subject: signal: introduce kernel_signal_stop() to fix
 jffs2_garbage_collect_thread()

jffs2_garbage_collect_thread() can race with SIGCONT and sleep in
TASK_STOPPED state after it was already sent. Add the new helper,
kernel_signal_stop(), which does this correctly.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jffs2/background.c |  3 +--
 include/linux/sched.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index f3145fd86d86..53cc7350af33 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -132,8 +132,7 @@ static int jffs2_garbage_collect_thread(void *_c)
 			case SIGSTOP:
 				jffs2_dbg(1, "%s(): SIGSTOP received\n",
 					  __func__);
-				set_current_state(TASK_STOPPED);
-				schedule();
+				kernel_signal_stop();
 				break;
 
 			case SIGKILL:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d54924b4b86..4069febaa34a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2475,6 +2475,16 @@ static inline int kernel_dequeue_signal(siginfo_t *info)
 	return ret;
 }
 
+static inline void kernel_signal_stop(void)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+		__set_current_state(TASK_STOPPED);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	schedule();
+}
+
 extern void release_task(struct task_struct * p);
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
-- 
cgit v1.2.3


From 002edb6f6f2a79bea50de11260ddc9572e6db731 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 6 Nov 2015 16:32:51 -0800
Subject: dma-mapping: tidy up dma_parms default handling

Many DMA controllers and other devices set max_segment_size to
indicate their scatter-gather capability, but have no interest in
segment_boundary_mask. However, the existence of a dma_parms structure
precludes the use of any default value, leaving them as zeros (assuming
a properly kzalloc'ed structure). If a well-behaved IOMMU (or SWIOTLB)
then tries to respect this by ensuring a mapped segment does not cross
a zero-byte boundary, hilarity ensues.

Since zero is a nonsensical value for either parameter, treat it as an
indicator for "default", as might be expected. In the process, clean up
a bit by replacing the bare constants with slightly more meaningful
macros and removing the superfluous "else" statements.

[akpm@linux-foundation.org: dma-mapping.h needs sizes.h for SZ_64K]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sakari Ailus <sakari.ailus@iki.fi>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ac07ff090919..2e551e2d2d03 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
 
+#include <linux/sizes.h>
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -145,7 +146,9 @@ static inline void arch_teardown_dma_ops(struct device *dev) { }
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
 {
-	return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536;
+	if (dev->dma_parms && dev->dma_parms->max_segment_size)
+		return dev->dma_parms->max_segment_size;
+	return SZ_64K;
 }
 
 static inline unsigned int dma_set_max_seg_size(struct device *dev,
@@ -154,14 +157,15 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev,
 	if (dev->dma_parms) {
 		dev->dma_parms->max_segment_size = size;
 		return 0;
-	} else
-		return -EIO;
+	}
+	return -EIO;
 }
 
 static inline unsigned long dma_get_seg_boundary(struct device *dev)
 {
-	return dev->dma_parms ?
-		dev->dma_parms->segment_boundary_mask : 0xffffffff;
+	if (dev->dma_parms && dev->dma_parms->segment_boundary_mask)
+		return dev->dma_parms->segment_boundary_mask;
+	return DMA_BIT_MASK(32);
 }
 
 static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
@@ -169,8 +173,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
 	if (dev->dma_parms) {
 		dev->dma_parms->segment_boundary_mask = mask;
 		return 0;
-	} else
-		return -EIO;
+	}
+	return -EIO;
 }
 
 #ifndef dma_max_pfn
-- 
cgit v1.2.3


From cb7ae262e230064ba282094b7e1f60a092448b72 Mon Sep 17 00:00:00 2001
From: Anish Bhatt <anish@chelsio.com>
Date: Fri, 6 Nov 2015 16:33:01 -0800
Subject: include/linux/zutil.h: fix usage example of zlib_adler32()

alder32 was renamed to zlib_adler32 since before 2.6.11.

Signed-off-by: Anish Bhatt <anish@chelsio.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zutil.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zutil.h b/include/linux/zutil.h
index 6adfa9a6ffe9..663689521759 100644
--- a/include/linux/zutil.h
+++ b/include/linux/zutil.h
@@ -68,10 +68,10 @@ typedef uLong (*check_func) (uLong check, const Byte *buf,
    An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
    much faster. Usage example:
 
-     uLong adler = adler32(0L, NULL, 0);
+     uLong adler = zlib_adler32(0L, NULL, 0);
 
      while (read_buffer(buffer, length) != EOF) {
-       adler = adler32(adler, buffer, length);
+       adler = zlib_adler32(adler, buffer, length);
      }
      if (adler != original_adler) error();
 */
-- 
cgit v1.2.3


From 95ad1f4a9358dff1dcf84bf5c9cc84caa9215f7f Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Nov 2015 11:21:47 +0100
Subject: netfilter: ipset: Fix extension alignment

The data extensions in ipset lacked the proper memory alignment and
thus could lead to kernel crash on several architectures. Therefore
the structures have been reorganized and alignment attributes added
where needed. The patch was tested on armv7h by Gerhard Wiesinger and
on x86_64, sparc64 by Jozsef Kadlecsik.

Reported-by: Gerhard Wiesinger <lists@wiesinger.com>
Tested-by: Gerhard Wiesinger <lists@wiesinger.com>
Tested-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    |  2 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h   | 17 +++-----
 net/netfilter/ipset/ip_set_bitmap_ip.c    | 14 ++-----
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 64 ++++++++++++++-----------------
 net/netfilter/ipset/ip_set_bitmap_port.c  | 18 ++++-----
 net/netfilter/ipset/ip_set_core.c         | 14 ++++---
 net/netfilter/ipset/ip_set_hash_gen.h     | 11 ++++--
 net/netfilter/ipset/ip_set_list_set.c     |  5 ++-
 8 files changed, 65 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 48bb01edcf30..0e1f433cc4b7 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -421,7 +421,7 @@ extern void ip_set_free(void *members);
 extern int ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr);
 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
 extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
-			      size_t len);
+			      size_t len, size_t align);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (set->dsize) {
-		if (set->extensions & IPSET_EXT_DESTROY)
-			mtype_ext_cleanup(set);
-		ip_set_free(map->extensions);
-	}
-	kfree(map);
+	if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
+	ip_set_free(map);
 
 	set->data = NULL;
 }
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct mtype *map = set->data;
 	struct nlattr *nested;
+	size_t memsize = sizeof(*map) + map->memsize;
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) +
-				map->memsize +
-				set->dsize * map->elements)))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
 /* Type structure */
 struct bitmap_ip {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
 	size_t memsize;		/* members size */
 	u8 netmask;		/* subnet netmask */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 	pr_debug("hosts %u, elements %llu\n",
 		 hosts, (unsigned long long)elements);
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_ip(set, map, first_ip, last_ip,
 			 elements, hosts, netmask)) {
 		kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
 /* Type structure */
 struct bitmap_ipmac {
 	void *members;		/* the set members */
-	void *extensions;	/* MAC + data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collector */
+	unsigned char extensions[0]	/* MAC + data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
 struct bitmap_ipmac_adt_elem {
+	unsigned char ether[ETH_ALEN] __aligned(2);
 	u16 id;
-	unsigned char *ether;
+	u16 add_mac;
 };
 
 struct bitmap_ipmac_elem {
 	unsigned char ether[ETH_ALEN];
 	unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
 
 static inline u32
 ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
 	return ip - m->first_ip;
 }
 
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
-	return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize)		\
+	(struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize)	\
+	(const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
 
 /* Common functions */
 
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, dsize);
-	if (elem->filled == MAC_FILLED)
-		return !e->ether ||
-		       ether_addr_equal(e->ether, elem->ether);
+	elem = get_const_elem(map->extensions, e->id, dsize);
+	if (e->add_mac && elem->filled == MAC_FILLED)
+		return ether_addr_equal(e->ether, elem->ether);
 	/* Trigger kernel to fill out the ethernet address */
 	return -EAGAIN;
 }
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, dsize);
+	elem = get_const_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 		 * and we can reuse it later when MAC is filled out,
 		 * possibly by the kernel
 		 */
-		if (e->ether)
+		if (e->add_mac)
 			ip_set_timeout_set(timeout, t);
 		else
 			*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
-			if (e->ether &&
+			if (e->add_mac &&
 			    (flags & IPSET_FLAG_EXIST) &&
 			    !ether_addr_equal(e->ether, elem->ether)) {
 				/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 				ether_addr_copy(elem->ether, e->ether);
 			}
 			return IPSET_ADD_FAILED;
-		} else if (!e->ether)
+		} else if (!e->add_mac)
 			/* Already added without ethernet address */
 			return IPSET_ADD_FAILED;
 		/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return IPSET_ADD_START_STORED_TIMEOUT;
-	} else if (e->ether) {
+	} else if (e->add_mac) {
 		/* We can store MAC too */
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
 		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, dsize);
+		get_const_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 {
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct bitmap_ipmac_adt_elem e = { .id = 0 };
+	struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 		return -EINVAL;
 
 	e.id = ip_to_id(map, ip);
-	e.ether = eth_hdr(skb)->h_source;
+	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
 
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	e.id = ip_to_id(map, ip);
-	if (tb[IPSET_ATTR_ETHER])
-		e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
-	else
-		e.ether = NULL;
-
+	if (tb[IPSET_ATTR_ETHER]) {
+		memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+		e.add_mac = 1;
+	}
 	ret = adtfn(set, &e, &ext, &ext, flags);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
 		return -IPSET_ERR_BITMAP_RANGE_SIZE;
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem),
+				     __alignof__(struct bitmap_ipmac_elem));
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	set->dsize = ip_set_elem_len(set, tb,
-				     sizeof(struct bitmap_ipmac_elem));
 	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
 /* Type structure */
 struct bitmap_port {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u16 first_port;		/* host byte order, included in range */
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * map->elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_port = first_port;
 	map->last_port = last_port;
 	set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
+	u32 elements;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		last_port = tmp;
 	}
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	elements = last_port - first_port + 1;
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
-	map->elements = last_port - first_port + 1;
+	map->elements = elements;
 	map->memsize = bitmap_bytes(0, map->elements);
 	set->variant = &bitmap_port;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_port(set, map, first_port, last_port)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
 }
 
 size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+		size_t align)
 {
 	enum ip_set_ext_id id;
-	size_t offset = len;
 	u32 cadt_flags = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
 		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+	if (!align)
+		align = 1;
 	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
 		if (!add_extension(id, cadt_flags, tb))
 			continue;
-		offset = ALIGN(offset, ip_set_extensions[id].align);
-		set->offset[id] = offset;
+		len = ALIGN(len, ip_set_extensions[id].align);
+		set->offset[id] = len;
 		set->extensions |= ip_set_extensions[id].type;
-		offset += ip_set_extensions[id].len;
+		len += ip_set_extensions[id].len;
 	}
-	return offset;
+	return ALIGN(len, align);
 }
 EXPORT_SYMBOL_GPL(ip_set_elem_len);
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..4ff22194ce55 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
 	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
 	u8 size;		/* size of the array */
 	u8 pos;			/* position of the first free entry */
-	unsigned char value[0];	/* the array of the values */
-} __attribute__ ((aligned));
+	unsigned char value[0]	/* the array of the values */
+		__aligned(__alignof__(u64));
+};
 
 /* The hash table: the table size stored here in order to make resizing easy */
 struct htable {
@@ -1323,12 +1324,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 #endif
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
 #ifndef IP_SET_PROTO_UNDEF
 	} else {
 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
 	}
 #endif
 	if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
 	struct rcu_head rcu;
 	struct list_head list;
 	ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
 
 struct set_adt_elem {
 	ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		size = IP_SET_LIST_MIN_SIZE;
 
 	set->variant = &set_variant;
-	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+				     __alignof__(struct set_elem));
 	if (!init_list_set(net, set, size))
 		return -ENOMEM;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
-- 
cgit v1.2.3


From dece16353ef47d8d33f5302bc158072a9d65e26f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Nov 2015 10:41:16 -0700
Subject: block: change ->make_request_fn() and users to return a queue cookie

No functional changes in this patch, but it prepares us for returning
a more useful cookie related to the IO that was queued up.

Signed-off-by: Jens Axboe <axboe@fb.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
---
 arch/m68k/emu/nfblock.c                     |  3 ++-
 arch/powerpc/sysdev/axonram.c               |  5 +++--
 arch/xtensa/platforms/iss/simdisk.c         |  3 ++-
 block/blk-core.c                            | 26 ++++++++++++++++----------
 block/blk-mq.c                              | 26 ++++++++++++++------------
 drivers/block/brd.c                         |  5 +++--
 drivers/block/drbd/drbd_int.h               |  2 +-
 drivers/block/drbd/drbd_req.c               |  3 ++-
 drivers/block/null_blk.c                    |  3 ++-
 drivers/block/pktcdvd.c                     |  9 ++++-----
 drivers/block/ps3vram.c                     |  6 ++++--
 drivers/block/rsxx/dev.c                    |  5 +++--
 drivers/block/umem.c                        |  4 ++--
 drivers/block/zram/zram_drv.c               |  5 +++--
 drivers/lightnvm/rrpc.c                     |  9 +++++----
 drivers/md/bcache/request.c                 | 11 ++++++++---
 drivers/md/dm.c                             |  6 +++---
 drivers/md/md.c                             |  8 +++++---
 drivers/nvdimm/blk.c                        |  3 ++-
 drivers/nvdimm/btt.c                        |  3 ++-
 drivers/nvdimm/pmem.c                       |  3 ++-
 drivers/s390/block/dcssblk.c                |  8 +++++---
 drivers/s390/block/xpram.c                  |  5 +++--
 drivers/staging/lustre/lustre/llite/lloop.c |  5 +++--
 include/linux/blk_types.h                   | 24 ++++++++++++++++++++++++
 include/linux/blkdev.h                      |  4 ++--
 include/linux/fs.h                          |  2 +-
 include/linux/lightnvm.h                    |  2 +-
 28 files changed, 127 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index f2a00c591bf7..e9110b9b8bcd 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -59,7 +59,7 @@ struct nfhd_device {
 	struct gendisk *disk;
 };
 
-static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct nfhd_device *dev = queue->queuedata;
 	struct bio_vec bvec;
@@ -77,6 +77,7 @@ static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
 		sec += len;
 	}
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d2b79bc336c1..7a399b4d60a0 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -103,7 +103,7 @@ axon_ram_irq_handler(int irq, void *dev)
  * axon_ram_make_request - make_request() method for block device
  * @queue, @bio: see blk_queue_make_request()
  */
-static void
+static blk_qc_t
 axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
@@ -120,7 +120,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 	bio_for_each_segment(vec, bio, iter) {
 		if (unlikely(phys_mem + vec.bv_len > phys_end)) {
 			bio_io_error(bio);
-			return;
+			return BLK_QC_T_NONE;
 		}
 
 		user_mem = page_address(vec.bv_page) + vec.bv_offset;
@@ -133,6 +133,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 		transfered += vec.bv_len;
 	}
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 /**
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index fa84ca990caa..3c3ace2c46b6 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -101,7 +101,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
 	spin_unlock(&dev->lock);
 }
 
-static void simdisk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct simdisk *dev = q->queuedata;
 	struct bio_vec bvec;
@@ -119,6 +119,7 @@ static void simdisk_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int simdisk_open(struct block_device *bdev, fmode_t mode)
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..e93df6d386a0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -809,7 +809,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 
-static void blk_queue_bio(struct request_queue *q, struct bio *bio);
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
@@ -1678,7 +1678,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-static void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
@@ -1698,7 +1698,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_error = -EIO;
 		bio_endio(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
@@ -1713,7 +1713,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	if (!blk_queue_nomerges(q)) {
 		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
-			return;
+			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
@@ -1791,6 +1791,8 @@ get_rq:
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
+
+	return BLK_QC_T_NONE;
 }
 
 /*
@@ -1996,12 +1998,13 @@ end_io:
  * a lower device by calling into generic_make_request recursively, which
  * means the bio should NOT be touched after the call to ->make_request_fn.
  */
-void generic_make_request(struct bio *bio)
+blk_qc_t generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
+	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (!generic_make_request_checks(bio))
-		return;
+		goto out;
 
 	/*
 	 * We only want one ->make_request_fn to be active at a time, else
@@ -2015,7 +2018,7 @@ void generic_make_request(struct bio *bio)
 	 */
 	if (current->bio_list) {
 		bio_list_add(current->bio_list, bio);
-		return;
+		goto out;
 	}
 
 	/* following loop may be a bit non-obvious, and so deserves some
@@ -2040,7 +2043,7 @@ void generic_make_request(struct bio *bio)
 
 		if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) {
 
-			q->make_request_fn(q, bio);
+			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
 
@@ -2053,6 +2056,9 @@ void generic_make_request(struct bio *bio)
 		}
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
+
+out:
+	return ret;
 }
 EXPORT_SYMBOL(generic_make_request);
 
@@ -2066,7 +2072,7 @@ EXPORT_SYMBOL(generic_make_request);
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
-void submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(int rw, struct bio *bio)
 {
 	bio->bi_rw |= rw;
 
@@ -2100,7 +2106,7 @@ void submit_bio(int rw, struct bio *bio)
 		}
 	}
 
-	generic_make_request(bio);
+	return generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..65f43bd696a0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1235,7 +1235,7 @@ static int blk_mq_direct_issue_request(struct request *rq)
  * but will attempt to bypass the hctx queueing if we can go straight to
  * hardware for SYNC IO.
  */
-static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
@@ -1249,7 +1249,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	blk_queue_split(q, &bio, q->bio_split);
@@ -1257,13 +1257,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (!is_flush_fua && !blk_queue_nomerges(q)) {
 		if (blk_attempt_plug_merge(q, bio, &request_count,
 					   &same_queue_rq))
-			return;
+			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
-		return;
+		return BLK_QC_T_NONE;
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
@@ -1302,11 +1302,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 			old_rq = rq;
 		blk_mq_put_ctx(data.ctx);
 		if (!old_rq)
-			return;
+			return BLK_QC_T_NONE;
 		if (!blk_mq_direct_issue_request(old_rq))
-			return;
+			return BLK_QC_T_NONE;
 		blk_mq_insert_request(old_rq, false, true, true);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1320,13 +1320,14 @@ run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 	blk_mq_put_ctx(data.ctx);
+	return BLK_QC_T_NONE;
 }
 
 /*
  * Single hardware queue variant. This will attempt to use any per-process
  * plug for merging and IO deferral.
  */
-static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
@@ -1339,18 +1340,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	blk_queue_split(q, &bio, q->bio_split);
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, NULL))
-		return;
+		return BLK_QC_T_NONE;
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
-		return;
+		return BLK_QC_T_NONE;
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
@@ -1374,7 +1375,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		}
 		list_add_tail(&rq->queuelist, &plug->mq_list);
 		blk_mq_put_ctx(data.ctx);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1389,6 +1390,7 @@ run_queue:
 	}
 
 	blk_mq_put_ctx(data.ctx);
+	return BLK_QC_T_NONE;
 }
 
 /*
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b9794aeeb878..c9f9c30d6467 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -323,7 +323,7 @@ out:
 	return err;
 }
 
-static void brd_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct brd_device *brd = bdev->bd_disk->private_data;
@@ -358,9 +358,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
 
 out:
 	bio_endio(bio);
-	return;
+	return BLK_QC_T_NONE;
 io_error:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 015c6e91b756..e66d453a5f2b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1448,7 +1448,7 @@ extern int proc_details;
 /* drbd_req */
 extern void do_submit(struct work_struct *ws);
 extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
-extern void drbd_make_request(struct request_queue *q, struct bio *bio);
+extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
 extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 211592682169..3ae2c0086563 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1494,7 +1494,7 @@ void do_submit(struct work_struct *ws)
 	}
 }
 
-void drbd_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
@@ -1510,6 +1510,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
 
 	inc_ap_bio(device);
 	__drbd_make_request(device, bio, start_jif);
+	return BLK_QC_T_NONE;
 }
 
 void request_timer_fn(unsigned long data)
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 1c9e4fe5aa44..6255d1c4bba4 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -321,7 +321,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
 	return &nullb->queues[index];
 }
 
-static void null_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	struct nullb *nullb = q->queuedata;
 	struct nullb_queue *nq = nullb_to_queue(nullb);
@@ -331,6 +331,7 @@ static void null_queue_bio(struct request_queue *q, struct bio *bio)
 	cmd->bio = bio;
 
 	null_handle_cmd(cmd);
+	return BLK_QC_T_NONE;
 }
 
 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7be2375db7f2..a7f4abcedee1 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2441,7 +2441,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
 	}
 }
 
-static void pkt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct pktcdvd_device *pd;
 	char b[BDEVNAME_SIZE];
@@ -2467,7 +2467,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 	 */
 	if (bio_data_dir(bio) == READ) {
 		pkt_make_request_read(pd, bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
@@ -2499,13 +2499,12 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 		pkt_make_request_write(q, split);
 	} while (split != bio);
 
-	return;
+	return BLK_QC_T_NONE;
 end_io:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
-
-
 static void pkt_init_queue(struct pktcdvd_device *pd)
 {
 	struct request_queue *q = pd->disk->queue;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index d89fcac59515..56847fcda086 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -598,7 +598,7 @@ out:
 	return next;
 }
 
-static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct ps3_system_bus_device *dev = q->queuedata;
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -614,11 +614,13 @@ static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
 	spin_unlock_irq(&priv->lock);
 
 	if (busy)
-		return;
+		return BLK_QC_T_NONE;
 
 	do {
 		bio = ps3vram_do_bio(dev, bio);
 	} while (bio);
+
+	return BLK_QC_T_NONE;
 }
 
 static int ps3vram_probe(struct ps3_system_bus_device *dev)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 3163e4cdc2cc..e1b8b7061d2f 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -145,7 +145,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 	}
 }
 
-static void rsxx_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct rsxx_cardinfo *card = q->queuedata;
 	struct rsxx_bio_meta *bio_meta;
@@ -199,7 +199,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (st)
 		goto queue_err;
 
-	return;
+	return BLK_QC_T_NONE;
 
 queue_err:
 	kmem_cache_free(bio_meta_pool, bio_meta);
@@ -207,6 +207,7 @@ req_err:
 	if (st)
 		bio->bi_error = st;
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 /*----------------- Device Setup -------------------*/
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 04d65790a886..7939b9f87441 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -524,7 +524,7 @@ static int mm_check_plugged(struct cardinfo *card)
 	return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
 }
 
-static void mm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct cardinfo *card = q->queuedata;
 	pr_debug("mm_make_request %llu %u\n",
@@ -541,7 +541,7 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
 		activate(card);
 	spin_unlock_irq(&card->lock);
 
-	return;
+	return BLK_QC_T_NONE;
 }
 
 static irqreturn_t mm_interrupt(int irq, void *__card)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9fa15bb9d118..4c99b6ba8681 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -894,7 +894,7 @@ out:
 /*
  * Handler function for all zram I/O requests.
  */
-static void zram_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct zram *zram = queue->queuedata;
 
@@ -911,11 +911,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
 
 	__zram_make_request(zram, bio);
 	zram_meta_put(zram);
-	return;
+	return BLK_QC_T_NONE;
 put_zram:
 	zram_meta_put(zram);
 error:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static void zram_slot_free_notify(struct block_device *bdev,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 64a888a5e9b3..7ba64c87ba1c 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -803,7 +803,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 	return NVM_IO_OK;
 }
 
-static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
+static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 {
 	struct rrpc *rrpc = q->queuedata;
 	struct nvm_rq *rqd;
@@ -811,21 +811,21 @@ static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
 
 	if (bio->bi_rw & REQ_DISCARD) {
 		rrpc_discard(rrpc, bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
 	if (!rqd) {
 		pr_err_ratelimited("rrpc: not able to queue bio.");
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 	memset(rqd, 0, sizeof(struct nvm_rq));
 
 	err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
 	switch (err) {
 	case NVM_IO_OK:
-		return;
+		return BLK_QC_T_NONE;
 	case NVM_IO_ERR:
 		bio_io_error(bio);
 		break;
@@ -841,6 +841,7 @@ static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	}
 
 	mempool_free(rqd, rrpc->rq_pool);
+	return BLK_QC_T_NONE;
 }
 
 static void rrpc_requeue(struct work_struct *work)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 8e9877b04637..25fa8445bb24 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -958,7 +958,8 @@ static void cached_dev_nodata(struct closure *cl)
 
 /* Cached devices - read & write stuff */
 
-static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t cached_dev_make_request(struct request_queue *q,
+					struct bio *bio)
 {
 	struct search *s;
 	struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
@@ -997,6 +998,8 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
 		else
 			generic_make_request(bio);
 	}
+
+	return BLK_QC_T_NONE;
 }
 
 static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
@@ -1070,7 +1073,8 @@ static void flash_dev_nodata(struct closure *cl)
 	continue_at(cl, search_free, NULL);
 }
 
-static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t flash_dev_make_request(struct request_queue *q,
+					     struct bio *bio)
 {
 	struct search *s;
 	struct closure *cl;
@@ -1093,7 +1097,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
 		continue_at_nobarrier(&s->cl,
 				      flash_dev_nodata,
 				      bcache_wq);
-		return;
+		return BLK_QC_T_NONE;
 	} else if (rw) {
 		bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
 					&KEY(d->id, bio->bi_iter.bi_sector, 0),
@@ -1109,6 +1113,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	continue_at(cl, search_free, NULL);
+	return BLK_QC_T_NONE;
 }
 
 static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 32440ad5f684..6e15f3565892 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1755,7 +1755,7 @@ static void __split_and_process_bio(struct mapped_device *md,
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
  */
-static void dm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 {
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
@@ -1774,12 +1774,12 @@ static void dm_make_request(struct request_queue *q, struct bio *bio)
 			queue_io(md, bio);
 		else
 			bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	__split_and_process_bio(md, map, bio);
 	dm_put_live_table(md, srcu_idx);
-	return;
+	return BLK_QC_T_NONE;
 }
 
 int dm_request_based(struct mapped_device *md)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3f9a514b5b9d..807095f4c793 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -250,7 +250,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
-static void md_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
 	struct mddev *mddev = q->queuedata;
@@ -262,13 +262,13 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
 	if (mddev == NULL || mddev->pers == NULL
 	    || !mddev->ready) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 	if (mddev->ro == 1 && unlikely(rw == WRITE)) {
 		if (bio_sectors(bio) != 0)
 			bio->bi_error = -EROFS;
 		bio_endio(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 	smp_rmb(); /* Ensure implications of  'active' are visible */
 	rcu_read_lock();
@@ -302,6 +302,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
 
 	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
 		wake_up(&mddev->sb_wait);
+
+	return BLK_QC_T_NONE;
 }
 
 /* mddev_suspend makes sure no new requests are submitted
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 0df77cb07df6..91a336ea8c4f 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -161,7 +161,7 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
 	return err;
 }
 
-static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct gendisk *disk = bdev->bd_disk;
@@ -208,6 +208,7 @@ static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
 
  out:
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index eae93ab8ffcd..efb2c1ceef98 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1150,7 +1150,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 	return ret;
 }
 
-static void btt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct btt *btt = q->queuedata;
@@ -1198,6 +1198,7 @@ static void btt_make_request(struct request_queue *q, struct bio *bio)
 
 out:
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0ba6a978f227..3963b7533b65 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -64,7 +64,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	kunmap_atomic(mem);
 }
 
-static void pmem_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
 	bool do_acct;
 	unsigned long start;
@@ -84,6 +84,7 @@ static void pmem_make_request(struct request_queue *q, struct bio *bio)
 		wmb_pmem();
 
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 5ed44fe21380..94a8f4ab57bc 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -27,7 +27,8 @@
 
 static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
-static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
+static blk_qc_t dcssblk_make_request(struct request_queue *q,
+						struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 			 void __pmem **kaddr, unsigned long *pfn);
 
@@ -815,7 +816,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
 	up_write(&dcssblk_devices_sem);
 }
 
-static void
+static blk_qc_t
 dcssblk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct dcssblk_dev_info *dev_info;
@@ -874,9 +875,10 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 		bytes_done += bvec.bv_len;
 	}
 	bio_endio(bio);
-	return;
+	return BLK_QC_T_NONE;
 fail:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static long
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 02871f1db562..288f59a4147b 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -181,7 +181,7 @@ static unsigned long xpram_highest_page_index(void)
 /*
  * Block device make request function.
  */
-static void xpram_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 {
 	xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
 	struct bio_vec bvec;
@@ -223,9 +223,10 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio)
 		}
 	}
 	bio_endio(bio);
-	return;
+	return BLK_QC_T_NONE;
 fail:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index e6974c36276d..fed50d538a41 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -333,7 +333,7 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
 	return count;
 }
 
-static void loop_make_request(struct request_queue *q, struct bio *old_bio)
+static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio)
 {
 	struct lloop_device *lo = q->queuedata;
 	int rw = bio_rw(old_bio);
@@ -364,9 +364,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 		goto err;
 	}
 	loop_add_bio(lo, old_bio);
-	return;
+	return BLK_QC_T_NONE;
 err:
 	bio_io_error(old_bio);
+	return BLK_QC_T_NONE;
 }
 
 static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e8130138f29d..641e5a3ed58c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -244,4 +244,28 @@ enum rq_flag_bits {
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 #define REQ_NO_TIMEOUT		(1ULL << __REQ_NO_TIMEOUT)
 
+typedef unsigned int blk_qc_t;
+#define BLK_QC_T_NONE	-1U
+#define BLK_QC_T_SHIFT	16
+
+static inline bool blk_qc_t_valid(blk_qc_t cookie)
+{
+	return cookie != BLK_QC_T_NONE;
+}
+
+static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num)
+{
+	return tag | (queue_num << BLK_QC_T_SHIFT);
+}
+
+static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
+{
+	return cookie >> BLK_QC_T_SHIFT;
+}
+
+static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
+{
+	return cookie & 0xffff;
+}
+
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d045ca8487af..5ee0f5243025 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -209,7 +209,7 @@ static inline unsigned short req_get_ioprio(struct request *req)
 struct blk_queue_ctx;
 
 typedef void (request_fn_proc) (struct request_queue *q);
-typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
+typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
 
@@ -761,7 +761,7 @@ static inline void rq_flush_dcache_pages(struct request *rq)
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
-extern void generic_make_request(struct bio *bio);
+extern blk_qc_t generic_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 72d8a844c692..bcca36e4bc1e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2625,7 +2625,7 @@ static inline void remove_inode_hash(struct inode *inode)
 extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
-extern void submit_bio(int, struct bio *);
+extern blk_qc_t submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
 extern int set_blocksize(struct block_device *, int);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 5ebd70d12f35..69c9057e1ab8 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -426,7 +426,7 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 	return ppa;
 }
 
-typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
+typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
 typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
-- 
cgit v1.2.3


From 05229beeddf7e75e2e616ddaad4b70e7fca9528d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Nov 2015 10:44:55 -0700
Subject: block: add block polling support

Add basic support for polling for specific IO to complete. This uses
the cookie that blk-mq passes back, which enables the block layer
to pass this cookie to the driver to spin for a specific request.

This will be combined with request latency tracking, so we can make
qualified decisions about when to poll and when not to. For now, for
benchmark purposes, we add a sysfs file that controls whether polling
is enabled or not.

Signed-off-by: Jens Axboe <axboe@fb.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-core.c       | 41 +++++++++++++++++++++++++++++++++++++++++
 block/blk-mq-sysfs.c   | 10 ++++++++++
 block/blk-sysfs.c      | 35 +++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h | 10 ++++++++++
 include/linux/blkdev.h |  3 +++
 5 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index e93df6d386a0..fa36b4ff7d63 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3312,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
+bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+{
+	struct blk_plug *plug;
+	long state;
+
+	if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
+	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		return false;
+
+	plug = current->plug;
+	if (plug)
+		blk_flush_plug_list(plug, false);
+
+	state = current->state;
+	while (!need_resched()) {
+		unsigned int queue_num = blk_qc_t_to_queue_num(cookie);
+		struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num];
+		int ret;
+
+		hctx->poll_invoked++;
+
+		ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
+		if (ret > 0) {
+			hctx->poll_success++;
+			set_current_state(TASK_RUNNING);
+			return true;
+		}
+
+		if (signal_pending_state(state, current))
+			set_current_state(TASK_RUNNING);
+
+		if (current->state == TASK_RUNNING)
+			return true;
+		if (ret < 0)
+			break;
+		cpu_relax();
+	}
+
+	return false;
+}
+
 #ifdef CONFIG_PM
 /**
  * blk_pm_runtime_init - Block layer runtime PM initialization routine
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 6f57a110289c..1cf18784c5cf 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
 	return ret;
 }
 
+static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	return sprintf(page, "invoked=%lu, success=%lu\n", hctx->poll_invoked, hctx->poll_success);
+}
+
 static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
 					   char *page)
 {
@@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
 	.attr = {.name = "cpu_list", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_cpus_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
+	.attr = {.name = "io_poll", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_poll_show,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_queued.attr,
@@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_tags.attr,
 	&blk_mq_hw_sysfs_cpus.attr,
 	&blk_mq_hw_sysfs_active.attr,
+	&blk_mq_hw_sysfs_poll.attr,
 	NULL,
 };
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 31849e328b45..565b8dac5782 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 	return ret;
 }
 
+static ssize_t queue_poll_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
+}
+
+static ssize_t queue_poll_store(struct request_queue *q, const char *page,
+				size_t count)
+{
+	unsigned long poll_on;
+	ssize_t ret;
+
+	if (!q->mq_ops || !q->mq_ops->poll)
+		return -EINVAL;
+
+	ret = queue_var_store(&poll_on, page, count);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irq(q->queue_lock);
+	if (poll_on)
+		queue_flag_set(QUEUE_FLAG_POLL, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_POLL, q);
+	spin_unlock_irq(q->queue_lock);
+
+	return ret;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = {
 	.store = queue_store_random,
 };
 
+static struct queue_sysfs_entry queue_poll_entry = {
+	.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_poll_show,
+	.store = queue_poll_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = {
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
 	&queue_random_entry.attr,
+	&queue_poll_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 83cc9d4e5455..daf17d70aeca 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -59,6 +59,9 @@ struct blk_mq_hw_ctx {
 
 	struct blk_mq_cpu_notifier	cpu_notifier;
 	struct kobject		kobj;
+
+	unsigned long		poll_invoked;
+	unsigned long		poll_success;
 };
 
 struct blk_mq_tag_set {
@@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
 		bool);
 typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
+typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
+
 
 struct blk_mq_ops {
 	/*
@@ -114,6 +119,11 @@ struct blk_mq_ops {
 	 */
 	timeout_fn		*timeout;
 
+	/*
+	 * Called to poll for completion of a specific tag.
+	 */
+	poll_fn			*poll;
+
 	softirq_done_fn		*complete;
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5ee0f5243025..3fe27f8d91f0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -487,6 +487,7 @@ struct request_queue {
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_POLL	       22	/* IO polling enabled if set */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
+bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue;	/* this is never NULL */
-- 
cgit v1.2.3


From 5037835c1f3eabf4f22163fc0278dd87165f8957 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Mon, 5 Oct 2015 16:33:36 -0600
Subject: coredump: add DAX filtering for ELF coredumps

Add two new flags to the existing coredump mechanism for ELF files to
allow us to explicitly filter DAX mappings.  This is desirable because
DAX mappings, like hugetlb mappings, have the potential to be very
large.

Update the coredump_filter documentation in
Documentation/filesystems/proc.txt so that it addresses the new DAX
coredump flags.  Also update the documented default value of
coredump_filter to be consistent with the core(5) man page.  The
documentation being updated talks about bit 4, Dump ELF headers, which
is enabled if CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is turned on in the
kernel config.  This kernel config option defaults to "y" if both ELF
binaries and coredump are enabled.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/filesystems/proc.txt | 22 ++++++++++++----------
 fs/binfmt_elf.c                    | 10 ++++++++++
 include/linux/sched.h              |  4 +++-
 3 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d411ca63c8b6..6f887cf873a1 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1598,16 +1598,16 @@ Documentation/accounting.
 ---------------------------------------------------------------
 When a process is dumped, all anonymous memory is written to a core file as
 long as the size of the core file isn't limited. But sometimes we don't want
-to dump some memory segments, for example, huge shared memory. Conversely,
-sometimes we want to save file-backed memory segments into a core file, not
-only the individual files.
+to dump some memory segments, for example, huge shared memory or DAX.
+Conversely, sometimes we want to save file-backed memory segments into a core
+file, not only the individual files.
 
 /proc/<pid>/coredump_filter allows you to customize which memory segments
 will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
 of memory types. If a bit of the bitmask is set, memory segments of the
 corresponding memory type are dumped, otherwise they are not dumped.
 
-The following 7 memory types are supported:
+The following 9 memory types are supported:
   - (bit 0) anonymous private memory
   - (bit 1) anonymous shared memory
   - (bit 2) file-backed private memory
@@ -1616,20 +1616,22 @@ The following 7 memory types are supported:
             effective only if the bit 2 is cleared)
   - (bit 5) hugetlb private memory
   - (bit 6) hugetlb shared memory
+  - (bit 7) DAX private memory
+  - (bit 8) DAX shared memory
 
   Note that MMIO pages such as frame buffer are never dumped and vDSO pages
   are always dumped regardless of the bitmask status.
 
-  Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
-  effected by bit 5-6.
+  Note that bits 0-4 don't affect hugetlb or DAX memory. hugetlb memory is
+  only affected by bit 5-6, and DAX is only affected by bits 7-8.
 
-Default value of coredump_filter is 0x23; this means all anonymous memory
-segments and hugetlb private memory are dumped.
+The default value of coredump_filter is 0x33; this means all anonymous memory
+segments, ELF header pages and hugetlb private memory are dumped.
 
 If you don't want to dump all shared memory segments attached to pid 1234,
-write 0x21 to the process's proc file.
+write 0x31 to the process's proc file.
 
-  $ echo 0x21 > /proc/1234/coredump_filter
+  $ echo 0x31 > /proc/1234/coredump_filter
 
 When a new process is created, the process inherits the bitmask status from its
 parent. It is useful to set up coredump_filter before the program runs.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6b659967898e..5f399ea1d20a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
 #include <linux/utsname.h>
 #include <linux/coredump.h>
 #include <linux/sched.h>
+#include <linux/dax.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1236,6 +1237,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	if (vma->vm_flags & VM_DONTDUMP)
 		return 0;
 
+	/* support for DAX */
+	if (vma_is_dax(vma)) {
+		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
+			goto whole;
+		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
+			goto whole;
+		return 0;
+	}
+
 	/* Hugetlb memory check */
 	if (vma->vm_flags & VM_HUGETLB) {
 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501b41af..3c02d92ed23b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -483,9 +483,11 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_DUMP_ELF_HEADERS	6
 #define MMF_DUMP_HUGETLB_PRIVATE 7
 #define MMF_DUMP_HUGETLB_SHARED  8
+#define MMF_DUMP_DAX_PRIVATE	9
+#define MMF_DUMP_DAX_SHARED	10
 
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	7
+#define MMF_DUMP_FILTER_BITS	9
 #define MMF_DUMP_FILTER_MASK \
 	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
-- 
cgit v1.2.3


From c8299cb605b27dd5a49f7a69e48fd23e5a206298 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Mon, 9 Nov 2015 14:58:10 -0800
Subject: kernel.h: make abs() work with 64-bit types

For 64-bit arguments, the abs macro casts it to an int which leads to
lost precision and may cause incorrect results.  To deal with 64-bit
types abs64 macro has been introduced but still there are places where
abs macro is used incorrectly.

To deal with the problem, expand abs macro such that it operates on s64
type when dealing with 64-bit types while still returning long when
dealing with smaller types.

This fixes one known bug (per John):

The internal clocksteering done for fine-grained error correction uses a
: logarithmic approximation, so any time adjtimex() adjusts the clock
: steering, timekeeping_freqadjust() quickly approximates the correct clock
: frequency over a series of ticks.
:
: Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit
: dc491596f639438 (Rework frequency adjustments to work better w/ nohz),
: used the abs() function with a s64 error value to calculate the size of
: the approximated adjustment to be made.
:
: Per include/linux/kernel.h: "abs() should not be used for 64-bit types
: (s64, u64, long long) - use abs64()".
:
: Thus on 32-bit platforms, this resulted in the clocksteering to take a
: quite dampended random walk trying to converge on the proper frequency,
: which caused the adjustments to be made much slower then intended (most
: easily observed when large adjustments are made).

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2c13f747ac2e..05ce782d53ab 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -200,28 +200,31 @@ extern int _cond_resched(void);
 
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
 
-/*
- * abs() handles unsigned and signed longs, ints, shorts and chars.  For all
- * input types abs() returns a signed long.
- * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()
- * for those.
+/**
+ * abs - return absolute value of an argument
+ * @x: the value.  If it is unsigned type, it is converted to signed type first
+ *   (s64, long or int depending on its size).
+ *
+ * Return: an absolute value of x.  If x is 64-bit, macro's return type is s64,
+ *   otherwise it is signed long.
  */
-#define abs(x) ({						\
-		long ret;					\
-		if (sizeof(x) == sizeof(long)) {		\
-			long __x = (x);				\
-			ret = (__x < 0) ? -__x : __x;		\
-		} else {					\
-			int __x = (x);				\
-			ret = (__x < 0) ? -__x : __x;		\
-		}						\
-		ret;						\
-	})
-
-#define abs64(x) ({				\
-		s64 __x = (x);			\
-		(__x < 0) ? -__x : __x;		\
-	})
+#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({	\
+		s64 __x = (x);						\
+		(__x < 0) ? -__x : __x;					\
+	}), ({								\
+		long ret;						\
+		if (sizeof(x) == sizeof(long)) {			\
+			long __x = (x);					\
+			ret = (__x < 0) ? -__x : __x;			\
+		} else {						\
+			int __x = (x);					\
+			ret = (__x < 0) ? -__x : __x;			\
+		}							\
+		ret;							\
+	}))
+
+/* Deprecated, use abs instead. */
+#define abs64(x) abs((s64)(x))
 
 /**
  * reciprocal_scale - "scale" a value into range [0, ep_ro)
-- 
cgit v1.2.3


From 79211c8ed19c055ca105502c8733800d442a0ae6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 9 Nov 2015 14:58:13 -0800
Subject: remove abs64()

Switch everything to the new and more capable implementation of abs().
Mainly to give the new abs() a bit of a workout.

Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_irq.c             | 4 ++--
 drivers/gpu/drm/tegra/sor.c           | 4 ++--
 drivers/input/joystick/walkera0701.c  | 4 ++--
 drivers/media/i2c/ov9650.c            | 2 +-
 drivers/net/wireless/mac80211_hwsim.c | 2 +-
 drivers/thermal/power_allocator.c     | 2 +-
 fs/ext4/mballoc.c                     | 4 ++--
 fs/gfs2/lock_dlm.c                    | 2 +-
 include/linux/kernel.h                | 3 ---
 kernel/time/clocksource.c             | 2 +-
 kernel/time/timekeeping.c             | 2 +-
 lib/div64.c                           | 2 +-
 net/sctp/transport.c                  | 2 +-
 13 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 22d207e211e7..22d8b78d537e 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -261,7 +261,7 @@ static void vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
 	 * available. In that case we can't account for this and just
 	 * hope for the best.
 	 */
-	if (vblrc && (abs64(diff_ns) > 1000000))
+	if (vblrc && (abs(diff_ns) > 1000000))
 		store_vblank(dev, pipe, 1, &tvblank);
 
 	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
@@ -1772,7 +1772,7 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
 	 * e.g., due to spurious vblank interrupts. We need to
 	 * ignore those for accounting.
 	 */
-	if (abs64(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS)
+	if (abs(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS)
 		store_vblank(dev, pipe, 1, &tvblank);
 	else
 		DRM_DEBUG("crtc %u: Redundant vblirq ignored. diff_ns = %d\n",
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index da1715ebdd71..3eff7cf75d25 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -555,11 +555,11 @@ static int tegra_sor_compute_params(struct tegra_sor *sor,
 	error = div_s64(active_sym - approx, tu_size);
 	error *= params->num_clocks;
 
-	if (error <= 0 && abs64(error) < params->error) {
+	if (error <= 0 && abs(error) < params->error) {
 		params->active_count = div_u64(active_count, f);
 		params->active_polarity = active_polarity;
 		params->active_frac = active_frac;
-		params->error = abs64(error);
+		params->error = abs(error);
 		params->tu_size = tu_size;
 
 		if (error == 0)
diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c
index d88f5dd3c9d9..9c07fe911075 100644
--- a/drivers/input/joystick/walkera0701.c
+++ b/drivers/input/joystick/walkera0701.c
@@ -150,7 +150,7 @@ static void walkera0701_irq_handler(void *handler_data)
 		if (w->counter == 24) {	/* full frame */
 			walkera0701_parse_frame(w);
 			w->counter = NO_SYNC;
-			if (abs64(pulse_time - SYNC_PULSE) < RESERVE)	/* new frame sync */
+			if (abs(pulse_time - SYNC_PULSE) < RESERVE)	/* new frame sync */
 				w->counter = 0;
 		} else {
 			if ((pulse_time > (ANALOG_MIN_PULSE - RESERVE)
@@ -161,7 +161,7 @@ static void walkera0701_irq_handler(void *handler_data)
 			} else
 				w->counter = NO_SYNC;
 		}
-	} else if (abs64(pulse_time - SYNC_PULSE - BIN0_PULSE) <
+	} else if (abs(pulse_time - SYNC_PULSE - BIN0_PULSE) <
 				RESERVE + BIN1_PULSE - BIN0_PULSE)	/* frame sync .. */
 		w->counter = 0;
 
diff --git a/drivers/media/i2c/ov9650.c b/drivers/media/i2c/ov9650.c
index e691bba1945b..1ee6a5527c38 100644
--- a/drivers/media/i2c/ov9650.c
+++ b/drivers/media/i2c/ov9650.c
@@ -1133,7 +1133,7 @@ static int __ov965x_set_frame_interval(struct ov965x *ov965x,
 		if (mbus_fmt->width != iv->size.width ||
 		    mbus_fmt->height != iv->size.height)
 			continue;
-		err = abs64((u64)(iv->interval.numerator * 10000) /
+		err = abs((u64)(iv->interval.numerator * 10000) /
 			    iv->interval.denominator - req_int);
 		if (err < min_err) {
 			fiv = iv;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index ee46f4647fbc..c00a7daaa4bc 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -787,7 +787,7 @@ static void mac80211_hwsim_set_tsf(struct ieee80211_hw *hw,
 	struct mac80211_hwsim_data *data = hw->priv;
 	u64 now = mac80211_hwsim_get_tsf(hw, vif);
 	u32 bcn_int = data->beacon_int;
-	u64 delta = abs64(tsf - now);
+	u64 delta = abs(tsf - now);
 
 	/* adjust after beaconing with new timestamp at old TBTT */
 	if (tsf > now) {
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index e570ff084add..f0fbea386869 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -228,7 +228,7 @@ static u32 pid_controller(struct thermal_zone_device *tz,
 	if (err < int_to_frac(tz->tzp->integral_cutoff)) {
 		s64 i_next = i + mul_frac(tz->tzp->k_i, err);
 
-		if (abs64(i_next) < max_power_frac) {
+		if (abs(i_next) < max_power_frac) {
 			i = i_next;
 			params->err_integral += err;
 		}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b4b3c1f91814..61eaf74dca37 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3333,8 +3333,8 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
 		atomic_inc(&pa->pa_count);
 		return pa;
 	}
-	cur_distance = abs64(goal_block - cpa->pa_pstart);
-	new_distance = abs64(goal_block - pa->pa_pstart);
+	cur_distance = abs(goal_block - cpa->pa_pstart);
+	new_distance = abs(goal_block - pa->pa_pstart);
 
 	if (cur_distance <= new_distance)
 		return cpa;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 284c1542783e..8b907c5cc913 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -50,7 +50,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
 	s64 delta = sample - s->stats[index];
 	s->stats[index] += (delta >> 3);
 	index++;
-	s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
+	s->stats[index] += ((abs(delta) - s->stats[index]) >> 2);
 }
 
 /**
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 05ce782d53ab..350dfb08aee3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -223,9 +223,6 @@ extern int _cond_resched(void);
 		ret;							\
 	}))
 
-/* Deprecated, use abs instead. */
-#define abs64(x) abs((s64)(x))
-
 /**
  * reciprocal_scale - "scale" a value into range [0, ep_ro)
  * @val: value
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0d8fe8b8f727..1347882d131e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 
 		/* Check the deviation from the watchdog clocksource. */
-		if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				cs->name);
 			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b1356b7ae570..d563c1960302 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
 	negative = (tick_error < 0);
 
 	/* Sort out the magnitude of the correction */
-	tick_error = abs64(tick_error);
+	tick_error = abs(tick_error);
 	for (adj = 0; tick_error > interval; adj++)
 		tick_error >>= 1;
 
diff --git a/lib/div64.c b/lib/div64.c
index 19ea7ed4b948..62a698a432bc 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -162,7 +162,7 @@ s64 div64_s64(s64 dividend, s64 divisor)
 {
 	s64 quot, t;
 
-	quot = div64_u64(abs64(dividend), abs64(divisor));
+	quot = div64_u64(abs(dividend), abs(divisor));
 	t = (dividend ^ divisor) >> 63;
 
 	return (quot ^ t) - t;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a0a431824f63..aab9e3f29755 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -331,7 +331,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 		 * 1/8, rto_alpha would be expressed as 3.
 		 */
 		tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
-			+ (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
+			+ (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
 		tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
 			+ (rtt >> net->sctp.rto_alpha);
 	} else {
-- 
cgit v1.2.3


From 77c5b5da02f0a30d61144a546c4ef3657e3b817d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 9 Nov 2015 14:58:23 -0800
Subject: kmap_atomic_to_page() has no users, remove it

Removal started in commit 5bbeed12bdc3 ("sparc32: drop unused
kmap_atomic_to_page").  Let's do it across the whole tree.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/highmem.h        |  1 -
 arch/arm/mm/highmem.c                 | 10 ----------
 arch/frv/include/asm/highmem.h        |  2 --
 arch/frv/mm/highmem.c                 |  5 -----
 arch/metag/include/asm/highmem.h      |  1 -
 arch/metag/mm/highmem.c               | 14 --------------
 arch/microblaze/include/asm/highmem.h | 13 -------------
 arch/mips/include/asm/highmem.h       |  1 -
 arch/mips/mm/highmem.c                | 13 -------------
 arch/parisc/include/asm/cacheflush.h  |  1 -
 arch/powerpc/include/asm/highmem.h    | 13 -------------
 arch/tile/include/asm/highmem.h       |  1 -
 arch/tile/mm/highmem.c                | 12 ------------
 arch/x86/include/asm/highmem.h        |  1 -
 arch/x86/mm/highmem_32.c              | 14 --------------
 include/linux/highmem.h               |  1 -
 16 files changed, 103 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 535579511ed0..0a0e2d1784c0 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -68,7 +68,6 @@ extern void kunmap(struct page *page);
 extern void *kmap_atomic(struct page *page);
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
-extern struct page *kmap_atomic_to_page(const void *ptr);
 #endif
 
 #endif
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 9df5f09585ca..d02f8187b1cc 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -147,13 +147,3 @@ void *kmap_atomic_pfn(unsigned long pfn)
 
 	return (void *)vaddr;
 }
-
-struct page *kmap_atomic_to_page(const void *ptr)
-{
-	unsigned long vaddr = (unsigned long)ptr;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	return pte_page(get_fixmap_pte(vaddr));
-}
diff --git a/arch/frv/include/asm/highmem.h b/arch/frv/include/asm/highmem.h
index b3adc93611f3..1f58938703ab 100644
--- a/arch/frv/include/asm/highmem.h
+++ b/arch/frv/include/asm/highmem.h
@@ -62,8 +62,6 @@ extern void kunmap_high(struct page *page);
 extern void *kmap(struct page *page);
 extern void kunmap(struct page *page);
 
-extern struct page *kmap_atomic_to_page(void *ptr);
-
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index 785344bbdc07..45750fb65c49 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -32,11 +32,6 @@ void kunmap(struct page *page)
 
 EXPORT_SYMBOL(kunmap);
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	return virt_to_page(ptr);
-}
-
 void *kmap_atomic(struct page *page)
 {
 	unsigned long paddr;
diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h
index 6646a15c73dd..9b1d172cd884 100644
--- a/arch/metag/include/asm/highmem.h
+++ b/arch/metag/include/asm/highmem.h
@@ -56,7 +56,6 @@ extern void kunmap(struct page *page);
 extern void *kmap_atomic(struct page *page);
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
-extern struct page *kmap_atomic_to_page(void *ptr);
 #endif
 
 #endif
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
index 807f1b1c4e65..f19a87f2c1ec 100644
--- a/arch/metag/mm/highmem.c
+++ b/arch/metag/mm/highmem.c
@@ -111,20 +111,6 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	return (void *)vaddr;
 }
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long vaddr = (unsigned long)ptr;
-	int idx;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 void __init kmap_init(void)
 {
 	unsigned long kmap_vstart;
diff --git a/arch/microblaze/include/asm/highmem.h b/arch/microblaze/include/asm/highmem.h
index d04638932438..67925ef18cfa 100644
--- a/arch/microblaze/include/asm/highmem.h
+++ b/arch/microblaze/include/asm/highmem.h
@@ -76,19 +76,6 @@ static inline void *kmap_atomic(struct page *page)
 	return kmap_atomic_prot(page, kmap_prot);
 }
 
-static inline struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long) ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 #define flush_cache_kmaps()	{ flush_icache(); flush_dcache(); }
 
 #endif /* __KERNEL__ */
diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index 572e63ec2a38..01880b34a209 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -49,7 +49,6 @@ extern void kunmap(struct page *page);
 extern void *kmap_atomic(struct page *page);
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
-extern struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	flush_cache_all()
 
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index 11661cbc11a8..d7258a103439 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -118,19 +118,6 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	return (void*) vaddr;
 }
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long)ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 void __init kmap_init(void)
 {
 	unsigned long kmap_vstart;
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index ec2df4bab302..845272ce9cc5 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -156,7 +156,6 @@ static inline void __kunmap_atomic(void *addr)
 
 #define kmap_atomic_prot(page, prot)	kmap_atomic(page)
 #define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #endif /* _PARISC_CACHEFLUSH_H */
 
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index caaf6e00630d..01c2c23b307e 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -84,19 +84,6 @@ static inline void *kmap_atomic(struct page *page)
 	return kmap_atomic_prot(page, kmap_prot);
 }
 
-static inline struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long) ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 
 #define flush_cache_kmaps()	flush_cache_all()
 
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index fc8429a31c85..979579b38e57 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -63,7 +63,6 @@ void *kmap_atomic(struct page *page);
 void __kunmap_atomic(void *kvaddr);
 void *kmap_atomic_pfn(unsigned long pfn);
 void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-struct page *kmap_atomic_to_page(void *ptr);
 void *kmap_atomic_prot(struct page *page, pgprot_t prot);
 void kmap_atomic_fix_kpte(struct page *page, int finished);
 
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index fcd545014e79..eca28551b22d 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -275,15 +275,3 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
 	return kmap_atomic_prot(pfn_to_page(pfn), prot);
 }
-
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	pte_t *pte;
-	unsigned long vaddr = (unsigned long)ptr;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	pte = kmap_get_pte(vaddr);
-	return pte_page(*pte);
-}
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 04e9d023168f..1c0b43724ce3 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -68,7 +68,6 @@ void *kmap_atomic(struct page *page);
 void __kunmap_atomic(void *kvaddr);
 void *kmap_atomic_pfn(unsigned long pfn);
 void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	do { } while (0)
 
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index eecb207a2037..a6d739258137 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -104,20 +104,6 @@ void __kunmap_atomic(void *kvaddr)
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long)ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-EXPORT_SYMBOL(kmap_atomic_to_page);
-
 void __init set_highmem_pages_init(void)
 {
 	struct zone *zone;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 6aefcd0031a6..bb3f3297062a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -78,7 +78,6 @@ static inline void __kunmap_atomic(void *addr)
 }
 
 #define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #define kmap_flush_unused()	do {} while(0)
 #endif
-- 
cgit v1.2.3


From 7bc4f1d281bc1f807fd0c9aaa2f2d333b6508790 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Mon, 9 Nov 2015 14:58:26 -0800
Subject: include/linux/kdev_t.h: remove unused huge_valid_dev()

There's no user of huge_valid_dev() any more, so remove it.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdev_t.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index c838abe3ee0a..a546d206c7f3 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -54,11 +54,6 @@ static inline dev_t new_decode_dev(u32 dev)
 	return MKDEV(major, minor);
 }
 
-static inline int huge_valid_dev(dev_t dev)
-{
-	return 1;
-}
-
 static inline u64 huge_encode_dev(dev_t dev)
 {
 	return new_encode_dev(dev);
-- 
cgit v1.2.3


From 8b9758b9c6f65f55c94370636c04e976edc93e1a Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Mon, 9 Nov 2015 14:58:28 -0800
Subject: include/linux/kdev_t.h: old/new_valid_dev() can return bool

Make old/new_valid_dev return bool due to these two particular functions
only using either one or zero as their return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdev_t.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index a546d206c7f3..052c7b32cc91 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -20,7 +20,7 @@
 	})
 
 /* acceptable for old filesystems */
-static inline int old_valid_dev(dev_t dev)
+static inline bool old_valid_dev(dev_t dev)
 {
 	return MAJOR(dev) < 256 && MINOR(dev) < 256;
 }
@@ -35,7 +35,7 @@ static inline dev_t old_decode_dev(u16 val)
 	return MKDEV((val >> 8) & 255, val & 255);
 }
 
-static inline int new_valid_dev(dev_t dev)
+static inline bool new_valid_dev(dev_t dev)
 {
 	return 1;
 }
-- 
cgit v1.2.3


From 35181e86df97e4223f4a28fb33e2bcf3b73de141 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Tue, 20 Oct 2015 15:39:03 +0800
Subject: KVM: x86: Add a common TSC scaling function

VMX and SVM calculate the TSC scaling ratio in a similar logic, so this
patch generalizes it to a common TSC scaling function.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
[Inline the multiplication and shift steps into mul_u64_u64_shr.  Remove
 BUG_ON.  - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm.c              | 48 ++++----------------------------------
 arch/x86/kvm/x86.c              | 40 +++++++++++++++++++++++++++++++-
 include/linux/kvm_host.h        |  1 +
 include/linux/math64.h          | 51 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 97 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f3354bd92364..52d1419968eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1238,6 +1238,8 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c92e6f429d0..65f4f1947a62 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -212,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 				      bool has_error_code, u32 error_code);
-static u64 __scale_tsc(u64 ratio, u64 tsc);
 
 enum {
 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -892,21 +891,7 @@ static __init int svm_hardware_setup(void)
 		kvm_enable_efer_bits(EFER_FFXSR);
 
 	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
-		u64 max;
-
 		kvm_has_tsc_control = true;
-
-		/*
-		 * Make sure the user can only configure tsc_khz values that
-		 * fit into a signed integer.
-		 * A min value is not calculated needed because it will always
-		 * be 1 on all machines and a value of 0 is used to disable
-		 * tsc-scaling for the vcpu.
-		 */
-		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
-
-		kvm_max_guest_tsc_khz = max;
-
 		kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
@@ -972,31 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
-static u64 __scale_tsc(u64 ratio, u64 tsc)
-{
-	u64 mult, frac, _tsc;
-
-	mult  = ratio >> 32;
-	frac  = ratio & ((1ULL << 32) - 1);
-
-	_tsc  = tsc;
-	_tsc *= mult;
-	_tsc += (tsc >> 32) * frac;
-	_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
-
-	return _tsc;
-}
-
-static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
-{
-	u64 _tsc = tsc;
-
-	if (vcpu->arch.tsc_scaling_ratio != TSC_RATIO_DEFAULT)
-		_tsc = __scale_tsc(vcpu->arch.tsc_scaling_ratio, tsc);
-
-	return _tsc;
-}
-
 static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 {
 	u64 ratio;
@@ -1065,7 +1025,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 	if (host) {
 		if (vcpu->arch.tsc_scaling_ratio != TSC_RATIO_DEFAULT)
 			WARN_ON(adjustment < 0);
-		adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+		adjustment = kvm_scale_tsc(vcpu, (u64)adjustment);
 	}
 
 	svm->vmcb->control.tsc_offset += adjustment;
@@ -1083,7 +1043,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 {
 	u64 tsc;
 
-	tsc = svm_scale_tsc(vcpu, rdtsc());
+	tsc = kvm_scale_tsc(vcpu, rdtsc());
 
 	return target_tsc - tsc;
 }
@@ -3075,7 +3035,7 @@ static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
 	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
 	return vmcb->control.tsc_offset +
-		svm_scale_tsc(vcpu, host_tsc);
+		kvm_scale_tsc(vcpu, host_tsc);
 }
 
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3085,7 +3045,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	switch (msr_info->index) {
 	case MSR_IA32_TSC: {
 		msr_info->data = svm->vmcb->control.tsc_offset +
-			svm_scale_tsc(vcpu, rdtsc());
+			kvm_scale_tsc(vcpu, rdtsc());
 
 		break;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ef5b9d66cd71..1473e64cb744 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1329,6 +1329,33 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
 	vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
 }
 
+/*
+ * Multiply tsc by a fixed point number represented by ratio.
+ *
+ * The most significant 64-N bits (mult) of ratio represent the
+ * integral part of the fixed point number; the remaining N bits
+ * (frac) represent the fractional part, ie. ratio represents a fixed
+ * point number (mult + frac * 2^(-N)).
+ *
+ * N equals to kvm_tsc_scaling_ratio_frac_bits.
+ */
+static inline u64 __scale_tsc(u64 ratio, u64 tsc)
+{
+	return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
+}
+
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
+{
+	u64 _tsc = tsc;
+	u64 ratio = vcpu->arch.tsc_scaling_ratio;
+
+	if (ratio != kvm_default_tsc_scaling_ratio)
+		_tsc = __scale_tsc(ratio, tsc);
+
+	return _tsc;
+}
+EXPORT_SYMBOL_GPL(kvm_scale_tsc);
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -7371,8 +7398,19 @@ int kvm_arch_hardware_setup(void)
 	if (r != 0)
 		return r;
 
-	if (kvm_has_tsc_control)
+	if (kvm_has_tsc_control) {
+		/*
+		 * Make sure the user can only configure tsc_khz values that
+		 * fit into a signed integer.
+		 * A min value is not calculated needed because it will always
+		 * be 1 on all machines.
+		 */
+		u64 max = min(0x7fffffffULL,
+			      __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
+		kvm_max_guest_tsc_khz = max;
+
 		kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
+	}
 
 	kvm_init_msr_list();
 	return 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 242a6d2b53ff..5706a2108f0a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
 int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
 				  uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
+
 #endif
diff --git a/include/linux/math64.h b/include/linux/math64.h
index c45c089bfdac..44282ec7b682 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 }
 #endif /* mul_u64_u32_shr */
 
+#ifndef mul_u64_u64_shr
+static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
+{
+	return (u64)(((unsigned __int128)a * mul) >> shift);
+}
+#endif /* mul_u64_u64_shr */
+
 #else
 
 #ifndef mul_u64_u32_shr
@@ -161,6 +168,50 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 }
 #endif /* mul_u64_u32_shr */
 
+#ifndef mul_u64_u64_shr
+static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
+{
+	union {
+		u64 ll;
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 high, low;
+#else
+			u32 low, high;
+#endif
+		} l;
+	} rl, rm, rn, rh, a0, b0;
+	u64 c;
+
+	a0.ll = a;
+	b0.ll = b;
+
+	rl.ll = (u64)a0.l.low * b0.l.low;
+	rm.ll = (u64)a0.l.low * b0.l.high;
+	rn.ll = (u64)a0.l.high * b0.l.low;
+	rh.ll = (u64)a0.l.high * b0.l.high;
+
+	/*
+	 * Each of these lines computes a 64-bit intermediate result into "c",
+	 * starting at bits 32-95.  The low 32-bits go into the result of the
+	 * multiplication, the high 32-bits are carried into the next step.
+	 */
+	rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
+	rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
+	rh.l.high = (c >> 32) + rh.l.high;
+
+	/*
+	 * The 128-bit result of the multiplication is in rl.ll and rh.ll,
+	 * shift it right and throw away the high part of the result.
+	 */
+	if (shift == 0)
+		return rl.ll;
+	if (shift < 64)
+		return (rl.ll >> shift) | (rh.ll << (64 - shift));
+	return rh.ll >> (shift & 63);
+}
+#endif /* mul_u64_u64_shr */
+
 #endif
 
 #endif /* _LINUX_MATH64_H */
-- 
cgit v1.2.3


From 381d585c80e34988269bd7901ad910981e900be1 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Tue, 20 Oct 2015 15:39:04 +0800
Subject: KVM: x86: Replace call-back set_tsc_khz() with a common function

Both VMX and SVM propagate virtual_tsc_khz in the same way, so this
patch removes the call-back set_tsc_khz() and replaces it with a common
function.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/svm.c              | 36 --------------------------------
 arch/x86/kvm/vmx.c              | 17 ---------------
 arch/x86/kvm/x86.c              | 46 ++++++++++++++++++++++++++++++++++++-----
 include/linux/math64.h          | 29 ++++++++++++++++++++++++++
 5 files changed, 70 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52d1419968eb..c5a3f3d66e90 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -853,7 +853,6 @@ struct kvm_x86_ops {
 
 	bool (*has_wbinvd_exit)(void);
 
-	void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
 	u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65f4f1947a62..f6e49a6c9ab0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -957,41 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
-static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
-	u64 ratio;
-	u64 khz;
-
-	/* Guest TSC same frequency as host TSC? */
-	if (!scale) {
-		vcpu->arch.tsc_scaling_ratio = TSC_RATIO_DEFAULT;
-		return;
-	}
-
-	/* TSC scaling supported? */
-	if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
-		if (user_tsc_khz > tsc_khz) {
-			vcpu->arch.tsc_catchup = 1;
-			vcpu->arch.tsc_always_catchup = 1;
-		} else
-			WARN(1, "user requested TSC rate below hardware speed\n");
-		return;
-	}
-
-	khz = user_tsc_khz;
-
-	/* TSC scaling required  - calculate ratio */
-	ratio = khz << 32;
-	do_div(ratio, tsc_khz);
-
-	if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
-		WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
-				user_tsc_khz);
-		return;
-	}
-	vcpu->arch.tsc_scaling_ratio = ratio;
-}
-
 static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4402,7 +4367,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
-	.set_tsc_khz = svm_set_tsc_khz,
 	.read_tsc_offset = svm_read_tsc_offset,
 	.write_tsc_offset = svm_write_tsc_offset,
 	.adjust_tsc_offset = svm_adjust_tsc_offset,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a26ed285931b..baee46893899 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2382,22 +2382,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 	return host_tsc + tsc_offset;
 }
 
-/*
- * Engage any workarounds for mis-matched TSC rates.  Currently limited to
- * software catchup for faster rates on slower CPUs.
- */
-static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
-	if (!scale)
-		return;
-
-	if (user_tsc_khz > tsc_khz) {
-		vcpu->arch.tsc_catchup = 1;
-		vcpu->arch.tsc_always_catchup = 1;
-	} else
-		WARN(1, "user requested TSC rate below hardware speed\n");
-}
-
 static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
 {
 	return vmcs_read64(TSC_OFFSET);
@@ -10826,7 +10810,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
-	.set_tsc_khz = vmx_set_tsc_khz,
 	.read_tsc_offset = vmx_read_tsc_offset,
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.adjust_tsc_offset = vmx_adjust_tsc_offset,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1473e64cb744..c314e8d22a67 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1253,7 +1253,43 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm)
 	return v;
 }
 
-static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
+static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
+{
+	u64 ratio;
+
+	/* Guest TSC same frequency as host TSC? */
+	if (!scale) {
+		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
+		return 0;
+	}
+
+	/* TSC scaling supported? */
+	if (!kvm_has_tsc_control) {
+		if (user_tsc_khz > tsc_khz) {
+			vcpu->arch.tsc_catchup = 1;
+			vcpu->arch.tsc_always_catchup = 1;
+			return 0;
+		} else {
+			WARN(1, "user requested TSC rate below hardware speed\n");
+			return -1;
+		}
+	}
+
+	/* TSC scaling required  - calculate ratio */
+	ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
+				user_tsc_khz, tsc_khz);
+
+	if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
+		WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+			  user_tsc_khz);
+		return -1;
+	}
+
+	vcpu->arch.tsc_scaling_ratio = ratio;
+	return 0;
+}
+
+static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 {
 	u32 thresh_lo, thresh_hi;
 	int use_scaling = 0;
@@ -1262,7 +1298,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 	if (this_tsc_khz == 0) {
 		/* set tsc_scaling_ratio to a safe value */
 		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
-		return;
+		return -1;
 	}
 
 	/* Compute a scale to convert nanoseconds in TSC cycles */
@@ -1283,7 +1319,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 		pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
 		use_scaling = 1;
 	}
-	kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
+	return set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
 }
 
 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
@@ -3353,9 +3389,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (user_tsc_khz == 0)
 			user_tsc_khz = tsc_khz;
 
-		kvm_set_tsc_khz(vcpu, user_tsc_khz);
+		if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
+			r = 0;
 
-		r = 0;
 		goto out;
 	}
 	case KVM_GET_TSC_KHZ: {
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 44282ec7b682..6e8b5b270ffe 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -214,4 +214,33 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
 
 #endif
 
+#ifndef mul_u64_u32_div
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
+{
+	union {
+		u64 ll;
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 high, low;
+#else
+			u32 low, high;
+#endif
+		} l;
+	} u, rl, rh;
+
+	u.ll = a;
+	rl.ll = (u64)u.l.low * mul;
+	rh.ll = (u64)u.l.high * mul + rl.l.high;
+
+	/* Bits 32-63 of the result will be in rh.l.low. */
+	rl.l.high = do_div(rh.ll, divisor);
+
+	/* Bits 0-31 of the result will be in rl.l.low.	*/
+	do_div(rl.ll, divisor);
+
+	rl.l.high = rh.l.low;
+	return rl.ll;
+}
+#endif /* mul_u64_u32_div */
+
 #endif /* _LINUX_MATH64_H */
-- 
cgit v1.2.3


From f70cd6b07e629f367bb9b1ac9d0e3e669eb325c0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 02:39:55 +0100
Subject: context_tracking: remove duplicate enabled check

All calls to context_tracking_enter and context_tracking_exit
are already checking context_tracking_is_enabled, except the
context_tracking_user_enter and context_tracking_user_exit
functions left in for the benefit of assembly calls.

Pull the check up to those functions, by making them simple
wrappers around the user_enter and user_exit inline functions.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Rik van Riel <riel@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  4 ++--
 kernel/context_tracking.c        | 16 ++--------------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 008fc67d0d96..6ef136ff0897 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -18,13 +18,13 @@ extern void context_tracking_user_exit(void);
 static inline void user_enter(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_user_enter();
+		context_tracking_enter(CONTEXT_USER);
 
 }
 static inline void user_exit(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_user_exit();
+		context_tracking_exit(CONTEXT_USER);
 }
 
 static inline enum ctx_state exception_enter(void)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 0a495ab35bc7..6d4c6ce21275 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -62,15 +62,6 @@ void context_tracking_enter(enum ctx_state state)
 {
 	unsigned long flags;
 
-	/*
-	 * Repeat the user_enter() check here because some archs may be calling
-	 * this from asm and if no CPU needs context tracking, they shouldn't
-	 * go further. Repeat the check here until they support the inline static
-	 * key check.
-	 */
-	if (!context_tracking_is_enabled())
-		return;
-
 	/*
 	 * Some contexts may involve an exception occuring in an irq,
 	 * leading to that nesting:
@@ -128,7 +119,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter);
 
 void context_tracking_user_enter(void)
 {
-	context_tracking_enter(CONTEXT_USER);
+	user_enter();
 }
 NOKPROBE_SYMBOL(context_tracking_user_enter);
 
@@ -148,9 +139,6 @@ void context_tracking_exit(enum ctx_state state)
 {
 	unsigned long flags;
 
-	if (!context_tracking_is_enabled())
-		return;
-
 	if (in_interrupt())
 		return;
 
@@ -181,7 +169,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit);
 
 void context_tracking_user_exit(void)
 {
-	context_tracking_exit(CONTEXT_USER);
+	user_exit();
 }
 NOKPROBE_SYMBOL(context_tracking_user_exit);
 
-- 
cgit v1.2.3


From d0e536d89395ecd8ab78fe999dc4d6f5d140ce46 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 02:39:56 +0100
Subject: context_tracking: avoid irq_save/irq_restore on guest entry and exit

guest_enter and guest_exit must be called with interrupts disabled,
since they take the vtime_seqlock with write_seq{lock,unlock}.
Therefore, it is not necessary to check for exceptions, nor to
save/restore the IRQ state, when context tracking functions are
called by guest_enter and guest_exit.

Split the body of context_tracking_entry and context_tracking_exit
out to __-prefixed functions, and use them from KVM.

Rik van Riel has measured this to speed up a tight vmentry/vmexit
loop by about 2%.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  8 +++--
 kernel/context_tracking.c        | 64 ++++++++++++++++++++++++----------------
 2 files changed, 44 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 6ef136ff0897..68b575afe5f5 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -10,6 +10,10 @@
 #ifdef CONFIG_CONTEXT_TRACKING
 extern void context_tracking_cpu_set(int cpu);
 
+/* Called with interrupts disabled.  */
+extern void __context_tracking_enter(enum ctx_state state);
+extern void __context_tracking_exit(enum ctx_state state);
+
 extern void context_tracking_enter(enum ctx_state state);
 extern void context_tracking_exit(enum ctx_state state);
 extern void context_tracking_user_enter(void);
@@ -88,13 +92,13 @@ static inline void guest_enter(void)
 		current->flags |= PF_VCPU;
 
 	if (context_tracking_is_enabled())
-		context_tracking_enter(CONTEXT_GUEST);
+		__context_tracking_enter(CONTEXT_GUEST);
 }
 
 static inline void guest_exit(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_exit(CONTEXT_GUEST);
+		__context_tracking_exit(CONTEXT_GUEST);
 
 	if (vtime_accounting_enabled())
 		vtime_guest_exit(current);
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 6d4c6ce21275..d8560ee3bab7 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -58,27 +58,13 @@ static void context_tracking_recursion_exit(void)
  * instructions to execute won't use any RCU read side critical section
  * because this function sets RCU in extended quiescent state.
  */
-void context_tracking_enter(enum ctx_state state)
+void __context_tracking_enter(enum ctx_state state)
 {
-	unsigned long flags;
-
-	/*
-	 * Some contexts may involve an exception occuring in an irq,
-	 * leading to that nesting:
-	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-	 * helpers are enough to protect RCU uses inside the exception. So
-	 * just return immediately if we detect we are in an IRQ.
-	 */
-	if (in_interrupt())
-		return;
-
 	/* Kernel threads aren't supposed to go to userspace */
 	WARN_ON_ONCE(!current->mm);
 
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if ( __this_cpu_read(context_tracking.state) != state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -111,7 +97,27 @@ void context_tracking_enter(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, state);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_enter);
+EXPORT_SYMBOL_GPL(__context_tracking_enter);
+
+void context_tracking_enter(enum ctx_state state)
+{
+	unsigned long flags;
+
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_enter(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_enter);
@@ -135,16 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter);
  * This call supports re-entrancy. This way it can be called from any exception
  * handler without needing to know if we came from userspace or not.
  */
-void context_tracking_exit(enum ctx_state state)
+void __context_tracking_exit(enum ctx_state state)
 {
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if (__this_cpu_read(context_tracking.state) == state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -161,7 +161,19 @@ void context_tracking_exit(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_exit);
+EXPORT_SYMBOL_GPL(__context_tracking_exit);
+
+void context_tracking_exit(enum ctx_state state)
+{
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_exit(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_exit);
-- 
cgit v1.2.3


From d1cd21427747f15920cd726f5f67a07880e7dee4 Mon Sep 17 00:00:00 2001
From: Jonathan Richardson <jonathar@broadcom.com>
Date: Fri, 16 Oct 2015 17:40:58 -0700
Subject: pwm: Set enable state properly on failed call to enable

The pwm_enable() function didn't clear the enabled bit if a call to the
driver's ->enable() callback returned an error. The result was that the
state of the PWM core was wrong. Clearing the bit when enable returns
an error ensures the state is properly set.

Tested-by: Jonathan Richardson <jonathar@broadcom.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jonathan Richardson <jonathar@broadcom.com>
[thierry.reding@gmail.com: add missing kerneldoc for the lock]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 33 ++++++++++++++++++++++++++-------
 include/linux/pwm.h |  3 +++
 2 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 3f9df3ea3350..b8f6c309c160 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -269,6 +269,7 @@ int pwmchip_add_with_polarity(struct pwm_chip *chip,
 		pwm->pwm = chip->base + i;
 		pwm->hwpwm = i;
 		pwm->polarity = polarity;
+		mutex_init(&pwm->lock);
 
 		radix_tree_insert(&pwm_tree, pwm->pwm, pwm);
 	}
@@ -473,16 +474,22 @@ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity)
 	if (!pwm->chip->ops->set_polarity)
 		return -ENOSYS;
 
-	if (pwm_is_enabled(pwm))
-		return -EBUSY;
+	mutex_lock(&pwm->lock);
+
+	if (pwm_is_enabled(pwm)) {
+		err = -EBUSY;
+		goto unlock;
+	}
 
 	err = pwm->chip->ops->set_polarity(pwm->chip, pwm, polarity);
 	if (err)
-		return err;
+		goto unlock;
 
 	pwm->polarity = polarity;
 
-	return 0;
+unlock:
+	mutex_unlock(&pwm->lock);
+	return err;
 }
 EXPORT_SYMBOL_GPL(pwm_set_polarity);
 
@@ -494,10 +501,22 @@ EXPORT_SYMBOL_GPL(pwm_set_polarity);
  */
 int pwm_enable(struct pwm_device *pwm)
 {
-	if (pwm && !test_and_set_bit(PWMF_ENABLED, &pwm->flags))
-		return pwm->chip->ops->enable(pwm->chip, pwm);
+	int err = 0;
+
+	if (!pwm)
+		return -EINVAL;
+
+	mutex_lock(&pwm->lock);
+
+	if (!test_and_set_bit(PWMF_ENABLED, &pwm->flags)) {
+		err = pwm->chip->ops->enable(pwm->chip, pwm);
+		if (err)
+			clear_bit(PWMF_ENABLED, &pwm->flags);
+	}
+
+	mutex_unlock(&pwm->lock);
 
-	return pwm ? 0 : -EINVAL;
+	return err;
 }
 EXPORT_SYMBOL_GPL(pwm_enable);
 
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index d681f6875aef..cfc3ed46cad2 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -2,6 +2,7 @@
 #define __LINUX_PWM_H
 
 #include <linux/err.h>
+#include <linux/mutex.h>
 #include <linux/of.h>
 
 struct pwm_device;
@@ -87,6 +88,7 @@ enum {
  * @pwm: global index of the PWM device
  * @chip: PWM chip providing this PWM device
  * @chip_data: chip-private data associated with the PWM device
+ * @lock: used to serialize accesses to the PWM device where necessary
  * @period: period of the PWM signal (in nanoseconds)
  * @duty_cycle: duty cycle of the PWM signal (in nanoseconds)
  * @polarity: polarity of the PWM signal
@@ -98,6 +100,7 @@ struct pwm_device {
 	unsigned int pwm;
 	struct pwm_chip *chip;
 	void *chip_data;
+	struct mutex lock;
 
 	unsigned int period;
 	unsigned int duty_cycle;
-- 
cgit v1.2.3


From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 Nov 2015 14:31:18 +0100
Subject: net: add __netdev_alloc_pcpu_stats() to indicate gfp flags

nf_tables may create percpu counters from the packet path through its
dynamic set instantiation infrastructure, so we need a way to allocate
this through GFP_ATOMIC.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c00772bd136..e9d0c8a75380 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats {
 	struct u64_stats_sync   syncp;
 };
 
-#define netdev_alloc_pcpu_stats(type)				\
-({								\
-	typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
-	if (pcpu_stats)	{					\
-		int __cpu;					\
-		for_each_possible_cpu(__cpu) {			\
-			typeof(type) *stat;			\
-			stat = per_cpu_ptr(pcpu_stats, __cpu);	\
-			u64_stats_init(&stat->syncp);		\
-		}						\
-	}							\
-	pcpu_stats;						\
+#define __netdev_alloc_pcpu_stats(type, gfp)				\
+({									\
+	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
+	if (pcpu_stats)	{						\
+		int __cpu;						\
+		for_each_possible_cpu(__cpu) {				\
+			typeof(type) *stat;				\
+			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
+			u64_stats_init(&stat->syncp);			\
+		}							\
+	}								\
+	pcpu_stats;							\
 })
 
+#define netdev_alloc_pcpu_stats(type)					\
+	__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink
-- 
cgit v1.2.3


From b1d06b60e90cd5016798b9984f8e420e753f4846 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 6 Nov 2015 19:28:22 -0800
Subject: of: Provide static inline function for of_translate_address if needed

If OF_ADDRESS is not configured, builds can fail with errors such as

drivers/net/ethernet/hisilicon/hns_mdio.c:
	In function 'hns_mdio_bus_name':
drivers/net/ethernet/hisilicon/hns_mdio.c:411:3:
	error: implicit declaration of function 'of_translate_address'

as currently seen when building sparc:allmodconfig.

Introduce a static inline function if OF_ADDRESS is not configured to fix
the build failure. Return OF_BAD_ADDR in this case. For this to work, the
definition of OF_BAD_ADDR has to be moved outside CONFIG_OF conditional
code.

Fixes: 876133d3161d ("net: hisilicon: add OF dependency")
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Frank Rowand <frank.rowand@sonymobile.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h         | 4 ++--
 include/linux/of_address.h | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 2194b8ca41f9..dd10626a615f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -126,6 +126,8 @@ extern raw_spinlock_t devtree_lock;
 #define OF_POPULATED	3 /* device already created for the node */
 #define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
 
+#define OF_BAD_ADDR	((u64)-1)
+
 #ifdef CONFIG_OF
 void of_core_init(void);
 
@@ -229,8 +231,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
 
-#define OF_BAD_ADDR	((u64)-1)
-
 static inline const char *of_node_full_name(const struct device_node *np)
 {
 	return np ? np->full_name : "<no-node>";
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index d88e81be6368..507daad0bc8d 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -57,6 +57,13 @@ extern int of_dma_get_range(struct device_node *np, u64 *dma_addr,
 				u64 *paddr, u64 *size);
 extern bool of_dma_is_coherent(struct device_node *np);
 #else /* CONFIG_OF_ADDRESS */
+
+static inline u64 of_translate_address(struct device_node *np,
+				       const __be32 *addr)
+{
+	return OF_BAD_ADDR;
+}
+
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
 					const struct of_device_id *matches,
-- 
cgit v1.2.3


From 5c50002963369c7c622b18ff751719eadbe225c5 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 13 Oct 2015 16:51:02 -0600
Subject: vfs: remove unused wrapper block_page_mkwrite()

The function currently called "__block_page_mkwrite()" used to be called
"block_page_mkwrite()" until a wrapper for this function was added by:

commit 24da4fab5a61 ("vfs: Create __block_page_mkwrite() helper passing
	error values back")

This wrapper, the current "block_page_mkwrite()", is currently unused.
__block_page_mkwrite() is used directly by ext4, nilfs2 and xfs.

Remove the unused wrapper, rename __block_page_mkwrite() back to
block_page_mkwrite() and update the comment above block_page_mkwrite().

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.com>
Cc: Jan Kara <jack@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 24 ++----------------------
 fs/ext4/inode.c             |  4 ++--
 fs/nilfs2/file.c            |  2 +-
 fs/xfs/xfs_file.c           |  2 +-
 include/linux/buffer_head.h |  2 --
 5 files changed, 6 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 51aff0296ce2..4f4cd959da7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2420,9 +2420,9 @@ EXPORT_SYMBOL(block_commit_write);
  * unlock the page.
  *
  * Direct callers of this function should protect against filesystem freezing
- * using sb_start_write() - sb_end_write() functions.
+ * using sb_start_pagefault() - sb_end_pagefault() functions.
  */
-int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 			 get_block_t get_block)
 {
 	struct page *page = vmf->page;
@@ -2459,26 +2459,6 @@ out_unlock:
 	unlock_page(page);
 	return ret;
 }
-EXPORT_SYMBOL(__block_page_mkwrite);
-
-int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-		   get_block_t get_block)
-{
-	int ret;
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
-
-	sb_start_pagefault(sb);
-
-	/*
-	 * Update file times before taking page lock. We may end up failing the
-	 * fault so this update may be superfluous but who really cares...
-	 */
-	file_update_time(vma->vm_file);
-
-	ret = __block_page_mkwrite(vma, vmf, get_block);
-	sb_end_pagefault(sb);
-	return block_page_mkwrite_return(ret);
-}
 EXPORT_SYMBOL(block_page_mkwrite);
 
 /*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7d1aad1d9313..ea433a7f4bca 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5283,7 +5283,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	    !ext4_should_journal_data(inode) &&
 	    !ext4_nonda_switch(inode->i_sb)) {
 		do {
-			ret = __block_page_mkwrite(vma, vmf,
+			ret = block_page_mkwrite(vma, vmf,
 						   ext4_da_get_block_prep);
 		} while (ret == -ENOSPC &&
 		       ext4_should_retry_alloc(inode->i_sb, &retries));
@@ -5330,7 +5330,7 @@ retry_alloc:
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
-	ret = __block_page_mkwrite(vma, vmf, get_block);
+	ret = block_page_mkwrite(vma, vmf, get_block);
 	if (!ret && ext4_should_journal_data(inode)) {
 		if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
 			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 54575e3cc1a2..088ba001c6ef 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -109,7 +109,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto out;
 
 	file_update_time(vma->vm_file);
-	ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
+	ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
 	if (ret) {
 		nilfs_transaction_abort(inode->i_sb);
 		goto out;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e78feb400e22..f80e90f95ad8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1506,7 +1506,7 @@ xfs_filemap_page_mkwrite(
 		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
 				    xfs_end_io_dax_write);
 	} else {
-		ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+		ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
 		ret = block_page_mkwrite_return(ret);
 	}
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e6797ded700e..89d9aa9e79bf 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -227,8 +227,6 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
 			get_block_t *, loff_t *);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
-int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-				get_block_t get_block);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 				get_block_t get_block);
 /* Convert errno to return value from ->page_mkwrite() call */
-- 
cgit v1.2.3


From c8fffa643583e00eb9a783abbca251b11bc0d163 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Thu, 8 Oct 2015 17:07:20 -0600
Subject: vfs: remove stale comment in inode_operations

The big warning comment that is currently at the end of struct
inode_operations was added as part of this commit:

4aa7c6346be3 ("vfs: add i_op->dentry_open()")

It was added to warn people not to use the newly added 'dentry_open'
function pointer.

This function pointer was removed as part of this commit:

4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and
		f_inode to the underlay")

The comment was left behind and now refers to nothing, so remove it.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a1cb8c605e0..f3bfbd7d3fa9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1665,8 +1665,6 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
-
-	/* WARNING: probably going away soon, do not use! */
 } ____cacheline_aligned;
 
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-- 
cgit v1.2.3


From c0a9f72c156baf1e88c33c6ba4450647af1b8804 Mon Sep 17 00:00:00 2001
From: Alex Smith <alex.smith@imgtec.com>
Date: Mon, 12 Oct 2015 10:40:43 +0100
Subject: irqchip: irq-mips-gic: Provide function to map GIC user section

The GIC provides a "user-mode visible" section containing a mirror of
the counter registers which can be mapped into user memory. This will
be used by the VDSO time function implementations, so provide a
function to map it in.

When the GIC is not enabled in Kconfig a dummy inline version of this
function is provided, along with "#define gic_present 0", so that we
don't have to litter the VDSO code with ifdefs.

[markos.chandras@imgtec.com:
  - Move mapping code to arch/mips/kernel/vdso.c and use a resource
    type to get the GIC usermode information
  - Avoid renaming function arguments and use __gic_base_addr to hold
    the base GIC address prior to ioremap.]
[ralf@linux-mips.org: Fix up gic_get_usm_range() to compile and make inline
again.]

Signed-off-by: Alex Smith <alex.smith@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alex Smith <alex.smith@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/11281/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/irqchip/irq-mips-gic.c   | 14 ++++++++++++++
 include/linux/irqchip/mips-gic.h | 17 +++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index aeaa061f0dbf..9e17ef27a183 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -29,6 +29,7 @@ struct gic_pcpu_mask {
 	DECLARE_BITMAP(pcpu_mask, GIC_MAX_INTRS);
 };
 
+static unsigned long __gic_base_addr;
 static void __iomem *gic_base;
 static struct gic_pcpu_mask pcpu_masks[NR_CPUS];
 static DEFINE_SPINLOCK(gic_lock);
@@ -301,6 +302,17 @@ int gic_get_c0_fdc_int(void)
 				  GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC));
 }
 
+int gic_get_usm_range(struct resource *gic_usm_res)
+{
+	if (!gic_present)
+		return -1;
+
+	gic_usm_res->start = __gic_base_addr + USM_VISIBLE_SECTION_OFS;
+	gic_usm_res->end = gic_usm_res->start + (USM_VISIBLE_SECTION_SIZE - 1);
+
+	return 0;
+}
+
 static void gic_handle_shared_int(bool chained)
 {
 	unsigned int i, intr, virq, gic_reg_step = mips_cm_is64 ? 8 : 4;
@@ -798,6 +810,8 @@ static void __init __gic_init(unsigned long gic_base_addr,
 {
 	unsigned int gicconfig;
 
+	__gic_base_addr = gic_base_addr;
+
 	gic_base = ioremap_nocache(gic_base_addr, gic_addrspace_size);
 
 	gicconfig = gic_read(GIC_REG(SHARED, GIC_SH_CONFIG));
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 4e6861605050..ce824db48d64 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -9,6 +9,7 @@
 #define __LINUX_IRQCHIP_MIPS_GIC_H
 
 #include <linux/clocksource.h>
+#include <linux/ioport.h>
 
 #define GIC_MAX_INTRS			256
 
@@ -245,6 +246,8 @@
 #define GIC_SHARED_TO_HWIRQ(x)	(GIC_SHARED_HWIRQ_BASE + (x))
 #define GIC_HWIRQ_TO_SHARED(x)	((x) - GIC_SHARED_HWIRQ_BASE)
 
+#ifdef CONFIG_MIPS_GIC
+
 extern unsigned int gic_present;
 
 extern void gic_init(unsigned long gic_base_addr,
@@ -264,4 +267,18 @@ extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
 extern int gic_get_c0_compare_int(void);
 extern int gic_get_c0_perfcount_int(void);
 extern int gic_get_c0_fdc_int(void);
+extern int gic_get_usm_range(struct resource *gic_usm_res);
+
+#else /* CONFIG_MIPS_GIC */
+
+#define gic_present	0
+
+static inline int gic_get_usm_range(struct resource *gic_usm_res)
+{
+	/* Shouldn't be called. */
+	return -1;
+}
+
+#endif /* CONFIG_MIPS_GIC */
+
 #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */
-- 
cgit v1.2.3


From e3a7a3bf362e2a8acc301e5eaec2631e740a8a95 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 11 Nov 2015 09:37:34 -0700
Subject: block: don't hardcode blk_qc_t -> tag mask

Use the shift/mask we use elsewhere.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 641e5a3ed58c..0fb65843ec1e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -265,7 +265,7 @@ static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
 
 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 {
-	return cookie & 0xffff;
+	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 }
 
 #endif /* __LINUX_BLK_TYPES_H */
-- 
cgit v1.2.3


From e409de992e3ea3674393465f07cc71c948edd87a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Oct 2015 19:18:52 +0200
Subject: 9p: xattr simplifications

Now that the xattr handler is passed to the xattr handler operations, we
can use the same get and set operations for the user, trusted, and security
xattr namespaces.  In those namespaces, we can access the full attribute
name by "reattaching" the name prefix the vfs has skipped for us.  Add a
xattr_full_name helper to make this obvious in the code.

For the "system.posix_acl_access" and "system.posix_acl_default"
attributes, handler->prefix is the full attribute name; the suffix is the
empty string.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: v9fs-developer@lists.sourceforge.net
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/Makefile         |  5 +--
 fs/9p/acl.c            | 51 +++----------------------------
 fs/9p/xattr.c          | 42 ++++++++++++++++++++++++++
 fs/9p/xattr.h          |  3 --
 fs/9p/xattr_security.c | 82 --------------------------------------------------
 fs/9p/xattr_trusted.c  | 82 --------------------------------------------------
 fs/9p/xattr_user.c     | 82 --------------------------------------------------
 fs/xattr.c             | 24 +++++++++++++++
 include/linux/xattr.h  | 18 ++++++-----
 9 files changed, 83 insertions(+), 306 deletions(-)
 delete mode 100644 fs/9p/xattr_security.c
 delete mode 100644 fs/9p/xattr_trusted.c
 delete mode 100644 fs/9p/xattr_user.c

(limited to 'include/linux')

diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index ff7be98f84f2..9619ccadd2fc 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -10,10 +10,7 @@ obj-$(CONFIG_9P_FS) := 9p.o
 	vfs_dentry.o \
 	v9fs.o \
 	fid.o  \
-	xattr.o \
-	xattr_user.o \
-	xattr_trusted.o
+	xattr.o
 
 9p-$(CONFIG_9P_FSCACHE) += cache.o
 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
-9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index e6fe82462043..a7e28890f5ef 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -212,31 +212,12 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep,
 	return 0;
 }
 
-static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
-			       void *buffer, size_t size, int type)
-{
-	char *full_name;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		full_name =  POSIX_ACL_XATTR_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		full_name = POSIX_ACL_XATTR_DEFAULT;
-		break;
-	default:
-		BUG();
-	}
-	return v9fs_xattr_get(dentry, full_name, buffer, size);
-}
-
 static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 			      struct dentry *dentry, const char *name,
 			      void *buffer, size_t size)
 {
 	struct v9fs_session_info *v9ses;
 	struct posix_acl *acl;
-	int type = handler->flags;
 	int error;
 
 	if (strcmp(name, "") != 0)
@@ -247,9 +228,9 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 	 * We allow set/get/list of acl when access=client is not specified
 	 */
 	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
-		return v9fs_remote_get_acl(dentry, name, buffer, size, type);
+		return v9fs_xattr_get(dentry, handler->prefix, buffer, size);
 
-	acl = v9fs_get_cached_acl(d_inode(dentry), type);
+	acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -260,26 +241,6 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 	return error;
 }
 
-static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
-			      const void *value, size_t size,
-			      int flags, int type)
-{
-	char *full_name;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		full_name =  POSIX_ACL_XATTR_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		full_name = POSIX_ACL_XATTR_DEFAULT;
-		break;
-	default:
-		BUG();
-	}
-	return v9fs_xattr_set(dentry, full_name, value, size, flags);
-}
-
-
 static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 			      struct dentry *dentry, const char *name,
 			      const void *value, size_t size, int flags)
@@ -298,8 +259,8 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 	 * xattr value. We leave it to the server to validate
 	 */
 	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
-		return v9fs_remote_set_acl(dentry, name,
-					   value, size, flags, handler->flags);
+		return v9fs_xattr_set(dentry, handler->prefix, value, size,
+				      flags);
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
@@ -320,7 +281,6 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 
 	switch (handler->flags) {
 	case ACL_TYPE_ACCESS:
-		name = POSIX_ACL_XATTR_ACCESS;
 		if (acl) {
 			umode_t mode = inode->i_mode;
 			retval = posix_acl_equiv_mode(acl, &mode);
@@ -351,7 +311,6 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 		}
 		break;
 	case ACL_TYPE_DEFAULT:
-		name = POSIX_ACL_XATTR_DEFAULT;
 		if (!S_ISDIR(inode->i_mode)) {
 			retval = acl ? -EINVAL : 0;
 			goto err_out;
@@ -360,7 +319,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 	default:
 		BUG();
 	}
-	retval = v9fs_xattr_set(dentry, name, value, size, flags);
+	retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags);
 	if (!retval)
 		set_cached_acl(inode, handler->flags, acl);
 err_out:
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 0cf44b6cccd6..e3d026ac382e 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -137,6 +137,48 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 	return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
 }
 
+static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
+				  struct dentry *dentry, const char *name,
+				  void *buffer, size_t size)
+{
+	const char *full_name = xattr_full_name(handler, name);
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return v9fs_xattr_get(dentry, full_name, buffer, size);
+}
+
+static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
+				  struct dentry *dentry, const char *name,
+				  const void *value, size_t size, int flags)
+{
+	const char *full_name = xattr_full_name(handler, name);
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return v9fs_xattr_set(dentry, full_name, value, size, flags);
+}
+
+static struct xattr_handler v9fs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.get	= v9fs_xattr_handler_get,
+	.set	= v9fs_xattr_handler_set,
+};
+
+static struct xattr_handler v9fs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.get	= v9fs_xattr_handler_get,
+	.set	= v9fs_xattr_handler_set,
+};
+
+#ifdef CONFIG_9P_FS_SECURITY
+static struct xattr_handler v9fs_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.get	= v9fs_xattr_handler_get,
+	.set	= v9fs_xattr_handler_set,
+};
+#endif
+
 const struct xattr_handler *v9fs_xattr_handlers[] = {
 	&v9fs_xattr_user_handler,
 	&v9fs_xattr_trusted_handler,
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index d3e2ea3840be..c63c3bea5de5 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -19,9 +19,6 @@
 #include <net/9p/client.h>
 
 extern const struct xattr_handler *v9fs_xattr_handlers[];
-extern struct xattr_handler v9fs_xattr_user_handler;
-extern struct xattr_handler v9fs_xattr_trusted_handler;
-extern struct xattr_handler v9fs_xattr_security_handler;
 extern const struct xattr_handler v9fs_xattr_acl_access_handler;
 extern const struct xattr_handler v9fs_xattr_acl_default_handler;
 
diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c
deleted file mode 100644
index c0a470add13c..000000000000
--- a/fs/9p/xattr_security.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corporation, 2010
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include "xattr.h"
-
-static int v9fs_xattr_security_get(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			void *buffer, size_t size)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
-	memcpy(full_name+prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
-	kfree(full_name);
-	return retval;
-}
-
-static int v9fs_xattr_security_set(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			const void *value, size_t size, int flags)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
-	memcpy(full_name + prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
-	kfree(full_name);
-	return retval;
-}
-
-struct xattr_handler v9fs_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.get	= v9fs_xattr_security_get,
-	.set	= v9fs_xattr_security_set,
-};
diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c
deleted file mode 100644
index b888a4eecd1a..000000000000
--- a/fs/9p/xattr_trusted.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corporation, 2010
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include "xattr.h"
-
-static int v9fs_xattr_trusted_get(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			void *buffer, size_t size)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
-	memcpy(full_name+prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
-	kfree(full_name);
-	return retval;
-}
-
-static int v9fs_xattr_trusted_set(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			const void *value, size_t size, int flags)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
-	memcpy(full_name + prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
-	kfree(full_name);
-	return retval;
-}
-
-struct xattr_handler v9fs_xattr_trusted_handler = {
-	.prefix	= XATTR_TRUSTED_PREFIX,
-	.get	= v9fs_xattr_trusted_get,
-	.set	= v9fs_xattr_trusted_set,
-};
diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c
deleted file mode 100644
index 06f136cbe264..000000000000
--- a/fs/9p/xattr_user.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corporation, 2010
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include "xattr.h"
-
-static int v9fs_xattr_user_get(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			void *buffer, size_t size)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_USER_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
-	memcpy(full_name+prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
-	kfree(full_name);
-	return retval;
-}
-
-static int v9fs_xattr_user_set(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			const void *value, size_t size, int flags)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_USER_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
-	memcpy(full_name + prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
-	kfree(full_name);
-	return retval;
-}
-
-struct xattr_handler v9fs_xattr_user_handler = {
-	.prefix	= XATTR_USER_PREFIX,
-	.get	= v9fs_xattr_user_get,
-	.set	= v9fs_xattr_user_set,
-};
diff --git a/fs/xattr.c b/fs/xattr.c
index 44377b6f6001..9b932b95d74e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -790,6 +790,30 @@ EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
 
+/**
+ * xattr_full_name  -  Compute full attribute name from suffix
+ *
+ * @handler:	handler of the xattr_handler operation
+ * @name:	name passed to the xattr_handler operation
+ *
+ * The get and set xattr handler operations are called with the remainder of
+ * the attribute name after skipping the handler's prefix: for example, "foo"
+ * is passed to the get operation of a handler with prefix "user." to get
+ * attribute "user.foo".  The full name is still "there" in the name though.
+ *
+ * Note: the list xattr handler operation when called from the vfs is passed a
+ * NULL name; some file systems use this operation internally, with varying
+ * semantics.
+ */
+const char *xattr_full_name(const struct xattr_handler *handler,
+			    const char *name)
+{
+	size_t prefix_len = strlen(handler->prefix);
+
+	return name - prefix_len;
+}
+EXPORT_SYMBOL(xattr_full_name);
+
 /*
  * Allocate new xattr and copy in the value; but leave the name to callers.
  */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 91b0a68d38dc..89474b9d260c 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -21,15 +21,19 @@ struct dentry;
 
 struct xattr_handler {
 	const char *prefix;
-	int flags;	/* fs private flags passed back to the handlers */
-	size_t (*list)(struct dentry *dentry, char *list, size_t list_size,
-		       const char *name, size_t name_len, int handler_flags);
-	int (*get)(struct dentry *dentry, const char *name, void *buffer,
-		   size_t size, int handler_flags);
-	int (*set)(struct dentry *dentry, const char *name, const void *buffer,
-		   size_t size, int flags, int handler_flags);
+	int flags;      /* fs private flags */
+	size_t (*list)(const struct xattr_handler *, struct dentry *dentry,
+		       char *list, size_t list_size, const char *name,
+		       size_t name_len);
+	int (*get)(const struct xattr_handler *, struct dentry *dentry,
+		   const char *name, void *buffer, size_t size);
+	int (*set)(const struct xattr_handler *, struct dentry *dentry,
+		   const char *name, const void *buffer, size_t size,
+		   int flags);
 };
 
+const char *xattr_full_name(const struct xattr_handler *, const char *);
+
 struct xattr {
 	const char *name;
 	void *value;
-- 
cgit v1.2.3


From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:26 +0200
Subject: net/mlx5e: Added self loopback prevention

Prevent outgoing multicast frames from looping back to the RX queue.

By introducing new HW capability self_lb_en_modifiable, which indicates
the support to modify self_lb_en bit in modify_tir command.

When this capability is set we can prevent TIRs from sending back
loopback multicast traffic to their own RQs, by "refreshing TIRs" with
modify_tir command, on every time new channels (SQs/RQs) are created at
device open.
This is needed since TIRs are static and only allocated once on driver
load, and the loopback decision is under their responsibility.

Fixes issues of the kind:
"IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!"
The issue is seen since the IPv6 solicitations multicast messages are
loopedback and the network stack thinks they are coming from another host.

Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 48 +++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                     | 24 +++++++-----
 2 files changed, 62 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5fc4d2d78cdf..df001754bcd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
 	return err;
 }
 
+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
+						  u32 tirn)
+{
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_NUM_TT; i++) {
+		err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
+							     priv->tirn[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1376,6 +1412,13 @@ int mlx5e_open_locked(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	if (err) {
+		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
+			   __func__, err);
+		goto err_close_channels;
+	}
+
 	mlx5e_update_carrier(priv);
 	mlx5e_redirect_rqts(priv);
 
@@ -1383,6 +1426,8 @@ int mlx5e_open_locked(struct net_device *netdev)
 
 	return 0;
 
+err_close_channels:
+	mlx5e_close_channels(priv);
 err_clear_state_opened_flag:
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 	return err;
@@ -1909,6 +1954,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 			       "Not creating net device, some required device capabilities are missing\n");
 		return -ENOTSUPP;
 	}
+	if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+		mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+
 	return 0;
 }
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dd2097455a2e..1565324eb620 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x6];
+	u8         reserved_0[0x3];
+	u8         self_lb_en_modifiable[0x1];
+	u8         reserved_1[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_1[0x4];
+	u8         reserved_2[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_2[0x3];
+	u8         reserved_3[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_4[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_4[0x20];
+	u8         reserved_5[0x20];
 
-	u8         reserved_5[0x10];
+	u8         reserved_6[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_6[0x120];
+	u8         reserved_7[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_7[0x600];
+	u8         reserved_8[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
@@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits {
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_1[0x1b];
+	u8         self_lb_en[0x1];
+	u8         reserved_2[0x3];
 	u8         lro[0x1];
 };
 
-- 
cgit v1.2.3


From 500404ebcbd074ca11aa0c3fd9a268aa4054fd8b Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 3 Nov 2015 12:28:10 +0200
Subject: dmaengine: of_dma: Correct return code for
 of_dma_request_slave_channel in case !CONFIG_OF

of_dma_request_slave_channel should return either pointer for valid
dma_chan or ERR_PTR() error code, NULL is not expected to be returned.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/of_dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 36112cdd665a..b90d8ec57c1f 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np,
 static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
-- 
cgit v1.2.3


From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 16 Nov 2015 15:43:45 -0500
Subject: vlan: Do not put vlan headers back on bridge and macvlan ports

When a vlan is configured with REORDER_HEADER set to 0, the vlan
header is put back into the packet and makes it appear that
the vlan header is still there even after it's been processed.
This posses a problem for bridge and macvlan ports.  The packets
passed to those device may be forwarded and at the time of the
forward, vlan headers end up being unexpectedly present.

With the patch, we make sure that we do not put the vlan header
back (when REORDER_HEADER is 0) if a bridge or macvlan has
been configured on top of the vlan device.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 net/8021q/vlan_core.c     | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc221b967687..67bfac1abfc1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_EBRIDGE;
 }
 
+static inline bool netif_is_bridge_port(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_BRIDGE_PORT;
+}
+
 static inline bool netif_is_ovs_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_OPENVSWITCH;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+	    !netif_is_macvlan_port(vlan_dev) &&
+	    !netif_is_bridge_port(vlan_dev)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
-- 
cgit v1.2.3


From 819ec8e1f349f73bdf65bf33a364538e59007a9a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 16 Nov 2015 23:34:41 +0100
Subject: phy: marvell: Add support for 88E1540 PHY

The 88E1540 can be found embedded in the Marvell 88E6352 switch.  It
is compatible with the 88E1510, so add support for it, using the
88E1510 specific functions.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 16 ++++++++++++++++
 include/linux/marvell_phy.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5de8d5827536..0240552b50f3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1153,6 +1153,21 @@ static struct phy_driver marvell_drivers[] = {
 		.suspend = &genphy_suspend,
 		.driver = { .owner = THIS_MODULE },
 	},
+	{
+		.phy_id = MARVELL_PHY_ID_88E1540,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E1540",
+		.features = PHY_GBIT_FEATURES,
+		.flags = PHY_HAS_INTERRUPT,
+		.config_aneg = &m88e1510_config_aneg,
+		.read_status = &marvell_read_status,
+		.ack_interrupt = &marvell_ack_interrupt,
+		.config_intr = &marvell_config_intr,
+		.did_interrupt = &m88e1121_did_interrupt,
+		.resume = &genphy_resume,
+		.suspend = &genphy_suspend,
+		.driver = { .owner = THIS_MODULE },
+	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E3016,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -1186,6 +1201,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
 	{ MARVELL_PHY_ID_88E1318S, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1116R, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1510, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
 	{ }
 };
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index e6982ac3200d..a57f0dfb6db7 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -16,6 +16,7 @@
 #define MARVELL_PHY_ID_88E1318S		0x01410e90
 #define MARVELL_PHY_ID_88E1116R		0x01410e40
 #define MARVELL_PHY_ID_88E1510		0x01410dd0
+#define MARVELL_PHY_ID_88E1540		0x01410eb0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 
 /* struct phy_device dev_flags definitions */
-- 
cgit v1.2.3


From 2e6edc95382cc36423aff18a237173ad62d5ab52 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 19 Nov 2015 13:29:28 -0800
Subject: block: protect rw_page against device teardown

Fix use after free crashes like the following:

 general protection fault: 0000 [#1] SMP
 Call Trace:
  [<ffffffffa0050216>] ? pmem_do_bvec.isra.12+0xa6/0xf0 [nd_pmem]
  [<ffffffffa0050ba2>] pmem_rw_page+0x42/0x80 [nd_pmem]
  [<ffffffff8128fd90>] bdev_read_page+0x50/0x60
  [<ffffffff812972f0>] do_mpage_readpage+0x510/0x770
  [<ffffffff8128fd20>] ? I_BDEV+0x20/0x20
  [<ffffffff811d86dc>] ? lru_cache_add+0x1c/0x50
  [<ffffffff81297657>] mpage_readpages+0x107/0x170
  [<ffffffff8128fd20>] ? I_BDEV+0x20/0x20
  [<ffffffff8128fd20>] ? I_BDEV+0x20/0x20
  [<ffffffff8129058d>] blkdev_readpages+0x1d/0x20
  [<ffffffff811d615f>] __do_page_cache_readahead+0x28f/0x310
  [<ffffffff811d6039>] ? __do_page_cache_readahead+0x169/0x310
  [<ffffffff811c5abd>] ? pagecache_get_page+0x2d/0x1d0
  [<ffffffff811c76f6>] filemap_fault+0x396/0x530
  [<ffffffff811f816e>] __do_fault+0x4e/0xf0
  [<ffffffff811fce7d>] handle_mm_fault+0x11bd/0x1b50

Cc: <stable@vger.kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Matthew Wilcox <willy@linux.intel.com>
[willy: symmetry fixups]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 block/blk.h            |  2 --
 fs/block_dev.c         | 18 ++++++++++++++++--
 include/linux/blkdev.h |  2 ++
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index da722eb786df..c43926d3d74d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -72,8 +72,6 @@ void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 bool __blk_end_bidi_request(struct request *rq, int error,
 			    unsigned int nr_bytes, unsigned int bidi_bytes);
-int blk_queue_enter(struct request_queue *q, gfp_t gfp);
-void blk_queue_exit(struct request_queue *q);
 void blk_freeze_queue(struct request_queue *q);
 
 static inline void blk_queue_enter_live(struct request_queue *q)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bb0dfb1c7af1..c25639e907bd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 			struct page *page)
 {
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	int result = -EOPNOTSUPP;
+
 	if (!ops->rw_page || bdev_get_integrity(bdev))
-		return -EOPNOTSUPP;
-	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+		return result;
+
+	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	if (result)
+		return result;
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+	blk_queue_exit(bdev->bd_queue);
+	return result;
 }
 EXPORT_SYMBOL_GPL(bdev_read_page);
 
@@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 	int result;
 	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
+
 	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return -EOPNOTSUPP;
+	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	if (result)
+		return result;
+
 	set_page_writeback(page);
 	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
 	if (result)
 		end_page_writeback(page);
 	else
 		unlock_page(page);
+	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3fe27f8d91f0..c0d2b7927c1f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
+extern int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+extern void blk_queue_exit(struct request_queue *q);
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
-- 
cgit v1.2.3


From 94a58c360a45c066ab5472cfd2bf2a4ba63aa532 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 20 Nov 2015 15:56:48 -0800
Subject: slab.h: sprinkle __assume_aligned attributes

The various allocators return aligned memory.  Telling the compiler that
allows it to generate better code in many cases, for example when the
return value is immediately passed to memset().

Some code does become larger, but at least we win twice as much as we lose:

$ scripts/bloat-o-meter /tmp/vmlinux vmlinux
add/remove: 0/0 grow/shrink: 13/52 up/down: 995/-2140 (-1145)

An example of the different (and smaller) code can be seen in mm_alloc(). Before:

:       48 8d 78 08             lea    0x8(%rax),%rdi
:       48 89 c1                mov    %rax,%rcx
:       48 89 c2                mov    %rax,%rdx
:       48 c7 00 00 00 00 00    movq   $0x0,(%rax)
:       48 c7 80 48 03 00 00    movq   $0x0,0x348(%rax)
:       00 00 00 00
:       31 c0                   xor    %eax,%eax
:       48 83 e7 f8             and    $0xfffffffffffffff8,%rdi
:       48 29 f9                sub    %rdi,%rcx
:       81 c1 50 03 00 00       add    $0x350,%ecx
:       c1 e9 03                shr    $0x3,%ecx
:       f3 48 ab                rep stos %rax,%es:(%rdi)

After:

:       48 89 c2                mov    %rax,%rdx
:       b9 6a 00 00 00          mov    $0x6a,%ecx
:       31 c0                   xor    %eax,%eax
:       48 89 d7                mov    %rdx,%rdi
:       f3 48 ab                rep stos %rax,%es:(%rdi)

So gcc's strategy is to do two possibly (but not really, of course)
unaligned stores to the first and last word, then do an aligned rep stos
covering the middle part with a little overlap.  Maybe arches which do not
allow unaligned stores gain even more.

I don't know if gcc can actually make use of alignments greater than 8 for
anything, so one could probably drop the __assume_xyz_alignment macros and
just use __assume_aligned(8).

The increases in code size are mostly caused by gcc deciding to
opencode strlen() using the check-four-bytes-at-a-time trick when it
knows the buffer is sufficiently aligned (one function grew by 200
bytes). Now it turns out that many of these strlen() calls showing up
were in fact redundant, and they're gone from -next. Applying the two
patches to next-20151001 bloat-o-meter instead says

add/remove: 0/0 grow/shrink: 6/52 up/down: 244/-2140 (-1896)

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7c82e3b307a3..96940772bb92 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -157,6 +157,24 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
 #endif
 
+/*
+ * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Intended for arches that get misalignment faults even for 64 bit integer
+ * aligned buffers.
+ */
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
+ * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
+ * aligned pointers.
+ */
+#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
+#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
+#define __assume_page_alignment __assume_aligned(PAGE_SIZE)
+
 /*
  * Kmalloc array related definitions
  */
@@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size)
 }
 #endif /* !CONFIG_SLOB */
 
-void *__kmalloc(size_t size, gfp_t flags);
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment;
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment;
 void kmem_cache_free(struct kmem_cache *, void *);
 
 /*
@@ -301,8 +319,8 @@ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
 bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node);
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment;
 #else
 static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
@@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
 #endif
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
+extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment;
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 					   gfp_t gfpflags,
-					   int node, size_t size);
+					   int node, size_t size) __assume_slab_alignment;
 #else
 static __always_inline void *
 kmem_cache_alloc_node_trace(struct kmem_cache *s,
@@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 }
 #endif /* CONFIG_TRACING */
 
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
 
 #ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
 #else
 static __always_inline void *
 kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
@@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 	return __kmalloc_node(size, flags, node);
 }
 
-/*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
- * Intended for arches that get misalignment faults even for 64 bit integer
- * aligned buffers.
- */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
 struct memcg_cache_array {
 	struct rcu_head rcu;
 	struct kmem_cache *entries[0];
-- 
cgit v1.2.3


From 5cf6a51e6062afe7cc507f32f1e5f7e6497ae844 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@intel.com>
Date: Fri, 20 Nov 2015 15:56:53 -0800
Subject: configfs: allow dynamic group creation

This patchset introduces IIO software triggers, offers a way of configuring
them via configfs and adds the IIO hrtimer based interrupt source to be used
with software triggers.

The architecture is now split in 3 parts, to remove all IIO trigger specific
parts from IIO configfs core:

(1) IIO configfs - creates the root of the IIO configfs subsys.
(2) IIO software triggers - software trigger implementation, dynamically
    creating /config/iio/triggers group.
(3) IIO hrtimer trigger - is the first interrupt source for software triggers
    (with syfs to follow). Each trigger type can implement its own set of
    attributes.

Lockdep seems to be happy with the locking in configfs patch.

This patch (of 5):

We don't want to hardcode default groups at subsystem
creation time. We export:
	* configfs_register_group
	* configfs_unregister_group
to allow drivers to programatically create/destroy groups
later, after module init time.

This is needed for IIO configfs support.

(akpm: the other 4 patches to be merged via the IIO tree)

Signed-off-by: Daniel Baluta <daniel.baluta@intel.com>
Suggested-by: Lars-Peter Clausen <lars@metafoo.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Joel Becker <jlbec@evilplan.org>
Cc: Hartmut Knaack <knaack.h@gmx.de>
Cc: Octavian Purdila <octavian.purdila@intel.com>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: Adriana Reus <adriana.reus@intel.com>
Cc: Cristina Opriceana <cristina.opriceana@gmail.com>
Cc: Peter Meerwald <pmeerw@pmeerw.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/configfs/dir.c        | 110 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/configfs.h |  10 +++++
 2 files changed, 120 insertions(+)

(limited to 'include/linux')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c81ce7f200a6..a7a1b218f308 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = {
 	.iterate	= configfs_readdir,
 };
 
+/**
+ * configfs_register_group - creates a parent-child relation between two groups
+ * @parent_group:	parent group
+ * @group:		child group
+ *
+ * link groups, creates dentry for the child and attaches it to the
+ * parent dentry.
+ *
+ * Return: 0 on success, negative errno code on error
+ */
+int configfs_register_group(struct config_group *parent_group,
+			    struct config_group *group)
+{
+	struct configfs_subsystem *subsys = parent_group->cg_subsys;
+	struct dentry *parent;
+	int ret;
+
+	mutex_lock(&subsys->su_mutex);
+	link_group(parent_group, group);
+	mutex_unlock(&subsys->su_mutex);
+
+	parent = parent_group->cg_item.ci_dentry;
+
+	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+	ret = create_default_group(parent_group, group);
+	if (!ret) {
+		spin_lock(&configfs_dirent_lock);
+		configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+		spin_unlock(&configfs_dirent_lock);
+	}
+	mutex_unlock(&d_inode(parent)->i_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(configfs_register_group);
+
+/**
+ * configfs_unregister_group() - unregisters a child group from its parent
+ * @group: parent group to be unregistered
+ *
+ * Undoes configfs_register_group()
+ */
+void configfs_unregister_group(struct config_group *group)
+{
+	struct configfs_subsystem *subsys = group->cg_subsys;
+	struct dentry *dentry = group->cg_item.ci_dentry;
+	struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
+
+	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+	spin_lock(&configfs_dirent_lock);
+	configfs_detach_prep(dentry, NULL);
+	spin_unlock(&configfs_dirent_lock);
+
+	configfs_detach_group(&group->cg_item);
+	d_inode(dentry)->i_flags |= S_DEAD;
+	dont_mount(dentry);
+	d_delete(dentry);
+	mutex_unlock(&d_inode(parent)->i_mutex);
+
+	dput(dentry);
+
+	mutex_lock(&subsys->su_mutex);
+	unlink_group(group);
+	mutex_unlock(&subsys->su_mutex);
+}
+EXPORT_SYMBOL(configfs_unregister_group);
+
+/**
+ * configfs_register_default_group() - allocates and registers a child group
+ * @parent_group:	parent group
+ * @name:		child group name
+ * @item_type:		child item type description
+ *
+ * boilerplate to allocate and register a child group with its parent. We need
+ * kzalloc'ed memory because child's default_group is initially empty.
+ *
+ * Return: allocated config group or ERR_PTR() on error
+ */
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+				const char *name,
+				struct config_item_type *item_type)
+{
+	int ret;
+	struct config_group *group;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+	config_group_init_type_name(group, name, item_type);
+
+	ret = configfs_register_group(parent_group, group);
+	if (ret) {
+		kfree(group);
+		return ERR_PTR(ret);
+	}
+	return group;
+}
+EXPORT_SYMBOL(configfs_register_default_group);
+
+/**
+ * configfs_unregister_default_group() - unregisters and frees a child group
+ * @group:	the group to act on
+ */
+void configfs_unregister_default_group(struct config_group *group)
+{
+	configfs_unregister_group(group);
+	kfree(group);
+}
+EXPORT_SYMBOL(configfs_unregister_default_group);
+
 int configfs_register_subsystem(struct configfs_subsystem *subsys)
 {
 	int err;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index a8a335b7fce0..758a029011b1 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro
 int configfs_register_subsystem(struct configfs_subsystem *subsys);
 void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
 
+int configfs_register_group(struct config_group *parent_group,
+			    struct config_group *group);
+void configfs_unregister_group(struct config_group *group);
+
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+				const char *name,
+				struct config_item_type *item_type);
+void configfs_unregister_default_group(struct config_group *group);
+
 /* These functions can sleep and can alloc with GFP_KERNEL */
 /* WARNING: These cannot be called underneath configfs callbacks!! */
 int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
-- 
cgit v1.2.3


From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Fri, 20 Nov 2015 15:57:21 -0800
Subject: kernel/signal.c: unexport sigsuspend()

sigsuspend() is nowhere used except in signal.c itself, so we can mark it
static do not pollute the global namespace.

But this patch is more than a boring cleanup patch, it fixes a real issue
on UserModeLinux.  UML has a special console driver to display ttys using
xterm, or other terminal emulators, on the host side.  Vegard reported
that sometimes UML is unable to spawn a xterm and he's facing the
following warning:

  WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0()

It turned out that this warning makes absolutely no sense as the UML
xterm code calls sigsuspend() on the host side, at least it tries.  But
as the kernel itself offers a sigsuspend() symbol the linker choose this
one instead of the glibc wrapper.  Interestingly this code used to work
since ever but always blocked signals on the wrong side.  Some recent
kernel change made the WARN_ON() trigger and uncovered the bug.

It is a wonderful example of how much works by chance on computers. :-)

Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()")
Signed-off-by: Richard Weinberger <richard@nod.at>
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>	[3.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 1 -
 kernel/signal.c        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ab1e0392b5ac..92557bbce7e7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
-extern int sigsuspend(sigset_t *);
 
 struct sigaction {
 #ifndef __ARCH_HAS_IRIX_SIGACTION
diff --git a/kernel/signal.c b/kernel/signal.c
index c0b01fe24bbd..f3f1f7a972fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause)
 
 #endif
 
-int sigsuspend(sigset_t *set)
+static int sigsuspend(sigset_t *set)
 {
 	current->saved_sigmask = current->blocked;
 	set_current_blocked(set);
-- 
cgit v1.2.3


From 21fa8442799945beaca074cb5bcf7cfe24969d59 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Fri, 20 Nov 2015 15:57:32 -0800
Subject: mm: fix up sparse warning in gfpflags_allow_blocking

sparse says:

    include/linux/gfp.h:274:26: warning: incorrect type in return expression (different base types)
    include/linux/gfp.h:274:26:    expected bool
    include/linux/gfp.h:274:26:    got restricted gfp_t

...add a forced cast to silence the warning.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6523109e136d..8942af0813e3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
-	return gfp_flags & __GFP_DIRECT_RECLAIM;
+	return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
 }
 
 #ifdef CONFIG_HIGHMEM
-- 
cgit v1.2.3


From 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 8 Nov 2015 08:52:31 -0500
Subject: tty: audit: Fix audit source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The data to audit/record is in the 'from' buffer (ie., the input
read buffer).

Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode")
Cc: stable <stable@vger.kernel.org> # 4.1+
Cc: Miloslav Trmač <mitr@redhat.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     | 2 +-
 drivers/tty/tty_audit.c | 2 +-
 include/linux/tty.h     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 13844261cd5f..ed776149261e 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty,
 {
 	struct n_tty_data *ldata = tty->disc_data;
 
-	tty_audit_add_data(tty, to, n, ldata->icanon);
+	tty_audit_add_data(tty, from, n, ldata->icanon);
 	return copy_to_user(to, from, n);
 }
 
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 90ca082935f6..3d245cd3d8e6 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
  *
  *	Audit @data of @size from @tty, if necessary.
  */
-void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			size_t size, unsigned icanon)
 {
 	struct tty_audit_buf *buf;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5b04b0a5375b..5e31f1b99037 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			       size_t size, unsigned icanon);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
@@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
 extern void tty_audit_push(struct tty_struct *tty);
 extern int tty_audit_push_current(void);
 #else
-static inline void tty_audit_add_data(struct tty_struct *tty,
-		unsigned char *data, size_t size, unsigned icanon)
+static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
+				      size_t size, unsigned icanon)
 {
 }
 static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
-- 
cgit v1.2.3


From 865762a8119e74b5f0e236d2d8eaaf8be9292a06 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 20 Nov 2015 15:57:58 -0800
Subject: slab/slub: adjust kmem_cache_alloc_bulk API

Adjust kmem_cache_alloc_bulk API before we have any real users.

Adjust API to return type 'int' instead of previously type 'bool'.  This
is done to allow future extension of the bulk alloc API.

A future extension could be to allow SLUB to stop at a page boundary, when
specified by a flag, and then return the number of objects.

The advantage of this approach, would make it easier to make bulk alloc
run without local IRQs disabled.  With an approach of cmpxchg "stealing"
the entire c->freelist or page->freelist.  To avoid overshooting we would
stop processing at a slab-page boundary.  Else we always end up returning
some objects at the cost of another cmpxchg.

To keep compatible with future users of this API linking against an older
kernel when using the new flag, we need to return the number of allocated
objects with this API change.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 +-
 mm/slab.c            | 2 +-
 mm/slab.h            | 2 +-
 mm/slab_common.c     | 6 +++---
 mm/slob.c            | 2 +-
 mm/slub.c            | 8 ++++----
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 96940772bb92..2037a861e367 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * Note that interrupts must be enabled when calling these functions.
  */
 void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_NUMA
 void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
diff --git a/mm/slab.c b/mm/slab.c
index e0819fa96559..4765c97ce690 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3419,7 +3419,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 								void **p)
 {
 	return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slab.h b/mm/slab.h
index 27492eb678f7..7b6087197997 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -170,7 +170,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
  * may be allocated or freed using these operations.
  */
 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_MEMCG_KMEM
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d88e97c10a2e..3c6a86b4ec25 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -112,7 +112,7 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
 		kmem_cache_free(s, p[i]);
 }
 
-bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
+int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 								void **p)
 {
 	size_t i;
@@ -121,10 +121,10 @@ bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 		void *x = p[i] = kmem_cache_alloc(s, flags);
 		if (!x) {
 			__kmem_cache_free_bulk(s, i, p);
-			return false;
+			return 0;
 		}
 	}
-	return true;
+	return i;
 }
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/slob.c b/mm/slob.c
index 0d7e5df74d1f..17e8f8cc7c53 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -617,7 +617,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 								void **p)
 {
 	return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slub.c b/mm/slub.c
index 34847044dfe5..46997517406e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2909,8 +2909,8 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
 /* Note that interrupts must be enabled when calling this function. */
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
-			   void **p)
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+			  void **p)
 {
 	struct kmem_cache_cpu *c;
 	int i;
@@ -2959,12 +2959,12 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 
 	/* memcg and kmem_cache debug support */
 	slab_post_alloc_hook(s, flags, size, p);
-	return true;
+	return i;
 error:
 	local_irq_enable();
 	slab_post_alloc_hook(s, flags, i, p);
 	__kmem_cache_free_bulk(s, i, p);
-	return false;
+	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_bulk);
 
-- 
cgit v1.2.3