From a7b8829d242b1a58107e9c02b09e93aec446d55c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 5 Jul 2017 20:30:01 +0200
Subject: iio: accel: st_accel: add SPI-3wire support

Add SPI Serial Interface Mode (SIM) register information
in st_sensor_settings look up table to support devices
(like LSM303AGR accel sensor) that allow just SPI-3wire
communication mode. SIM mode has to be configured before any
other operation since it is not enabled by default and the driver
is not able to read without that configuration

Whilst a fairly substantial patch, the actual logic is simple and it
is better to have the generic fix than a band aid.

Fixes: ddc05fa28606 (iio: st-accel: add support for lsm303agr accel)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/common/st_sensors.h          | 7 +++++++
 include/linux/platform_data/st_sensors_pdata.h | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 497f2b3a5a62..97f1b465d04f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -105,6 +105,11 @@ struct st_sensor_fullscale {
 	struct st_sensor_fullscale_avl fs_avl[ST_SENSORS_FULLSCALE_AVL_MAX];
 };
 
+struct st_sensor_sim {
+	u8 addr;
+	u8 value;
+};
+
 /**
  * struct st_sensor_bdu - ST sensor device block data update
  * @addr: address of the register.
@@ -197,6 +202,7 @@ struct st_sensor_transfer_function {
  * @bdu: Block data update register.
  * @das: Data Alignment Selection register.
  * @drdy_irq: Data ready register of the sensor.
+ * @sim: SPI serial interface mode register of the sensor.
  * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read.
  * @bootime: samples to discard when sensor passing from power-down to power-up.
  */
@@ -213,6 +219,7 @@ struct st_sensor_settings {
 	struct st_sensor_bdu bdu;
 	struct st_sensor_das das;
 	struct st_sensor_data_ready_irq drdy_irq;
+	struct st_sensor_sim sim;
 	bool multi_read_bit;
 	unsigned int bootime;
 };
diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h
index 79b0e4cdb814..f8274b0c6888 100644
--- a/include/linux/platform_data/st_sensors_pdata.h
+++ b/include/linux/platform_data/st_sensors_pdata.h
@@ -17,10 +17,12 @@
  *	Available only for accelerometer and pressure sensors.
  *	Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet).
  * @open_drain: set the interrupt line to be open drain if possible.
+ * @spi_3wire: enable spi-3wire mode.
  */
 struct st_sensors_platform_data {
 	u8 drdy_int_pin;
 	bool open_drain;
+	bool spi_3wire;
 };
 
 #endif /* ST_SENSORS_PDATA_H */
-- 
cgit v1.2.3


From 7cfdfdc82a467c78af9132cb9c98e84415df34bc Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 10 Jul 2017 14:45:20 +0900
Subject: libata: Cleanup ata_read_log_page()

The warning message "READ LOG DMA EXT failed, trying unqueued" in
ata_read_log_page() as well as the macro name ATA_HORKAGE_NO_NCQ_LOG
are confusing: the command READ LOG DMA EXT is not an queued NCQ command
unless it is encapsulated in a RECEIVE FPDMA QUEUED command.
From ACS-4 READ LOG DMA EXT description:

"The device processes the READ LOG DMA EXT command in the NCQ feature
set environment (see 4.13.6) if the READ LOG DMA EXT command is
encapsulated in a RECEIVE FPDMA QUEUED command (see 7.30) with the
inputs encapsulated as shown in 7.23.6."

To avoid confusion, fix the warning messsage to mention switching to PIO and
not "unqueued" and rename the macro ATA_HORKAGE_NO_NCQ_LOG to
ATA_HORKAGE_NO_DMA_LOG.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 6 +++---
 include/linux/libata.h    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8453f9a4682f..fa7dd4394c02 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2083,7 +2083,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 retry:
 	ata_tf_init(dev, &tf);
 	if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) &&
-	    !(dev->horkage & ATA_HORKAGE_NO_NCQ_LOG)) {
+	    !(dev->horkage & ATA_HORKAGE_NO_DMA_LOG)) {
 		tf.command = ATA_CMD_READ_LOG_DMA_EXT;
 		tf.protocol = ATA_PROT_DMA;
 		dma = true;
@@ -2102,8 +2102,8 @@ retry:
 				     buf, sectors * ATA_SECT_SIZE, 0);
 
 	if (err_mask && dma) {
-		dev->horkage |= ATA_HORKAGE_NO_NCQ_LOG;
-		ata_dev_warn(dev, "READ LOG DMA EXT failed, trying unqueued\n");
+		dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
+		ata_dev_warn(dev, "READ LOG DMA EXT failed, trying PIO\n");
 		goto retry;
 	}
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 55de3da58b1c..931c32f1f18d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -435,7 +435,7 @@ enum {
 	ATA_HORKAGE_NOLPM	= (1 << 20),	/* don't use LPM */
 	ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21),	/* some WDs have broken LPM */
 	ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */
-	ATA_HORKAGE_NO_NCQ_LOG	= (1 << 23),	/* don't use NCQ for log read */
+	ATA_HORKAGE_NO_DMA_LOG	= (1 << 23),	/* don't use DMA for log read */
 	ATA_HORKAGE_NOTRIM	= (1 << 24),	/* don't use TRIM */
 	ATA_HORKAGE_MAX_SEC_1024 = (1 << 25),	/* Limit max sects to 1024 */
 
-- 
cgit v1.2.3


From 36acbd9e8377c27570b887e2332a5e1f0b140e16 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Fri, 14 Jul 2017 18:16:44 +0530
Subject: mmc: host: omap_hsmmc: remove unused platform callbacks

Remove unused callbacks in the omap_hsmmc_platform_data structure

Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c            | 11 -----------
 include/linux/platform_data/hsmmc-omap.h | 10 ----------
 2 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 7c12f3715676..04ff3c97a535 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -356,9 +356,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
 	struct mmc_host *mmc = host->mmc;
 	int ret = 0;
 
-	if (mmc_pdata(host)->set_power)
-		return mmc_pdata(host)->set_power(host->dev, power_on, vdd);
-
 	/*
 	 * If we don't see a Vcc regulator, assume it's a fixed
 	 * voltage always-on regulator.
@@ -366,9 +363,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
 	if (IS_ERR(mmc->supply.vmmc))
 		return 0;
 
-	if (mmc_pdata(host)->before_set_reg)
-		mmc_pdata(host)->before_set_reg(host->dev, power_on, vdd);
-
 	ret = omap_hsmmc_set_pbias(host, false, 0);
 	if (ret)
 		return ret;
@@ -400,9 +394,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
 			return ret;
 	}
 
-	if (mmc_pdata(host)->after_set_reg)
-		mmc_pdata(host)->after_set_reg(host->dev, power_on, vdd);
-
 	return 0;
 
 err_set_voltage:
@@ -469,8 +460,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	int ret;
 	struct mmc_host *mmc = host->mmc;
 
-	if (mmc_pdata(host)->set_power)
-		return 0;
 
 	ret = mmc_regulator_get_supply(mmc);
 	if (ret == -EPROBE_DEFER)
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 8e981be2e2c2..0ff1e0dba720 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -55,9 +55,6 @@ struct omap_hsmmc_platform_data {
 	u32 caps;	/* Used for the MMC driver on 2430 and later */
 	u32 pm_caps;	/* PM capabilities of the mmc */
 
-	/* use the internal clock */
-	unsigned internal_clock:1;
-
 	/* nonremovable e.g. eMMC */
 	unsigned nonremovable:1;
 
@@ -73,13 +70,6 @@ struct omap_hsmmc_platform_data {
 	int gpio_cd;			/* gpio (card detect) */
 	int gpio_cod;			/* gpio (cover detect) */
 	int gpio_wp;			/* gpio (write protect) */
-
-	int (*set_power)(struct device *dev, int power_on, int vdd);
-	void (*remux)(struct device *dev, int power_on);
-	/* Call back before enabling / disabling regulators */
-	void (*before_set_reg)(struct device *dev, int power_on, int vdd);
-	/* Call back after enabling / disabling regulators */
-	void (*after_set_reg)(struct device *dev, int power_on, int vdd);
 	/* if we have special card, init it using this callback */
 	void (*init_card)(struct mmc_card *card);
 
-- 
cgit v1.2.3


From c641e5b207ed7dfaa692820aeb5b6dde3de3e9b0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 12 Jul 2017 17:55:29 +0200
Subject: ASoC: fix pcm-creation regression

This reverts commit 99b04f4c4051 ("ASoC: add Component level
pcm_new/pcm_free"), which started calling the pcm_new callback for every
component in a *card* when creating a new pcm, something which does not
seem to make any sense.

This specifically led to memory leaks in systems with more than one
platform component and where DMA memory is allocated in the
platform-driver callback. For example, when both mcasp devices are being
used on an am335x board, DMA memory would be allocated twice for every
DAI link during probe.

When CONFIG_SND_VERBOSE_PROCFS was set this fortunately also led to
warnings such as:

WARNING: CPU: 0 PID: 565 at ../fs/proc/generic.c:346 proc_register+0x110/0x154
proc_dir_entry 'sub0/prealloc' already registered

Since there seems to be no users of the new component callbacks, and the
current implementation introduced a regression, let's revert the
offending commit for now.

Fixes: 99b04f4c4051 ("ASoC: add Component level pcm_new/pcm_free")
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable <stable@vger.kernel.org>	# 4.10
---
 include/sound/soc.h  |  6 ------
 sound/soc/soc-core.c | 25 -------------------------
 sound/soc/soc-pcm.c  | 32 +++++++++-----------------------
 3 files changed, 9 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 9c94b97c17f8..c4a8b1947566 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -795,10 +795,6 @@ struct snd_soc_component_driver {
 	int (*suspend)(struct snd_soc_component *);
 	int (*resume)(struct snd_soc_component *);
 
-	/* pcm creation and destruction */
-	int (*pcm_new)(struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_pcm *);
-
 	/* DT */
 	int (*of_xlate_dai_name)(struct snd_soc_component *component,
 				 struct of_phandle_args *args,
@@ -874,8 +870,6 @@ struct snd_soc_component {
 	void (*remove)(struct snd_soc_component *);
 	int (*suspend)(struct snd_soc_component *);
 	int (*resume)(struct snd_soc_component *);
-	int (*pcm_new)(struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_pcm *);
 
 	/* machine specific init */
 	int (*init)(struct snd_soc_component *component);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 921622a01944..c240e13ba057 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3171,8 +3171,6 @@ static int snd_soc_component_initialize(struct snd_soc_component *component,
 	component->remove = component->driver->remove;
 	component->suspend = component->driver->suspend;
 	component->resume = component->driver->resume;
-	component->pcm_new = component->driver->pcm_new;
-	component->pcm_free = component->driver->pcm_free;
 
 	dapm = &component->dapm;
 	dapm->dev = dev;
@@ -3360,25 +3358,6 @@ static void snd_soc_platform_drv_remove(struct snd_soc_component *component)
 	platform->driver->remove(platform);
 }
 
-static int snd_soc_platform_drv_pcm_new(struct snd_soc_pcm_runtime *rtd)
-{
-	struct snd_soc_platform *platform = rtd->platform;
-
-	if (platform->driver->pcm_new)
-		return platform->driver->pcm_new(rtd);
-	else
-		return 0;
-}
-
-static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
-
-	if (platform->driver->pcm_free)
-		platform->driver->pcm_free(pcm);
-}
-
 /**
  * snd_soc_add_platform - Add a platform to the ASoC core
  * @dev: The parent device for the platform
@@ -3402,10 +3381,6 @@ int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform,
 		platform->component.probe = snd_soc_platform_drv_probe;
 	if (platform_drv->remove)
 		platform->component.remove = snd_soc_platform_drv_remove;
-	if (platform_drv->pcm_new)
-		platform->component.pcm_new = snd_soc_platform_drv_pcm_new;
-	if (platform_drv->pcm_free)
-		platform->component.pcm_free = snd_soc_platform_drv_pcm_free;
 
 #ifdef CONFIG_DEBUG_FS
 	platform->component.debugfs_prefix = "platform";
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index dcc5ece08668..553f7a76743c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2628,25 +2628,12 @@ static int dpcm_fe_dai_close(struct snd_pcm_substream *fe_substream)
 	return ret;
 }
 
-static void soc_pcm_free(struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_soc_component *component;
-
-	list_for_each_entry(component, &rtd->card->component_dev_list,
-			    card_list) {
-		if (component->pcm_free)
-			component->pcm_free(pcm);
-	}
-}
-
 /* create a new pcm */
 int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 {
 	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_dai *codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
-	struct snd_soc_component *component;
 	struct snd_pcm *pcm;
 	char new_name[64];
 	int ret = 0, playback = 0, capture = 0;
@@ -2756,18 +2743,17 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 	if (capture)
 		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &rtd->ops);
 
-	list_for_each_entry(component, &rtd->card->component_dev_list, card_list) {
-		if (component->pcm_new) {
-			ret = component->pcm_new(rtd);
-			if (ret < 0) {
-				dev_err(component->dev,
-					"ASoC: pcm constructor failed: %d\n",
-					ret);
-				return ret;
-			}
+	if (platform->driver->pcm_new) {
+		ret = platform->driver->pcm_new(rtd);
+		if (ret < 0) {
+			dev_err(platform->dev,
+				"ASoC: pcm constructor failed: %d\n",
+				ret);
+			return ret;
 		}
 	}
-	pcm->private_free = soc_pcm_free;
+
+	pcm->private_free = platform->driver->pcm_free;
 out:
 	dev_info(rtd->card->dev, "%s <-> %s mapping ok\n",
 		 (rtd->num_codecs > 1) ? "multicodec" : rtd->codec_dai->name,
-- 
cgit v1.2.3


From 43fc509c3efb5c973991ee24c449ab2a0d71dd1e Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Thu, 20 Jul 2017 11:19:58 +0100
Subject: dma-coherent: introduce interface for default DMA pool

Christoph noticed [1] that default DMA pool in current form overload
the DMA coherent infrastructure. In reply, Robin suggested [2] to
split the per-device vs. global pool interfaces, so allocation/release
from default DMA pool is driven by dma ops implementation.

This patch implements Robin's idea and provide interface to
allocate/release/mmap the default (aka global) DMA pool.

To make it clear that existing *_from_coherent routines work on
per-device pool rename them to *_from_dev_coherent.

[1] https://lkml.org/lkml/2017/7/7/370
[2] https://lkml.org/lkml/2017/7/7/431

Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arc/mm/dma.c           |   2 +-
 arch/arm/mm/dma-mapping.c   |   2 +-
 arch/arm64/mm/dma-mapping.c |   4 +-
 arch/mips/mm/dma-default.c  |   2 +-
 drivers/base/dma-coherent.c | 164 ++++++++++++++++++++++++++++----------------
 drivers/base/dma-mapping.c  |   2 +-
 include/linux/dma-mapping.h |  40 ++++++++---
 7 files changed, 144 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 2a07e6ecafbd..71d3efff99d3 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -117,7 +117,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < count && user_count <= (count - off)) {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e7380bafbfa6..fcf1473d6fed 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -851,7 +851,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
 	unsigned long off = vma->vm_pgoff;
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index e90cd1db42a8..f27d4dd04384 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -329,7 +329,7 @@ static int __swiotlb_mmap(struct device *dev,
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
 					     is_device_dma_coherent(dev));
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	return __swiotlb_mmap_pfn(vma, pfn, size);
@@ -706,7 +706,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
 					     is_device_dma_coherent(dev));
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index e08598c70b3e..8e78251eccc2 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -232,7 +232,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	else
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < count && user_count <= (count - off)) {
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index 2ae24c28e70c..1c152aed6b82 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -25,7 +25,7 @@ static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *de
 {
 	if (dev && dev->dma_mem)
 		return dev->dma_mem;
-	return dma_coherent_default_memory;
+	return NULL;
 }
 
 static inline dma_addr_t dma_get_device_base(struct device *dev,
@@ -165,34 +165,15 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
 }
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
-/**
- * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area
- *
- * @dev:	device from which we allocate memory
- * @size:	size of requested memory area
- * @dma_handle:	This will be filled with the correct dma handle
- * @ret:	This pointer will be filled with the virtual address
- *		to allocated area.
- *
- * This function should be only called from per-arch dma_alloc_coherent()
- * to support allocation from per-device coherent memory pools.
- *
- * Returns 0 if dma_alloc_coherent should continue with allocating from
- * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
- */
-int dma_alloc_from_coherent(struct device *dev, ssize_t size,
-				       dma_addr_t *dma_handle, void **ret)
+static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
+		ssize_t size, dma_addr_t *dma_handle)
 {
-	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 	int order = get_order(size);
 	unsigned long flags;
 	int pageno;
 	int dma_memory_map;
+	void *ret;
 
-	if (!mem)
-		return 0;
-
-	*ret = NULL;
 	spin_lock_irqsave(&mem->spinlock, flags);
 
 	if (unlikely(size > (mem->size << PAGE_SHIFT)))
@@ -203,21 +184,50 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
 		goto err;
 
 	/*
-	 * Memory was found in the per-device area.
+	 * Memory was found in the coherent area.
 	 */
-	*dma_handle = dma_get_device_base(dev, mem) + (pageno << PAGE_SHIFT);
-	*ret = mem->virt_base + (pageno << PAGE_SHIFT);
+	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+	ret = mem->virt_base + (pageno << PAGE_SHIFT);
 	dma_memory_map = (mem->flags & DMA_MEMORY_MAP);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 	if (dma_memory_map)
-		memset(*ret, 0, size);
+		memset(ret, 0, size);
 	else
-		memset_io(*ret, 0, size);
+		memset_io(ret, 0, size);
 
-	return 1;
+	return ret;
 
 err:
 	spin_unlock_irqrestore(&mem->spinlock, flags);
+	return NULL;
+}
+
+/**
+ * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool
+ * @dev:	device from which we allocate memory
+ * @size:	size of requested memory area
+ * @dma_handle:	This will be filled with the correct dma handle
+ * @ret:	This pointer will be filled with the virtual address
+ *		to allocated area.
+ *
+ * This function should be only called from per-arch dma_alloc_coherent()
+ * to support allocation from per-device coherent memory pools.
+ *
+ * Returns 0 if dma_alloc_coherent should continue with allocating from
+ * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
+ */
+int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
+		dma_addr_t *dma_handle, void **ret)
+{
+	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+	if (!mem)
+		return 0;
+
+	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+	if (*ret)
+		return 1;
+
 	/*
 	 * In the case where the allocation can not be satisfied from the
 	 * per-device area, try to fall back to generic memory if the
@@ -225,25 +235,20 @@ err:
 	 */
 	return mem->flags & DMA_MEMORY_EXCLUSIVE;
 }
-EXPORT_SYMBOL(dma_alloc_from_coherent);
+EXPORT_SYMBOL(dma_alloc_from_dev_coherent);
 
-/**
- * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
- * @dev:	device from which the memory was allocated
- * @order:	the order of pages allocated
- * @vaddr:	virtual address of allocated pages
- *
- * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, releases that memory.
- *
- * Returns 1 if we correctly released the memory, or 0 if
- * dma_release_coherent() should proceed with releasing memory from
- * generic pools.
- */
-int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
 {
-	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+	if (!dma_coherent_default_memory)
+		return NULL;
+
+	return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
+			dma_handle);
+}
 
+static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
+				       int order, void *vaddr)
+{
 	if (mem && vaddr >= mem->virt_base && vaddr <
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
 		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
@@ -256,28 +261,39 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(dma_release_from_coherent);
 
 /**
- * dma_mmap_from_coherent() - try to mmap the memory allocated from
- * per-device coherent memory pool to userspace
+ * dma_release_from_dev_coherent() - free memory to device coherent memory pool
  * @dev:	device from which the memory was allocated
- * @vma:	vm_area for the userspace memory
- * @vaddr:	cpu address returned by dma_alloc_from_coherent
- * @size:	size of the memory buffer allocated by dma_alloc_from_coherent
- * @ret:	result from remap_pfn_range()
+ * @order:	the order of pages allocated
+ * @vaddr:	virtual address of allocated pages
  *
  * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, maps that memory to the provided vma.
+ * coherent memory pool and if so, releases that memory.
  *
- * Returns 1 if we correctly mapped the memory, or 0 if the caller should
- * proceed with mapping memory from generic pools.
+ * Returns 1 if we correctly released the memory, or 0 if the caller should
+ * proceed with releasing memory from generic pools.
  */
-int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
-			   void *vaddr, size_t size, int *ret)
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
 {
 	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
+	return __dma_release_from_coherent(mem, order, vaddr);
+}
+EXPORT_SYMBOL(dma_release_from_dev_coherent);
+
+int dma_release_from_global_coherent(int order, void *vaddr)
+{
+	if (!dma_coherent_default_memory)
+		return 0;
+
+	return __dma_release_from_coherent(dma_coherent_default_memory, order,
+			vaddr);
+}
+
+static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
+		struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
+{
 	if (mem && vaddr >= mem->virt_base && vaddr + size <=
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
 		unsigned long off = vma->vm_pgoff;
@@ -296,7 +312,39 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(dma_mmap_from_coherent);
+
+/**
+ * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool
+ * @dev:	device from which the memory was allocated
+ * @vma:	vm_area for the userspace memory
+ * @vaddr:	cpu address returned by dma_alloc_from_dev_coherent
+ * @size:	size of the memory buffer allocated
+ * @ret:	result from remap_pfn_range()
+ *
+ * This checks whether the memory was allocated from the per-device
+ * coherent memory pool and if so, maps that memory to the provided vma.
+ *
+ * Returns 1 if we correctly mapped the memory, or 0 if the caller should
+ * proceed with mapping memory from generic pools.
+ */
+int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
+			   void *vaddr, size_t size, int *ret)
+{
+	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+	return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
+}
+EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
+
+int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
+				   size_t size, int *ret)
+{
+	if (!dma_coherent_default_memory)
+		return 0;
+
+	return __dma_mmap_from_coherent(dma_coherent_default_memory, vma,
+					vaddr, size, ret);
+}
 
 /*
  * Support for reserved memory regions defined in device tree
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 5096755d185e..b555ff9dd8fc 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -235,7 +235,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < count && user_count <= (count - off)) {
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 843ab866e0f4..03c0196a6f24 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -157,16 +157,40 @@ static inline int is_device_dma_capable(struct device *dev)
  * These three functions are only for dma allocator.
  * Don't use them in device drivers.
  */
-int dma_alloc_from_coherent(struct device *dev, ssize_t size,
+int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 				       dma_addr_t *dma_handle, void **ret);
-int dma_release_from_coherent(struct device *dev, int order, void *vaddr);
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
 
-int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
+int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 			    void *cpu_addr, size_t size, int *ret);
+
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle);
+int dma_release_from_global_coherent(int order, void *vaddr);
+int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
+				  size_t size, int *ret);
+
 #else
-#define dma_alloc_from_coherent(dev, size, handle, ret) (0)
-#define dma_release_from_coherent(dev, order, vaddr) (0)
-#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0)
+#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
+#define dma_release_from_dev_coherent(dev, order, vaddr) (0)
+#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
+
+static inline void *dma_alloc_from_global_coherent(ssize_t size,
+						   dma_addr_t *dma_handle)
+{
+	return NULL;
+}
+
+static inline int dma_release_from_global_coherent(int order, void *vaddr)
+{
+	return 0;
+}
+
+static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
+						void *cpu_addr, size_t size,
+						int *ret)
+{
+	return 0;
+}
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
 #ifdef CONFIG_HAS_DMA
@@ -481,7 +505,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
 
 	BUG_ON(!ops);
 
-	if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr))
+	if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
 		return cpu_addr;
 
 	if (!arch_dma_alloc_attrs(&dev, &flag))
@@ -503,7 +527,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	BUG_ON(!ops);
 	WARN_ON(irqs_disabled());
 
-	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
+	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
 		return;
 
 	if (!ops->free || !cpu_addr)
-- 
cgit v1.2.3


From 832e4c83abc5ec25af77db6c8a0f36d78f1cf825 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 09:16:24 +0200
Subject: uuid: remove uuid_be

Everything uses uuid_t now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/uuid.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 2251e1925ea4..33b0bdbb613c 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -84,26 +84,12 @@ int guid_parse(const char *uuid, guid_t *u);
 int uuid_parse(const char *uuid, uuid_t *u);
 
 /* backwards compatibility, don't use in new code */
-typedef uuid_t uuid_be;
-#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-	UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
-#define NULL_UUID_BE 							\
-	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-	     0x00, 0x00, 0x00, 0x00)
-
 #define uuid_le_gen(u)		guid_gen(u)
-#define uuid_be_gen(u)		uuid_gen(u)
 #define uuid_le_to_bin(guid, u)	guid_parse(guid, u)
-#define uuid_be_to_bin(uuid, u)	uuid_parse(uuid, u)
 
 static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
 {
 	return memcmp(&u1, &u2, sizeof(guid_t));
 }
 
-static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
-{
-	return memcmp(&u1, &u2, sizeof(uuid_t));
-}
-
 #endif
-- 
cgit v1.2.3


From a0c2d9c1de0f6be33fe817069b06663277153c20 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 21 Jul 2017 16:51:23 -0600
Subject: ACPI: NUMA: add missing include in acpi_numa.h

Right now if a file includes acpi_numa.h and they don't happen to include
linux/numa.h before it, they get the following warning:

./include/acpi/acpi_numa.h:9:5: warning: "MAX_NUMNODES" is not defined [-Wundef]
 #if MAX_NUMNODES > 256
     ^~~~~~~~~~~~

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_numa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index d4b72944ccda..1e3a74f94131 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_ACPI_NUMA
 #include <linux/kernel.h>
+#include <linux/numa.h>
 
 /* Proximity bitmap length */
 #if MAX_NUMNODES > 256
-- 
cgit v1.2.3


From 6c423f5751b9f68bfe7c7545519d4c7159f93e1b Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 24 Jul 2017 13:58:00 -0600
Subject: sched/wait: Clean up some documentation warnings

A couple of kerneldoc comments in <linux/wait.h> had incorrect names for
macro parameters, with this unsightly result:

  ./include/linux/wait.h:555: warning: No description found for parameter 'wq'
  ./include/linux/wait.h:555: warning: Excess function parameter 'wq_head' description in 'wait_event_interruptible_hrtimeout'
  ./include/linux/wait.h:759: warning: No description found for parameter 'wq_head'
  ./include/linux/wait.h:759: warning: Excess function parameter 'wq' description in 'wait_event_killable'

Correct the comments and kill the warnings.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/20170724135800.769c4042@lwn.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index b289c96151ee..5b74e36c0ca8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -529,13 +529,13 @@ do {										\
 
 /**
  * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq_head: the waitqueue to wait on
+ * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq_head is woken up.
+ * The @condition is checked each time the waitqueue @wq is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -735,12 +735,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
 
 /**
  * wait_event_killable - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_KILLABLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
-- 
cgit v1.2.3


From d9f89b4e9290e46cd9b273e9ad0bff0f93e86fae Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Sat, 1 Jul 2017 18:26:54 +0200
Subject: KVM: arm/arm64: PMU: Fix overflow interrupt injection

kvm_pmu_overflow_set() is called from perf's interrupt handler,
making the call of kvm_vgic_inject_irq() from it introduced with
"KVM: arm/arm64: PMU: remove request-less vcpu kick" a really bad
idea, as it's quite easy to try and retake a lock that the
interrupted context is already holding. The fix is to use a vcpu
kick, leaving the interrupt injection to kvm_pmu_sync_hwstate(),
like it was doing before the refactoring. We don't just revert,
though, because before the kick was request-less, leaving the vcpu
exposed to the request-less vcpu kick race, and also because the
kick was used unnecessarily from register access handlers.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/sys_regs.c |  2 +-
 include/kvm/arm_pmu.h     |  2 --
 virt/kvm/arm/pmu.c        | 43 +++++++++++++++----------------------------
 3 files changed, 16 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 77862881ae86..2e070d3baf9f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	if (p->is_write) {
 		if (r->CRm & 0x2)
 			/* accessing PMOVSSET_EL0 */
-			kvm_pmu_overflow_set(vcpu, p->regval & mask);
+			vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
 		else
 			/* accessing PMOVSCLR_EL0 */
 			vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index f6e030617467..f87fe20fcb05 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -48,7 +48,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
-void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
@@ -86,7 +85,6 @@ static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
-static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
 static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index fc8a723ff387..8a9c42366db7 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -203,11 +203,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 	return reg;
 }
 
-static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
+static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	bool overflow = !!kvm_pmu_overflow_status(vcpu);
+	bool overflow;
+
+	if (!kvm_arm_pmu_v3_ready(vcpu))
+		return;
 
+	overflow = !!kvm_pmu_overflow_status(vcpu);
 	if (pmu->irq_level == overflow)
 		return;
 
@@ -215,33 +219,11 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
 
 	if (likely(irqchip_in_kernel(vcpu->kvm))) {
 		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-					      pmu->irq_num, overflow,
-					      &vcpu->arch.pmu);
+					      pmu->irq_num, overflow, pmu);
 		WARN_ON(ret);
 	}
 }
 
-/**
- * kvm_pmu_overflow_set - set PMU overflow interrupt
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMOVSSET register
- */
-void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
-{
-	if (val == 0)
-		return;
-
-	vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
-	kvm_pmu_check_overflow(vcpu);
-}
-
-static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
-{
-	if (!kvm_arm_pmu_v3_ready(vcpu))
-		return;
-	kvm_pmu_check_overflow(vcpu);
-}
-
 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
@@ -303,7 +285,7 @@ static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 }
 
 /**
- * When perf event overflows, call kvm_pmu_overflow_set to set overflow status.
+ * When the perf event overflows, set the overflow status and inform the vcpu.
  */
 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 				  struct perf_sample_data *data,
@@ -313,7 +295,12 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	int idx = pmc->idx;
 
-	kvm_pmu_overflow_set(vcpu, BIT(idx));
+	vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
+
+	if (kvm_pmu_overflow_status(vcpu)) {
+		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
 }
 
 /**
@@ -341,7 +328,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
 			reg = lower_32_bits(reg);
 			vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 			if (!reg)
-				kvm_pmu_overflow_set(vcpu, BIT(i));
+				vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
 		}
 	}
 }
-- 
cgit v1.2.3


From 2fd4167fadd1360ab015e4f0e88e51843e49556c Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Wed, 12 Jul 2017 10:58:19 -0600
Subject: nvme: fabrics commands should use the fctype field for data direction

Fabrics commands with opcode 0x7F use the fctype field to indicate data
direction.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Sagi Grimberg <sai@grmberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Fixes: eb793e2c ("nvme.h: add NVMe over Fabrics definitions")
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index bc74da018bdc..25d8225dbd04 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1006,7 +1006,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd)
 	 * Why can't we simply have a Fabrics In and Fabrics out command?
 	 */
 	if (unlikely(cmd->common.opcode == nvme_fabrics_command))
-		return cmd->fabrics.opcode & 1;
+		return cmd->fabrics.fctype & 1;
 	return cmd->common.opcode & 1;
 }
 
-- 
cgit v1.2.3


From 9c5358e15ca12ed3dc3b1e51671dee5d155de8e0 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 17 Jul 2017 13:59:39 -0700
Subject: nvme-fc: revise TRADDR parsing

The FC-NVME spec hasn't locked down on the format string for TRADDR.
Currently the spec is lobbying for "nn-<16hexdigits>:pn-<16hexdigits>"
where the wwn's are hex values but not prefixed by 0x.

Most implementations so far expect a string format of
"nn-0x<16hexdigits>:pn-0x<16hexdigits>" to be used. The transport
uses the match_u64 parser which requires a leading 0x prefix to set
the base properly. If it's not there, a match will either fail or return
a base 10 value.

The resolution in T11 is pushing out. Therefore, to fix things now and
to cover any eventuality and any implementations already in the field,
this patch adds support for both formats.

The change consists of replacing the token matching routine with a
routine that validates the fixed string format, and then builds
a local copy of the hex name with a 0x prefix before calling
the system parser.

Note: the same parser routine exists in both the initiator and target
transports. Given this is about the only "shared" item, we chose to
replicate rather than create an interdendency on some shared code.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c   | 102 ++++++++++++++++++++++++-----------------------
 drivers/nvme/target/fc.c | 101 ++++++++++++++++++++++++----------------------
 include/linux/nvme-fc.h  |  19 +++++++++
 3 files changed, 125 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5630ca46c3b5..5c2a08ef08ba 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2805,66 +2805,70 @@ out_fail:
 	return ERR_PTR(ret);
 }
 
-enum {
-	FCT_TRADDR_ERR		= 0,
-	FCT_TRADDR_WWNN		= 1 << 0,
-	FCT_TRADDR_WWPN		= 1 << 1,
-};
 
 struct nvmet_fc_traddr {
 	u64	nn;
 	u64	pn;
 };
 
-static const match_table_t traddr_opt_tokens = {
-	{ FCT_TRADDR_WWNN,	"nn-%s"		},
-	{ FCT_TRADDR_WWPN,	"pn-%s"		},
-	{ FCT_TRADDR_ERR,	NULL		}
-};
-
 static int
-nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf)
+__nvme_fc_parse_u64(substring_t *sstr, u64 *val)
 {
-	substring_t args[MAX_OPT_ARGS];
-	char *options, *o, *p;
-	int token, ret = 0;
 	u64 token64;
 
-	options = o = kstrdup(buf, GFP_KERNEL);
-	if (!options)
-		return -ENOMEM;
+	if (match_u64(sstr, &token64))
+		return -EINVAL;
+	*val = token64;
 
-	while ((p = strsep(&o, ":\n")) != NULL) {
-		if (!*p)
-			continue;
+	return 0;
+}
 
-		token = match_token(p, traddr_opt_tokens, args);
-		switch (token) {
-		case FCT_TRADDR_WWNN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->nn = token64;
-			break;
-		case FCT_TRADDR_WWPN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->pn = token64;
-			break;
-		default:
-			pr_warn("unknown traddr token or missing value '%s'\n",
-					p);
-			ret = -EINVAL;
-			goto out;
-		}
-	}
+/*
+ * This routine validates and extracts the WWN's from the TRADDR string.
+ * As kernel parsers need the 0x to determine number base, universally
+ * build string to parse with 0x prefix before parsing name strings.
+ */
+static int
+nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
+{
+	char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1];
+	substring_t wwn = { name, &name[sizeof(name)-1] };
+	int nnoffset, pnoffset;
+
+	/* validate it string one of the 2 allowed formats */
+	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
+			!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
+				"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
+		nnoffset = NVME_FC_TRADDR_OXNNLEN;
+		pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
+						NVME_FC_TRADDR_OXNNLEN;
+	} else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH &&
+			!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
+				"pn-", NVME_FC_TRADDR_NNLEN))) {
+		nnoffset = NVME_FC_TRADDR_NNLEN;
+		pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
+	} else
+		goto out_einval;
 
-out:
-	kfree(options);
-	return ret;
+	name[0] = '0';
+	name[1] = 'x';
+	name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0;
+
+	memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->nn))
+		goto out_einval;
+
+	memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->pn))
+		goto out_einval;
+
+	return 0;
+
+out_einval:
+	pr_warn("%s: bad traddr string\n", __func__);
+	return -EINVAL;
 }
 
 static struct nvme_ctrl *
@@ -2878,11 +2882,11 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 	unsigned long flags;
 	int ret;
 
-	ret = nvme_fc_parse_address(&raddr, opts->traddr);
+	ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE);
 	if (ret || !raddr.nn || !raddr.pn)
 		return ERR_PTR(-EINVAL);
 
-	ret = nvme_fc_parse_address(&laddr, opts->host_traddr);
+	ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE);
 	if (ret || !laddr.nn || !laddr.pn)
 		return ERR_PTR(-EINVAL);
 
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index d5801c150b1c..31ca55dfcb1d 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2293,66 +2293,70 @@ nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port,
 }
 EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort);
 
-enum {
-	FCT_TRADDR_ERR		= 0,
-	FCT_TRADDR_WWNN		= 1 << 0,
-	FCT_TRADDR_WWPN		= 1 << 1,
-};
 
 struct nvmet_fc_traddr {
 	u64	nn;
 	u64	pn;
 };
 
-static const match_table_t traddr_opt_tokens = {
-	{ FCT_TRADDR_WWNN,	"nn-%s"		},
-	{ FCT_TRADDR_WWPN,	"pn-%s"		},
-	{ FCT_TRADDR_ERR,	NULL		}
-};
-
 static int
-nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf)
+__nvme_fc_parse_u64(substring_t *sstr, u64 *val)
 {
-	substring_t args[MAX_OPT_ARGS];
-	char *options, *o, *p;
-	int token, ret = 0;
 	u64 token64;
 
-	options = o = kstrdup(buf, GFP_KERNEL);
-	if (!options)
-		return -ENOMEM;
+	if (match_u64(sstr, &token64))
+		return -EINVAL;
+	*val = token64;
 
-	while ((p = strsep(&o, ":\n")) != NULL) {
-		if (!*p)
-			continue;
+	return 0;
+}
 
-		token = match_token(p, traddr_opt_tokens, args);
-		switch (token) {
-		case FCT_TRADDR_WWNN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->nn = token64;
-			break;
-		case FCT_TRADDR_WWPN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->pn = token64;
-			break;
-		default:
-			pr_warn("unknown traddr token or missing value '%s'\n",
-					p);
-			ret = -EINVAL;
-			goto out;
-		}
-	}
+/*
+ * This routine validates and extracts the WWN's from the TRADDR string.
+ * As kernel parsers need the 0x to determine number base, universally
+ * build string to parse with 0x prefix before parsing name strings.
+ */
+static int
+nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
+{
+	char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1];
+	substring_t wwn = { name, &name[sizeof(name)-1] };
+	int nnoffset, pnoffset;
+
+	/* validate it string one of the 2 allowed formats */
+	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
+			!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
+				"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
+		nnoffset = NVME_FC_TRADDR_OXNNLEN;
+		pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
+						NVME_FC_TRADDR_OXNNLEN;
+	} else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH &&
+			!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
+				"pn-", NVME_FC_TRADDR_NNLEN))) {
+		nnoffset = NVME_FC_TRADDR_NNLEN;
+		pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
+	} else
+		goto out_einval;
+
+	name[0] = '0';
+	name[1] = 'x';
+	name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0;
+
+	memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->nn))
+		goto out_einval;
+
+	memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->pn))
+		goto out_einval;
 
-out:
-	kfree(options);
-	return ret;
+	return 0;
+
+out_einval:
+	pr_warn("%s: bad traddr string\n", __func__);
+	return -EINVAL;
 }
 
 static int
@@ -2370,7 +2374,8 @@ nvmet_fc_add_port(struct nvmet_port *port)
 
 	/* map the traddr address info to a target port */
 
-	ret = nvmet_fc_parse_traddr(&traddr, port->disc_addr.traddr);
+	ret = nvme_fc_parse_traddr(&traddr, port->disc_addr.traddr,
+			sizeof(port->disc_addr.traddr));
 	if (ret)
 		return ret;
 
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index 21c37e39e41a..36cca93a5ff2 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -334,5 +334,24 @@ struct fcnvme_ls_disconnect_acc {
 #define NVME_FC_LS_TIMEOUT_SEC		2		/* 2 seconds */
 #define NVME_FC_TGTOP_TIMEOUT_SEC	2		/* 2 seconds */
 
+/*
+ * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>"
+ * the string is allowed to be specified with or without a "0x" prefix
+ * infront of the <16hexdigits>.  Without is considered the "min" string
+ * and with is considered the "max" string. The hexdigits may be upper
+ * or lower case.
+ */
+#define NVME_FC_TRADDR_NNLEN		3	/* "?n-" */
+#define NVME_FC_TRADDR_OXNNLEN		5	/* "?n-0x" */
+#define NVME_FC_TRADDR_HEXNAMELEN	16
+#define NVME_FC_TRADDR_MINLENGTH	\
+		(2 * (NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1)
+#define NVME_FC_TRADDR_MAXLENGTH	\
+		(2 * (NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1)
+#define NVME_FC_TRADDR_MIN_PN_OFFSET	\
+		(NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1)
+#define NVME_FC_TRADDR_MAX_PN_OFFSET	\
+		(NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1)
+
 
 #endif /* _NVME_FC_H */
-- 
cgit v1.2.3


From dce4551cb2adb1ac9a30f8ab5299d614392b3cff Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 Jul 2017 17:57:47 +0200
Subject: udp: preserve head state for IP_CMSG_PASSSEC

Paul Moore reported a SELinux/IP_PASSSEC regression
caused by missing skb->sp at recvmsg() time. We need to
preserve the skb head state to process the IP_CMSG_PASSSEC
cmsg.

With this commit we avoid releasing the skb head state in the
BH even if a secpath is attached to the current skb, and stores
the skb status (with/without head states) in the scratch area,
so that we can access it at skb deallocation time, without
incurring in cache-miss penalties.

This also avoids misusing the skb CB for ipv6 packets,
as introduced by the commit 0ddf3fb2c43d ("udp: preserve
skb->dst if required for IP options processing").

Clean a bit the scratch area helpers implementation, to
reduce the code differences between 32 and 64 bits build.

Reported-by: Paul Moore <paul@paul-moore.com>
Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue")
Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 33 ++++++++++++++++++++++-----------
 net/ipv4/udp.c    | 38 ++++++++++++++++++--------------------
 2 files changed, 40 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 972ce4baab6b..56ce2d2a612d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
  */
-#if BITS_PER_LONG == 64
-
-/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on
- * cold cache lines at recvmsg time.
- * skb->len can be stored on 16 bits since the udp header has been already
- * validated and pulled.
- */
 struct udp_dev_scratch {
-	u32 truesize;
+	/* skb->truesize and the stateless bit are embedded in a single field;
+	 * do not use a bitfield since the compiler emits better/smaller code
+	 * this way
+	 */
+	u32 _tsize_state;
+
+#if BITS_PER_LONG == 64
+	/* len and the bit needed to compute skb_csum_unnecessary
+	 * will be on cold cache lines at recvmsg time.
+	 * skb->len can be stored on 16 bits since the udp header has been
+	 * already validated and pulled.
+	 */
 	u16 len;
 	bool is_linear;
 	bool csum_unnecessary;
+#endif
 };
 
+static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb)
+{
+	return (struct udp_dev_scratch *)&skb->dev_scratch;
+}
+
+#if BITS_PER_LONG == 64
 static inline unsigned int udp_skb_len(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+	return udp_skb_scratch(skb)->len;
 }
 
 static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+	return udp_skb_scratch(skb)->csum_unnecessary;
 }
 
 static inline bool udp_skb_is_linear(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+	return udp_skb_scratch(skb)->is_linear;
 }
 
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b057653ceca9..d243772f6efc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1163,34 +1163,32 @@ out:
 	return ret;
 }
 
-#if BITS_PER_LONG == 64
+#define UDP_SKB_IS_STATELESS 0x80000000
+
 static void udp_set_dev_scratch(struct sk_buff *skb)
 {
-	struct udp_dev_scratch *scratch;
+	struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
 
 	BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
-	scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
-	scratch->truesize = skb->truesize;
+	scratch->_tsize_state = skb->truesize;
+#if BITS_PER_LONG == 64
 	scratch->len = skb->len;
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
+#endif
+	if (likely(!skb->_skb_refdst))
+		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
 static int udp_skb_truesize(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
-}
-#else
-static void udp_set_dev_scratch(struct sk_buff *skb)
-{
-	skb->dev_scratch = skb->truesize;
+	return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
 }
 
-static int udp_skb_truesize(struct sk_buff *skb)
+static bool udp_skb_has_head_state(struct sk_buff *skb)
 {
-	return skb->dev_scratch;
+	return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
 }
-#endif
 
 /* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial,
@@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		unlock_sock_fast(sk, slow);
 	}
 
-	/* we cleared the head states previously only if the skb lacks any IP
-	 * options, see __udp_queue_rcv_skb().
+	/* In the more common cases we cleared the head states previously,
+	 * see __udp_queue_rcv_skb().
 	 */
-	if (unlikely(IPCB(skb)->opt.optlen > 0))
+	if (unlikely(udp_skb_has_head_state(skb)))
 		skb_release_head_state(skb);
 	consume_stateless_skb(skb);
 }
@@ -1784,11 +1782,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* At recvmsg() time we need skb->dst to process IP options-related
-	 * cmsg, elsewhere can we clear all pending head states while they are
-	 * hot in the cache
+	/* At recvmsg() time we may access skb->dst or skb->sp depending on
+	 * the IP options and the cmsg flags, elsewhere can we clear all
+	 * pending head states while they are hot in the cache
 	 */
-	if (likely(IPCB(skb)->opt.optlen == 0))
+	if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp))
 		skb_release_head_state(skb);
 
 	rc = __udp_enqueue_schedule_skb(sk, skb);
-- 
cgit v1.2.3


From 0a94efb5acbb6980d7c9ab604372d93cd507e4d8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 23 Jul 2017 08:36:15 -0400
Subject: workqueue: implicit ordered attribute should be overridable

5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be
ordered") automatically enabled ordered attribute for unbound
workqueues w/ max_active == 1.  Because ordered workqueues reject
max_active and some attribute changes, this implicit ordered mode
broke cases where the user creates an unbound workqueue w/ max_active
== 1 and later explicitly changes the related attributes.

This patch distinguishes explicit and implicit ordered setting and
overrides from attribute changes if implict.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered")
---
 include/linux/workqueue.h |  4 +++-
 kernel/workqueue.c        | 13 +++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c102ef65cb64..db6dc9dc0482 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -323,6 +323,7 @@ enum {
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
+	__WQ_ORDERED_EXPLICIT	= 1 << 18, /* internal: alloc_ordered_workqueue() */
 	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
@@ -422,7 +423,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
-	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
+	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED |		\
+			__WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
 	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index abe4a4971c24..7146ea70a62d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3744,8 +3744,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
 		return -EINVAL;
 
 	/* creating multiple pwqs breaks ordering guarantee */
-	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
-		return -EINVAL;
+	if (!list_empty(&wq->pwqs)) {
+		if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+			return -EINVAL;
+
+		wq->flags &= ~__WQ_ORDERED;
+	}
 
 	ctx = apply_wqattrs_prepare(wq, attrs);
 	if (!ctx)
@@ -4129,13 +4133,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 	struct pool_workqueue *pwq;
 
 	/* disallow meddling with max_active for ordered workqueues */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
+	if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
 		return;
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 	mutex_lock(&wq->mutex);
 
+	wq->flags &= ~__WQ_ORDERED;
 	wq->saved_max_active = max_active;
 
 	for_each_pwq(pwq, wq)
@@ -5263,7 +5268,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
 	 * attributes breaks ordering guarantee.  Disallow exposing ordered
 	 * workqueues.
 	 */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
+	if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
 		return -EINVAL;
 
 	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
-- 
cgit v1.2.3


From 2eaa38d9fcba5294182268b8d11770cf3fdc9bc9 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 25 Jul 2017 11:08:15 +0200
Subject: net: phy: Remove trailing semicolon in macro definition

Commit e5a03bfd873c2 ("phy: Add an mdio_device structure")
introduced a spurious trailing semicolon. Remove it.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2a9567bb8186..0bb5b212ab42 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -830,7 +830,7 @@ static inline int phy_read_status(struct phy_device *phydev)
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
 #define phydev_dbg(_phydev, format, args...)	\
-	dev_dbg(&_phydev->mdio.dev, format, ##args);
+	dev_dbg(&_phydev->mdio.dev, format, ##args)
 
 static inline const char *phydev_name(const struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From fc1ff45a07abf240aa0c6586c11465c86c8cab8d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 15 Jul 2017 09:32:56 -0300
Subject: media: cec-notifier: small improvements

Allow calling cec_notifier_set_phys_addr and
cec_notifier_set_phys_addr_from_edid with a NULL notifier, in which
case these functions do nothing.

Add a cec_notifier_phys_addr_invalidate helper function (the notifier
equivalent of cec_phys_addr_invalidate).

These changes simplify drm CEC driver support.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/cec/cec-notifier.c |  6 ++++++
 include/media/cec-notifier.h     | 15 +++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/media/cec/cec-notifier.c b/drivers/media/cec/cec-notifier.c
index 74dc1c32080e..08b619d0ea1e 100644
--- a/drivers/media/cec/cec-notifier.c
+++ b/drivers/media/cec/cec-notifier.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL_GPL(cec_notifier_put);
 
 void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa)
 {
+	if (n == NULL)
+		return;
+
 	mutex_lock(&n->lock);
 	n->phys_addr = pa;
 	if (n->callback)
@@ -100,6 +103,9 @@ void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 {
 	u16 pa = CEC_PHYS_ADDR_INVALID;
 
+	if (n == NULL)
+		return;
+
 	if (edid && edid->extensions)
 		pa = cec_get_edid_phys_addr((const u8 *)edid,
 				EDID_LENGTH * (edid->extensions + 1), NULL);
diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h
index 298f996969df..a4f7429c4ae5 100644
--- a/include/media/cec-notifier.h
+++ b/include/media/cec-notifier.h
@@ -57,6 +57,7 @@ void cec_notifier_put(struct cec_notifier *n);
  * @pa: the CEC physical address
  *
  * Set a new CEC physical address.
+ * Does nothing if @n == NULL.
  */
 void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa);
 
@@ -66,6 +67,7 @@ void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa);
  * @edid: the struct edid pointer
  *
  * Parses the EDID to obtain the new CEC physical address and set it.
+ * Does nothing if @n == NULL.
  */
 void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 					  const struct edid *edid);
@@ -118,4 +120,17 @@ static inline void cec_notifier_unregister(struct cec_notifier *n)
 
 #endif
 
+/**
+ * cec_notifier_phys_addr_invalidate() - set the physical address to INVALID
+ *
+ * @n: the CEC notifier
+ *
+ * This is a simple helper function to invalidate the physical
+ * address. Does nothing if @n == NULL.
+ */
+static inline void cec_notifier_phys_addr_invalidate(struct cec_notifier *n)
+{
+	cec_notifier_set_phys_addr(n, CEC_PHYS_ADDR_INVALID);
+}
+
 #endif
-- 
cgit v1.2.3


From b25db383928cecba356835583b16fa7008f97b3a Mon Sep 17 00:00:00 2001
From: Prabhakar Lad <prabhakar.csengg@gmail.com>
Date: Thu, 20 Jul 2017 04:56:31 -0400
Subject: media: platform: davinci: drop VPFE_CMD_S_CCDC_RAW_PARAMS

drop VPFE_CMD_S_CCDC_RAW_PARAMS ioctl from dm355/dm644x following reasons:

- This ioctl was never in public api and was only defined in kernel header.
- The function set_params constantly mixes up pointers and phys_addr_t
  numbers.
- This is part of a 'VPFE_CMD_S_CCDC_RAW_PARAMS' ioctl command that is
  described as an 'experimental ioctl that will change in future kernels'.
- The code to allocate the table never gets called after we copy_from_user
  the user input over the kernel settings, and then compare them
  for inequality.
- We then go on to use an address provided by user space as both the
  __user pointer for input and pass it through phys_to_virt to come up
  with a kernel pointer to copy the data to. This looks like a trivially
  exploitable root hole.

Signed-off-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/davinci/ccdc_hw_device.h |  10 --
 drivers/media/platform/davinci/dm355_ccdc.c     |  92 +--------------
 drivers/media/platform/davinci/dm644x_ccdc.c    | 151 +-----------------------
 drivers/media/platform/davinci/vpfe_capture.c   |  75 ------------
 include/media/davinci/dm644x_ccdc.h             |  12 --
 include/media/davinci/vpfe_capture.h            |  10 --
 6 files changed, 4 insertions(+), 346 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/davinci/ccdc_hw_device.h b/drivers/media/platform/davinci/ccdc_hw_device.h
index 8f6688a7a111..f1b521045d64 100644
--- a/drivers/media/platform/davinci/ccdc_hw_device.h
+++ b/drivers/media/platform/davinci/ccdc_hw_device.h
@@ -42,16 +42,6 @@ struct ccdc_hw_ops {
 	int (*set_hw_if_params) (struct vpfe_hw_if_param *param);
 	/* get interface parameters */
 	int (*get_hw_if_params) (struct vpfe_hw_if_param *param);
-	/*
-	 * Pointer to function to set parameters. Used
-	 * for implementing VPFE_S_CCDC_PARAMS
-	 */
-	int (*set_params) (void *params);
-	/*
-	 * Pointer to function to get parameter. Used
-	 * for implementing VPFE_G_CCDC_PARAMS
-	 */
-	int (*get_params) (void *params);
 	/* Pointer to function to configure ccdc */
 	int (*configure) (void);
 
diff --git a/drivers/media/platform/davinci/dm355_ccdc.c b/drivers/media/platform/davinci/dm355_ccdc.c
index 73db166dc338..6d492dc4c3a9 100644
--- a/drivers/media/platform/davinci/dm355_ccdc.c
+++ b/drivers/media/platform/davinci/dm355_ccdc.c
@@ -17,12 +17,7 @@
  * This module is for configuring DM355 CCD controller of VPFE to capture
  * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
  * such as Defect Pixel Correction, Color Space Conversion etc to
- * pre-process the Bayer RGB data, before writing it to SDRAM. This
- * module also allows application to configure individual
- * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
- * To do so, application include dm355_ccdc.h and vpfe_capture.h header
- * files. The setparams() API is called by vpfe_capture driver
- * to configure module parameters
+ * pre-process the Bayer RGB data, before writing it to SDRAM.
  *
  * TODO: 1) Raw bayer parameter settings and bayer capture
  * 	 2) Split module parameter structure to module specific ioctl structs
@@ -260,90 +255,6 @@ static void ccdc_setwin(struct v4l2_rect *image_win,
 	dev_dbg(ccdc_cfg.dev, "\nEnd of ccdc_setwin...");
 }
 
-static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
-{
-	if (ccdcparam->datasft < CCDC_DATA_NO_SHIFT ||
-	    ccdcparam->datasft > CCDC_DATA_SHIFT_6BIT) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of data shift\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->mfilt1 < CCDC_NO_MEDIAN_FILTER1 ||
-	    ccdcparam->mfilt1 > CCDC_MEDIAN_FILTER1) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of median filter1\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->mfilt2 < CCDC_NO_MEDIAN_FILTER2 ||
-	    ccdcparam->mfilt2 > CCDC_MEDIAN_FILTER2) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of median filter2\n");
-		return -EINVAL;
-	}
-
-	if ((ccdcparam->med_filt_thres < 0) ||
-	   (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) {
-		dev_dbg(ccdc_cfg.dev,
-			"Invalid value of median filter threshold\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->data_sz < CCDC_DATA_16BITS ||
-	    ccdcparam->data_sz > CCDC_DATA_8BITS) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of data size\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->alaw.enable) {
-		if (ccdcparam->alaw.gamma_wd < CCDC_GAMMA_BITS_13_4 ||
-		    ccdcparam->alaw.gamma_wd > CCDC_GAMMA_BITS_09_0) {
-			dev_dbg(ccdc_cfg.dev, "Invalid value of ALAW\n");
-			return -EINVAL;
-		}
-	}
-
-	if (ccdcparam->blk_clamp.b_clamp_enable) {
-		if (ccdcparam->blk_clamp.sample_pixel < CCDC_SAMPLE_1PIXELS ||
-		    ccdcparam->blk_clamp.sample_pixel > CCDC_SAMPLE_16PIXELS) {
-			dev_dbg(ccdc_cfg.dev,
-				"Invalid value of sample pixel\n");
-			return -EINVAL;
-		}
-		if (ccdcparam->blk_clamp.sample_ln < CCDC_SAMPLE_1LINES ||
-		    ccdcparam->blk_clamp.sample_ln > CCDC_SAMPLE_16LINES) {
-			dev_dbg(ccdc_cfg.dev,
-				"Invalid value of sample lines\n");
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
-/* Parameter operations */
-static int ccdc_set_params(void __user *params)
-{
-	struct ccdc_config_params_raw ccdc_raw_params;
-	int x;
-
-	/* only raw module parameters can be set through the IOCTL */
-	if (ccdc_cfg.if_type != VPFE_RAW_BAYER)
-		return -EINVAL;
-
-	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
-	if (x) {
-		dev_dbg(ccdc_cfg.dev, "ccdc_set_params: error in copying ccdcparams, %d\n",
-			x);
-		return -EFAULT;
-	}
-
-	if (!validate_ccdc_param(&ccdc_raw_params)) {
-		memcpy(&ccdc_cfg.bayer.config_params,
-			&ccdc_raw_params,
-			sizeof(ccdc_raw_params));
-		return 0;
-	}
-	return -EINVAL;
-}
-
 /* This function will configure CCDC for YCbCr video capture */
 static void ccdc_config_ycbcr(void)
 {
@@ -939,7 +850,6 @@ static struct ccdc_hw_device ccdc_hw_dev = {
 		.enable = ccdc_enable,
 		.enable_out_to_sdram = ccdc_enable_output_to_sdram,
 		.set_hw_if_params = ccdc_set_hw_if_params,
-		.set_params = ccdc_set_params,
 		.configure = ccdc_configure,
 		.set_buftype = ccdc_set_buftype,
 		.get_buftype = ccdc_get_buftype,
diff --git a/drivers/media/platform/davinci/dm644x_ccdc.c b/drivers/media/platform/davinci/dm644x_ccdc.c
index 740fbc7a8c14..3b2d8a9317b8 100644
--- a/drivers/media/platform/davinci/dm644x_ccdc.c
+++ b/drivers/media/platform/davinci/dm644x_ccdc.c
@@ -17,13 +17,9 @@
  * This module is for configuring CCD controller of DM6446 VPFE to capture
  * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
  * such as Defect Pixel Correction, Color Space Conversion etc to
- * pre-process the Raw Bayer RGB data, before writing it to SDRAM. This
- * module also allows application to configure individual
- * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
- * To do so, application includes dm644x_ccdc.h and vpfe_capture.h header
- * files.  The setparams() API is called by vpfe_capture driver
- * to configure module parameters. This file is named DM644x so that other
- * variants such DM6443 may be supported using the same module.
+ * pre-process the Raw Bayer RGB data, before writing it to SDRAM.
+ * This file is named DM644x so that other variants such DM6443
+ * may be supported using the same module.
  *
  * TODO: Test Raw bayer parameter settings and bayer capture
  * 	 Split module parameter structure to module specific ioctl structs
@@ -216,96 +212,8 @@ static void ccdc_readregs(void)
 	dev_notice(ccdc_cfg.dev, "\nReading 0x%x to VERT_LINES...\n", val);
 }
 
-static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
-{
-	if (ccdcparam->alaw.enable) {
-		u8 max_gamma = ccdc_gamma_width_max_bit(ccdcparam->alaw.gamma_wd);
-		u8 max_data = ccdc_data_size_max_bit(ccdcparam->data_sz);
-
-		if ((ccdcparam->alaw.gamma_wd > CCDC_GAMMA_BITS_09_0) ||
-		    (ccdcparam->alaw.gamma_wd < CCDC_GAMMA_BITS_15_6) ||
-		    (max_gamma > max_data)) {
-			dev_dbg(ccdc_cfg.dev, "\nInvalid data line select");
-			return -1;
-		}
-	}
-	return 0;
-}
-
-static int ccdc_update_raw_params(struct ccdc_config_params_raw *raw_params)
-{
-	struct ccdc_config_params_raw *config_params =
-				&ccdc_cfg.bayer.config_params;
-	unsigned int *fpc_virtaddr = NULL;
-	unsigned int *fpc_physaddr = NULL;
-
-	memcpy(config_params, raw_params, sizeof(*raw_params));
-	/*
-	 * allocate memory for fault pixel table and copy the user
-	 * values to the table
-	 */
-	if (!config_params->fault_pxl.enable)
-		return 0;
-
-	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
-	fpc_virtaddr = (unsigned int *)phys_to_virt(
-				(unsigned long)fpc_physaddr);
-	/*
-	 * Allocate memory for FPC table if current
-	 * FPC table buffer is not big enough to
-	 * accommodate FPC Number requested
-	 */
-	if (raw_params->fault_pxl.fp_num != config_params->fault_pxl.fp_num) {
-		if (fpc_physaddr != NULL) {
-			free_pages((unsigned long)fpc_virtaddr,
-				   get_order
-				   (config_params->fault_pxl.fp_num *
-				   FP_NUM_BYTES));
-		}
-
-		/* Allocate memory for FPC table */
-		fpc_virtaddr =
-			(unsigned int *)__get_free_pages(GFP_KERNEL | GFP_DMA,
-							 get_order(raw_params->
-							 fault_pxl.fp_num *
-							 FP_NUM_BYTES));
-
-		if (fpc_virtaddr == NULL) {
-			dev_dbg(ccdc_cfg.dev,
-				"\nUnable to allocate memory for FPC");
-			return -EFAULT;
-		}
-		fpc_physaddr =
-		    (unsigned int *)virt_to_phys((void *)fpc_virtaddr);
-	}
-
-	/* Copy number of fault pixels and FPC table */
-	config_params->fault_pxl.fp_num = raw_params->fault_pxl.fp_num;
-	if (copy_from_user(fpc_virtaddr,
-			(void __user *)raw_params->fault_pxl.fpc_table_addr,
-			config_params->fault_pxl.fp_num * FP_NUM_BYTES)) {
-		dev_dbg(ccdc_cfg.dev, "\n copy_from_user failed");
-		return -EFAULT;
-	}
-	config_params->fault_pxl.fpc_table_addr = (unsigned long)fpc_physaddr;
-	return 0;
-}
-
 static int ccdc_close(struct device *dev)
 {
-	struct ccdc_config_params_raw *config_params =
-				&ccdc_cfg.bayer.config_params;
-	unsigned int *fpc_physaddr = NULL, *fpc_virtaddr = NULL;
-
-	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
-
-	if (fpc_physaddr != NULL) {
-		fpc_virtaddr = (unsigned int *)
-		    phys_to_virt((unsigned long)fpc_physaddr);
-		free_pages((unsigned long)fpc_virtaddr,
-			   get_order(config_params->fault_pxl.fp_num *
-			   FP_NUM_BYTES));
-	}
 	return 0;
 }
 
@@ -339,29 +247,6 @@ static void ccdc_sbl_reset(void)
 	vpss_clear_wbl_overflow(VPSS_PCR_CCDC_WBL_O);
 }
 
-/* Parameter operations */
-static int ccdc_set_params(void __user *params)
-{
-	struct ccdc_config_params_raw ccdc_raw_params;
-	int x;
-
-	if (ccdc_cfg.if_type != VPFE_RAW_BAYER)
-		return -EINVAL;
-
-	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
-	if (x) {
-		dev_dbg(ccdc_cfg.dev, "ccdc_set_params: error in copyingccdc params, %d\n",
-			x);
-		return -EFAULT;
-	}
-
-	if (!validate_ccdc_param(&ccdc_raw_params)) {
-		if (!ccdc_update_raw_params(&ccdc_raw_params))
-			return 0;
-	}
-	return -EINVAL;
-}
-
 /*
  * ccdc_config_ycbcr()
  * This function will configure CCDC for YCbCr video capture
@@ -489,32 +374,6 @@ static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp)
 	regw(val, CCDC_BLKCMP);
 }
 
-static void ccdc_config_fpc(struct ccdc_fault_pixel *fpc)
-{
-	u32 val;
-
-	/* Initially disable FPC */
-	val = CCDC_FPC_DISABLE;
-	regw(val, CCDC_FPC);
-
-	if (!fpc->enable)
-		return;
-
-	/* Configure Fault pixel if needed */
-	regw(fpc->fpc_table_addr, CCDC_FPC_ADDR);
-	dev_dbg(ccdc_cfg.dev, "\nWriting 0x%lx to FPC_ADDR...\n",
-		       (fpc->fpc_table_addr));
-	/* Write the FPC params with FPC disable */
-	val = fpc->fp_num & CCDC_FPC_FPC_NUM_MASK;
-	regw(val, CCDC_FPC);
-
-	dev_dbg(ccdc_cfg.dev, "\nWriting 0x%x to FPC...\n", val);
-	/* read the FPC register */
-	val = regr(CCDC_FPC) | CCDC_FPC_ENABLE;
-	regw(val, CCDC_FPC);
-	dev_dbg(ccdc_cfg.dev, "\nWriting 0x%x to FPC...\n", val);
-}
-
 /*
  * ccdc_config_raw()
  * This function will configure CCDC for Raw capture mode
@@ -569,9 +428,6 @@ static void ccdc_config_raw(void)
 	/* Configure Black level compensation */
 	ccdc_config_black_compense(&config_params->blk_comp);
 
-	/* Configure Fault Pixel Correction */
-	ccdc_config_fpc(&config_params->fault_pxl);
-
 	/* If data size is 8 bit then pack the data */
 	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
 	     config_params->alaw.enable)
@@ -929,7 +785,6 @@ static struct ccdc_hw_device ccdc_hw_dev = {
 		.reset = ccdc_sbl_reset,
 		.enable = ccdc_enable,
 		.set_hw_if_params = ccdc_set_hw_if_params,
-		.set_params = ccdc_set_params,
 		.configure = ccdc_configure,
 		.set_buftype = ccdc_set_buftype,
 		.get_buftype = ccdc_get_buftype,
diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c
index 1831bf5ccca5..b1bf4a7e8eb7 100644
--- a/drivers/media/platform/davinci/vpfe_capture.c
+++ b/drivers/media/platform/davinci/vpfe_capture.c
@@ -280,45 +280,6 @@ void vpfe_unregister_ccdc_device(struct ccdc_hw_device *dev)
 }
 EXPORT_SYMBOL(vpfe_unregister_ccdc_device);
 
-/*
- * vpfe_get_ccdc_image_format - Get image parameters based on CCDC settings
- */
-static int vpfe_get_ccdc_image_format(struct vpfe_device *vpfe_dev,
-				 struct v4l2_format *f)
-{
-	struct v4l2_rect image_win;
-	enum ccdc_buftype buf_type;
-	enum ccdc_frmfmt frm_fmt;
-
-	memset(f, 0, sizeof(*f));
-	f->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-	ccdc_dev->hw_ops.get_image_window(&image_win);
-	f->fmt.pix.width = image_win.width;
-	f->fmt.pix.height = image_win.height;
-	f->fmt.pix.bytesperline = ccdc_dev->hw_ops.get_line_length();
-	f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
-				f->fmt.pix.height;
-	buf_type = ccdc_dev->hw_ops.get_buftype();
-	f->fmt.pix.pixelformat = ccdc_dev->hw_ops.get_pixel_format();
-	frm_fmt = ccdc_dev->hw_ops.get_frame_format();
-	if (frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
-		f->fmt.pix.field = V4L2_FIELD_NONE;
-	else if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
-		if (buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
-			f->fmt.pix.field = V4L2_FIELD_INTERLACED;
-		else if (buf_type == CCDC_BUFTYPE_FLD_SEPARATED)
-			f->fmt.pix.field = V4L2_FIELD_SEQ_TB;
-		else {
-			v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf_type\n");
-			return -EINVAL;
-		}
-	} else {
-		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid frm_fmt\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 /*
  * vpfe_config_ccdc_image_format()
  * For a pix format, configure ccdc to setup the capture
@@ -1697,41 +1658,6 @@ unlock_out:
 	return ret;
 }
 
-
-static long vpfe_param_handler(struct file *file, void *priv,
-		bool valid_prio, unsigned int cmd, void *param)
-{
-	struct vpfe_device *vpfe_dev = video_drvdata(file);
-	int ret;
-
-	v4l2_dbg(2, debug, &vpfe_dev->v4l2_dev, "vpfe_param_handler\n");
-
-	if (vpfe_dev->started) {
-		/* only allowed if streaming is not started */
-		v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
-			"device already started\n");
-		return -EBUSY;
-	}
-
-	ret = mutex_lock_interruptible(&vpfe_dev->lock);
-	if (ret)
-		return ret;
-
-	switch (cmd) {
-	case VPFE_CMD_S_CCDC_RAW_PARAMS:
-		ret = -EINVAL;
-		v4l2_warn(&vpfe_dev->v4l2_dev,
-			"VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n");
-		break;
-	default:
-		ret = -ENOTTY;
-	}
-unlock_out:
-	mutex_unlock(&vpfe_dev->lock);
-	return ret;
-}
-
-
 /* vpfe capture ioctl operations */
 static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
 	.vidioc_querycap	 = vpfe_querycap,
@@ -1754,7 +1680,6 @@ static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
 	.vidioc_cropcap		 = vpfe_cropcap,
 	.vidioc_g_selection	 = vpfe_g_selection,
 	.vidioc_s_selection	 = vpfe_s_selection,
-	.vidioc_default		 = vpfe_param_handler,
 };
 
 static struct vpfe_device *vpfe_initialize(void)
diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h
index 7c909da29d43..6ea2ce241851 100644
--- a/include/media/davinci/dm644x_ccdc.h
+++ b/include/media/davinci/dm644x_ccdc.h
@@ -103,16 +103,6 @@ struct ccdc_black_compensation {
 	char gb;
 };
 
-/* structure for fault pixel correction */
-struct ccdc_fault_pixel {
-	/* Enable or Disable fault pixel correction */
-	unsigned char enable;
-	/* Number of fault pixel */
-	unsigned short fp_num;
-	/* Address of fault pixel table */
-	unsigned long fpc_table_addr;
-};
-
 /* Structure for CCDC configuration parameters for raw capture mode passed
  * by application
  */
@@ -125,8 +115,6 @@ struct ccdc_config_params_raw {
 	struct ccdc_black_clamp blk_clamp;
 	/* Structure for Black Compensation */
 	struct ccdc_black_compensation blk_comp;
-	/* Structure for Fault Pixel Module Configuration */
-	struct ccdc_fault_pixel fault_pxl;
 };
 
 
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
index 8e1a4d88daa0..f003533602d0 100644
--- a/include/media/davinci/vpfe_capture.h
+++ b/include/media/davinci/vpfe_capture.h
@@ -183,14 +183,4 @@ struct vpfe_config_params {
 };
 
 #endif				/* End of __KERNEL__ */
-/**
- * VPFE_CMD_S_CCDC_RAW_PARAMS - EXPERIMENTAL IOCTL to set raw capture params
- * This can be used to configure modules such as defect pixel correction,
- * color space conversion, culling etc. This is an experimental ioctl that
- * will change in future kernels. So use this ioctl with care !
- * TODO: This is to be split into multiple ioctls and also explore the
- * possibility of extending the v4l2 api to include this
- **/
-#define VPFE_CMD_S_CCDC_RAW_PARAMS _IOW('V', BASE_VIDIOC_PRIVATE + 1, \
-					void *)
 #endif				/* _DAVINCI_VPFE_H */
-- 
cgit v1.2.3


From fdeaf7e3eb37c6dbc4b4ac97dbe1945d239eb788 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Mon, 24 Jul 2017 13:40:03 +0200
Subject: KVM: make pid available for uevents without debugfs

Simplify and improve the code so that the PID is always available in
the uevent even when debugfs is not available.

This adds a userspace_pid field to struct kvm, as per Radim's
suggestion, so that the PID can be retrieved on destruction too.

Acked-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Fixes: 286de8f6ac9202 ("KVM: trigger uevents when creating or destroying a VM")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 35 ++++++++++++-----------------------
 2 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 648b34cabb38..890b706d1943 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -445,6 +445,7 @@ struct kvm {
 	struct kvm_stat_data **debugfs_stat_data;
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
+	pid_t userspace_pid;
 };
 
 #define kvm_err(fmt, ...) \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 82987d457b8b..f3f74271f1a9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3883,7 +3883,6 @@ static const struct file_operations *stat_fops[] = {
 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 {
 	struct kobj_uevent_env *env;
-	char *tmp, *pathbuf = NULL;
 	unsigned long long created, active;
 
 	if (!kvm_dev.this_device || !kvm)
@@ -3907,38 +3906,28 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 	add_uevent_var(env, "CREATED=%llu", created);
 	add_uevent_var(env, "COUNT=%llu", active);
 
-	if (type == KVM_EVENT_CREATE_VM)
+	if (type == KVM_EVENT_CREATE_VM) {
 		add_uevent_var(env, "EVENT=create");
-	else if (type == KVM_EVENT_DESTROY_VM)
+		kvm->userspace_pid = task_pid_nr(current);
+	} else if (type == KVM_EVENT_DESTROY_VM) {
 		add_uevent_var(env, "EVENT=destroy");
+	}
+	add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
 	if (kvm->debugfs_dentry) {
-		char p[ITOA_MAX_LEN];
-
-		snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
-		tmp = strchrnul(p + 1, '-');
-		*tmp = '\0';
-		add_uevent_var(env, "PID=%s", p);
-		pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (pathbuf) {
-			/* sizeof counts the final '\0' */
-			int len = sizeof("STATS_PATH=") - 1;
-			const char *pvar = "STATS_PATH=";
-
-			tmp = dentry_path_raw(kvm->debugfs_dentry,
-					      pathbuf + len,
-					      PATH_MAX - len);
-			if (!IS_ERR(tmp)) {
-				memcpy(tmp - len, pvar, len);
-				env->envp[env->envp_idx++] = tmp - len;
-			}
+		char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
+
+		if (p) {
+			tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
+			if (!IS_ERR(tmp))
+				add_uevent_var(env, "STATS_PATH=%s", tmp);
+			kfree(p);
 		}
 	}
 	/* no need for checks, since we are adding at most only 5 keys */
 	env->envp[env->envp_idx++] = NULL;
 	kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
 	kfree(env);
-	kfree(pathbuf);
 }
 
 static int kvm_init_debug(void)
-- 
cgit v1.2.3


From bb67b496c338e15813f075f482067da930f52e39 Mon Sep 17 00:00:00 2001
From: Murilo Opsfelder Araujo <mopsfelder@gmail.com>
Date: Tue, 18 Jul 2017 14:22:20 -0300
Subject: include/linux/vfio.h: Guard powerpc-specific functions with
 CONFIG_VFIO_SPAPR_EEH

When CONFIG_EEH=y and CONFIG_VFIO_SPAPR_EEH=n, build fails with the
following:

    drivers/vfio/pci/vfio_pci.o: In function `.vfio_pci_release':
    vfio_pci.c:(.text+0xa98): undefined reference to `.vfio_spapr_pci_eeh_release'
    drivers/vfio/pci/vfio_pci.o: In function `.vfio_pci_open':
    vfio_pci.c:(.text+0x1420): undefined reference to `.vfio_spapr_pci_eeh_open'

In this case, vfio_pci.c should use the empty definitions of
vfio_spapr_pci_eeh_open and vfio_spapr_pci_eeh_release functions.

This patch fixes it by guarding these function definitions with
CONFIG_VFIO_SPAPR_EEH, the symbol that controls whether vfio_spapr_eeh.c is
built, which is where the non-empty versions of these functions are. We need to
make use of IS_ENABLED() macro because CONFIG_VFIO_SPAPR_EEH is a tristate
option.

This issue was found during a randconfig build. Logs are here:

    http://kisskb.ellerman.id.au/kisskb/buildresult/12982362/

Signed-off-by: Murilo Opsfelder Araujo <mopsfelder@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 586809abb273..a47b985341d1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -152,7 +152,7 @@ extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 					      size_t *data_size);
 
 struct pci_dev;
-#ifdef CONFIG_EEH
+#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
@@ -173,7 +173,7 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 {
 	return -ENOTTY;
 }
-#endif /* CONFIG_EEH */
+#endif /* CONFIG_VFIO_SPAPR_EEH */
 
 /*
  * IRQfd - generic
-- 
cgit v1.2.3


From 273752c9ff03eb83856601b2a3458218bb949e46 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 26 Jul 2017 09:35:09 -0400
Subject: dm, dax: Make sure dm_dax_flush() is called if device supports it

Currently dm_dax_flush() is not being called, even if underlying dax
device supports write cache, because DAXDEV_WRITE_CACHE is not being
propagated up to the DM dax device.

If the underlying dax device supports write cache, set
DAXDEV_WRITE_CACHE on the DM dax device.  This will cause dm_dax_flush()
to be called.

Fixes: abebfbe2f7 ("dm: add ->flush() dax operation support")
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/dax/super.c   |  6 ++++++
 drivers/md/dm-table.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/dax.h   |  1 +
 3 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ce9e563e6e1d..938eb4868f7f 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -278,6 +278,12 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc)
 }
 EXPORT_SYMBOL_GPL(dax_write_cache);
 
+bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+	return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index a39bcd9b982a..28a4071cdf85 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -20,6 +20,7 @@
 #include <linux/atomic.h>
 #include <linux/blk-mq.h>
 #include <linux/mount.h>
+#include <linux/dax.h>
 
 #define DM_MSG_PREFIX "table"
 
@@ -1630,6 +1631,37 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
 	return false;
 }
 
+static int device_dax_write_cache_enabled(struct dm_target *ti,
+					  struct dm_dev *dev, sector_t start,
+					  sector_t len, void *data)
+{
+	struct dax_device *dax_dev = dev->dax_dev;
+
+	if (!dax_dev)
+		return false;
+
+	if (dax_write_cache_enabled(dax_dev))
+		return true;
+	return false;
+}
+
+static int dm_table_supports_dax_write_cache(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i;
+
+	for (i = 0; i < dm_table_get_num_targets(t); i++) {
+		ti = dm_table_get_target(t, i);
+
+		if (ti->type->iterate_devices &&
+		    ti->type->iterate_devices(ti,
+				device_dax_write_cache_enabled, NULL))
+			return true;
+	}
+
+	return false;
+}
+
 static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
 			    sector_t start, sector_t len, void *data)
 {
@@ -1785,6 +1817,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 	blk_queue_write_cache(q, wc, fua);
 
+	if (dm_table_supports_dax_write_cache(t))
+		dax_write_cache(t->md->dax_dev, true);
+
 	/* Ensure that all underlying devices are non-rotational. */
 	if (dm_table_all_devices_attribute(t, device_is_nonrot))
 		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 794811875732..df97b7af7e2c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -87,6 +87,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t size);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 /*
  * We use lowest available bit in exceptional entry for locking, one bit for
-- 
cgit v1.2.3


From 1937f8a29f4a650bc27e0311b43b53509a34fd22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:59 +0200
Subject: scsi: bnx2fc: Simplify CPU hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 69 +++++++++------------------------------
 include/linux/cpuhotplug.h        |  1 -
 2 files changed, 15 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index c4ebf8c5d884..6844ba361616 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2624,12 +2624,11 @@ static struct fcoe_transport bnx2fc_transport = {
 };
 
 /**
- * bnx2fc_percpu_thread_create - Create a receive thread for an
- *				 online CPU
+ * bnx2fc_cpu_online - Create a receive thread for an  online CPU
  *
  * @cpu: cpu index for the online cpu
  */
-static void bnx2fc_percpu_thread_create(unsigned int cpu)
+static int bnx2fc_cpu_online(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2639,15 +2638,17 @@ static void bnx2fc_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2fc_percpu_io_thread,
 					(void *)p, cpu_to_node(cpu),
 					"bnx2fc_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
+static int bnx2fc_cpu_offline(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2661,7 +2662,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 	thread = p->iothread;
 	p->iothread = NULL;
 
-
 	/* Free all work in the list */
 	list_for_each_entry_safe(work, tmp, &p->work_list, list) {
 		list_del_init(&work->list);
@@ -2673,20 +2673,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 
 	if (thread)
 		kthread_stop(thread);
-}
-
-
-static int bnx2fc_cpu_online(unsigned int cpu)
-{
-	printk(PFX "CPU %x online: Create Rx thread\n", cpu);
-	bnx2fc_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2fc_cpu_dead(unsigned int cpu)
-{
-	printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2fc_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -2761,31 +2747,16 @@ static int __init bnx2fc_mod_init(void)
 		spin_lock_init(&p->fp_work_lock);
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_create(cpu);
-
-	rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						  "scsi/bnx2fc:online",
-						  bnx2fc_cpu_online, NULL);
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2fc:online",
+			       bnx2fc_cpu_online, bnx2fc_cpu_offline);
 	if (rc < 0)
-		goto stop_threads;
+		goto stop_thread;
 	bnx2fc_online_state = rc;
 
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD,
-					     "scsi/bnx2fc:dead",
-					     NULL, bnx2fc_cpu_dead);
-	put_online_cpus();
-
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
-
 	return 0;
 
-stop_threads:
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_destroy(cpu);
-	put_online_cpus();
+stop_thread:
 	kthread_stop(l2_thread);
 free_wq:
 	destroy_workqueue(bnx2fc_wq);
@@ -2804,7 +2775,6 @@ static void __exit bnx2fc_mod_exit(void)
 	struct fcoe_percpu_s *bg;
 	struct task_struct *l2_thread;
 	struct sk_buff *skb;
-	unsigned int cpu = 0;
 
 	/*
 	 * NOTE: Since cnic calls register_driver routine rtnl_lock,
@@ -2845,16 +2815,7 @@ static void __exit bnx2fc_mod_exit(void)
 	if (l2_thread)
 		kthread_stop(l2_thread);
 
-	get_online_cpus();
-	/* Destroy per cpu threads */
-	for_each_online_cpu(cpu) {
-		bnx2fc_percpu_thread_destroy(cpu);
-	}
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD);
-
-	put_online_cpus();
+	cpuhp_remove_state(bnx2fc_online_state);
 
 	destroy_workqueue(bnx2fc_wq);
 	/*
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index b56573bf440d..2e7b1731ad12 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2FC_DEAD,
 	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
-- 
cgit v1.2.3


From f9f22a86912f9d36b50e9b3b383fabfb9f22dd46 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:53:00 +0200
Subject: scsi: bnx2i: Simplify cpu hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 65 ++++++++++-------------------------------
 include/linux/cpuhotplug.h      |  1 -
 2 files changed, 15 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 7487b653e799..4ebcda8d9500 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -404,12 +404,11 @@ int bnx2i_get_stats(void *handle)
 
 
 /**
- * bnx2i_percpu_thread_create - Create a receive thread for an
- *				online CPU
+ * bnx2i_cpu_online - Create a receive thread for an online CPU
  *
  * @cpu:	cpu index for the online cpu
  */
-static void bnx2i_percpu_thread_create(unsigned int cpu)
+static int bnx2i_cpu_online(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -419,16 +418,17 @@ static void bnx2i_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2i_percpu_io_thread, (void *)p,
 					cpu_to_node(cpu),
 					"bnx2i_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-
-static void bnx2i_percpu_thread_destroy(unsigned int cpu)
+static int bnx2i_cpu_offline(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -451,19 +451,6 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu)
 	spin_unlock_bh(&p->p_work_lock);
 	if (thread)
 		kthread_stop(thread);
-}
-
-static int bnx2i_cpu_online(unsigned int cpu)
-{
-	pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu);
-	bnx2i_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2i_cpu_dead(unsigned int cpu)
-{
-	pr_info("CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2i_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -511,28 +498,14 @@ static int __init bnx2i_mod_init(void)
 		p->iothread = NULL;
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_create(cpu);
-
-	err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						   "scsi/bnx2i:online",
-						   bnx2i_cpu_online, NULL);
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2i:online",
+				bnx2i_cpu_online, bnx2i_cpu_offline);
 	if (err < 0)
-		goto remove_threads;
+		goto unreg_driver;
 	bnx2i_online_state = err;
-
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD,
-					     "scsi/bnx2i:dead",
-					     NULL, bnx2i_cpu_dead);
-	put_online_cpus();
 	return 0;
 
-remove_threads:
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-	put_online_cpus();
+unreg_driver:
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
 unreg_xport:
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
@@ -552,7 +525,6 @@ out:
 static void __exit bnx2i_mod_exit(void)
 {
 	struct bnx2i_hba *hba;
-	unsigned cpu = 0;
 
 	mutex_lock(&bnx2i_dev_lock);
 	while (!list_empty(&adapter_list)) {
@@ -570,14 +542,7 @@ static void __exit bnx2i_mod_exit(void)
 	}
 	mutex_unlock(&bnx2i_dev_lock);
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD);
-	put_online_cpus();
+	cpuhp_remove_state(bnx2i_online_state);
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2e7b1731ad12..82b30e638430 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From 6b84202c946cd3da3a8daa92c682510e9ed80321 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 26 Jul 2017 16:24:59 +0800
Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors

Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving
_sctp_walk_{params, errors}()") tried to fix the issue that it
may overstep the chunk end for _sctp_walk_{params, errors} with
'chunk_end > offset(length) + sizeof(length)'.

But it introduced a side effect: When processing INIT, it verifies
the chunks with 'param.v == chunk_end' after iterating all params
by sctp_walk_params(). With the check 'chunk_end > offset(length)
+ sizeof(length)', it would return when the last param is not yet
accessed. Because the last param usually is fwdtsn supported param
whose size is 4 and 'chunk_end == offset(length) + sizeof(length)'

This is a badly issue even causing sctp couldn't process 4-shakes.
Client would always get abort when connecting to server, due to
the failure of INIT chunk verification on server.

The patch is to use 'chunk_end <= offset(length) + sizeof(length)'
instead of 'chunk_end < offset(length) + sizeof(length)' for both
_sctp_walk_params and _sctp_walk_errors.

Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 980807d7506f..45fd4c6056b5 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
-     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
+     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
       (void *)chunk + end) &&\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
@@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 #define _sctp_walk_errors(err, chunk_hdr, end)\
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
+     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
       (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-- 
cgit v1.2.3


From a3287c41ff405025bc57b165a0f6cd698bbbc1be Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 25 Jul 2017 16:30:34 +0100
Subject: drivers/perf: arm_pmu: Request PMU SPIs with IRQF_PER_CPU

Since the PMU register interface is banked per CPU, CPU PMU interrrupts
cannot be handled by a CPU other than the one with the PMU asserting the
interrupt. This means that migrating PMU SPIs, as we do during a CPU
hotplug operation doesn't make any sense and can lead to the IRQ being
disabled entirely if we route a spurious IRQ to the new affinity target.

This has been observed in practice on AMD Seattle, where CPUs on the
non-boot cluster appear to take a spurious PMU IRQ when coming online,
which is routed to CPU0 where it cannot be handled.

This patch passes IRQF_PERCPU for PMU SPIs and forcefully sets their
affinity prior to requesting them, ensuring that they cannot
be migrated during hotplug events. This interacts badly with the DB8500
erratum workaround that ping-pongs the interrupt affinity from the handler,
so we avoid passing IRQF_PERCPU in that case by allowing the IRQ flags
to be overridden in the platdata.

Fixes: 3cf7ee98b848 ("drivers/perf: arm_pmu: move irq request/free into probe")
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mach-ux500/cpu-db8500.c |  1 +
 drivers/perf/arm_pmu.c           | 41 ++++++++++++++++++++++++++--------------
 include/linux/perf/arm_pmu.h     |  4 ++++
 3 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 28083ef72819..71a34e8c345a 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -133,6 +133,7 @@ static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler)
 
 static struct arm_pmu_platdata db8500_pmu_platdata = {
 	.handle_irq		= db8500_pmu_handler,
+	.irq_flags		= IRQF_NOBALANCING | IRQF_NO_THREAD,
 };
 
 static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index dc459eb1246b..1c5e0f333779 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -569,22 +569,41 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
 		if (irq != other_irq) {
 			pr_warn("mismatched PPIs detected.\n");
 			err = -EINVAL;
+			goto err_out;
 		}
 	} else {
-		err = request_irq(irq, handler,
-				  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+		struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu);
+		unsigned long irq_flags;
+
+		err = irq_force_affinity(irq, cpumask_of(cpu));
+
+		if (err && num_possible_cpus() > 1) {
+			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				irq, cpu);
+			goto err_out;
+		}
+
+		if (platdata && platdata->irq_flags) {
+			irq_flags = platdata->irq_flags;
+		} else {
+			irq_flags = IRQF_PERCPU |
+				    IRQF_NOBALANCING |
+				    IRQF_NO_THREAD;
+		}
+
+		err = request_irq(irq, handler, irq_flags, "arm-pmu",
 				  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 	}
 
-	if (err) {
-		pr_err("unable to request IRQ%d for ARM PMU counters\n",
-			irq);
-		return err;
-	}
+	if (err)
+		goto err_out;
 
 	cpumask_set_cpu(cpu, &armpmu->active_irqs);
-
 	return 0;
+
+err_out:
+	pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
+	return err;
 }
 
 int armpmu_request_irqs(struct arm_pmu *armpmu)
@@ -628,12 +647,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 			enable_percpu_irq(irq, IRQ_TYPE_NONE);
 			return 0;
 		}
-
-		if (irq_force_affinity(irq, cpumask_of(cpu)) &&
-		    num_possible_cpus() > 1) {
-			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-				irq, cpu);
-		}
 	}
 
 	return 0;
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 1360dd6d5e61..af0f44effd44 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -24,10 +24,14 @@
  *	interrupt and passed the address of the low level handler,
  *	and can be used to implement any platform specific handling
  *	before or after calling it.
+ *
+ * @irq_flags: if non-zero, these flags will be passed to request_irq
+ *             when requesting interrupts for this PMU device.
  */
 struct arm_pmu_platdata {
 	irqreturn_t (*handle_irq)(int irq, void *dev,
 				  irq_handler_t pmu_handler);
+	unsigned long irq_flags;
 };
 
 #ifdef CONFIG_ARM_PMU
-- 
cgit v1.2.3


From 8397913303abc9333f376a518a8368fa22ca5e6e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 27 Jul 2017 12:21:11 +0200
Subject: genirq/cpuhotplug: Revert "Set force affinity flag on hotplug
 migration"

That commit was part of the changes moving x86 to the generic CPU hotplug
interrupt migration code. The force flag was required on x86 before the
hierarchical irqdomain rework, but invoking set_affinity() with force=true
stayed and had no side effects.

At some point in the past, the force flag got repurposed to support the
exynos timer interrupt affinity setting to a not yet online CPU, so the
interrupt controller callback does not verify the supplied affinity mask
against cpu_online_mask.

Setting the flag in the CPU hotplug code causes the cpu online masking to
be blocked on these irq controllers and results in potentially affining an
interrupt to the CPU which is unplugged, i.e. instead of moving it away,
it's just reassigned to it.

As the force flags is not longer needed on x86, it's safe to revert that
patch so the ARM irqchips which use the force flag work again.

Add comments to that effect, so this won't happen again.

Note: The online mask handling should be done in the generic code and the
force flag and the masking in the irq chips removed all together, but
that's not a change possible for 4.13.

Fixes: 77f85e66aa8b ("genirq/cpuhotplug: Set force affinity flag on hotplug migration")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: LAK <linux-arm-kernel@lists.infradead.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707271217590.3109@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 7 ++++++-
 kernel/irq/cpuhotplug.c | 9 +++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 00db35b61e9e..d2d543794093 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -388,7 +388,12 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @irq_mask_ack:	ack and mask an interrupt source
  * @irq_unmask:		unmask an interrupt source
  * @irq_eoi:		end of interrupt
- * @irq_set_affinity:	set the CPU affinity on SMP machines
+ * @irq_set_affinity:	Set the CPU affinity on SMP machines. If the force
+ *			argument is true, it tells the driver to
+ *			unconditionally apply the affinity setting. Sanity
+ *			checks against the supplied affinity mask are not
+ *			required. This is used for CPU hotplug where the
+ *			target CPU is not yet set in the cpu_online_mask.
  * @irq_retrigger:	resend an IRQ to the CPU
  * @irq_set_type:	set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
  * @irq_set_wake:	enable/disable power-management wake-on of an IRQ
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index aee8f7ec40af..638eb9c83d9f 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc)
 		affinity = cpu_online_mask;
 		brokeaff = true;
 	}
-
-	err = irq_do_set_affinity(d, affinity, true);
+	/*
+	 * Do not set the force argument of irq_do_set_affinity() as this
+	 * disables the masking of offline CPUs from the supplied affinity
+	 * mask and therefore might keep/reassign the irq to the outgoing
+	 * CPU.
+	 */
+	err = irq_do_set_affinity(d, affinity, false);
 	if (err) {
 		pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
 				    d->irq, err);
-- 
cgit v1.2.3


From 0b794ffae7afa7c4e5accac8791c4b78e8d080ce Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 May 2017 15:11:26 +0300
Subject: net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size

Fix miscalculation in reserved_at_1a0 field.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 87869c04849a..fd98aef4545c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8175,7 +8175,7 @@ struct mlx5_ifc_mtpps_reg_bits {
 	u8         out_pulse_duration[0x10];
 	u8         out_periodic_adjustment[0x10];
 
-	u8         reserved_at_1a0[0x60];
+	u8         reserved_at_1a0[0x40];
 };
 
 struct mlx5_ifc_mtppse_reg_bits {
-- 
cgit v1.2.3


From fa3676885e3b5be1edfa1b2cc775e20a45b34a19 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 May 2017 16:09:34 +0300
Subject: net/mlx5e: Add field select to MTPPS register

In order to mark relevant fields while setting the MTPPS register
add field select. Otherwise it can cause a misconfiguration in
firmware.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 29 +++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  2 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  5 ++++
 include/linux/mlx5/mlx5_ifc.h                      | 10 +++++---
 4 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 66f432385dbb..ab07233e2faa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -53,6 +53,15 @@ enum {
 	MLX5E_EVENT_MODE_ONCE_TILL_ARM	= 0x2,
 };
 
+enum {
+	MLX5E_MTPPS_FS_ENABLE			= BIT(0x0),
+	MLX5E_MTPPS_FS_PATTERN			= BIT(0x2),
+	MLX5E_MTPPS_FS_PIN_MODE			= BIT(0x3),
+	MLX5E_MTPPS_FS_TIME_STAMP		= BIT(0x4),
+	MLX5E_MTPPS_FS_OUT_PULSE_DURATION	= BIT(0x5),
+	MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
+};
+
 void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
 			struct skb_shared_hwtstamps *hwts)
 {
@@ -221,7 +230,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 
 		/* For future use need to add a loop for finding all 1PPS out pins */
 		MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
-		MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF);
+		MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
+		MLX5_SET(mtpps_reg, in, field_select,
+			 MLX5E_MTPPS_FS_PIN_MODE |
+			 MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
 
 		mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	}
@@ -257,8 +269,7 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	int pin = -1;
 	int err = 0;
 
-	if (!MLX5_CAP_GEN(priv->mdev, pps) ||
-	    !MLX5_CAP_GEN(priv->mdev, pps_modify))
+	if (!MLX5_PPS_CAP(priv->mdev))
 		return -EOPNOTSUPP;
 
 	if (rq->extts.index >= tstamp->ptp_info.n_pins)
@@ -277,6 +288,9 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
 	MLX5_SET(mtpps_reg, in, pattern, pattern);
 	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+					      MLX5E_MTPPS_FS_PATTERN |
+					      MLX5E_MTPPS_FS_ENABLE);
 
 	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	if (err)
@@ -302,7 +316,7 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	int pin = -1;
 	s64 ns;
 
-	if (!MLX5_CAP_GEN(priv->mdev, pps_modify))
+	if (!MLX5_PPS_CAP(priv->mdev))
 		return -EOPNOTSUPP;
 
 	if (rq->perout.index >= tstamp->ptp_info.n_pins)
@@ -337,7 +351,10 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
 	MLX5_SET(mtpps_reg, in, enable, on);
 	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
-
+	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+					      MLX5E_MTPPS_FS_PATTERN |
+					      MLX5E_MTPPS_FS_ENABLE |
+					      MLX5E_MTPPS_FS_TIME_STAMP);
 	return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 }
 
@@ -487,7 +504,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 #define MAX_PIN_NUM	8
 	tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
 	if (tstamp->pps_pin_caps) {
-		if (MLX5_CAP_GEN(priv->mdev, pps))
+		if (MLX5_PPS_CAP(priv->mdev))
 			mlx5e_get_pps_caps(priv, tstamp);
 		if (tstamp->ptp_info.n_pins)
 			mlx5e_init_pin_config(tstamp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index af51a5d2b912..52b9a64cd3a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -698,7 +698,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	else
 		mlx5_core_dbg(dev, "port_module_event is not set\n");
 
-	if (MLX5_CAP_GEN(dev, pps))
+	if (MLX5_PPS_CAP(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
 
 	if (MLX5_CAP_GEN(dev, fpga))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 6a3d6bef7dd4..6a263e8d883a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -154,6 +154,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
 int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
 int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
 
+#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&		\
+			    MLX5_CAP_GEN((mdev), pps_modify) &&		\
+			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&	\
+			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
+
 int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
 
 void mlx5e_init(void);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fd98aef4545c..3030121b4746 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7749,8 +7749,10 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7f];
+	u8         reserved_at_0[0x7d];
 
+	u8         mtpps_enh_out_per_adj[0x1];
+	u8         mtpps_fs[0x1];
 	u8         pcie_performance_group[0x1];
 };
 
@@ -8159,7 +8161,8 @@ struct mlx5_ifc_mtpps_reg_bits {
 	u8         reserved_at_78[0x4];
 	u8         cap_pin_4_mode[0x4];
 
-	u8         reserved_at_80[0x80];
+	u8         field_select[0x20];
+	u8         reserved_at_a0[0x60];
 
 	u8         enable[0x1];
 	u8         reserved_at_101[0xb];
@@ -8174,8 +8177,9 @@ struct mlx5_ifc_mtpps_reg_bits {
 
 	u8         out_pulse_duration[0x10];
 	u8         out_periodic_adjustment[0x10];
+	u8         enhanced_out_periodic_adjustment[0x20];
 
-	u8         reserved_at_1a0[0x40];
+	u8         reserved_at_1c0[0x20];
 };
 
 struct mlx5_ifc_mtppse_reg_bits {
-- 
cgit v1.2.3


From c9f2c1ae123a751d4e4f949144500219354d5ee1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 Jul 2017 14:45:09 +0200
Subject: udp6: fix socket leak on early demux

When an early demuxed packet reaches __udp6_lib_lookup_skb(), the
sk reference is retrieved and used, but the relevant reference
count is leaked and the socket destructor is never called.
Beyond leaking the sk memory, if there are pending UDP packets
in the receive queue, even the related accounted memory is leaked.

In the long run, this will cause persistent forward allocation errors
and no UDP skbs (both ipv4 and ipv6) will be able to reach the
user-space.

Fix this by explicitly accessing the early demux reference before
the lookup, and properly decreasing the socket reference count
after usage.

Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and
the now obsoleted comment about "socket cache".

The newly added code is derived from the current ipv4 code for the
similar path.

v1 -> v2:
  fixed the __udp6_lib_rcv() return code for resubmission,
  as suggested by Eric

Reported-by: Sam Edwards <CFSworks@gmail.com>
Reported-by: Marc Haber <mh+netdev@zugschlus.de>
Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h |  1 +
 net/ipv4/udp.c    |  3 ++-
 net/ipv6/udp.c    | 27 ++++++++++++++++++---------
 3 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 56ce2d2a612d..cc8036987dcb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -260,6 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
 }
 
 void udp_v4_early_demux(struct sk_buff *skb);
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
 int udp_get_port(struct sock *sk, unsigned short snum,
 		 int (*saddr_cmp)(const struct sock *,
 				  const struct sock *));
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fac7cb9e3b0f..e6276fa3750b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1928,7 +1928,7 @@ drop:
 /* For TCP sockets, sk_rx_dst is protected by socket lock
  * For UDP, we use xchg() to guard against concurrent changes.
  */
-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 {
 	struct dst_entry *old;
 
@@ -1937,6 +1937,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 		dst_release(old);
 	}
 }
+EXPORT_SYMBOL(udp_sk_rx_dst_set);
 
 /*
  *	Multicasts and broadcasts go to each listener.
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4a3e65626e8b..98fe4560e24c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 					  struct udp_table *udptable)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
-	struct sock *sk;
 
-	sk = skb_steal_sock(skb);
-	if (unlikely(sk))
-		return sk;
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
 				 udptable, skb);
@@ -804,6 +800,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (udp6_csum_init(skb, uh, proto))
 		goto csum_error;
 
+	/* Check if the socket is already available, e.g. due to early demux */
+	sk = skb_steal_sock(skb);
+	if (sk) {
+		struct dst_entry *dst = skb_dst(skb);
+		int ret;
+
+		if (unlikely(sk->sk_rx_dst != dst))
+			udp_sk_rx_dst_set(sk, dst);
+
+		ret = udpv6_queue_rcv_skb(sk, skb);
+		sock_put(sk);
+
+		/* a return value > 0 means to resubmit the input */
+		if (ret > 0)
+			return ret;
+		return 0;
+	}
+
 	/*
 	 *	Multicast receive code
 	 */
@@ -812,11 +826,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 				saddr, daddr, udptable, proto);
 
 	/* Unicast */
-
-	/*
-	 * check socket cache ... must talk to Alan about his plans
-	 * for sock caches... i'll skip this for now.
-	 */
 	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 	if (sk) {
 		int ret;
-- 
cgit v1.2.3


From 37ef38f3f83891a2f413fb872bae7d0f9bb95b27 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Thu, 27 Jul 2017 16:15:52 -0500
Subject: tty: pl011: fix initialization order of QDF2400 E44

The work-around for Qualcomm Technologies QDF2400 Erratum 44 hinges on a
global variable defined in the pl011 driver.  The ACPI SPCR parsing code
determines whether the work-around is needed, and if so, it changes the
console name from "pl011" to "qdf2400_e44".  The expectation is that
the pl011 driver will implement the work-around when it sees the console
name.  The global variable qdf2400_e44_present is set when that happens.

The problem is that work-around needs to be enabled when the pl011
driver probes, not when the console name is queried.  However, sbsa_probe()
is called before pl011_console_match().  The work-around appeared to work
previously because the default console on QDF2400 platforms was always
ttyAMA1.  The first time sbsa_probe() is called (for ttyAMA0),
qdf2400_e44_present is still false.  Then pl011_console_match() is called,
and it sets qdf2400_e44_present to true.  All subsequent calls to
sbsa_probe() enable the work-around.

The solution is to move the global variable into spcr.c and let the
pl011 driver query it during probe time.  This works because all QDF2400
platforms require SPCR, so parse_spcr() will always be called.
pl011_console_match still checks for the "qdf2400_e44" console name,
but it doesn't do anything else special.

Fixes: 5a0722b898f8 ("tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44")
Tested-by: Jeffrey Hugo <jhugo@codeaurora.org>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/spcr.c             | 36 ++++++++++++++++++++++++++++++++++--
 drivers/tty/serial/amba-pl011.c | 37 +++++++++++++++++++------------------
 include/linux/acpi.h            |  1 +
 3 files changed, 54 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 4ac3e06b41d8..98aa8c808a33 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -16,6 +16,16 @@
 #include <linux/kernel.h>
 #include <linux/serial_core.h>
 
+/*
+ * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as
+ * occasionally getting stuck as 1. To avoid the potential for a hang, check
+ * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART
+ * implementations, so only do so if an affected platform is detected in
+ * parse_spcr().
+ */
+bool qdf2400_e44_present;
+EXPORT_SYMBOL(qdf2400_e44_present);
+
 /*
  * Some Qualcomm Datacenter Technologies SoCs have a defective UART BUSY bit.
  * Detect them by examining the OEM fields in the SPCR header, similiar to PCI
@@ -147,8 +157,30 @@ int __init parse_spcr(bool earlycon)
 		goto done;
 	}
 
-	if (qdf2400_erratum_44_present(&table->header))
-		uart = "qdf2400_e44";
+	/*
+	 * If the E44 erratum is required, then we need to tell the pl011
+	 * driver to implement the work-around.
+	 *
+	 * The global variable is used by the probe function when it
+	 * creates the UARTs, whether or not they're used as a console.
+	 *
+	 * If the user specifies "traditional" earlycon, the qdf2400_e44
+	 * console name matches the EARLYCON_DECLARE() statement, and
+	 * SPCR is not used.  Parameter "earlycon" is false.
+	 *
+	 * If the user specifies "SPCR" earlycon, then we need to update
+	 * the console name so that it also says "qdf2400_e44".  Parameter
+	 * "earlycon" is true.
+	 *
+	 * For consistency, if we change the console name, then we do it
+	 * for everyone, not just earlycon.
+	 */
+	if (qdf2400_erratum_44_present(&table->header)) {
+		qdf2400_e44_present = true;
+		if (earlycon)
+			uart = "qdf2400_e44";
+	}
+
 	if (xgene_8250_erratum_present(table))
 		iotype = "mmio32";
 
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8a857bb34fbb..1888d168a41c 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -142,15 +142,7 @@ static struct vendor_data vendor_sbsa = {
 	.fixed_options		= true,
 };
 
-/*
- * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as
- * occasionally getting stuck as 1. To avoid the potential for a hang, check
- * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART
- * implementations, so only do so if an affected platform is detected in
- * parse_spcr().
- */
-static bool qdf2400_e44_present = false;
-
+#ifdef CONFIG_ACPI_SPCR_TABLE
 static struct vendor_data vendor_qdt_qdf2400_e44 = {
 	.reg_offset		= pl011_std_offsets,
 	.fr_busy		= UART011_FR_TXFE,
@@ -165,6 +157,7 @@ static struct vendor_data vendor_qdt_qdf2400_e44 = {
 	.always_enabled		= true,
 	.fixed_options		= true,
 };
+#endif
 
 static u16 pl011_st_offsets[REG_ARRAY_SIZE] = {
 	[REG_DR] = UART01x_DR,
@@ -2375,12 +2368,14 @@ static int __init pl011_console_match(struct console *co, char *name, int idx,
 	resource_size_t addr;
 	int i;
 
-	if (strcmp(name, "qdf2400_e44") == 0) {
-		pr_info_once("UART: Working around QDF2400 SoC erratum 44");
-		qdf2400_e44_present = true;
-	} else if (strcmp(name, "pl011") != 0) {
+	/*
+	 * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum
+	 * have a distinct console name, so make sure we check for that.
+	 * The actual implementation of the erratum occurs in the probe
+	 * function.
+	 */
+	if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0))
 		return -ENODEV;
-	}
 
 	if (uart_parse_earlycon(options, &iotype, &addr, &options))
 		return -ENODEV;
@@ -2734,11 +2729,17 @@ static int sbsa_uart_probe(struct platform_device *pdev)
 	}
 	uap->port.irq	= ret;
 
-	uap->reg_offset	= vendor_sbsa.reg_offset;
-	uap->vendor	= qdf2400_e44_present ?
-					&vendor_qdt_qdf2400_e44 : &vendor_sbsa;
+#ifdef CONFIG_ACPI_SPCR_TABLE
+	if (qdf2400_e44_present) {
+		dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n");
+		uap->vendor = &vendor_qdt_qdf2400_e44;
+	} else
+#endif
+		uap->vendor = &vendor_sbsa;
+
+	uap->reg_offset	= uap->vendor->reg_offset;
 	uap->fifosize	= 32;
-	uap->port.iotype = vendor_sbsa.access_32b ? UPIO_MEM32 : UPIO_MEM;
+	uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM;
 	uap->port.ops	= &sbsa_uart_pops;
 	uap->fixed_baud = baudrate;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c749eef1daa1..27b4b6615263 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1209,6 +1209,7 @@ static inline bool acpi_has_watchdog(void) { return false; }
 #endif
 
 #ifdef CONFIG_ACPI_SPCR_TABLE
+extern bool qdf2400_e44_present;
 int parse_spcr(bool earlycon);
 #else
 static inline int parse_spcr(bool earlycon) { return 0; }
-- 
cgit v1.2.3


From a627b0a7c15ee4d2c87a86d5be5c8167382e8d0d Mon Sep 17 00:00:00 2001
From: Eric Whitney <enwlinux@gmail.com>
Date: Sun, 30 Jul 2017 22:30:11 -0400
Subject: ext4: remove unused metadata accounting variables

Two variables in ext4_inode_info, i_reserved_meta_blocks and
i_allocated_meta_blocks, are unused.  Removing them saves a little
memory per in-memory inode and cleans up clutter in several tracepoints.
Adjust tracepoint output from ext4_alloc_da_blocks() for consistency
and fix a typo and whitespace near these changes.

Signed-off-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4.h              |  6 ++----
 fs/ext4/super.c             |  2 --
 include/trace/events/ext4.h | 35 ++++++++---------------------------
 3 files changed, 10 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..dcbcd9d444d1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -961,7 +961,7 @@ struct ext4_inode_info {
 	/*
 	 * i_block_group is the number of the block group which contains
 	 * this file's inode.  Constant across the lifetime of the inode,
-	 * it is ued for making block allocation decisions - we try to
+	 * it is used for making block allocation decisions - we try to
 	 * place a file's data blocks near its inode block, and new inodes
 	 * near to their parent directory's inode.
 	 */
@@ -1049,10 +1049,8 @@ struct ext4_inode_info {
 	ext4_group_t	i_last_alloc_group;
 
 	/* allocation reservation info for delalloc */
-	/* In case of bigalloc, these refer to clusters rather than blocks */
+	/* In case of bigalloc, this refer to clusters rather than blocks */
 	unsigned int i_reserved_data_blocks;
-	unsigned int i_reserved_meta_blocks;
-	unsigned int i_allocated_meta_blocks;
 	ext4_lblk_t i_da_metadata_calc_last_lblock;
 	int i_da_metadata_calc_len;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0886fe82e9c4..d61a70e2193a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_es_shk_nr = 0;
 	ei->i_es_shrink_lblk = 0;
 	ei->i_reserved_data_blocks = 0;
-	ei->i_reserved_meta_blocks = 0;
-	ei->i_allocated_meta_blocks = 0;
 	ei->i_da_metadata_calc_len = 0;
 	ei->i_da_metadata_calc_last_lblock = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index dfae175ddebc..9c3bc3883d2f 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -937,21 +937,19 @@ TRACE_EVENT(ext4_alloc_da_blocks,
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
-		__field( unsigned int,	data_blocks	)
-		__field( unsigned int,	meta_blocks	)
+		__field( unsigned int,	data_blocks		)
 	),
 
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
+	TP_printk("dev %d,%d ino %lu reserved_data_blocks %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->data_blocks, __entry->meta_blocks)
+		  __entry->data_blocks)
 );
 
 TRACE_EVENT(ext4_mballoc_alloc,
@@ -1153,8 +1151,6 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 		__field(	__u64,	i_blocks		)
 		__field(	int,	used_blocks		)
 		__field(	int,	reserved_data_blocks	)
-		__field(	int,	reserved_meta_blocks	)
-		__field(	int,	allocated_meta_blocks	)
 		__field(	int,	quota_claim		)
 		__field(	__u16,	mode			)
 	),
@@ -1166,22 +1162,16 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 		__entry->used_blocks = used_blocks;
 		__entry->reserved_data_blocks =
 				EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks =
-				EXT4_I(inode)->i_reserved_meta_blocks;
-		__entry->allocated_meta_blocks =
-				EXT4_I(inode)->i_allocated_meta_blocks;
 		__entry->quota_claim = quota_claim;
 		__entry->mode	= inode->i_mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
-		  "reserved_data_blocks %d reserved_meta_blocks %d "
-		  "allocated_meta_blocks %d quota_claim %d",
+		  "reserved_data_blocks %d quota_claim %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks,
 		  __entry->quota_claim)
 );
 
@@ -1195,7 +1185,6 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__field(	ino_t,	ino			)
 		__field(	__u64,	i_blocks		)
 		__field(	int,	reserved_data_blocks	)
-		__field(	int,	reserved_meta_blocks	)
 		__field(	__u16,  mode			)
 	),
 
@@ -1204,17 +1193,15 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__entry->ino	= inode->i_ino;
 		__entry->i_blocks = inode->i_blocks;
 		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 		__entry->mode	= inode->i_mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
-		  "reserved_data_blocks %d reserved_meta_blocks %d",
+		  "reserved_data_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
-		  __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks)
+		  __entry->reserved_data_blocks)
 );
 
 TRACE_EVENT(ext4_da_release_space,
@@ -1228,8 +1215,6 @@ TRACE_EVENT(ext4_da_release_space,
 		__field(	__u64,	i_blocks		)
 		__field(	int,	freed_blocks		)
 		__field(	int,	reserved_data_blocks	)
-		__field(	int,	reserved_meta_blocks	)
-		__field(	int,	allocated_meta_blocks	)
 		__field(	__u16,  mode			)
 	),
 
@@ -1239,19 +1224,15 @@ TRACE_EVENT(ext4_da_release_space,
 		__entry->i_blocks = inode->i_blocks;
 		__entry->freed_blocks = freed_blocks;
 		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
-		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
 		__entry->mode	= inode->i_mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
-		  "reserved_data_blocks %d reserved_meta_blocks %d "
-		  "allocated_meta_blocks %d",
+		  "reserved_data_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
-		  __entry->freed_blocks, __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+		  __entry->freed_blocks, __entry->reserved_data_blocks)
 );
 
 DECLARE_EVENT_CLASS(ext4__bitmap_load,
-- 
cgit v1.2.3


From 99f828436788f0155798145853607ca8f0e6de93 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Jul 2017 22:29:51 +0100
Subject: dma-buf/sync_file: Allow multiple sync_files to wrap a single
 dma-fence

Up until recently sync_file were create to export a single dma-fence to
userspace, and so we could canabalise a bit insie dma-fence to mark
whether or not we had enable polling for the sync_file itself. However,
with the advent of syncobj, we do allow userspace to create multiple
sync_files for a single dma-fence. (Similarly, that the sw-sync
validation framework also started returning multiple sync-files wrapping
a single dma-fence for a syncpt also triggering the problem.)

This patch reverts my suggestion in commit e24165537312
("dma-buf/sync_file: only enable fence signalling on poll()") to use a
single bit in the shared dma-fence and restores the sync_file->flags for
tracking the bits individually.

Reported-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline")
Fixes: e9083420bbac ("drm: introduce sync objects (v4)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: dri-devel@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk
(cherry picked from commit db1fc97ca0c0d3fdeeadf314d99a26188438940a)
---
 drivers/dma-buf/sync_file.c | 5 +++--
 include/linux/sync_file.h   | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index d7e219d2669d..66fb40d0ebdb 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file)
 {
 	struct sync_file *sync_file = file->private_data;
 
-	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
+	if (test_bit(POLL_ENABLED, &sync_file->flags))
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
@@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
+	if (list_empty(&sync_file->cb.node) &&
+	    !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) {
 		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
 					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 5726107963b2..0ad87c434ae6 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -43,12 +43,13 @@ struct sync_file {
 #endif
 
 	wait_queue_head_t	wq;
+	unsigned long		flags;
 
 	struct dma_fence	*fence;
 	struct dma_fence_cb cb;
 };
 
-#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
+#define POLL_ENABLED 0
 
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
-- 
cgit v1.2.3


From 9c80034921d1ece5c0e3241ba1645bce32387684 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 29 Jul 2017 14:11:43 +0200
Subject: i2c: rephrase explanation of I2C_CLASS_DEPRECATED

Hopefully making clear that it is not needed for new drivers.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 00ca5b86a753..d501d3956f13 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -689,7 +689,8 @@ i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
 #define I2C_CLASS_SPD		(1<<7)	/* Memory modules */
-#define I2C_CLASS_DEPRECATED	(1<<8)	/* Warn users that adapter will stop using classes */
+/* Warn users that the adapter doesn't support classes anymore */
+#define I2C_CLASS_DEPRECATED	(1<<8)
 
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
-- 
cgit v1.2.3


From cb891fa6a1d5f52c5f5c07b6f7f4c6d65ea55fc0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 31 Jul 2017 16:52:36 +0200
Subject: udp6: fix jumbogram reception

Since commit 67a51780aebb ("ipv6: udp: leverage scratch area
helpers") udp6_recvmsg() read the skb len from the scratch area,
to avoid a cache miss.
But the UDP6 rx path support RFC 2675 UDPv6 jumbograms, and their
length exceeds the 16 bits available in the scratch area. As a side
effect the length returned by recvmsg() is:
<ingress datagram len> % (1<<16)

This commit addresses the issue allocating one more bit in the
IP6CB flags field and setting it for incoming jumbograms.
Such field is still in the first cacheline, so at recvmsg()
time we can check it and fallback to access skb->len if
required, without a measurable overhead.

Fixes: 67a51780aebb ("ipv6: udp: leverage scratch area helpers")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h |  6 ++++++
 net/ipv6/exthdrs.c   |  1 +
 net/ipv6/udp.c       | 11 ++++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e1b442996f81..474d6bbc158c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -128,6 +128,7 @@ struct inet6_skb_parm {
 #define IP6SKB_FRAGMENTED      16
 #define IP6SKB_HOPBYHOP        32
 #define IP6SKB_L3SLAVE         64
+#define IP6SKB_JUMBOGRAM      128
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
@@ -152,6 +153,11 @@ static inline int inet6_iif(const struct sk_buff *skb)
 	return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
 }
 
+static inline bool inet6_is_jumbogram(const struct sk_buff *skb)
+{
+	return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM);
+}
+
 /* can not be used in TCP layer after tcp_v6_fill_cb */
 static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4996d734f1d2..3cec529c6113 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 	if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
 		goto drop;
 
+	IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM;
 	return true;
 
 drop:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 98fe4560e24c..578142b7ca3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 EXPORT_SYMBOL_GPL(udp6_lib_lookup);
 #endif
 
+/* do not use the scratch area len for jumbogram: their length execeeds the
+ * scratch area space; note that the IP6CB flags is still in the first
+ * cacheline, so checking for jumbograms is cheap
+ */
+static int udp6_skb_len(struct sk_buff *skb)
+{
+	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
+}
+
 /*
  *	This should be easy, if there is something there we
  *	return it, otherwise we block.
@@ -358,7 +367,7 @@ try_again:
 	if (!skb)
 		return err;
 
-	ulen = udp_skb_len(skb);
+	ulen = udp6_skb_len(skb);
 	copied = len;
 	if (copied > ulen - off)
 		copied = ulen - off;
-- 
cgit v1.2.3


From e17e8969f5c59a10083af5e260bdad6026872203 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 24 Jul 2017 16:43:49 +0200
Subject: libceph: fallback for when there isn't a pool-specific choose_arg

There is now a fallback to a choose_arg index of -1 if there isn't
a pool-specific choose_arg set.  If you create a per-pool weight-set,
that works for that pool.  Otherwise we try the compat/default one.  If
that doesn't exist either, then we use the normal CRUSH weights.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/crush/crush.h |  2 +-
 net/ceph/osdmap.c           | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 92e165d417a6..07eed95e10c7 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -193,7 +193,7 @@ struct crush_choose_arg {
 struct crush_choose_arg_map {
 #ifdef __KERNEL__
 	struct rb_node node;
-	u64 choose_args_index;
+	s64 choose_args_index;
 #endif
 	struct crush_choose_arg *args; /*!< replacement for each bucket
                                             in the crushmap */
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 64ae9f89773a..eb57a06373ca 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2301,10 +2301,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
 	}
 }
 
+/*
+ * Magic value used for a "default" fallback choose_args, used if the
+ * crush_choose_arg_map passed to do_crush() does not exist.  If this
+ * also doesn't exist, fall back to canonical weights.
+ */
+#define CEPH_DEFAULT_CHOOSE_ARGS	-1
+
 static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 		    int *result, int result_max,
 		    const __u32 *weight, int weight_max,
-		    u64 choose_args_index)
+		    s64 choose_args_index)
 {
 	struct crush_choose_arg_map *arg_map;
 	int r;
@@ -2313,6 +2320,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
 					choose_args_index);
+	if (!arg_map)
+		arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+						CEPH_DEFAULT_CHOOSE_ARGS);
 
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-- 
cgit v1.2.3


From ae78dd8139ce93a528beb7f3914531b7a7be9e30 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 27 Jul 2017 17:59:14 +0200
Subject: libceph: make RECOVERY_DELETES feature create a new interval

This is needed so that the OSDs can regenerate the missing set at the
start of a new interval where support for recovery deletes changed.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/osd_client.h | 1 +
 include/linux/ceph/osdmap.h     | 2 ++
 include/linux/ceph/rados.h      | 4 ++++
 net/ceph/osd_client.c           | 5 +++++
 net/ceph/osdmap.c               | 5 ++++-
 5 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c6d96a5f46fd..adf670ecaf94 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -148,6 +148,7 @@ struct ceph_osd_request_target {
 	int size;
 	int min_size;
 	bool sort_bitwise;
+	bool recovery_deletes;
 
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index a0996cb9faed..af3444a5bfdd 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  u32 new_pg_num,
 			  bool old_sort_bitwise,
 			  bool new_sort_bitwise,
+			  bool old_recovery_deletes,
+			  bool new_recovery_deletes,
 			  const struct ceph_pg *pgid);
 bool ceph_osds_changed(const struct ceph_osds *old_acting,
 		       const struct ceph_osds *new_acting,
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 385db08bb8b2..b8281feda9c7 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s);
 #define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
 #define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
 #define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
+#define CEPH_OSDMAP_REQUIRE_JEWEL    (1<<16) /* require jewel for booting osds */
+#define CEPH_OSDMAP_REQUIRE_KRAKEN   (1<<17) /* require kraken for booting osds */
+#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
+#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
 
 /*
  * The error code to return when an OSD can't handle a write
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b5f016cb9569..dcfbdd74dfd1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	bool legacy_change;
 	bool split = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
+	bool recovery_deletes = ceph_osdmap_flag(osdc,
+						 CEPH_OSDMAP_RECOVERY_DELETES);
 	enum calc_target_result ct_res;
 	int ret;
 
@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 				 pi->pg_num,
 				 t->sort_bitwise,
 				 sort_bitwise,
+				 t->recovery_deletes,
+				 recovery_deletes,
 				 &last_pgid))
 		force_resend = true;
 
@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->pg_num = pi->pg_num;
 		t->pg_num_mask = pi->pg_num_mask;
 		t->sort_bitwise = sort_bitwise;
+		t->recovery_deletes = recovery_deletes;
 
 		t->osd = acting.primary;
 	}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0bec71fa712e..f358d0bfa76b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2082,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  u32 new_pg_num,
 			  bool old_sort_bitwise,
 			  bool new_sort_bitwise,
+			  bool old_recovery_deletes,
+			  bool new_recovery_deletes,
 			  const struct ceph_pg *pgid)
 {
 	return !osds_equal(old_acting, new_acting) ||
@@ -2089,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 	       old_size != new_size ||
 	       old_min_size != new_min_size ||
 	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
-	       old_sort_bitwise != new_sort_bitwise;
+	       old_sort_bitwise != new_sort_bitwise ||
+	       old_recovery_deletes != new_recovery_deletes;
 }
 
 static int calc_pg_rank(int osd, const struct ceph_osds *acting)
-- 
cgit v1.2.3


From fd40559c8657418385e42f797e0b04bfc0add748 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 1 Aug 2017 16:02:47 -0400
Subject: NFSv4: Fix EXCHANGE_ID corrupt verifier issue

The verifier is allocated on the stack, but the EXCHANGE_ID RPC call was
changed to be asynchronous by commit 8d89bd70bc939. If we interrrupt
the call to rpc_wait_for_completion_task(), we can therefore end up
transmitting random stack contents in lieu of the verifier.

Fixes: 8d89bd70bc939 ("NFS setup async exchange_id")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c       | 11 ++++-------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  2 +-
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 93c1d7352238..c458a1e28b91 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7460,7 +7460,7 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
 			cdata->res.server_scope = NULL;
 		}
 		/* Save the EXCHANGE_ID verifier session trunk tests */
-		memcpy(clp->cl_confirm.data, cdata->args.verifier->data,
+		memcpy(clp->cl_confirm.data, cdata->args.verifier.data,
 		       sizeof(clp->cl_confirm.data));
 	}
 out:
@@ -7497,7 +7497,6 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = {
 static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 			u32 sp4_how, struct rpc_xprt *xprt)
 {
-	nfs4_verifier verifier;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID],
 		.rpc_cred = cred,
@@ -7521,8 +7520,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 		return -ENOMEM;
 	}
 
-	if (!xprt)
-		nfs4_init_boot_verifier(clp, &verifier);
+	nfs4_init_boot_verifier(clp, &calldata->args.verifier);
 
 	status = nfs4_init_uniform_client_string(clp);
 	if (status)
@@ -7563,9 +7561,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 		task_setup_data.rpc_xprt = xprt;
 		task_setup_data.flags =
 				RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC;
-		calldata->args.verifier = &clp->cl_confirm;
-	} else {
-		calldata->args.verifier = &verifier;
+		memcpy(calldata->args.verifier.data, clp->cl_confirm.data,
+				sizeof(calldata->args.verifier.data));
 	}
 	calldata->args.client = clp;
 #ifdef CONFIG_NFS_V4_1_MIGRATION
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fa3eb361d4f8..37c8af003275 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1785,7 +1785,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
 	int len = 0;
 
 	encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
-	encode_nfs4_verifier(xdr, args->verifier);
+	encode_nfs4_verifier(xdr, &args->verifier);
 
 	encode_string(xdr, strlen(args->client->cl_owner_id),
 			args->client->cl_owner_id);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ca3bcc4ed4e5..62cbcb842f99 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1235,7 +1235,7 @@ struct nfs41_state_protection {
 
 struct nfs41_exchange_id_args {
 	struct nfs_client		*client;
-	nfs4_verifier			*verifier;
+	nfs4_verifier			verifier;
 	u32				flags;
 	struct nfs41_state_protection	state_protect;
 };
-- 
cgit v1.2.3


From d9535cb7b7603aeb549c697ecdf92024e4d0a650 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 28 Jul 2017 17:30:02 -0500
Subject: ptp: introduce ptp auxiliary worker

Many PTP drivers required to perform some asynchronous or periodic work,
like periodically handling PHC counter overflow or handle delayed timestamp
for RX/TX network packets. In most of the cases, such work is implemented
using workqueues. Unfortunately, Kernel workqueues might introduce
significant delay in work scheduling under high system load and on -RT,
which could cause misbehavior of PTP drivers due to internal counter
overflow, for example, and there is no way to tune its execution policy and
priority manuallly.

Hence, The kthread_worker can be used insted of workqueues, as it create
separte named kthread for each worker and its its execution policy and
priority can be configured using chrt tool.

This prblem was reported for two drivers TI CPSW CPTS and dp83640, so
instead of modifying each of these driver it was proposed to add PTP
auxiliary worker to the PHC subsystem.

The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker
and kthread_delayed_work and introduces two new PHC subsystem APIs:

- long (*do_aux_work)(struct ptp_clock_info *ptp) callback in
ptp_clock_info structure, which driver should assign if it require to
perform asynchronous or periodic work. Driver should return the delay of
the PTP next auxiliary work scheduling time (>=0) or negative value in case
further scheduling is not required.

- int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which
allows schedule PTP auxiliary work.

The name of kthread_worker thread corresponds PTP PHC device name "ptp%d".

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 42 ++++++++++++++++++++++++++++++++++++++++
 drivers/ptp/ptp_private.h        |  3 +++
 include/linux/ptp_clock_kernel.h | 20 +++++++++++++++++++
 3 files changed, 65 insertions(+)

(limited to 'include')

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index b77435783ef3..7eacc1c4b3b1 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/sched/types.h>
 
 #include "ptp_private.h"
 
@@ -184,6 +185,19 @@ static void delete_ptp_clock(struct posix_clock *pc)
 	kfree(ptp);
 }
 
+static void ptp_aux_kworker(struct kthread_work *work)
+{
+	struct ptp_clock *ptp = container_of(work, struct ptp_clock,
+					     aux_work.work);
+	struct ptp_clock_info *info = ptp->info;
+	long delay;
+
+	delay = info->do_aux_work(info);
+
+	if (delay >= 0)
+		kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay);
+}
+
 /* public interface */
 
 struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
@@ -217,6 +231,20 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	mutex_init(&ptp->pincfg_mux);
 	init_waitqueue_head(&ptp->tsev_wq);
 
+	if (ptp->info->do_aux_work) {
+		char *worker_name = kasprintf(GFP_KERNEL, "ptp%d", ptp->index);
+
+		kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
+		ptp->kworker = kthread_create_worker(0, worker_name ?
+						     worker_name : info->name);
+		kfree(worker_name);
+		if (IS_ERR(ptp->kworker)) {
+			err = PTR_ERR(ptp->kworker);
+			pr_err("failed to create ptp aux_worker %d\n", err);
+			goto kworker_err;
+		}
+	}
+
 	err = ptp_populate_pin_groups(ptp);
 	if (err)
 		goto no_pin_groups;
@@ -259,6 +287,9 @@ no_pps:
 no_device:
 	ptp_cleanup_pin_groups(ptp);
 no_pin_groups:
+	if (ptp->kworker)
+		kthread_destroy_worker(ptp->kworker);
+kworker_err:
 	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	ida_simple_remove(&ptp_clocks_map, index);
@@ -274,6 +305,11 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
 	ptp->defunct = 1;
 	wake_up_interruptible(&ptp->tsev_wq);
 
+	if (ptp->kworker) {
+		kthread_cancel_delayed_work_sync(&ptp->aux_work);
+		kthread_destroy_worker(ptp->kworker);
+	}
+
 	/* Release the clock's resources. */
 	if (ptp->pps_source)
 		pps_unregister_source(ptp->pps_source);
@@ -339,6 +375,12 @@ int ptp_find_pin(struct ptp_clock *ptp,
 }
 EXPORT_SYMBOL(ptp_find_pin);
 
+int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
+{
+	return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay);
+}
+EXPORT_SYMBOL(ptp_schedule_worker);
+
 /* module operations */
 
 static void __exit ptp_exit(void)
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index d95888974d0c..b86f1bfecd6f 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -22,6 +22,7 @@
 
 #include <linux/cdev.h>
 #include <linux/device.h>
+#include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/ptp_clock.h>
@@ -56,6 +57,8 @@ struct ptp_clock {
 	struct attribute_group pin_attr_group;
 	/* 1st entry is a pointer to the real group, 2nd is NULL terminator */
 	const struct attribute_group *pin_attr_groups[2];
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work aux_work;
 };
 
 /*
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index a026bfd089db..51349d124ee5 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -99,6 +99,11 @@ struct system_device_crosststamp;
  *            parameter func: the desired function to use.
  *            parameter chan: the function channel index to use.
  *
+ * @do_work:  Request driver to perform auxiliary (periodic) operations
+ *	      Driver should return delay of the next auxiliary work scheduling
+ *	      time (>=0) or negative value in case further scheduling
+ *	      is not required.
+ *
  * Drivers should embed their ptp_clock_info within a private
  * structure, obtaining a reference to it using container_of().
  *
@@ -126,6 +131,7 @@ struct ptp_clock_info {
 		      struct ptp_clock_request *request, int on);
 	int (*verify)(struct ptp_clock_info *ptp, unsigned int pin,
 		      enum ptp_pin_function func, unsigned int chan);
+	long (*do_aux_work)(struct ptp_clock_info *ptp);
 };
 
 struct ptp_clock;
@@ -211,6 +217,16 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
 int ptp_find_pin(struct ptp_clock *ptp,
 		 enum ptp_pin_function func, unsigned int chan);
 
+/**
+ * ptp_schedule_worker() - schedule ptp auxiliary work
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @delay:  number of jiffies to wait before queuing
+ *          See kthread_queue_delayed_work() for more info.
+ */
+
+int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay);
+
 #else
 static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 						   struct device *parent)
@@ -225,6 +241,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp)
 static inline int ptp_find_pin(struct ptp_clock *ptp,
 			       enum ptp_pin_function func, unsigned int chan)
 { return -1; }
+static inline int ptp_schedule_worker(struct ptp_clock *ptp,
+				      unsigned long delay)
+{ return -EOPNOTSUPP; }
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:35 -0600
Subject: drm/msm: Remove __user from __u64 data types

__user should be used to identify user pointers and not __u64
variables containing pointers.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 26c54f6d595d..ad4eb2863e70 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd {
 	__u32 size;           /* in, cmdstream size */
 	__u32 pad;
 	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 __user relocs;  /* in, ptr to array of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
 };
 
 /* Each buffer referenced elsewhere in the cmdstream submit (ie. the
@@ -215,8 +215,8 @@ struct drm_msm_gem_submit {
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 __user bos;     /* in, ptr to array of submit_bo's */
-	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
 };
 
-- 
cgit v1.2.3


From a477b9cd37aa81a490dfa3265b7ff4f2c5a92463 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Aug 2017 20:11:02 -0500
Subject: PCI: Add pci_reset_function_locked()

The implementation of PCI workarounds may require that the device is reset
from its probe function.  This implies that the PCI device lock is already
held, and makes calling pci_reset_function() impossible (since it will
itself try to take that lock).

Add pci_reset_function_locked(), which is the equivalent of
pci_reset_function(), except that it requires the PCI device lock to be
already held by the caller.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
[bhelgaas: folded in fix for conflict with 52354b9d1f46 ("PCI: Remove
__pci_dev_reset() and pci_dev_reset()")]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org	# 4.11: 52354b9d1f46: PCI: Remove __pci_dev_reset() and pci_dev_reset()
Cc: stable@vger.kernel.org	# 4.11
---
 drivers/pci/pci.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc3456dc1..b4b7eab29400 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4259,6 +4259,41 @@ int pci_reset_function(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_reset_function);
 
+/**
+ * pci_reset_function_locked - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device.  This function differs
+ * from __pci_reset_function() in that it saves and restores device state
+ * over the reset.  It also differs from pci_reset_function() in that it
+ * requires the PCI device lock to be held.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function_locked(struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_probe_reset_function(dev);
+	if (rc)
+		return rc;
+
+	pci_dev_save_and_disable(dev);
+
+	rc = __pci_reset_function_locked(dev);
+
+	pci_dev_restore(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function_locked);
+
 /**
  * pci_try_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66dd659..a75c13673852 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1067,6 +1067,7 @@ void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
+int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_reset_slot(struct pci_slot *slot);
-- 
cgit v1.2.3


From 6d29231000bbe0fb9e4893a9c68151ffdd3b5469 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 31 Jul 2017 10:31:27 +0200
Subject: mtd: nand: Declare tBERS, tR and tPROG as u64 to avoid integer
 overflow

All timings in nand_sdr_timings are expressed in picoseconds but some
of them may not fit in an u32.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: 204e7ecd47e2 ("mtd: nand: Add a few more timings to nand_sdr_timings")
Reported-by: Alexander Dahl <ada@thorsis.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_timings.c | 6 +++---
 include/linux/mtd/nand.h        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c
index f06312df3669..7e36d7d13c26 100644
--- a/drivers/mtd/nand/nand_timings.c
+++ b/drivers/mtd/nand/nand_timings.c
@@ -311,9 +311,9 @@ int onfi_init_data_interface(struct nand_chip *chip,
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 
 		/* microseconds -> picoseconds */
-		timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog);
-		timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers);
-		timings->tR_max = 1000000UL * le16_to_cpu(params->t_r);
+		timings->tPROG_max = 1000000ULL * le16_to_cpu(params->t_prog);
+		timings->tBERS_max = 1000000ULL * le16_to_cpu(params->t_bers);
+		timings->tR_max = 1000000ULL * le16_to_cpu(params->t_r);
 
 		/* nanoseconds -> picoseconds */
 		timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 892148c448cc..5216d2eb2289 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -681,10 +681,10 @@ struct nand_buffers {
  * @tWW_min: WP# transition to WE# low
  */
 struct nand_sdr_timings {
-	u32 tBERS_max;
+	u64 tBERS_max;
 	u32 tCCS_min;
-	u32 tPROG_max;
-	u32 tR_max;
+	u64 tPROG_max;
+	u64 tR_max;
 	u32 tALH_min;
 	u32 tADL_min;
 	u32 tALS_min;
-- 
cgit v1.2.3


From c994f778bb1cca8ebe7a4e528cefec233e93b5cc Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Tue, 1 Aug 2017 16:43:43 +0300
Subject: net/mlx4_en: Fix wrong indication of Wake-on-LAN (WoL) support

Currently when WoL is supported but disabled, ethtool reports:
"Supports Wake-on: d".
Fix the indication of Wol support, so that the indication
remains "g" all the time if the NIC supports WoL.

Tested:
As accepted, when NIC supports WoL- ethtool reports:
	Supports Wake-on: g
	Wake-on: d
when NIC doesn't support WoL- ethtool reports:
        Supports Wake-on: d
        Wake-on: d

Fixes: 14c07b1358ed ("mlx4: Wake on LAN support")
Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 15 ++++++++-------
 drivers/net/ethernet/mellanox/mlx4/fw.c         |  4 ++++
 drivers/net/ethernet/mellanox/mlx4/fw.h         |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c       |  2 ++
 include/linux/mlx4/device.h                     |  1 +
 5 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index c751a1d434ad..3d4e4a5d00d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -223,6 +223,7 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 			    struct ethtool_wolinfo *wol)
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct mlx4_caps *caps = &priv->mdev->dev->caps;
 	int err = 0;
 	u64 config = 0;
 	u64 mask;
@@ -235,24 +236,24 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
 		MLX4_DEV_CAP_FLAG_WOL_PORT2;
 
-	if (!(priv->mdev->dev->caps.flags & mask)) {
+	if (!(caps->flags & mask)) {
 		wol->supported = 0;
 		wol->wolopts = 0;
 		return;
 	}
 
+	if (caps->wol_port[priv->port])
+		wol->supported = WAKE_MAGIC;
+	else
+		wol->supported = 0;
+
 	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
 	if (err) {
 		en_err(priv, "Failed to get WoL information\n");
 		return;
 	}
 
-	if (config & MLX4_EN_WOL_MAGIC)
-		wol->supported = WAKE_MAGIC;
-	else
-		wol->supported = 0;
-
-	if (config & MLX4_EN_WOL_ENABLED)
+	if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC))
 		wol->wolopts = WAKE_MAGIC;
 	else
 		wol->wolopts = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 37e84a59e751..c165f16623a9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -764,6 +764,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET	0x3e
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
 #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET		0x40
+#define QUERY_DEV_CAP_WOL_OFFSET		0x43
 #define QUERY_DEV_CAP_FLAGS_OFFSET		0x44
 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET		0x48
 #define QUERY_DEV_CAP_UAR_SZ_OFFSET		0x49
@@ -920,6 +921,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
 	MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
 	dev_cap->flags = flags | (u64)ext_flags << 32;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET);
+	dev_cap->wol_port[1] = !!(field & 0x20);
+	dev_cap->wol_port[2] = !!(field & 0x40);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
 	dev_cap->reserved_uars = field >> 4;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 5343a0599253..b52ba01aa486 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -129,6 +129,7 @@ struct mlx4_dev_cap {
 	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_rate_limit_caps rl_caps;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
+	bool wol_port[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_func_cap {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a27c9c13a36e..09b9bc17bce9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -424,6 +424,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
+	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
+	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
 
 	/* Save uar page shift */
 	if (!mlx4_is_slave(dev)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index aad5d81dfb44..b54517c05e9a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -620,6 +620,7 @@ struct mlx4_caps {
 	u32			dmfs_high_rate_qpn_base;
 	u32			dmfs_high_rate_qpn_range;
 	u32			vf_caps;
+	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
 };
 
-- 
cgit v1.2.3


From 3898da947bbaf9e7fd5816e825978d360028bba2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 2 Aug 2017 17:55:54 +0200
Subject: KVM: avoid using rcu_dereference_protected
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During teardown, accesses to memslots and buses are using
rcu_dereference_protected with an always-true condition because
these accesses are done outside the usual mutexes.  This
is because the last reference is gone and there cannot be any
concurrent modifications, but rcu_dereference_protected is
ugly and unobvious.

Instead, check the refcount in kvm_get_bus and __kvm_memslots.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/linux/kvm_host.h |  6 ++++--
 virt/kvm/kvm_main.c      | 11 ++++-------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 890b706d1943..21a6fd6c44af 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -477,7 +477,8 @@ struct kvm {
 static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 {
 	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
-				      lockdep_is_held(&kvm->slots_lock));
+				      lockdep_is_held(&kvm->slots_lock) ||
+				      !refcount_read(&kvm->users_count));
 }
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
@@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
 	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
-			lockdep_is_held(&kvm->slots_lock));
+			lockdep_is_held(&kvm->slots_lock) ||
+			!refcount_read(&kvm->users_count));
 }
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f3f74271f1a9..15252d723b54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -717,10 +717,9 @@ out_err_no_srcu:
 	hardware_disable_all();
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
-		kfree(rcu_access_pointer(kvm->buses[i]));
+		kfree(kvm_get_bus(kvm, i));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm,
-			rcu_dereference_protected(kvm->memslots[i], 1));
+		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
 	kvm_arch_free_vm(kvm);
 	mmdrop(current->mm);
 	return ERR_PTR(r);
@@ -754,9 +753,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		struct kvm_io_bus *bus;
+		struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
 
-		bus = rcu_dereference_protected(kvm->buses[i], 1);
 		if (bus)
 			kvm_io_bus_destroy(bus);
 		kvm->buses[i] = NULL;
@@ -770,8 +768,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm,
-			rcu_dereference_protected(kvm->memslots[i], 1));
+		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
-- 
cgit v1.2.3


From 3ea277194daaeaa84ce75180ec7c7a2075027a68 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 2 Aug 2017 13:31:52 -0700
Subject: mm, mprotect: flush TLB if potentially racing with a parallel reclaim
 leaving stale TLB entries

Nadav Amit identified a theoritical race between page reclaim and
mprotect due to TLB flushes being batched outside of the PTL being held.

He described the race as follows:

        CPU0                            CPU1
        ----                            ----
                                        user accesses memory using RW PTE
                                        [PTE now cached in TLB]
        try_to_unmap_one()
        ==> ptep_get_and_clear()
        ==> set_tlb_ubc_flush_pending()
                                        mprotect(addr, PROT_READ)
                                        ==> change_pte_range()
                                        ==> [ PTE non-present - no flush ]

                                        user writes using cached RW PTE
        ...

        try_to_unmap_flush()

The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such
as munmap, mremap and madvise.

For some operations like mprotect, it's not necessarily a data integrity
issue but it is a correctness issue as there is a window where an
mprotect that limits access still allows access.  For munmap, it's
potentially a data integrity issue although the race is massive as an
munmap, mmap and return to userspace must all complete between the
window when reclaim drops the PTL and flushes the TLB.  However, it's
theoritically possible so handle this issue by flushing the mm if
reclaim is potentially currently batching TLB flushes.

Other instances where a flush is required for a present pte should be ok
as either the page lock is held preventing parallel reclaim or a page
reference count is elevated preventing a parallel free leading to
corruption.  In the case of page_mkclean there isn't an obvious path
that userspace could take advantage of without using the operations that
are guarded by this patch.  Other users such as gup as a race with
reclaim looks just at PTEs.  huge page variants should be ok as they
don't race with reclaim.  mincore only looks at PTEs.  userfault also
should be ok as if a parallel reclaim takes place, it will either fault
the page back in or read some of the data before the flush occurs
triggering a fault.

Note that a variant of this patch was acked by Andy Lutomirski but this
was for the x86 parts on top of his PCID work which didn't make the 4.13
merge window as expected.  His ack is dropped from this version and
there will be a follow-on patch on top of PCID that will include his
ack.

[akpm@linux-foundation.org: tweak comments]
[akpm@linux-foundation.org: fix spello]
Link: http://lkml.kernel.org/r/20170717155523.emckq2esjro6hf3z@suse.de
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: <stable@vger.kernel.org>	[v4.4+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  4 ++++
 mm/internal.h            |  5 ++++-
 mm/madvise.c             |  1 +
 mm/memory.c              |  1 +
 mm/mprotect.c            |  1 +
 mm/mremap.c              |  1 +
 mm/rmap.c                | 36 ++++++++++++++++++++++++++++++++++++
 7 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ff151814a02d..7f384bb62d8e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -494,6 +494,10 @@ struct mm_struct {
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
 	bool tlb_flush_pending;
+#endif
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+	/* See flush_tlb_batched_pending() */
+	bool tlb_flush_batched;
 #endif
 	struct uprobes_state uprobes_state;
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/internal.h b/mm/internal.h
index 24d88f084705..4ef49fc55e58 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq;
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
+void flush_tlb_batched_pending(struct mm_struct *mm);
 #else
 static inline void try_to_unmap_flush(void)
 {
@@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void)
 static inline void try_to_unmap_flush_dirty(void)
 {
 }
-
+static inline void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+}
 #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
 
 extern const struct trace_print_flags pageflag_names[];
diff --git a/mm/madvise.c b/mm/madvise.c
index 9976852f1e1c..47d8d8a25eae 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 
 	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
 	orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		ptent = *pte;
diff --git a/mm/memory.c b/mm/memory.c
index 0e517be91a89..f65beaad319b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1197,6 +1197,7 @@ again:
 	init_rss_vec(rss);
 	start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	pte = start_pte;
+	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	do {
 		pte_t ptent = *pte;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1a8c9ca83e48..4180ad8cc9c5 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	    atomic_read(&vma->vm_mm->mm_users) == 1)
 		target_node = numa_node_id();
 
+	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 	do {
 		oldpte = *pte;
diff --git a/mm/mremap.c b/mm/mremap.c
index cd8a1b199ef9..6e3d857458de 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	new_ptl = pte_lockptr(mm, new_pmd);
 	if (new_ptl != old_ptl)
 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 
 	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
diff --git a/mm/rmap.c b/mm/rmap.c
index ced14f1af6dc..c8993c63eb25 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -604,6 +604,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
 	tlb_ubc->flush_required = true;
 
+	/*
+	 * Ensure compiler does not re-order the setting of tlb_flush_batched
+	 * before the PTE is cleared.
+	 */
+	barrier();
+	mm->tlb_flush_batched = true;
+
 	/*
 	 * If the PTE was dirty then it's best to assume it's writable. The
 	 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
@@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
 
 	return should_defer;
 }
+
+/*
+ * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
+ * releasing the PTL if TLB flushes are batched. It's possible for a parallel
+ * operation such as mprotect or munmap to race between reclaim unmapping
+ * the page and flushing the page. If this race occurs, it potentially allows
+ * access to data via a stale TLB entry. Tracking all mm's that have TLB
+ * batching in flight would be expensive during reclaim so instead track
+ * whether TLB batching occurred in the past and if so then do a flush here
+ * if required. This will cost one additional flush per reclaim cycle paid
+ * by the first operation at risk such as mprotect and mumap.
+ *
+ * This must be called under the PTL so that an access to tlb_flush_batched
+ * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
+ * via the PTL.
+ */
+void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+	if (mm->tlb_flush_batched) {
+		flush_tlb_mm(mm);
+
+		/*
+		 * Do not allow the compiler to re-order the clearing of
+		 * tlb_flush_batched before the tlb is flushed.
+		 */
+		barrier();
+		mm->tlb_flush_batched = false;
+	}
+}
 #else
 static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
-- 
cgit v1.2.3


From d16977f3a6cfbb5e9ce477f423a1bf343347c1ed Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 2 Aug 2017 13:32:01 -0700
Subject: kthread: fix documentation build warning

The kerneldoc comment for kthread_create() had an incorrect argument
name, leading to a warning in the docs build.

Correct it, and make one more small step toward a warning-free build.

Link: http://lkml.kernel.org/r/20170724135916.7f486c6f@lwn.net
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 4fec8b775895..82e197eeac91 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -15,7 +15,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
  * @threadfn: the function to run in the thread
  * @data: data pointer for @threadfn()
  * @namefmt: printf-style format string for the thread name
- * @...: arguments for @namefmt.
+ * @arg...: arguments for @namefmt.
  *
  * This macro will create a kthread on the current node, leaving it in
  * the stopped state.  This is just a helper for kthread_create_on_node();
-- 
cgit v1.2.3


From 89affbf5d9ebb15c6460596822e8857ea2f9e735 Mon Sep 17 00:00:00 2001
From: Dima Zavin <dmitriyz@waymo.com>
Date: Wed, 2 Aug 2017 13:32:18 -0700
Subject: cpuset: fix a deadlock due to incomplete patching of
 cpusets_enabled()

In codepaths that use the begin/retry interface for reading
mems_allowed_seq with irqs disabled, there exists a race condition that
stalls the patch process after only modifying a subset of the
static_branch call sites.

This problem manifested itself as a deadlock in the slub allocator,
inside get_any_partial.  The loop reads mems_allowed_seq value (via
read_mems_allowed_begin), performs the defrag operation, and then
verifies the consistency of mem_allowed via the read_mems_allowed_retry
and the cookie returned by xxx_begin.

The issue here is that both begin and retry first check if cpusets are
enabled via cpusets_enabled() static branch.  This branch can be
rewritted dynamically (via cpuset_inc) if a new cpuset is created.  The
x86 jump label code fully synchronizes across all CPUs for every entry
it rewrites.  If it rewrites only one of the callsites (specifically the
one in read_mems_allowed_retry) and then waits for the
smp_call_function(do_sync_core) to complete while a CPU is inside the
begin/retry section with IRQs off and the mems_allowed value is changed,
we can hang.

This is because begin() will always return 0 (since it wasn't patched
yet) while retry() will test the 0 against the actual value of the seq
counter.

The fix is to use two different static keys: one for begin
(pre_enable_key) and one for retry (enable_key).  In cpuset_inc(), we
first bump the pre_enable key to ensure that cpuset_mems_allowed_begin()
always return a valid seqcount if are enabling cpusets.  Similarly, when
disabling cpusets via cpuset_dec(), we first ensure that callers of
cpuset_mems_allowed_retry() will start ignoring the seqcount value
before we let cpuset_mems_allowed_begin() return 0.

The relevant stack traces of the two stuck threads:

  CPU: 1 PID: 1415 Comm: mkdir Tainted: G L  4.9.36-00104-g540c51286237 #4
  Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017
  task: ffff8817f9c28000 task.stack: ffffc9000ffa4000
  RIP: smp_call_function_many+0x1f9/0x260
  Call Trace:
    smp_call_function+0x3b/0x70
    on_each_cpu+0x2f/0x90
    text_poke_bp+0x87/0xd0
    arch_jump_label_transform+0x93/0x100
    __jump_label_update+0x77/0x90
    jump_label_update+0xaa/0xc0
    static_key_slow_inc+0x9e/0xb0
    cpuset_css_online+0x70/0x2e0
    online_css+0x2c/0xa0
    cgroup_apply_control_enable+0x27f/0x3d0
    cgroup_mkdir+0x2b7/0x420
    kernfs_iop_mkdir+0x5a/0x80
    vfs_mkdir+0xf6/0x1a0
    SyS_mkdir+0xb7/0xe0
    entry_SYSCALL_64_fastpath+0x18/0xad

  ...

  CPU: 2 PID: 1 Comm: init Tainted: G L  4.9.36-00104-g540c51286237 #4
  Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017
  task: ffff8818087c0000 task.stack: ffffc90000030000
  RIP: int3+0x39/0x70
  Call Trace:
    <#DB> ? ___slab_alloc+0x28b/0x5a0
    <EOE> ? copy_process.part.40+0xf7/0x1de0
    __slab_alloc.isra.80+0x54/0x90
    copy_process.part.40+0xf7/0x1de0
    copy_process.part.40+0xf7/0x1de0
    kmem_cache_alloc_node+0x8a/0x280
    copy_process.part.40+0xf7/0x1de0
    _do_fork+0xe7/0x6c0
    _raw_spin_unlock_irq+0x2d/0x60
    trace_hardirqs_on_caller+0x136/0x1d0
    entry_SYSCALL_64_fastpath+0x5/0xad
    do_syscall_64+0x27/0x350
    SyS_clone+0x19/0x20
    do_syscall_64+0x60/0x350
    entry_SYSCALL64_slow_path+0x25/0x25

Link: http://lkml.kernel.org/r/20170731040113.14197-1-dmitriyz@waymo.com
Fixes: 46e700abc44c ("mm, page_alloc: remove unnecessary taking of a seqlock when cpusets are disabled")
Signed-off-by: Dima Zavin <dmitriyz@waymo.com>
Reported-by: Cliff Spradlin <cspradlin@waymo.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christopher Lameter <cl@linux.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 19 +++++++++++++++++--
 kernel/cgroup/cpuset.c |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 119a3f9604b0..898cfe2eeb42 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -18,6 +18,19 @@
 
 #ifdef CONFIG_CPUSETS
 
+/*
+ * Static branch rewrites can happen in an arbitrary order for a given
+ * key. In code paths where we need to loop with read_mems_allowed_begin() and
+ * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need
+ * to ensure that begin() always gets rewritten before retry() in the
+ * disabled -> enabled transition. If not, then if local irqs are disabled
+ * around the loop, we can deadlock since retry() would always be
+ * comparing the latest value of the mems_allowed seqcount against 0 as
+ * begin() still would see cpusets_enabled() as false. The enabled -> disabled
+ * transition should happen in reverse order for the same reasons (want to stop
+ * looking at real value of mems_allowed.sequence in retry() first).
+ */
+extern struct static_key_false cpusets_pre_enable_key;
 extern struct static_key_false cpusets_enabled_key;
 static inline bool cpusets_enabled(void)
 {
@@ -32,12 +45,14 @@ static inline int nr_cpusets(void)
 
 static inline void cpuset_inc(void)
 {
+	static_branch_inc(&cpusets_pre_enable_key);
 	static_branch_inc(&cpusets_enabled_key);
 }
 
 static inline void cpuset_dec(void)
 {
 	static_branch_dec(&cpusets_enabled_key);
+	static_branch_dec(&cpusets_pre_enable_key);
 }
 
 extern int cpuset_init(void);
@@ -115,7 +130,7 @@ extern void cpuset_print_current_mems_allowed(void);
  */
 static inline unsigned int read_mems_allowed_begin(void)
 {
-	if (!cpusets_enabled())
+	if (!static_branch_unlikely(&cpusets_pre_enable_key))
 		return 0;
 
 	return read_seqcount_begin(&current->mems_allowed_seq);
@@ -129,7 +144,7 @@ static inline unsigned int read_mems_allowed_begin(void)
  */
 static inline bool read_mems_allowed_retry(unsigned int seq)
 {
-	if (!cpusets_enabled())
+	if (!static_branch_unlikely(&cpusets_enabled_key))
 		return false;
 
 	return read_seqcount_retry(&current->mems_allowed_seq, seq);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ca8376e5008c..8d5151688504 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -63,6 +63,7 @@
 #include <linux/cgroup.h>
 #include <linux/wait.h>
 
+DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
 DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
 
 /* See "Frequency meter" comments, below. */
-- 
cgit v1.2.3


From 1ee1c3f5b5cff959e0ac95a125bd15eaf88cc638 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 2 Aug 2017 13:32:27 -0700
Subject: mm: allow page_cache_get_speculative in interrupt context

Kernel panic when calling the IRQ-safe __get_user_pages_fast in NMI
handler.

The bug was introduced by commit 2947ba054a4d ("x86/mm/gup: Switch GUP
to the generic get_user_page_fast() implementation").

The original x86 __get_user_page_fast used plain get_page() or
page_ref_add().  However, the generic __get_user_page_fast uses
page_cache_get_speculative(), which has VM_BUG_ON(in_interrupt()).

There is no reason to prevent page_cache_get_speculative from using in
interrupt context.  According to the author, putting a BUG_ON there is
just because the code is not verifying correctness of interrupt races.
I did some tests in interrupt context.  There is no issue found.

Removing VM_BUG_ON(in_interrupt()) for page_cache_get_speculative().

Link: http://lkml.kernel.org/r/1501609146-59730-1-git-send-email-kan.liang@intel.com
Fixes: 2947ba054a4d ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ying Huang <ying.huang@intel.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index baa9344dcd10..79b36f57c3ba 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -163,8 +163,6 @@ void release_pages(struct page **pages, int nr, bool cold);
  */
 static inline int page_cache_get_speculative(struct page *page)
 {
-	VM_BUG_ON(in_interrupt());
-
 #ifdef CONFIG_TINY_RCU
 # ifdef CONFIG_PREEMPT_COUNT
 	VM_BUG_ON(!in_atomic() && !irqs_disabled());
-- 
cgit v1.2.3


From e1a10ef7fa876f8510aaec36ea5c0cf34baba410 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 3 Aug 2017 09:19:52 -0400
Subject: tcp: introduce tcp_rto_delta_us() helper for xmit timer fix

Pure refactor. This helper will be required in the xmit timer fix
later in the patch series. (Because the TLP logic will want to make
this calculation.)

Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 10 ++++++++++
 net/ipv4/tcp_input.c |  5 +----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..ada65e767b28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1916,6 +1916,16 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			     u64 xmit_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
 
+/* At how many usecs into the future should the RTO fire? */
+static inline s64 tcp_rto_delta_us(const struct sock *sk)
+{
+	const struct sk_buff *skb = tcp_write_queue_head(sk);
+	u32 rto = inet_csk(sk)->icsk_rto;
+	u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
+
+	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
+}
+
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dad026fcfd09..b9ba8d55d8b8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3004,10 +3004,7 @@ void tcp_rearm_rto(struct sock *sk)
 		/* Offset the time elapsed after installing regular RTO */
 		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
-			struct sk_buff *skb = tcp_write_queue_head(sk);
-			u64 rto_time_stamp = skb->skb_mstamp +
-					     jiffies_to_usecs(rto);
-			s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+			s64 delta_us = tcp_rto_delta_us(sk);
 			/* delta_us may not be positive if the socket is locked
 			 * when the retrans timer fires and is rescheduled.
 			 */
-- 
cgit v1.2.3


From 931b3c1a832621b4bdcbaf783096fc267eb36fbe Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 1 Aug 2017 09:41:37 +0300
Subject: RDMA/mlx5: Fix existence check for extended address vector

The extended address vector is the highest bit in be32 variable,
but it was compared with the lowest. This patch fixes the endianness
of that check and removes already declared define.

Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support")
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 2 +-
 include/linux/mlx5/qp.h          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index ae0746754008..3d701c7a4c91 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
 	if (qp->ibqp.qp_type != IB_QPT_RC) {
 		av = *wqe;
-		if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+		if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
 			*wqe += sizeof(struct mlx5_av);
 		else
 			*wqe += sizeof(struct mlx5_base_av);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 6f41270d80c0..f378dc0e7eaf 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg {
 #define MLX5_WQE_CTRL_OPCODE_MASK 0xff
 #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
-#define MLX5_WQE_AV_EXT 0x80000000
 
 enum {
 	MLX5_ETH_WQE_L3_INNER_CSUM      = 1 << 4,
-- 
cgit v1.2.3


From 978d13d60c34818a41fc35962602bdfa5c03f214 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 4 Aug 2017 23:59:31 -0700
Subject: iscsi-target: Fix iscsi_np reset hung task during parallel delete

This patch fixes a bug associated with iscsit_reset_np_thread()
that can occur during parallel configfs rmdir of a single iscsi_np
used across multiple iscsi-target instances, that would result in
hung task(s) similar to below where configfs rmdir process context
was blocked indefinately waiting for iscsi_np->np_restart_comp
to finish:

[ 6726.112076] INFO: task dcp_proxy_node_:15550 blocked for more than 120 seconds.
[ 6726.119440]       Tainted: G        W  O     4.1.26-3321 #2
[ 6726.125045] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 6726.132927] dcp_proxy_node_ D ffff8803f202bc88     0 15550      1 0x00000000
[ 6726.140058]  ffff8803f202bc88 ffff88085c64d960 ffff88083b3b1ad0 ffff88087fffeb08
[ 6726.147593]  ffff8803f202c000 7fffffffffffffff ffff88083f459c28 ffff88083b3b1ad0
[ 6726.155132]  ffff88035373c100 ffff8803f202bca8 ffffffff8168ced2 ffff8803f202bcb8
[ 6726.162667] Call Trace:
[ 6726.165150]  [<ffffffff8168ced2>] schedule+0x32/0x80
[ 6726.170156]  [<ffffffff8168f5b4>] schedule_timeout+0x214/0x290
[ 6726.176030]  [<ffffffff810caef2>] ? __send_signal+0x52/0x4a0
[ 6726.181728]  [<ffffffff8168d7d6>] wait_for_completion+0x96/0x100
[ 6726.187774]  [<ffffffff810e7c80>] ? wake_up_state+0x10/0x10
[ 6726.193395]  [<ffffffffa035d6e2>] iscsit_reset_np_thread+0x62/0xe0 [iscsi_target_mod]
[ 6726.201278]  [<ffffffffa0355d86>] iscsit_tpg_disable_portal_group+0x96/0x190 [iscsi_target_mod]
[ 6726.210033]  [<ffffffffa0363f7f>] lio_target_tpg_store_enable+0x4f/0xc0 [iscsi_target_mod]
[ 6726.218351]  [<ffffffff81260c5a>] configfs_write_file+0xaa/0x110
[ 6726.224392]  [<ffffffff811ea364>] vfs_write+0xa4/0x1b0
[ 6726.229576]  [<ffffffff811eb111>] SyS_write+0x41/0xb0
[ 6726.234659]  [<ffffffff8169042e>] system_call_fastpath+0x12/0x71

It would happen because each iscsit_reset_np_thread() sets state
to ISCSI_NP_THREAD_RESET, sends SIGINT, and then blocks waiting
for completion on iscsi_np->np_restart_comp.

However, if iscsi_np was active processing a login request and
more than a single iscsit_reset_np_thread() caller to the same
iscsi_np was blocked on iscsi_np->np_restart_comp, iscsi_np
kthread process context in __iscsi_target_login_thread() would
flush pending signals and only perform a single completion of
np->np_restart_comp before going back to sleep within transport
specific iscsit_transport->iscsi_accept_np code.

To address this bug, add a iscsi_np->np_reset_count and update
__iscsi_target_login_thread() to keep completing np->np_restart_comp
until ->np_reset_count has reached zero.

Reported-by: Gary Guo <ghg@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c       | 1 +
 drivers/target/iscsi/iscsi_target_login.c | 7 +++++--
 include/target/iscsi/iscsi_target_core.h  | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 12803de99400..5001261f5d69 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -418,6 +418,7 @@ int iscsit_reset_np_thread(
 		return 0;
 	}
 	np->np_thread_state = ISCSI_NP_THREAD_RESET;
+	atomic_inc(&np->np_reset_count);
 
 	if (np->np_thread) {
 		spin_unlock_bh(&np->np_thread_lock);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index e9bdc8b86e7d..dc13afbd4c88 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1243,9 +1243,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 	flush_signals(current);
 
 	spin_lock_bh(&np->np_thread_lock);
-	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+	if (atomic_dec_if_positive(&np->np_reset_count) >= 0) {
 		np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+		spin_unlock_bh(&np->np_thread_lock);
 		complete(&np->np_restart_comp);
+		return 1;
 	} else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) {
 		spin_unlock_bh(&np->np_thread_lock);
 		goto exit;
@@ -1278,7 +1280,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 		goto exit;
 	} else if (rc < 0) {
 		spin_lock_bh(&np->np_thread_lock);
-		if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+		if (atomic_dec_if_positive(&np->np_reset_count) >= 0) {
+			np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
 			spin_unlock_bh(&np->np_thread_lock);
 			complete(&np->np_restart_comp);
 			iscsit_put_transport(conn->conn_transport);
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 0ca1fb08805b..fb87d32f5e51 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -786,6 +786,7 @@ struct iscsi_np {
 	int			np_sock_type;
 	enum np_thread_state_table np_thread_state;
 	bool                    enabled;
+	atomic_t		np_reset_count;
 	enum iscsi_timer_flags_table np_login_timer_flags;
 	u32			np_exports;
 	enum np_flags_table	np_flags;
-- 
cgit v1.2.3


From 0cca6c8920ade95e2741b2062cf1397dc546fb0f Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@o2linux.fr>
Date: Sun, 6 Aug 2017 16:00:05 +0200
Subject: pinctrl: generic: update references to Documentation/pinctrl.txt

Update deprecated references to Documentation/pinctrl.txt since it has been
moved to Documentation/driver-api/pinctl.rst.

Signed-off-by: Ludovic Desroches <ludovic.desroches@o2linux.fr>
Fixes: 5a9b73832e9e ("pinctrl.txt: move it to the driver-api book")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/gpio-legacy.txt      | 2 +-
 MAINTAINERS                             | 2 +-
 include/linux/device.h                  | 2 +-
 include/linux/pinctrl/pinconf-generic.h | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt
index b34fd94f7089..5eacc147ea87 100644
--- a/Documentation/gpio/gpio-legacy.txt
+++ b/Documentation/gpio/gpio-legacy.txt
@@ -459,7 +459,7 @@ pin controller?
 
 This is done by registering "ranges" of pins, which are essentially
 cross-reference tables. These are described in
-Documentation/pinctrl.txt
+Documentation/driver-api/pinctl.rst
 
 While the pin allocation is totally managed by the pinctrl subsystem,
 gpio (under gpiolib) is still maintained by gpio drivers. It may happen
diff --git a/MAINTAINERS b/MAINTAINERS
index f66488dfdbc9..2c85558aec19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10375,7 +10375,7 @@ L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/pinctrl/
-F:	Documentation/pinctrl.txt
+F:	Documentation/driver-api/pinctl.rst
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 723cd54b94da..beabdbc08420 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -843,7 +843,7 @@ struct dev_links_info {
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
  * @pins:	For device pin management.
- *		See Documentation/pinctrl.txt for details.
+ *		See Documentation/driver-api/pinctl.rst for details.
  * @msi_list:	Hosts MSI descriptors
  * @msi_domain: The generic MSI domain this device is using.
  * @numa_node:	NUMA node this device is close to.
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 231d3075815a..e91d1b6a260d 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -81,8 +81,8 @@
  * 	it.
  * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a
  * 	value on the line. Use argument 1 to indicate high level, argument 0 to
- * 	indicate low level. (Please see Documentation/pinctrl.txt, section
- * 	"GPIO mode pitfalls" for a discussion around this parameter.)
+ *	indicate low level. (Please see Documentation/driver-api/pinctl.rst,
+ *	section "GPIO mode pitfalls" for a discussion around this parameter.)
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
  *	the driver which alternative power source to use.
-- 
cgit v1.2.3


From 3be8eddd9d58a925b461b582fa5aa422a9c145ee Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 25 Jul 2017 09:27:33 -0700
Subject: drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk
 order.

This is useful to allow GL to provide defined results for overlapping
glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
window movement without first blitting to a temporary.  x11perf
-copywinwin100 goes from 1850/sec to 4850/sec.

v2: Default to the same behavior as before when the flags aren't
    passed. (suggested by Boris)

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170725162733.28007-2-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/gpu/drm/vc4/vc4_drv.c       |  1 +
 drivers/gpu/drm/vc4/vc4_gem.c       |  5 ++++-
 drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++-----
 include/uapi/drm/vc4_drm.h          | 11 +++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index e8f0e1790d5e..1c96edcb302b 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -99,6 +99,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
+	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
 		args->value = true;
 		break;
 	default:
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 22a55cff7e64..d0c6bfb68c4e 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -1007,7 +1007,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct ww_acquire_ctx acquire_ctx;
 	int ret = 0;
 
-	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
+	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
+			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
 		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index e0539731130b..273984f71ae2 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 	uint8_t max_y_tile = args->max_y_tile;
 	uint8_t xtiles = max_x_tile - min_x_tile + 1;
 	uint8_t ytiles = max_y_tile - min_y_tile + 1;
-	uint8_t x, y;
+	uint8_t xi, yi;
 	uint32_t size, loop_body_size;
+	bool positive_x = true;
+	bool positive_y = true;
+
+	if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
+		if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X))
+			positive_x = false;
+		if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y))
+			positive_y = false;
+	}
 
 	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
 	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -354,10 +363,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 	rcl_u16(setup, args->height);
 	rcl_u16(setup, args->color_write.bits);
 
-	for (y = min_y_tile; y <= max_y_tile; y++) {
-		for (x = min_x_tile; x <= max_x_tile; x++) {
-			bool first = (x == min_x_tile && y == min_y_tile);
-			bool last = (x == max_x_tile && y == max_y_tile);
+	for (yi = 0; yi < ytiles; yi++) {
+		int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
+		for (xi = 0; xi < xtiles; xi++) {
+			int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
+			bool first = (xi == 0 && yi == 0);
+			bool last = (xi == xtiles - 1 && yi == ytiles - 1);
 
 			emit_tile(exec, setup, x, y, first, last);
 		}
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 551628e571f9..afae87004963 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -155,6 +155,16 @@ struct drm_vc4_submit_cl {
 	__u32 pad:24;
 
 #define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+/* By default, the kernel gets to choose the order that the tiles are
+ * rendered in.  If this is set, then the tiles will be rendered in a
+ * raster order, with the right-to-left vs left-to-right and
+ * top-to-bottom vs bottom-to-top dictated by
+ * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*.  This allows overlapping
+ * blits to be implemented using the 3D engine.
+ */
+#define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -294,6 +304,7 @@ struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_BRANCHES		3
 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
+#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
 
 struct drm_vc4_get_param {
 	__u32 param;
-- 
cgit v1.2.3


From 8941a7cbcc5f06fb9e1d105911c928766d742861 Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Mon, 7 Aug 2017 12:39:37 -0500
Subject: drm/tinydrm: Generalize tinydrm_xrgb8888_to_gray8()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds parameters for vaddr and clip to tinydrm_xrgb8888_to_gray8() to
make it more generic.

dma_buf_{begin,end}_cpu_access() are moved out to the repaper driver.

Return type is change to void to simplify error handling by callers.

Signed-off-by: David Lechner <david@lechnology.com>
Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/1502127581-10517-2-git-send-email-david@lechnology.com
---
 drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c | 42 +++++++++-----------------
 drivers/gpu/drm/tinydrm/repaper.c              | 28 +++++++++++++++--
 include/drm/tinydrm/tinydrm-helpers.h          |  3 +-
 3 files changed, 41 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
index 75808bb84c9a..bd6cce093a85 100644
--- a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
@@ -185,7 +185,9 @@ EXPORT_SYMBOL(tinydrm_xrgb8888_to_rgb565);
 /**
  * tinydrm_xrgb8888_to_gray8 - Convert XRGB8888 to grayscale
  * @dst: 8-bit grayscale destination buffer
+ * @vaddr: XRGB8888 source buffer
  * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
  *
  * Drm doesn't have native monochrome or grayscale support.
  * Such drivers can announce the commonly supported XR24 format to userspace
@@ -195,41 +197,31 @@ EXPORT_SYMBOL(tinydrm_xrgb8888_to_rgb565);
  * where 1 means foreground color and 0 background color.
  *
  * ITU BT.601 is used for the RGB -> luma (brightness) conversion.
- *
- * Returns:
- * Zero on success, negative error code on failure.
  */
-int tinydrm_xrgb8888_to_gray8(u8 *dst, struct drm_framebuffer *fb)
+void tinydrm_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb,
+			       struct drm_clip_rect *clip)
 {
-	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	struct dma_buf_attachment *import_attach = cma_obj->base.import_attach;
-	unsigned int x, y, pitch = fb->pitches[0];
-	int ret = 0;
+	unsigned int len = (clip->x2 - clip->x1) * sizeof(u32);
+	unsigned int x, y;
 	void *buf;
 	u32 *src;
 
 	if (WARN_ON(fb->format->format != DRM_FORMAT_XRGB8888))
-		return -EINVAL;
+		return;
 	/*
 	 * The cma memory is write-combined so reads are uncached.
 	 * Speed up by fetching one line at a time.
 	 */
-	buf = kmalloc(pitch, GFP_KERNEL);
+	buf = kmalloc(len, GFP_KERNEL);
 	if (!buf)
-		return -ENOMEM;
-
-	if (import_attach) {
-		ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
-					       DMA_FROM_DEVICE);
-		if (ret)
-			goto err_free;
-	}
+		return;
 
-	for (y = 0; y < fb->height; y++) {
-		src = cma_obj->vaddr + (y * pitch);
-		memcpy(buf, src, pitch);
+	for (y = clip->y1; y < clip->y2; y++) {
+		src = vaddr + (y * fb->pitches[0]);
+		src += clip->x1;
+		memcpy(buf, src, len);
 		src = buf;
-		for (x = 0; x < fb->width; x++) {
+		for (x = clip->x1; x < clip->x2; x++) {
 			u8 r = (*src & 0x00ff0000) >> 16;
 			u8 g = (*src & 0x0000ff00) >> 8;
 			u8 b =  *src & 0x000000ff;
@@ -240,13 +232,7 @@ int tinydrm_xrgb8888_to_gray8(u8 *dst, struct drm_framebuffer *fb)
 		}
 	}
 
-	if (import_attach)
-		ret = dma_buf_end_cpu_access(import_attach->dmabuf,
-					     DMA_FROM_DEVICE);
-err_free:
 	kfree(buf);
-
-	return ret;
 }
 EXPORT_SYMBOL(tinydrm_xrgb8888_to_gray8);
 
diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c
index 3343d3f15a90..30dc97b3ff21 100644
--- a/drivers/gpu/drm/tinydrm/repaper.c
+++ b/drivers/gpu/drm/tinydrm/repaper.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/dma-buf.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -525,11 +526,20 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb,
 			    struct drm_clip_rect *clips,
 			    unsigned int num_clips)
 {
+	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	struct dma_buf_attachment *import_attach = cma_obj->base.import_attach;
 	struct tinydrm_device *tdev = fb->dev->dev_private;
 	struct repaper_epd *epd = epd_from_tinydrm(tdev);
+	struct drm_clip_rect clip;
 	u8 *buf = NULL;
 	int ret = 0;
 
+	/* repaper can't do partial updates */
+	clip.x1 = 0;
+	clip.x2 = fb->width;
+	clip.y1 = 0;
+	clip.y2 = fb->height;
+
 	mutex_lock(&tdev->dirty_lock);
 
 	if (!epd->enabled)
@@ -550,9 +560,21 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb,
 		goto out_unlock;
 	}
 
-	ret = tinydrm_xrgb8888_to_gray8(buf, fb);
-	if (ret)
-		goto out_unlock;
+	if (import_attach) {
+		ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
+					       DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+	}
+
+	tinydrm_xrgb8888_to_gray8(buf, cma_obj->vaddr, fb, &clip);
+
+	if (import_attach) {
+		ret = dma_buf_end_cpu_access(import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+	}
 
 	repaper_gray8_to_mono_reversed(buf, fb->width, fb->height);
 
diff --git a/include/drm/tinydrm/tinydrm-helpers.h b/include/drm/tinydrm/tinydrm-helpers.h
index a6c387f91eff..d554ded60ee9 100644
--- a/include/drm/tinydrm/tinydrm-helpers.h
+++ b/include/drm/tinydrm/tinydrm-helpers.h
@@ -43,7 +43,8 @@ void tinydrm_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
 void tinydrm_xrgb8888_to_rgb565(u16 *dst, void *vaddr,
 				struct drm_framebuffer *fb,
 				struct drm_clip_rect *clip, bool swap);
-int tinydrm_xrgb8888_to_gray8(u8 *dst, struct drm_framebuffer *fb);
+void tinydrm_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb,
+			       struct drm_clip_rect *clip);
 
 struct backlight_device *tinydrm_of_find_backlight(struct device *dev);
 int tinydrm_enable_backlight(struct backlight_device *backlight);
-- 
cgit v1.2.3


From 0fb228d30b8d72bfee51f57e638d412324d44a11 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 1 Aug 2017 15:12:39 -0700
Subject: nvmet_fc: add defer_req callback for deferment of cmd buffer return

At queue creation, the transport allocates a local job struct
(struct nvmet_fc_fcp_iod) for each possible element of the queue.
When a new CMD is received from the wire, a jobs struct is allocated
from the queue and then used for the duration of the command.
The job struct contains buffer space for the wire command iu. Thus,
upon allocation of the job struct, the cmd iu buffer is copied to
the job struct and the LLDD may immediately free/reuse the CMD IU
buffer passed in the call.

However, in some circumstances, due to the packetized nature of FC
and the api of the FC LLDD which may issue a hw command to send the
wire response, but the LLDD may not get the hw completion for the
command and upcall the nvmet_fc layer before a new command may be
asynchronously received on the wire. In other words, its possible
for the initiator to get the response from the wire, thus believe a
command slot free, and send a new command iu. The new command iu
may be received by the LLDD and passed to the transport before the
LLDD had serviced the hw completion and made the teardown calls for
the original job struct. As such, there is no available job struct
available for the new io. E.g. it appears like the host sent more
queue elements than the queue size. It didn't based on it's
understanding.

Rather than treat this as a hard connection failure queue the new
request until the job struct does free up. As the buffer isn't
copied as there's no job struct, a special return value must be
returned to the LLDD to signify to hold off on recycling the cmd
iu buffer.  And later, when a job struct is allocated and the
buffer copied, a new LLDD callback is introduced to notify the
LLDD and allow it to recycle it's command iu buffer.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c       | 212 +++++++++++++++++++++++++++++++++++------
 include/linux/nvme-fc-driver.h |   7 ++
 2 files changed, 191 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 31ca55dfcb1d..1b7f2520a20d 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -114,6 +114,11 @@ struct nvmet_fc_tgtport {
 	struct kref			ref;
 };
 
+struct nvmet_fc_defer_fcp_req {
+	struct list_head		req_list;
+	struct nvmefc_tgt_fcp_req	*fcp_req;
+};
+
 struct nvmet_fc_tgt_queue {
 	bool				ninetypercent;
 	u16				qid;
@@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue {
 	struct nvmet_fc_tgt_assoc	*assoc;
 	struct nvmet_fc_fcp_iod		*fod;		/* array of fcp_iods */
 	struct list_head		fod_list;
+	struct list_head		pending_cmd_list;
+	struct list_head		avail_defer_list;
 	struct workqueue_struct		*work_q;
 	struct kref			ref;
 } __aligned(sizeof(unsigned long long));
@@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
 static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
 static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
 static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
+static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
+					struct nvmet_fc_fcp_iod *fod);
 
 
 /* *********************** FC-NVME DMA Handling **************************** */
@@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod *
 nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 {
 	static struct nvmet_fc_fcp_iod *fod;
-	unsigned long flags;
 
-	spin_lock_irqsave(&queue->qlock, flags);
+	lockdep_assert_held(&queue->qlock);
+
 	fod = list_first_entry_or_null(&queue->fod_list,
 					struct nvmet_fc_fcp_iod, fcp_list);
 	if (fod) {
@@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 		 * will "inherit" that reference.
 		 */
 	}
-	spin_unlock_irqrestore(&queue->qlock, flags);
 	return fod;
 }
 
 
+static void
+nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
+		       struct nvmet_fc_tgt_queue *queue,
+		       struct nvmefc_tgt_fcp_req *fcpreq)
+{
+	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+
+	/*
+	 * put all admin cmds on hw queue id 0. All io commands go to
+	 * the respective hw queue based on a modulo basis
+	 */
+	fcpreq->hwqid = queue->qid ?
+			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
+
+	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
+		queue_work_on(queue->cpu, queue->work_q, &fod->work);
+	else
+		nvmet_fc_handle_fcp_rqst(tgtport, fod);
+}
+
 static void
 nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 			struct nvmet_fc_fcp_iod *fod)
 {
 	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
 	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
 	unsigned long flags;
 
 	fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
@@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 
 	fcpreq->nvmet_fc_private = NULL;
 
-	spin_lock_irqsave(&queue->qlock, flags);
-	list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
 	fod->active = false;
 	fod->abort = false;
 	fod->aborted = false;
 	fod->writedataactive = false;
 	fod->fcpreq = NULL;
+
+	tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
+
+	spin_lock_irqsave(&queue->qlock, flags);
+	deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+				struct nvmet_fc_defer_fcp_req, req_list);
+	if (!deferfcp) {
+		list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		/* Release reference taken at queue lookup and fod allocation */
+		nvmet_fc_tgt_q_put(queue);
+		return;
+	}
+
+	/* Re-use the fod for the next pending cmd that was deferred */
+	list_del(&deferfcp->req_list);
+
+	fcpreq = deferfcp->fcp_req;
+
+	/* deferfcp can be reused for another IO at a later date */
+	list_add_tail(&deferfcp->req_list, &queue->avail_defer_list);
+
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
+	/* Save NVME CMD IO in fod */
+	memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen);
+
+	/* Setup new fcpreq to be processed */
+	fcpreq->rspaddr = NULL;
+	fcpreq->rsplen  = 0;
+	fcpreq->nvmet_fc_private = fod;
+	fod->fcpreq = fcpreq;
+	fod->active = true;
+
+	/* inform LLDD IO is now being processed */
+	tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
+
+	/* Submit deferred IO for processing */
+	nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
 	/*
-	 * release the reference taken at queue lookup and fod allocation
+	 * Leave the queue lookup get reference taken when
+	 * fod was originally allocated.
 	 */
-	nvmet_fc_tgt_q_put(queue);
-
-	tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
 }
 
 static int
@@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	queue->port = assoc->tgtport->port;
 	queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
 	INIT_LIST_HEAD(&queue->fod_list);
+	INIT_LIST_HEAD(&queue->avail_defer_list);
+	INIT_LIST_HEAD(&queue->pending_cmd_list);
 	atomic_set(&queue->connected, 0);
 	atomic_set(&queue->sqtail, 0);
 	atomic_set(&queue->rsn, 1);
@@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 {
 	struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
 	struct nvmet_fc_fcp_iod *fod = queue->fod;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
 	unsigned long flags;
 	int i, writedataactive;
 	bool disconnect;
@@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 			}
 		}
 	}
+
+	/* Cleanup defer'ed IOs in queue */
+	list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
+		list_del(&deferfcp->req_list);
+		kfree(deferfcp);
+	}
+
+	for (;;) {
+		deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+				struct nvmet_fc_defer_fcp_req, req_list);
+		if (!deferfcp)
+			break;
+
+		list_del(&deferfcp->req_list);
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		tgtport->ops->defer_rcv(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		tgtport->ops->fcp_abort(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		tgtport->ops->fcp_req_release(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		kfree(deferfcp);
+
+		spin_lock_irqsave(&queue->qlock, flags);
+	}
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
 	flush_workqueue(queue->work_q);
@@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
  * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc
  * layer for processing.
  *
- * The nvmet-fc layer will copy cmd payload to an internal structure for
- * processing.  As such, upon completion of the routine, the LLDD may
- * immediately free/reuse the CMD IU buffer passed in the call.
+ * The nvmet_fc layer allocates a local job structure (struct
+ * nvmet_fc_fcp_iod) from the queue for the io and copies the
+ * CMD IU buffer to the job structure. As such, on a successful
+ * completion (returns 0), the LLDD may immediately free/reuse
+ * the CMD IU buffer passed in the call.
+ *
+ * However, in some circumstances, due to the packetized nature of FC
+ * and the api of the FC LLDD which may issue a hw command to send the
+ * response, but the LLDD may not get the hw completion for that command
+ * and upcall the nvmet_fc layer before a new command may be
+ * asynchronously received - its possible for a command to be received
+ * before the LLDD and nvmet_fc have recycled the job structure. It gives
+ * the appearance of more commands received than fits in the sq.
+ * To alleviate this scenario, a temporary queue is maintained in the
+ * transport for pending LLDD requests waiting for a queue job structure.
+ * In these "overrun" cases, a temporary queue element is allocated
+ * the LLDD request and CMD iu buffer information remembered, and the
+ * routine returns a -EOVERFLOW status. Subsequently, when a queue job
+ * structure is freed, it is immediately reallocated for anything on the
+ * pending request list. The LLDDs defer_rcv() callback is called,
+ * informing the LLDD that it may reuse the CMD IU buffer, and the io
+ * is then started normally with the transport.
  *
- * If this routine returns error, the lldd should abort the exchange.
+ * The LLDD, when receiving an -EOVERFLOW completion status, is to treat
+ * the completion as successful but must not reuse the CMD IU buffer
+ * until the LLDD's defer_rcv() callback has been called for the
+ * corresponding struct nvmefc_tgt_fcp_req pointer.
+ *
+ * If there is any other condition in which an error occurs, the
+ * transport will return a non-zero status indicating the error.
+ * In all cases other than -EOVERFLOW, the transport has not accepted the
+ * request and the LLDD should abort the exchange.
  *
  * @target_port: pointer to the (registered) target port the FCP CMD IU
  *              was received on.
@@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	struct nvme_fc_cmd_iu *cmdiu = cmdiubuf;
 	struct nvmet_fc_tgt_queue *queue;
 	struct nvmet_fc_fcp_iod *fod;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
+	unsigned long flags;
 
 	/* validate iu, so the connection id can be used to find the queue */
 	if ((cmdiubuf_len != sizeof(*cmdiu)) ||
@@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	 * when the fod is freed.
 	 */
 
+	spin_lock_irqsave(&queue->qlock, flags);
+
 	fod = nvmet_fc_alloc_fcp_iod(queue);
-	if (!fod) {
+	if (fod) {
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		fcpreq->nvmet_fc_private = fod;
+		fod->fcpreq = fcpreq;
+
+		memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+
+		nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
+		return 0;
+	}
+
+	if (!tgtport->ops->defer_rcv) {
+		spin_unlock_irqrestore(&queue->qlock, flags);
 		/* release the queue lookup reference */
 		nvmet_fc_tgt_q_put(queue);
 		return -ENOENT;
 	}
 
-	fcpreq->nvmet_fc_private = fod;
-	fod->fcpreq = fcpreq;
-	/*
-	 * put all admin cmds on hw queue id 0. All io commands go to
-	 * the respective hw queue based on a modulo basis
-	 */
-	fcpreq->hwqid = queue->qid ?
-			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
-	memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+	deferfcp = list_first_entry_or_null(&queue->avail_defer_list,
+			struct nvmet_fc_defer_fcp_req, req_list);
+	if (deferfcp) {
+		/* Just re-use one that was previously allocated */
+		list_del(&deferfcp->req_list);
+	} else {
+		spin_unlock_irqrestore(&queue->qlock, flags);
 
-	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
-		queue_work_on(queue->cpu, queue->work_q, &fod->work);
-	else
-		nvmet_fc_handle_fcp_rqst(tgtport, fod);
+		/* Now we need to dynamically allocate one */
+		deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL);
+		if (!deferfcp) {
+			/* release the queue lookup reference */
+			nvmet_fc_tgt_q_put(queue);
+			return -ENOMEM;
+		}
+		spin_lock_irqsave(&queue->qlock, flags);
+	}
 
-	return 0;
+	/* For now, use rspaddr / rsplen to save payload information */
+	fcpreq->rspaddr = cmdiubuf;
+	fcpreq->rsplen  = cmdiubuf_len;
+	deferfcp->fcp_req = fcpreq;
+
+	/* defer processing till a fod becomes available */
+	list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list);
+
+	/* NOTE: the queue lookup reference is still valid */
+
+	spin_unlock_irqrestore(&queue->qlock, flags);
+
+	return -EOVERFLOW;
 }
 EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
 
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 6c8c5d8041b7..2591878c1d48 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,6 +346,11 @@ struct nvme_fc_remote_port {
  *       indicating an FC transport Aborted status.
  *       Entrypoint is Mandatory.
  *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -846,6 +851,8 @@ struct nvmet_fc_target_template {
 				struct nvmefc_tgt_fcp_req *fcpreq);
 	void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport,
 				struct nvmefc_tgt_fcp_req *fcpreq);
+	void (*defer_rcv)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_fcp_req *fcpreq);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;
-- 
cgit v1.2.3


From 16af97dc5a8975371a83d9e30a64038b48f40a2d Mon Sep 17 00:00:00 2001
From: Nadav Amit <nadav.amit@gmail.com>
Date: Thu, 10 Aug 2017 15:23:56 -0700
Subject: mm: migrate: prevent racy access to tlb_flush_pending

Patch series "fixes of TLB batching races", v6.

It turns out that Linux TLB batching mechanism suffers from various
races.  Races that are caused due to batching during reclamation were
recently handled by Mel and this patch-set deals with others.  The more
fundamental issue is that concurrent updates of the page-tables allow
for TLB flushes to be batched on one core, while another core changes
the page-tables.  This other core may assume a PTE change does not
require a flush based on the updated PTE value, while it is unaware that
TLB flushes are still pending.

This behavior affects KSM (which may result in memory corruption) and
MADV_FREE and MADV_DONTNEED (which may result in incorrect behavior).  A
proof-of-concept can easily produce the wrong behavior of MADV_DONTNEED.
Memory corruption in KSM is harder to produce in practice, but was
observed by hacking the kernel and adding a delay before flushing and
replacing the KSM page.

Finally, there is also one memory barrier missing, which may affect
architectures with weak memory model.

This patch (of 7):

Setting and clearing mm->tlb_flush_pending can be performed by multiple
threads, since mmap_sem may only be acquired for read in
task_numa_work().  If this happens, tlb_flush_pending might be cleared
while one of the threads still changes PTEs and batches TLB flushes.

This can lead to the same race between migration and
change_protection_range() that led to the introduction of
tlb_flush_pending.  The result of this race was data corruption, which
means that this patch also addresses a theoretically possible data
corruption.

An actual data corruption was not observed, yet the race was was
confirmed by adding assertion to check tlb_flush_pending is not set by
two threads, adding artificial latency in change_protection_range() and
using sysctl to reduce kernel.numa_balancing_scan_delay_ms.

Link: http://lkml.kernel.org/r/20170802000818.4760-2-namit@vmware.com
Fixes: 20841405940e ("mm: fix TLB flush race between migration, and
change_protection_range")
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 31 ++++++++++++++++++++++---------
 kernel/fork.c            |  2 +-
 mm/debug.c               |  2 +-
 mm/mprotect.c            |  4 ++--
 4 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f384bb62d8e..f58f76ee1dfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -493,7 +493,7 @@ struct mm_struct {
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
-	bool tlb_flush_pending;
+	atomic_t tlb_flush_pending;
 #endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
@@ -532,33 +532,46 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	return mm->tlb_flush_pending;
+	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
-	mm->tlb_flush_pending = true;
+	atomic_set(&mm->tlb_flush_pending, 0);
+}
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+	atomic_inc(&mm->tlb_flush_pending);
 
 	/*
-	 * Guarantee that the tlb_flush_pending store does not leak into the
+	 * Guarantee that the tlb_flush_pending increase does not leak into the
 	 * critical section updating the page tables
 	 */
 	smp_mb__before_spinlock();
 }
+
 /* Clearing is done after a TLB flush, which also provides a barrier. */
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	mm->tlb_flush_pending = false;
+	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	return false;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 }
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+}
+
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 }
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 17921b0390b4..e075b7780421 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 	mmu_notifier_mm_init(mm);
-	clear_tlb_flush_pending(mm);
+	init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
 #endif
diff --git a/mm/debug.c b/mm/debug.c
index db1cd26d8752..d70103bb4731 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -159,7 +159,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
-		mm->tlb_flush_pending,
+		atomic_read(&mm->tlb_flush_pending),
 #endif
 		mm->def_flags, &mm->def_flags
 	);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4180ad8cc9c5..bd0f409922cb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
 	flush_cache_range(vma, addr, end);
-	set_tlb_flush_pending(mm);
+	inc_tlb_flush_pending(mm);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	/* Only flush the TLB if we actually modified any entries: */
 	if (pages)
 		flush_tlb_range(vma, start, end);
-	clear_tlb_flush_pending(mm);
+	dec_tlb_flush_pending(mm);
 
 	return pages;
 }
-- 
cgit v1.2.3


From 0a2c40487f3e4215c6ab46e7f837036badfb542b Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 10 Aug 2017 15:23:59 -0700
Subject: mm: migrate: fix barriers around tlb_flush_pending

Reading tlb_flush_pending while the page-table lock is taken does not
require a barrier, since the lock/unlock already acts as a barrier.
Removing the barrier in mm_tlb_flush_pending() to address this issue.

However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending()
while the page-table lock is already released, which may present a
problem on architectures with weak memory model (PPC).  To deal with
this case, a new parameter is added to mm_tlb_flush_pending() to
indicate if it is read without the page-table lock taken, and calling
smp_mb__after_unlock_lock() in this case.

Link: http://lkml.kernel.org/r/20170802000818.4760-3-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f58f76ee1dfa..0e478ebd2706 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,12 +526,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
- * The barriers below prevent the compiler from re-ordering the instructions
- * around the memory barriers that are already present in the code.
+ * The barriers are used to ensure the order between tlb_flush_pending updates,
+ * which happen while the lock is not taken, and the PTE updates, which happen
+ * while the lock is taken, are serialized.
  */
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
@@ -554,7 +554,13 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 /* Clearing is done after a TLB flush, which also provides a barrier. */
 static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
+	/*
+	 * Guarantee that the tlb_flush_pending does not not leak into the
+	 * critical section, since we must order the PTE change and changes to
+	 * the pending TLB flush indication. We could have relied on TLB flush
+	 * as a memory barrier, but this behavior is not clearly documented.
+	 */
+	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
-- 
cgit v1.2.3


From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:05 -0700
Subject: mm: refactor TLB gathering API

This patch is a preparatory patch for solving race problems caused by
TLB batch.  For that, we will increase/decrease TLB flush pending count
of mm_struct whenever tlb_[gather|finish]_mmu is called.

Before making it simple, this patch separates architecture specific part
and rename it to arch_tlb_[gather|finish]_mmu and generic part just
calls it.

It shouldn't change any behavior.

Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  6 ++++--
 arch/ia64/include/asm/tlb.h |  6 ++++--
 arch/s390/include/asm/tlb.h | 12 ++++++------
 arch/sh/include/asm/tlb.h   |  6 ++++--
 arch/um/include/asm/tlb.h   |  8 +++++---
 include/asm-generic/tlb.h   |  7 ++++---
 include/linux/mm_types.h    |  6 ++++++
 mm/memory.c                 | 28 +++++++++++++++++++++-------
 8 files changed, 54 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3f2eb76243e3..7f5b2a2d3861 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->fullmm = !(start | (end+1));
@@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index fced197b9626..93cadc04ac62 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
@@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
  * collected.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7317b3108a88..d574d0820dc8 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -47,10 +47,9 @@ struct mmu_table_batch {
 extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
-static inline void tlb_gather_mmu(struct mmu_gather *tlb,
-				  struct mm_struct *mm,
-				  unsigned long start,
-				  unsigned long end)
+static inline void
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-static inline void tlb_finish_mmu(struct mmu_gather *tlb,
-				  unsigned long start, unsigned long end)
+static inline void
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 }
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 46e0d635e36f..89786560dbd4 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	if (tlb->fullmm)
 		flush_tlb_mm(tlb->mm);
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 600a2e9bfee2..2a901eca7145 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-/* tlb_finish_mmu
+/* arch_tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8afa4335e5b2..8f71521e7a44 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,10 +112,11 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
+void arch_tlb_gather_mmu(struct mmu_gather *tlb,
+	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
-							unsigned long end);
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			 unsigned long start, unsigned long end);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e478ebd2706..c605f2a3a68e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return mm->cpu_vm_mask_var;
 }
 
+struct mmu_gather;
+extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end);
+extern void tlb_finish_mmu(struct mmu_gather *tlb,
+				unsigned long start, unsigned long end);
+
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
diff --git a/mm/memory.c b/mm/memory.c
index f65beaad319b..34cba5113e06 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
-/* tlb_gather_mmu
- *	Called to initialize an (on-stack) mmu_gather structure for page-table
- *	tear-down from @mm. The @fullmm argument is used when @mm is without
- *	users and we're going to destroy the full address space (exit/execve).
- */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
@@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	struct mmu_gather_batch *batch, *next;
 
@@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
+/* tlb_gather_mmu
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
+ */
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
+{
+	arch_tlb_gather_mmu(tlb, mm, start, end);
+}
+
+void tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
+{
+	arch_tlb_finish_mmu(tlb, start, end);
+}
+
 /*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
-- 
cgit v1.2.3


From 0a2dd266dd6b7a31503b5bbe63af05961a6b446d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:09 -0700
Subject: mm: make tlb_flush_pending global

Currently, tlb_flush_pending is used only for CONFIG_[NUMA_BALANCING|
COMPACTION] but upcoming patches to solve subtle TLB flush batching
problem will use it regardless of compaction/NUMA so this patch doesn't
remove the dependency.

[akpm@linux-foundation.org: remove more ifdefs from world's ugliest printk statement]
Link: http://lkml.kernel.org/r/20170802000818.4760-6-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 21 ---------------------
 mm/debug.c               |  4 ----
 2 files changed, 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c605f2a3a68e..892a7b0196fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -487,14 +487,12 @@ struct mm_struct {
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 	/*
 	 * An operation with batched TLB flushing is going on. Anything that
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
 	atomic_t tlb_flush_pending;
-#endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
 	bool tlb_flush_batched;
@@ -528,7 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 extern void tlb_finish_mmu(struct mmu_gather *tlb,
 				unsigned long start, unsigned long end);
 
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
@@ -569,24 +566,6 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
-#else
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
-	return false;
-}
-
-static inline void init_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void inc_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void dec_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-#endif
 
 struct vm_fault;
 
diff --git a/mm/debug.c b/mm/debug.c
index d70103bb4731..5715448ab0b5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		"tlb_flush_pending %d\n"
-#endif
 		"def_flags: %#lx(%pGv)\n",
 
 		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
@@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		atomic_read(&mm->tlb_flush_pending),
-#endif
 		mm->def_flags, &mm->def_flags
 	);
 }
-- 
cgit v1.2.3


From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:12 -0700
Subject: mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem

Nadav reported parallel MADV_DONTNEED on same range has a stale TLB
problem and Mel fixed it[1] and found same problem on MADV_FREE[2].

Quote from Mel Gorman:
 "The race in question is CPU 0 running madv_free and updating some PTEs
  while CPU 1 is also running madv_free and looking at the same PTEs.
  CPU 1 may have writable TLB entries for a page but fail the pte_dirty
  check (because CPU 0 has updated it already) and potentially fail to
  flush.

  Hence, when madv_free on CPU 1 returns, there are still potentially
  writable TLB entries and the underlying PTE is still present so that a
  subsequent write does not necessarily propagate the dirty bit to the
  underlying PTE any more. Reclaim at some unknown time at the future
  may then see that the PTE is still clean and discard the page even
  though a write has happened in the meantime. I think this is possible
  but I could have missed some protection in madv_free that prevents it
  happening."

This patch aims for solving both problems all at once and is ready for
other problem with KSM, MADV_FREE and soft-dirty story[3].

TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending
and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can
catch there are parallel threads going on.  In that case, forcefully,
flush TLB to prevent for user to access memory via stale TLB entry
although it fail to gather page table entry.

I confirmed this patch works with [4] test program Nadav gave so this
patch supersedes "mm: Always flush VMA ranges affected by zap_page_range
v2" in current mmotm.

NOTE:

This patch modifies arch-specific TLB gathering interface(x86, ia64,
s390, sh, um).  It seems most of architecture are straightforward but
s390 need to be careful because tlb_flush_mmu works only if
mm->context.flush_mm is set to non-zero which happens only a pte entry
really is cleared by ptep_get_and_clear and friends.  However, this
problem never changes the pte entries but need to flush to prevent
memory access from stale tlb.

[1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net
[2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de
[3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com
[4] https://patchwork.kernel.org/patch/9861621/

[minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu]
  Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox
Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reported-by: Nadav Amit <namit@vmware.com>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  7 ++++++-
 arch/ia64/include/asm/tlb.h |  4 +++-
 arch/s390/include/asm/tlb.h |  7 ++++++-
 arch/sh/include/asm/tlb.h   |  4 ++--
 arch/um/include/asm/tlb.h   |  7 ++++++-
 include/asm-generic/tlb.h   |  2 +-
 include/linux/mm_types.h    |  8 ++++++++
 mm/memory.c                 | 18 ++++++++++++++++--
 8 files changed, 48 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 7f5b2a2d3861..d5562f9ce600 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->range_start = start;
+		tlb->range_end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 93cadc04ac62..cbe5ac3699bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force)
+		tlb->need_flush = 1;
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
 	 * tlb->end_addr.
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index d574d0820dc8..2eb8ff0d6fca 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 }
 
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 89786560dbd4..51a8bc967e75 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
-	if (tlb->fullmm)
+	if (tlb->fullmm || force)
 		flush_tlb_mm(tlb->mm);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 2a901eca7145..344d95619d03 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+		tlb->need_flush = 1;
+	}
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8f71521e7a44..faddde44de8c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			 unsigned long start, unsigned long end);
+			 unsigned long start, unsigned long end, bool force);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 892a7b0196fd..3cadee0a3508 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
+/*
+ * Returns true if there are two above TLB batching threads in parallel.
+ */
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+	return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_set(&mm->tlb_flush_pending, 0);
diff --git a/mm/memory.c b/mm/memory.c
index 34cba5113e06..e158f7ac6730 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	that were required.
  */
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
 	struct mmu_gather_batch *batch, *next;
 
+	if (force)
+		__tlb_adjust_range(tlb, start, end - start);
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
 	arch_tlb_gather_mmu(tlb, mm, start, end);
+	inc_tlb_flush_pending(tlb->mm);
 }
 
 void tlb_finish_mmu(struct mmu_gather *tlb,
 		unsigned long start, unsigned long end)
 {
-	arch_tlb_finish_mmu(tlb, start, end);
+	/*
+	 * If there are parallel threads are doing PTE changes on same range
+	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
+	 * flush by batching, a thread has stable TLB entry can fail to flush
+	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
+	 * forcefully if we detect parallel PTE batching threads.
+	 */
+	bool force = mm_tlb_flush_nested(tlb->mm);
+
+	arch_tlb_finish_mmu(tlb, start, end, force);
+	dec_tlb_flush_pending(tlb->mm);
 }
 
 /*
-- 
cgit v1.2.3


From e4672e55d6f3428ae9f27542e05c891f2af71051 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 2 Aug 2017 13:56:01 +0200
Subject: drm: Extract drm_device.h

I need this to untangle an include loop in the next patch.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20170802115604.12734-2-daniel.vetter@ffwll.ch
---
 include/drm/drmP.h       | 175 +------------------------------------------
 include/drm/drm_device.h | 190 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 191 insertions(+), 174 deletions(-)
 create mode 100644 include/drm/drm_device.h

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 3aa3809ab524..3031c256105e 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -82,19 +82,10 @@
 #include <drm/drm_sysfs.h>
 #include <drm/drm_vblank.h>
 #include <drm/drm_irq.h>
-
+#include <drm/drm_device.h>
 
 struct module;
 
-struct drm_device;
-struct drm_agp_head;
-struct drm_local_map;
-struct drm_device_dma;
-struct drm_gem_object;
-struct drm_master;
-struct drm_vblank_crtc;
-struct drm_vma_offset_manager;
-
 struct device_node;
 struct videomode;
 struct reservation_object;
@@ -305,170 +296,6 @@ struct pci_controller;
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
 
-/**
- * DRM device structure. This structure represent a complete card that
- * may contain multiple heads.
- */
-struct drm_device {
-	struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */
-	int if_version;			/**< Highest interface version set */
-
-	/** \name Lifetime Management */
-	/*@{ */
-	struct kref ref;		/**< Object ref-count */
-	struct device *dev;		/**< Device structure of bus-device */
-	struct drm_driver *driver;	/**< DRM driver managing the device */
-	void *dev_private;		/**< DRM driver private data */
-	struct drm_minor *control;		/**< Control node */
-	struct drm_minor *primary;		/**< Primary node */
-	struct drm_minor *render;		/**< Render node */
-	bool registered;
-
-	/* currently active master for this device. Protected by master_mutex */
-	struct drm_master *master;
-
-	atomic_t unplugged;			/**< Flag whether dev is dead */
-	struct inode *anon_inode;		/**< inode for private address-space */
-	char *unique;				/**< unique name of the device */
-	/*@} */
-
-	/** \name Locks */
-	/*@{ */
-	struct mutex struct_mutex;	/**< For others */
-	struct mutex master_mutex;      /**< For drm_minor::master and drm_file::is_master */
-	/*@} */
-
-	/** \name Usage Counters */
-	/*@{ */
-	int open_count;			/**< Outstanding files open, protected by drm_global_mutex. */
-	spinlock_t buf_lock;		/**< For drm_device::buf_use and a few other things. */
-	int buf_use;			/**< Buffers in use -- cannot alloc */
-	atomic_t buf_alloc;		/**< Buffer allocation in progress */
-	/*@} */
-
-	struct mutex filelist_mutex;
-	struct list_head filelist;
-
-	/** \name Memory management */
-	/*@{ */
-	struct list_head maplist;	/**< Linked list of regions */
-	struct drm_open_hash map_hash;	/**< User token hash table for maps */
-
-	/** \name Context handle management */
-	/*@{ */
-	struct list_head ctxlist;	/**< Linked list of context handles */
-	struct mutex ctxlist_mutex;	/**< For ctxlist */
-
-	struct idr ctx_idr;
-
-	struct list_head vmalist;	/**< List of vmas (for debugging) */
-
-	/*@} */
-
-	/** \name DMA support */
-	/*@{ */
-	struct drm_device_dma *dma;		/**< Optional pointer for DMA support */
-	/*@} */
-
-	/** \name Context support */
-	/*@{ */
-
-	__volatile__ long context_flag;	/**< Context swapping flag */
-	int last_context;		/**< Last current context */
-	/*@} */
-
-	/**
-	 * @irq_enabled:
-	 *
-	 * Indicates that interrupt handling is enabled, specifically vblank
-	 * handling. Drivers which don't use drm_irq_install() need to set this
-	 * to true manually.
-	 */
-	bool irq_enabled;
-	int irq;
-
-	/**
-	 * @vblank_disable_immediate:
-	 *
-	 * If true, vblank interrupt will be disabled immediately when the
-	 * refcount drops to zero, as opposed to via the vblank disable
-	 * timer.
-	 *
-	 * This can be set to true it the hardware has a working vblank counter
-	 * with high-precision timestamping (otherwise there are races) and the
-	 * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off()
-	 * appropriately. See also @max_vblank_count and
-	 * &drm_crtc_funcs.get_vblank_counter.
-	 */
-	bool vblank_disable_immediate;
-
-	/**
-	 * @vblank:
-	 *
-	 * Array of vblank tracking structures, one per &struct drm_crtc. For
-	 * historical reasons (vblank support predates kernel modesetting) this
-	 * is free-standing and not part of &struct drm_crtc itself. It must be
-	 * initialized explicitly by calling drm_vblank_init().
-	 */
-	struct drm_vblank_crtc *vblank;
-
-	spinlock_t vblank_time_lock;    /**< Protects vblank count and time updates during vblank enable/disable */
-	spinlock_t vbl_lock;
-
-	/**
-	 * @max_vblank_count:
-	 *
-	 * Maximum value of the vblank registers. This value +1 will result in a
-	 * wrap-around of the vblank register. It is used by the vblank core to
-	 * handle wrap-arounds.
-	 *
-	 * If set to zero the vblank core will try to guess the elapsed vblanks
-	 * between times when the vblank interrupt is disabled through
-	 * high-precision timestamps. That approach is suffering from small
-	 * races and imprecision over longer time periods, hence exposing a
-	 * hardware vblank counter is always recommended.
-	 *
-	 * If non-zeor, &drm_crtc_funcs.get_vblank_counter must be set.
-	 */
-	u32 max_vblank_count;           /**< size of vblank counter register */
-
-	/**
-	 * List of events
-	 */
-	struct list_head vblank_event_list;
-	spinlock_t event_lock;
-
-	/*@} */
-
-	struct drm_agp_head *agp;	/**< AGP data */
-
-	struct pci_dev *pdev;		/**< PCI device structure */
-#ifdef __alpha__
-	struct pci_controller *hose;
-#endif
-
-	struct drm_sg_mem *sg;	/**< Scatter gather memory */
-	unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
-
-	struct {
-		int context;
-		struct drm_hw_lock *lock;
-	} sigdata;
-
-	struct drm_local_map *agp_buffer_map;
-	unsigned int agp_buffer_token;
-
-	struct drm_mode_config mode_config;	/**< Current mode config */
-
-	/** \name GEM information */
-	/*@{ */
-	struct mutex object_name_lock;
-	struct idr object_name_idr;
-	struct drm_vma_offset_manager *vma_offset_manager;
-	/*@} */
-	int switch_power_state;
-};
-
 /**
  * drm_drv_uses_atomic_modeset - check if the driver implements
  * atomic_commit()
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
new file mode 100644
index 000000000000..e21af87a2f3c
--- /dev/null
+++ b/include/drm/drm_device.h
@@ -0,0 +1,190 @@
+#ifndef _DRM_DEVICE_H_
+#define _DRM_DEVICE_H_
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/idr.h>
+
+#include <drm/drm_hashtab.h>
+#include <drm/drm_mode_config.h>
+
+struct drm_driver;
+struct drm_minor;
+struct drm_master;
+struct drm_device_dma;
+struct drm_vblank_crtc;
+struct drm_sg_mem;
+struct drm_local_map;
+struct drm_vma_offset_manager;
+
+struct inode;
+
+struct pci_dev;
+struct pci_controller;
+
+/**
+ * DRM device structure. This structure represent a complete card that
+ * may contain multiple heads.
+ */
+struct drm_device {
+	struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */
+	int if_version;			/**< Highest interface version set */
+
+	/** \name Lifetime Management */
+	/*@{ */
+	struct kref ref;		/**< Object ref-count */
+	struct device *dev;		/**< Device structure of bus-device */
+	struct drm_driver *driver;	/**< DRM driver managing the device */
+	void *dev_private;		/**< DRM driver private data */
+	struct drm_minor *control;		/**< Control node */
+	struct drm_minor *primary;		/**< Primary node */
+	struct drm_minor *render;		/**< Render node */
+	bool registered;
+
+	/* currently active master for this device. Protected by master_mutex */
+	struct drm_master *master;
+
+	atomic_t unplugged;			/**< Flag whether dev is dead */
+	struct inode *anon_inode;		/**< inode for private address-space */
+	char *unique;				/**< unique name of the device */
+	/*@} */
+
+	/** \name Locks */
+	/*@{ */
+	struct mutex struct_mutex;	/**< For others */
+	struct mutex master_mutex;      /**< For drm_minor::master and drm_file::is_master */
+	/*@} */
+
+	/** \name Usage Counters */
+	/*@{ */
+	int open_count;			/**< Outstanding files open, protected by drm_global_mutex. */
+	spinlock_t buf_lock;		/**< For drm_device::buf_use and a few other things. */
+	int buf_use;			/**< Buffers in use -- cannot alloc */
+	atomic_t buf_alloc;		/**< Buffer allocation in progress */
+	/*@} */
+
+	struct mutex filelist_mutex;
+	struct list_head filelist;
+
+	/** \name Memory management */
+	/*@{ */
+	struct list_head maplist;	/**< Linked list of regions */
+	struct drm_open_hash map_hash;	/**< User token hash table for maps */
+
+	/** \name Context handle management */
+	/*@{ */
+	struct list_head ctxlist;	/**< Linked list of context handles */
+	struct mutex ctxlist_mutex;	/**< For ctxlist */
+
+	struct idr ctx_idr;
+
+	struct list_head vmalist;	/**< List of vmas (for debugging) */
+
+	/*@} */
+
+	/** \name DMA support */
+	/*@{ */
+	struct drm_device_dma *dma;		/**< Optional pointer for DMA support */
+	/*@} */
+
+	/** \name Context support */
+	/*@{ */
+
+	__volatile__ long context_flag;	/**< Context swapping flag */
+	int last_context;		/**< Last current context */
+	/*@} */
+
+	/**
+	 * @irq_enabled:
+	 *
+	 * Indicates that interrupt handling is enabled, specifically vblank
+	 * handling. Drivers which don't use drm_irq_install() need to set this
+	 * to true manually.
+	 */
+	bool irq_enabled;
+	int irq;
+
+	/**
+	 * @vblank_disable_immediate:
+	 *
+	 * If true, vblank interrupt will be disabled immediately when the
+	 * refcount drops to zero, as opposed to via the vblank disable
+	 * timer.
+	 *
+	 * This can be set to true it the hardware has a working vblank counter
+	 * with high-precision timestamping (otherwise there are races) and the
+	 * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off()
+	 * appropriately. See also @max_vblank_count and
+	 * &drm_crtc_funcs.get_vblank_counter.
+	 */
+	bool vblank_disable_immediate;
+
+	/**
+	 * @vblank:
+	 *
+	 * Array of vblank tracking structures, one per &struct drm_crtc. For
+	 * historical reasons (vblank support predates kernel modesetting) this
+	 * is free-standing and not part of &struct drm_crtc itself. It must be
+	 * initialized explicitly by calling drm_vblank_init().
+	 */
+	struct drm_vblank_crtc *vblank;
+
+	spinlock_t vblank_time_lock;    /**< Protects vblank count and time updates during vblank enable/disable */
+	spinlock_t vbl_lock;
+
+	/**
+	 * @max_vblank_count:
+	 *
+	 * Maximum value of the vblank registers. This value +1 will result in a
+	 * wrap-around of the vblank register. It is used by the vblank core to
+	 * handle wrap-arounds.
+	 *
+	 * If set to zero the vblank core will try to guess the elapsed vblanks
+	 * between times when the vblank interrupt is disabled through
+	 * high-precision timestamps. That approach is suffering from small
+	 * races and imprecision over longer time periods, hence exposing a
+	 * hardware vblank counter is always recommended.
+	 *
+	 * If non-zeor, &drm_crtc_funcs.get_vblank_counter must be set.
+	 */
+	u32 max_vblank_count;           /**< size of vblank counter register */
+
+	/**
+	 * List of events
+	 */
+	struct list_head vblank_event_list;
+	spinlock_t event_lock;
+
+	/*@} */
+
+	struct drm_agp_head *agp;	/**< AGP data */
+
+	struct pci_dev *pdev;		/**< PCI device structure */
+#ifdef __alpha__
+	struct pci_controller *hose;
+#endif
+
+	struct drm_sg_mem *sg;	/**< Scatter gather memory */
+	unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
+
+	struct {
+		int context;
+		struct drm_hw_lock *lock;
+	} sigdata;
+
+	struct drm_local_map *agp_buffer_map;
+	unsigned int agp_buffer_token;
+
+	struct drm_mode_config mode_config;	/**< Current mode config */
+
+	/** \name GEM information */
+	/*@{ */
+	struct mutex object_name_lock;
+	struct idr object_name_idr;
+	struct drm_vma_offset_manager *vma_offset_manager;
+	/*@} */
+	int switch_power_state;
+};
+
+#endif
-- 
cgit v1.2.3


From c07dcd61a0e57c6840c23d90cb1beddad7d682a1 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 2 Aug 2017 13:56:02 +0200
Subject: drm: Document device unplug infrastructure

While at it, also ocd and give them a consistent drm_dev_ prefix, like
the other device instance functionality. Plus move the functions into
the right places.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170802115604.12734-3-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/drm_drv.c                   | 24 +++++++++++++++++++++---
 drivers/gpu/drm/drm_file.c                  |  2 +-
 drivers/gpu/drm/drm_gem.c                   |  2 +-
 drivers/gpu/drm/drm_gem_cma_helper.c        |  2 +-
 drivers/gpu/drm/drm_ioctl.c                 |  4 ++--
 drivers/gpu/drm/drm_vm.c                    |  2 +-
 drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c |  2 +-
 drivers/gpu/drm/udl/udl_connector.c         |  2 +-
 drivers/gpu/drm/udl/udl_drv.c               |  2 +-
 drivers/gpu/drm/udl/udl_fb.c                |  2 +-
 include/drm/drmP.h                          | 13 -------------
 include/drm/drm_drv.h                       | 22 ++++++++++++++++++++--
 12 files changed, 51 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 2ed2d919beae..39191e5c240e 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -291,7 +291,7 @@ struct drm_minor *drm_minor_acquire(unsigned int minor_id)
 
 	if (!minor) {
 		return ERR_PTR(-ENODEV);
-	} else if (drm_device_is_unplugged(minor->dev)) {
+	} else if (drm_dev_is_unplugged(minor->dev)) {
 		drm_dev_unref(minor->dev);
 		return ERR_PTR(-ENODEV);
 	}
@@ -364,7 +364,22 @@ void drm_put_dev(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_put_dev);
 
-void drm_unplug_dev(struct drm_device *dev)
+static void drm_device_set_unplugged(struct drm_device *dev)
+{
+	smp_wmb();
+	atomic_set(&dev->unplugged, 1);
+}
+
+/**
+ * drm_dev_unplug - unplug a DRM device
+ * @dev: DRM device
+ *
+ * This unplugs a hotpluggable DRM device, which makes it inaccessible to
+ * userspace operations. Entry-points can use drm_dev_is_unplugged(). This
+ * essentially unregisters the device like drm_dev_unregister(), but can be
+ * called while there are still open users of @dev.
+ */
+void drm_dev_unplug(struct drm_device *dev)
 {
 	/* for a USB device */
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
@@ -383,7 +398,7 @@ void drm_unplug_dev(struct drm_device *dev)
 	}
 	mutex_unlock(&drm_global_mutex);
 }
-EXPORT_SYMBOL(drm_unplug_dev);
+EXPORT_SYMBOL(drm_dev_unplug);
 
 /*
  * DRM internal mount
@@ -835,6 +850,9 @@ EXPORT_SYMBOL(drm_dev_register);
  * drm_dev_register() but does not deallocate the device. The caller must call
  * drm_dev_unref() to drop their final reference.
  *
+ * A special form of unregistering for hotpluggable devices is drm_dev_unplug(),
+ * which can be called while there are still open users of @dev.
+ *
  * This should be called first in the device teardown code to make sure
  * userspace can't access the device instance any more.
  */
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 59b75a974357..b3c6e997ccdb 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -436,7 +436,7 @@ int drm_release(struct inode *inode, struct file *filp)
 
 	if (!--dev->open_count) {
 		drm_lastclose(dev);
-		if (drm_device_is_unplugged(dev))
+		if (drm_dev_is_unplugged(dev))
 			drm_put_dev(dev);
 	}
 	mutex_unlock(&drm_global_mutex);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index a8d396bed6a4..ad4e9cfe48a2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1001,7 +1001,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_vma_offset_node *node;
 	int ret;
 
-	if (drm_device_is_unplugged(dev))
+	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
 
 	drm_vma_offset_lock_lookup(dev->vma_offset_manager);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index 275ab872b34f..f4e00571839d 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -390,7 +390,7 @@ unsigned long drm_gem_cma_get_unmapped_area(struct file *filp,
 	struct drm_device *dev = priv->minor->dev;
 	struct drm_vma_offset_node *node;
 
-	if (drm_device_is_unplugged(dev))
+	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
 
 	drm_vma_offset_lock_lookup(dev->vma_offset_manager);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 8bfeb32f8a10..d920b2118a39 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -716,7 +716,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata,
 	struct drm_device *dev = file_priv->minor->dev;
 	int retcode;
 
-	if (drm_device_is_unplugged(dev))
+	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
 
 	retcode = drm_ioctl_permit(flags, file_priv);
@@ -765,7 +765,7 @@ long drm_ioctl(struct file *filp,
 
 	dev = file_priv->minor->dev;
 
-	if (drm_device_is_unplugged(dev))
+	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
 
 	is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END;
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 1170b3209a12..13a59ed2afbc 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -631,7 +631,7 @@ int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_device *dev = priv->minor->dev;
 	int ret;
 
-	if (drm_device_is_unplugged(dev))
+	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
 
 	mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
index f224b54a30f6..177e9d861001 100644
--- a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
@@ -56,7 +56,7 @@ static const struct drm_connector_helper_funcs tinydrm_connector_hfuncs = {
 static enum drm_connector_status
 tinydrm_connector_detect(struct drm_connector *connector, bool force)
 {
-	if (drm_device_is_unplugged(connector->dev))
+	if (drm_dev_is_unplugged(connector->dev))
 		return connector_status_disconnected;
 
 	return connector->status;
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index d2f57c52f7db..9f9a49748d17 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -96,7 +96,7 @@ static int udl_mode_valid(struct drm_connector *connector,
 static enum drm_connector_status
 udl_detect(struct drm_connector *connector, bool force)
 {
-	if (drm_device_is_unplugged(connector->dev))
+	if (drm_dev_is_unplugged(connector->dev))
 		return connector_status_disconnected;
 	return connector_status_connected;
 }
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index 0f02e1acf0ba..96c44ede5d69 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -102,7 +102,7 @@ static void udl_usb_disconnect(struct usb_interface *interface)
 	drm_kms_helper_poll_disable(dev);
 	udl_fbdev_unplug(dev);
 	udl_drop_usb(dev);
-	drm_unplug_dev(dev);
+	drm_dev_unplug(dev);
 }
 
 /*
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index a5c54dc60def..77a2c59c56a1 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -198,7 +198,7 @@ static int udl_fb_open(struct fb_info *info, int user)
 	struct udl_device *udl = dev->dev_private;
 
 	/* If the USB device is gone, we don't accept new opens */
-	if (drm_device_is_unplugged(udl->ddev))
+	if (drm_dev_is_unplugged(udl->ddev))
 		return -ENODEV;
 
 	ufbdev->fb_count++;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 3031c256105e..7277783a4ff0 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -320,19 +320,6 @@ static __inline__ int drm_core_check_feature(struct drm_device *dev,
 	return ((dev->driver->driver_features & feature) ? 1 : 0);
 }
 
-static inline void drm_device_set_unplugged(struct drm_device *dev)
-{
-	smp_wmb();
-	atomic_set(&dev->unplugged, 1);
-}
-
-static inline int drm_device_is_unplugged(struct drm_device *dev)
-{
-	int ret = atomic_read(&dev->unplugged);
-	smp_rmb();
-	return ret;
-}
-
 /******************************************************************/
 /** \name Internal function definitions */
 /*@{*/
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 505c91354802..71bbaaec836d 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -30,7 +30,8 @@
 #include <linux/list.h>
 #include <linux/irqreturn.h>
 
-struct drm_device;
+#include <drm/drm_device.h>
+
 struct drm_file;
 struct drm_gem_object;
 struct drm_master;
@@ -613,7 +614,24 @@ void drm_dev_unregister(struct drm_device *dev);
 void drm_dev_ref(struct drm_device *dev);
 void drm_dev_unref(struct drm_device *dev);
 void drm_put_dev(struct drm_device *dev);
-void drm_unplug_dev(struct drm_device *dev);
+void drm_dev_unplug(struct drm_device *dev);
+
+/**
+ * drm_dev_is_unplugged - is a DRM device unplugged
+ * @dev: DRM device
+ *
+ * This function can be called to check whether a hotpluggable is unplugged.
+ * Unplugging itself is singalled through drm_dev_unplug(). If a device is
+ * unplugged, these two functions guarantee that any store before calling
+ * drm_dev_unplug() is visible to callers of this function after it completes
+ */
+static inline int drm_dev_is_unplugged(struct drm_device *dev)
+{
+	int ret = atomic_read(&dev->unplugged);
+	smp_rmb();
+	return ret;
+}
+
 
 int drm_dev_set_unique(struct drm_device *dev, const char *name);
 
-- 
cgit v1.2.3


From 7faf952a3030d304334fe527be339b63e9e2745f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 10 Aug 2017 13:01:48 -0400
Subject: dma-buf: add reservation_object_copy_fences (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows us to copy all the fences in a reservation object to another one.

v2: handle NULL src_list

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1502384509-10465-2-git-send-email-alexander.deucher@amd.com
---
 drivers/dma-buf/reservation.c | 60 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/reservation.h   |  3 +++
 2 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 87f8f5747500..d4881f91c43b 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -261,6 +261,66 @@ void reservation_object_add_excl_fence(struct reservation_object *obj,
 }
 EXPORT_SYMBOL(reservation_object_add_excl_fence);
 
+/**
+* reservation_object_copy_fences - Copy all fences from src to dst.
+* @dst: the destination reservation object
+* @src: the source reservation object
+*
+* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
+* held.
+*/
+int reservation_object_copy_fences(struct reservation_object *dst,
+				   struct reservation_object *src)
+{
+	struct reservation_object_list *src_list, *dst_list;
+	struct dma_fence *old, *new;
+	size_t size;
+	unsigned i;
+
+	src_list = reservation_object_get_list(src);
+
+	if (src_list) {
+		size = offsetof(typeof(*src_list),
+				shared[src_list->shared_count]);
+		dst_list = kmalloc(size, GFP_KERNEL);
+		if (!dst_list)
+			return -ENOMEM;
+
+		dst_list->shared_count = src_list->shared_count;
+		dst_list->shared_max = src_list->shared_count;
+		for (i = 0; i < src_list->shared_count; ++i)
+			dst_list->shared[i] =
+				dma_fence_get(src_list->shared[i]);
+	} else {
+		dst_list = NULL;
+	}
+
+	kfree(dst->staged);
+	dst->staged = NULL;
+
+	src_list = reservation_object_get_list(dst);
+
+	old = reservation_object_get_excl(dst);
+	new = reservation_object_get_excl(src);
+
+	dma_fence_get(new);
+
+	preempt_disable();
+	write_seqcount_begin(&dst->seq);
+	/* write_seqcount_begin provides the necessary memory barrier */
+	RCU_INIT_POINTER(dst->fence_excl, new);
+	RCU_INIT_POINTER(dst->fence, dst_list);
+	write_seqcount_end(&dst->seq);
+	preempt_enable();
+
+	if (src_list)
+		kfree_rcu(src_list, rcu);
+	dma_fence_put(old);
+
+	return 0;
+}
+EXPORT_SYMBOL(reservation_object_copy_fences);
+
 /**
  * reservation_object_get_fences_rcu - Get an object's shared and exclusive
  * fences without update side lock held
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index 156cfd330b66..21fc84d82d41 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -254,6 +254,9 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 				      unsigned *pshared_count,
 				      struct dma_fence ***pshared);
 
+int reservation_object_copy_fences(struct reservation_object *dst,
+				   struct reservation_object *src);
+
 long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
 					 bool wait_all, bool intr,
 					 unsigned long timeout);
-- 
cgit v1.2.3


From 6a1c9510694fe1e901a3b5b53386eac069adcea6 Mon Sep 17 00:00:00 2001
From: Moses Reuben <moses.reuben@amd.com>
Date: Tue, 15 Aug 2017 23:00:20 -0400
Subject: drm/amdkfd: Adding new IOCTL for scratch memory v2

v2:
* Renamed ALLOC_MEMORY_OF_SCRATCH to SET_SCRATCH_BACKING_VA
* Removed size parameter from the ioctl, it was unused
* Removed hole in ioctl number space
* No more call to write_config_static_mem
* Return correct error code from ioctl

Signed-off-by: Moses Reuben <moses.reuben@amd.com>
Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 37 ++++++++++++++++++++++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  3 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  2 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  1 +
 include/uapi/linux/kfd_ioctl.h                     | 11 ++++++-
 6 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 65b506f19b46..7436d34b77ab 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -848,6 +848,40 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
 
 	return err;
 }
+static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_dev *dev;
+	long err;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_fail;
+	}
+
+	pdd->qpd.sh_hidden_private_base = args->va_addr;
+
+	mutex_unlock(&p->mutex);
+
+	if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
+		dev->kfd2kgd->set_scratch_backing_va(
+			dev->kgd, args->va_addr, pdd->qpd.vmid);
+
+	return 0;
+
+bind_process_to_device_fail:
+	mutex_unlock(&p->mutex);
+	return err;
+}
 
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
@@ -902,6 +936,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
 			kfd_ioctl_dbg_wave_control, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
+			kfd_ioctl_set_scratch_backing_va, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 618ac65b6136..53a66e821624 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -270,6 +270,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
 			q->pipe, q->queue);
 
+	dqm->dev->kfd2kgd->set_scratch_backing_va(
+			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+
 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
 			       q->process->mm);
 	if (retval)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index fadc56a8be71..72c3cbabc0a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -24,6 +24,7 @@
 #include "kfd_device_queue_manager.h"
 #include "cik_regs.h"
 #include "oss/oss_2_4_sh_mask.h"
+#include "gca/gfx_7_2_sh_mask.h"
 
 static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
 				   struct qcm_process_device *qpd,
@@ -123,6 +124,7 @@ static int register_process_cik(struct device_queue_manager *dqm,
 	} else {
 		temp = get_sh_mem_bases_nybble_64(pdd);
 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+		qpd->sh_mem_config |= 1  << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
 	}
 
 	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 15e81ae9d2f4..40e9ddd096cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -135,6 +135,8 @@ static int register_process_vi(struct device_queue_manager *dqm,
 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
 		qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+		qpd->sh_mem_config |= 1  <<
+			SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
 	}
 
 	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 30ce92c6e6a1..b397ec726400 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -432,6 +432,7 @@ struct qcm_process_device {
 	uint32_t gds_size;
 	uint32_t num_gws;
 	uint32_t num_oac;
+	uint32_t sh_hidden_private_base;
 };
 
 /* Data that is per-process-per device. */
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d6833426fdef..1b9c5609d523 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -232,6 +232,12 @@ struct kfd_ioctl_wait_events_args {
 	uint32_t wait_result;		/* from KFD */
 };
 
+struct kfd_ioctl_set_scratch_backing_va_args {
+	uint64_t va_addr;	/* to KFD */
+	uint32_t gpu_id;	/* to KFD */
+	uint32_t pad;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -286,7 +292,10 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_DBG_WAVE_CONTROL		\
 		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
 
+#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
+		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x11
+#define AMDKFD_COMMAND_END		0x12
 
 #endif
-- 
cgit v1.2.3


From 5d71dbc3a588690c3d66d76db8cd29973425ce6d Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Tue, 15 Aug 2017 23:00:22 -0400
Subject: drm/amdkfd: Implement image tiling mode support v2

v2: Removed hole in ioctl number space

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 43 ++++++++++++++++++++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 28 ++++++++++++++++++++-
 2 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7436d34b77ab..e4a8c2e52cb2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -883,6 +883,46 @@ bind_process_to_device_fail:
 	return err;
 }
 
+static int kfd_ioctl_get_tile_config(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_tile_config_args *args = data;
+	struct kfd_dev *dev;
+	struct tile_config config;
+	int err = 0;
+
+	dev = kfd_device_by_id(args->gpu_id);
+
+	dev->kfd2kgd->get_tile_config(dev->kgd, &config);
+
+	args->gb_addr_config = config.gb_addr_config;
+	args->num_banks = config.num_banks;
+	args->num_ranks = config.num_ranks;
+
+	if (args->num_tile_configs > config.num_tile_configs)
+		args->num_tile_configs = config.num_tile_configs;
+	err = copy_to_user((void __user *)args->tile_config_ptr,
+			config.tile_config_ptr,
+			args->num_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
+		args->num_macro_tile_configs =
+				config.num_macro_tile_configs;
+	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
+			config.macro_tile_config_ptr,
+			args->num_macro_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_macro_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 			    .cmd_drv = 0, .name = #ioctl}
@@ -939,6 +979,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
 			kfd_ioctl_set_scratch_backing_va, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
+			kfd_ioctl_get_tile_config, 0)
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 1b9c5609d523..7b4567bacfc2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -238,6 +238,29 @@ struct kfd_ioctl_set_scratch_backing_va_args {
 	uint32_t pad;
 };
 
+struct kfd_ioctl_get_tile_config_args {
+	/* to KFD: pointer to tile array */
+	uint64_t tile_config_ptr;
+	/* to KFD: pointer to macro tile array */
+	uint64_t macro_tile_config_ptr;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_tile_configs;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_macro_tile_configs;
+
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t gb_addr_config;	/* from KFD */
+	uint32_t num_banks;		/* from KFD */
+	uint32_t num_ranks;		/* from KFD */
+	/* struct size can be extended later if needed
+	 * without breaking ABI compatibility
+	 */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -295,7 +318,10 @@ struct kfd_ioctl_set_scratch_backing_va_args {
 #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
 		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
 
+#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
+		AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x12
+#define AMDKFD_COMMAND_END		0x13
 
 #endif
-- 
cgit v1.2.3


From d20fa5a06d7bddb7823d14255982148fefb72950 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 11 Aug 2017 16:49:04 +0300
Subject: drm: omapdrm: hdmi: Pass HDMI core version as integer to HDMI audio

The HDMI audio driver only needs to know which generation of HDMI
transmitter it deals with, not the detailed SoC model. Pass the version
number as an integer to prepare for removal of the OMAP SoC version from
the omapdrm driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/dss/hdmi4.c          | 2 +-
 drivers/gpu/drm/omapdrm/dss/hdmi5.c          | 2 +-
 drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c | 2 +-
 drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c | 2 +-
 include/sound/omap-hdmi-audio.h              | 2 +-
 sound/soc/omap/omap-hdmi-audio.c             | 9 +++------
 6 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index 438f47d8ab69..4c131d710282 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -667,7 +667,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 4,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index 8ab76b50662a..a6adddeee5bb 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -694,7 +694,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 5,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c
index 156a254705ea..ec78d61bc551 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c
@@ -664,7 +664,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 4,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c
index 4da36bcab977..2e2fcc3d6d4f 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c
@@ -695,7 +695,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 5,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/include/sound/omap-hdmi-audio.h b/include/sound/omap-hdmi-audio.h
index 1df2ff61a4dd..0e495ed8872e 100644
--- a/include/sound/omap-hdmi-audio.h
+++ b/include/sound/omap-hdmi-audio.h
@@ -39,7 +39,7 @@ struct omap_hdmi_audio_ops {
 /* HDMI audio initalization data */
 struct omap_hdmi_audio_pdata {
 	struct device *dev;
-	enum omapdss_version dss_version;
+	unsigned int version;
 	phys_addr_t audio_dma_addr;
 
 	const struct omap_hdmi_audio_ops *ops;
diff --git a/sound/soc/omap/omap-hdmi-audio.c b/sound/soc/omap/omap-hdmi-audio.c
index 888133f9e65d..3e9cc4842a1d 100644
--- a/sound/soc/omap/omap-hdmi-audio.c
+++ b/sound/soc/omap/omap-hdmi-audio.c
@@ -337,14 +337,11 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev)
 	ad->dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	mutex_init(&ad->current_stream_lock);
 
-	switch (ha->dss_version) {
-	case OMAPDSS_VER_OMAP4430_ES1:
-	case OMAPDSS_VER_OMAP4430_ES2:
-	case OMAPDSS_VER_OMAP4:
+	switch (ha->version) {
+	case 4:
 		dai_drv = &omap4_hdmi_dai;
 		break;
-	case OMAPDSS_VER_OMAP5:
-	case OMAPDSS_VER_DRA7xx:
+	case 5:
 		dai_drv = &omap5_hdmi_dai;
 		break;
 	default:
-- 
cgit v1.2.3


From 739a6d5d640a9811beef6a828253ee184dd431c5 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 11 Aug 2017 16:49:12 +0300
Subject: drm: omapdrm: Remove omapdrm platform data

The omapdrm platform data are not used anymore, remove them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/platform_data/omap_drm.h | 53 ----------------------------------
 1 file changed, 53 deletions(-)
 delete mode 100644 include/linux/platform_data/omap_drm.h

(limited to 'include')

diff --git a/include/linux/platform_data/omap_drm.h b/include/linux/platform_data/omap_drm.h
deleted file mode 100644
index f4e4a237ebd2..000000000000
--- a/include/linux/platform_data/omap_drm.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * DRM/KMS platform data for TI OMAP platforms
- *
- * Copyright (C) 2012 Texas Instruments
- * Author: Rob Clark <rob.clark@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __PLATFORM_DATA_OMAP_DRM_H__
-#define __PLATFORM_DATA_OMAP_DRM_H__
-
-/*
- * Optional platform data to configure the default configuration of which
- * pipes/overlays/CRTCs are used.. if this is not provided, then instead the
- * first CONFIG_DRM_OMAP_NUM_CRTCS are used, and they are each connected to
- * one manager, with priority given to managers that are connected to
- * detected devices.  Remaining overlays are used as video planes.  This
- * should be a good default behavior for most cases, but yet there still
- * might be times when you wish to do something different.
- */
-struct omap_kms_platform_data {
-	/* overlays to use as CRTCs: */
-	int ovl_cnt;
-	const int *ovl_ids;
-
-	/* overlays to use as video planes: */
-	int pln_cnt;
-	const int *pln_ids;
-
-	int mgr_cnt;
-	const int *mgr_ids;
-
-	int dev_cnt;
-	const char **dev_names;
-};
-
-struct omap_drm_platform_data {
-	uint32_t omaprev;
-	struct omap_kms_platform_data *kms_pdata;
-};
-
-#endif /* __PLATFORM_DATA_OMAP_DRM_H__ */
-- 
cgit v1.2.3


From d956e1293b9b43f3a9a508162cdbaa96cf02e6e0 Mon Sep 17 00:00:00 2001
From: Noralf Trønnes <noralf@tronnes.org>
Date: Sun, 6 Aug 2017 17:41:08 +0200
Subject: drm/gem-cma-helper: Remove drm_gem_cma_dumb_map_offset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are no more users of drm_gem_cma_dumb_map_offset(), so remove it.

Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1502034068-51384-20-git-send-email-noralf@tronnes.org
---
 drivers/gpu/drm/drm_gem_cma_helper.c | 35 -----------------------------------
 include/drm/drm_gem_cma_helper.h     |  5 -----
 2 files changed, 40 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index f4e00571839d..373e33f22be4 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -264,41 +264,6 @@ int drm_gem_cma_dumb_create(struct drm_file *file_priv,
 }
 EXPORT_SYMBOL_GPL(drm_gem_cma_dumb_create);
 
-/**
- * drm_gem_cma_dumb_map_offset - return the fake mmap offset for a CMA GEM
- *     object
- * @file_priv: DRM file-private structure containing the GEM object
- * @drm: DRM device
- * @handle: GEM object handle
- * @offset: return location for the fake mmap offset
- *
- * This function look up an object by its handle and returns the fake mmap
- * offset associated with it. Drivers using the CMA helpers should set this
- * as their &drm_driver.dumb_map_offset callback.
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- */
-int drm_gem_cma_dumb_map_offset(struct drm_file *file_priv,
-				struct drm_device *drm, u32 handle,
-				u64 *offset)
-{
-	struct drm_gem_object *gem_obj;
-
-	gem_obj = drm_gem_object_lookup(file_priv, handle);
-	if (!gem_obj) {
-		dev_err(drm->dev, "failed to lookup GEM object\n");
-		return -EINVAL;
-	}
-
-	*offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
-
-	drm_gem_object_put_unlocked(gem_obj);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(drm_gem_cma_dumb_map_offset);
-
 const struct vm_operations_struct drm_gem_cma_vm_ops = {
 	.open = drm_gem_vm_open,
 	.close = drm_gem_vm_close,
diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index b42529e0fae0..58a739bf15f1 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -73,11 +73,6 @@ int drm_gem_cma_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *drm,
 			    struct drm_mode_create_dumb *args);
 
-/* map memory region for DRM framebuffer to user space */
-int drm_gem_cma_dumb_map_offset(struct drm_file *file_priv,
-				struct drm_device *drm, u32 handle,
-				u64 *offset);
-
 /* set vm_flags and we can change the VM attribute to other one at here */
 int drm_gem_cma_mmap(struct file *filp, struct vm_area_struct *vma);
 
-- 
cgit v1.2.3


From 4c3dbb2c312c9fafbac30d98c523b8b1f3455d78 Mon Sep 17 00:00:00 2001
From: Noralf Trønnes <noralf@tronnes.org>
Date: Sun, 13 Aug 2017 15:31:44 +0200
Subject: drm: Add GEM backed framebuffer library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This library provides helpers for drivers that don't subclass
drm_framebuffer and are backed by drm_gem_object. The code is
taken from drm_fb_cma_helper.

Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/1502631125-13557-2-git-send-email-noralf@tronnes.org
---
 Documentation/gpu/drm-kms-helpers.rst        |   9 +
 drivers/gpu/drm/Makefile                     |   2 +-
 drivers/gpu/drm/drm_gem_framebuffer_helper.c | 283 +++++++++++++++++++++++++++
 include/drm/drm_framebuffer.h                |   7 +
 include/drm/drm_gem_framebuffer_helper.h     |  37 ++++
 5 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/drm_gem_framebuffer_helper.c
 create mode 100644 include/drm/drm_gem_framebuffer_helper.h

(limited to 'include')

diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index 7c5e2549a58a..13dd237418cc 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -296,3 +296,12 @@ Auxiliary Modeset Helpers
 
 .. kernel-doc:: drivers/gpu/drm/drm_modeset_helper.c
    :export:
+
+Framebuffer GEM Helper Reference
+================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
+   :export:
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 24a066e1841c..a8acc197dec3 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -33,7 +33,7 @@ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
 		drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
 		drm_simple_kms_helper.o drm_modeset_helper.o \
-		drm_scdc_helper.o
+		drm_scdc_helper.o drm_gem_framebuffer_helper.o
 
 drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
new file mode 100644
index 000000000000..d54a083dc5dd
--- /dev/null
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -0,0 +1,283 @@
+/*
+ * drm gem framebuffer helper functions
+ *
+ * Copyright (C) 2017 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-fence.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_modeset_helper.h>
+
+/**
+ * DOC: overview
+ *
+ * This library provides helpers for drivers that don't subclass
+ * &drm_framebuffer and and use &drm_gem_object for their backing storage.
+ *
+ * Drivers without additional needs to validate framebuffers can simply use
+ * drm_gem_fb_create() and everything is wired up automatically. But all
+ * parts can be used individually.
+ */
+
+/**
+ * drm_gem_fb_get_obj() - Get GEM object for framebuffer
+ * @fb: The framebuffer
+ * @plane: Which plane
+ *
+ * Returns the GEM object for given framebuffer.
+ */
+struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb,
+					  unsigned int plane)
+{
+	if (plane >= 4)
+		return NULL;
+
+	return fb->obj[plane];
+}
+EXPORT_SYMBOL_GPL(drm_gem_fb_get_obj);
+
+static struct drm_framebuffer *
+drm_gem_fb_alloc(struct drm_device *dev,
+		 const struct drm_mode_fb_cmd2 *mode_cmd,
+		 struct drm_gem_object **obj, unsigned int num_planes,
+		 const struct drm_framebuffer_funcs *funcs)
+{
+	struct drm_framebuffer *fb;
+	int ret, i;
+
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
+	if (!fb)
+		return ERR_PTR(-ENOMEM);
+
+	drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
+
+	for (i = 0; i < num_planes; i++)
+		fb->obj[i] = obj[i];
+
+	ret = drm_framebuffer_init(dev, fb, funcs);
+	if (ret) {
+		DRM_DEV_ERROR(dev->dev, "Failed to init framebuffer: %d\n",
+			      ret);
+		kfree(fb);
+		return ERR_PTR(ret);
+	}
+
+	return fb;
+}
+
+/**
+ * drm_gem_fb_destroy - Free GEM backed framebuffer
+ * @fb: DRM framebuffer
+ *
+ * Frees a GEM backed framebuffer with its backing buffer(s) and the structure
+ * itself. Drivers can use this as their &drm_framebuffer_funcs->destroy
+ * callback.
+ */
+void drm_gem_fb_destroy(struct drm_framebuffer *fb)
+{
+	int i;
+
+	for (i = 0; i < 4; i++)
+		drm_gem_object_put_unlocked(fb->obj[i]);
+
+	drm_framebuffer_cleanup(fb);
+	kfree(fb);
+}
+EXPORT_SYMBOL(drm_gem_fb_destroy);
+
+/**
+ * drm_gem_fb_create_handle - Create handle for GEM backed framebuffer
+ * @fb: DRM framebuffer
+ * @file: drm file
+ * @handle: handle created
+ *
+ * Drivers can use this as their &drm_framebuffer_funcs->create_handle
+ * callback.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file,
+			     unsigned int *handle)
+{
+	return drm_gem_handle_create(file, fb->obj[0], handle);
+}
+EXPORT_SYMBOL(drm_gem_fb_create_handle);
+
+/**
+ * drm_gem_fb_create_with_funcs() - helper function for the
+ *                                  &drm_mode_config_funcs.fb_create
+ *                                  callback
+ * @dev: DRM device
+ * @file: drm file for the ioctl call
+ * @mode_cmd: metadata from the userspace fb creation request
+ * @funcs: vtable to be used for the new framebuffer object
+ *
+ * This can be used to set &drm_framebuffer_funcs for drivers that need the
+ * &drm_framebuffer_funcs.dirty callback. Use drm_gem_fb_create() if you don't
+ * need to change &drm_framebuffer_funcs.
+ * The function does buffer size validation.
+ */
+struct drm_framebuffer *
+drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_framebuffer_funcs *funcs)
+{
+	const struct drm_format_info *info;
+	struct drm_gem_object *objs[4];
+	struct drm_framebuffer *fb;
+	int ret, i;
+
+	info = drm_get_format_info(dev, mode_cmd);
+	if (!info)
+		return ERR_PTR(-EINVAL);
+
+	for (i = 0; i < info->num_planes; i++) {
+		unsigned int width = mode_cmd->width / (i ? info->hsub : 1);
+		unsigned int height = mode_cmd->height / (i ? info->vsub : 1);
+		unsigned int min_size;
+
+		objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]);
+		if (!objs[i]) {
+			DRM_DEV_ERROR(dev->dev, "Failed to lookup GEM\n");
+			ret = -ENOENT;
+			goto err_gem_object_put;
+		}
+
+		min_size = (height - 1) * mode_cmd->pitches[i]
+			 + width * info->cpp[i]
+			 + mode_cmd->offsets[i];
+
+		if (objs[i]->size < min_size) {
+			drm_gem_object_put_unlocked(objs[i]);
+			ret = -EINVAL;
+			goto err_gem_object_put;
+		}
+	}
+
+	fb = drm_gem_fb_alloc(dev, mode_cmd, objs, i, funcs);
+	if (IS_ERR(fb)) {
+		ret = PTR_ERR(fb);
+		goto err_gem_object_put;
+	}
+
+	return fb;
+
+err_gem_object_put:
+	for (i--; i >= 0; i--)
+		drm_gem_object_put_unlocked(objs[i]);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_funcs);
+
+static const struct drm_framebuffer_funcs drm_gem_fb_funcs = {
+	.destroy	= drm_gem_fb_destroy,
+	.create_handle	= drm_gem_fb_create_handle,
+};
+
+/**
+ * drm_gem_fb_create() - &drm_mode_config_funcs.fb_create callback function
+ * @dev: DRM device
+ * @file: drm file for the ioctl call
+ * @mode_cmd: metadata from the userspace fb creation request
+ *
+ * If your hardware has special alignment or pitch requirements these should be
+ * checked before calling this function. The function does buffer size
+ * validation. Use drm_gem_fb_create_with_funcs() if you need to set
+ * &drm_framebuffer_funcs.dirty.
+ */
+struct drm_framebuffer *
+drm_gem_fb_create(struct drm_device *dev, struct drm_file *file,
+		  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	return drm_gem_fb_create_with_funcs(dev, file, mode_cmd,
+					    &drm_gem_fb_funcs);
+}
+EXPORT_SYMBOL_GPL(drm_gem_fb_create);
+
+/**
+ * drm_gem_fb_prepare_fb() - Prepare gem framebuffer
+ * @plane: Which plane
+ * @state: Plane state attach fence to
+ *
+ * This can be used as the &drm_plane_helper_funcs.prepare_fb hook.
+ *
+ * This function checks if the plane FB has an dma-buf attached, extracts
+ * the exclusive fence and attaches it to plane state for the atomic helper
+ * to wait on.
+ *
+ * There is no need for &drm_plane_helper_funcs.cleanup_fb hook for simple
+ * gem based framebuffer drivers which have their buffers always pinned in
+ * memory.
+ */
+int drm_gem_fb_prepare_fb(struct drm_plane *plane,
+			  struct drm_plane_state *state)
+{
+	struct dma_buf *dma_buf;
+	struct dma_fence *fence;
+
+	if ((plane->state->fb == state->fb) || !state->fb)
+		return 0;
+
+	dma_buf = drm_gem_fb_get_obj(state->fb, 0)->dma_buf;
+	if (dma_buf) {
+		fence = reservation_object_get_excl_rcu(dma_buf->resv);
+		drm_atomic_set_fence_for_plane(state, fence);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb);
+
+/**
+ * drm_gem_fbdev_fb_create - Create a drm_framebuffer for fbdev emulation
+ * @dev: DRM device
+ * @sizes: fbdev size description
+ * @pitch_align: optional pitch alignment
+ * @obj: GEM object backing the framebuffer
+ * @funcs: vtable to be used for the new framebuffer object
+ *
+ * This function creates a framebuffer for use with fbdev emulation.
+ *
+ * Returns:
+ * Pointer to a drm_framebuffer on success or an error pointer on failure.
+ */
+struct drm_framebuffer *
+drm_gem_fbdev_fb_create(struct drm_device *dev,
+			struct drm_fb_helper_surface_size *sizes,
+			unsigned int pitch_align, struct drm_gem_object *obj,
+			const struct drm_framebuffer_funcs *funcs)
+{
+	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.pitches[0] = sizes->surface_width *
+			      DIV_ROUND_UP(sizes->surface_bpp, 8);
+	if (pitch_align)
+		mode_cmd.pitches[0] = roundup(mode_cmd.pitches[0],
+					      pitch_align);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							sizes->surface_depth);
+	if (obj->size < mode_cmd.pitches[0] * mode_cmd.height)
+		return ERR_PTR(-EINVAL);
+
+	return drm_gem_fb_alloc(dev, &mode_cmd, &obj, 1, funcs);
+}
+EXPORT_SYMBOL(drm_gem_fbdev_fb_create);
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index 5244f059d23a..b6996ddb19d6 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -190,6 +190,13 @@ struct drm_framebuffer {
 	 * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock.
 	 */
 	struct list_head filp_head;
+	/**
+	 * @obj: GEM objects backing the framebuffer, one per plane (optional).
+	 *
+	 * This is used by the GEM framebuffer helpers, see e.g.
+	 * drm_gem_fb_create().
+	 */
+	struct drm_gem_object *obj[4];
 };
 
 #define obj_to_fb(x) container_of(x, struct drm_framebuffer, base)
diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h
new file mode 100644
index 000000000000..db9cfa07235e
--- /dev/null
+++ b/include/drm/drm_gem_framebuffer_helper.h
@@ -0,0 +1,37 @@
+#ifndef __DRM_GEM_FB_HELPER_H__
+#define __DRM_GEM_FB_HELPER_H__
+
+struct drm_device;
+struct drm_file;
+struct drm_fb_helper_surface_size;
+struct drm_framebuffer;
+struct drm_framebuffer_funcs;
+struct drm_gem_object;
+struct drm_mode_fb_cmd2;
+struct drm_plane;
+struct drm_plane_state;
+
+struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb,
+					  unsigned int plane);
+void drm_gem_fb_destroy(struct drm_framebuffer *fb);
+int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file,
+			     unsigned int *handle);
+
+struct drm_framebuffer *
+drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_framebuffer_funcs *funcs);
+struct drm_framebuffer *
+drm_gem_fb_create(struct drm_device *dev, struct drm_file *file,
+		  const struct drm_mode_fb_cmd2 *mode_cmd);
+
+int drm_gem_fb_prepare_fb(struct drm_plane *plane,
+			  struct drm_plane_state *state);
+
+struct drm_framebuffer *
+drm_gem_fbdev_fb_create(struct drm_device *dev,
+			struct drm_fb_helper_surface_size *sizes,
+			unsigned int pitch_align, struct drm_gem_object *obj,
+			const struct drm_framebuffer_funcs *funcs);
+
+#endif
-- 
cgit v1.2.3


From 373533f80b89f0f4fb59b65c2942d1b20b91319c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 7 Aug 2017 11:13:41 +0200
Subject: drm/ttm: make ttm_mem_type_manager_func debug more useful
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide the drm printer directly instead of just the callback.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c   | 7 +++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 7 +++----
 drivers/gpu/drm/nouveau/nouveau_ttm.c         | 6 ++++--
 drivers/gpu/drm/ttm/ttm_bo.c                  | 3 ++-
 drivers/gpu/drm/ttm/ttm_bo_manager.c          | 5 ++---
 drivers/gpu/drm/virtio/virtgpu_ttm.c          | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 4 ++--
 include/drm/ttm/ttm_bo_driver.h               | 5 +++--
 8 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 5e6b90c6794f..26818b0a0cc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -253,18 +253,17 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man,
  * amdgpu_gtt_mgr_debug - dump VRAM table
  *
  * @man: TTM memory type manager
- * @prefix: text prefix
+ * @printer: DRM printer to use
  *
  * Dump the table content using printk.
  */
 static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man,
-				  const char *prefix)
+				 struct drm_printer *printer)
 {
 	struct amdgpu_gtt_mgr *mgr = man->priv;
-	struct drm_printer p = drm_debug_printer(prefix);
 
 	spin_lock(&mgr->lock);
-	drm_mm_print(&mgr->mm, &p);
+	drm_mm_print(&mgr->mm, printer);
 	spin_unlock(&mgr->lock);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a2c59a08b2bd..3f86b47984a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -204,18 +204,17 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
  * amdgpu_vram_mgr_debug - dump VRAM table
  *
  * @man: TTM memory type manager
- * @prefix: text prefix
+ * @printer: DRM printer to use
  *
  * Dump the table content using printk.
  */
 static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
-				  const char *prefix)
+				  struct drm_printer *printer)
 {
 	struct amdgpu_vram_mgr *mgr = man->priv;
-	struct drm_printer p = drm_debug_printer(prefix);
 
 	spin_lock(&mgr->lock);
-	drm_mm_print(&mgr->mm, &p);
+	drm_mm_print(&mgr->mm, printer);
 	spin_unlock(&mgr->lock);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 999c35a25498..b0ad7fcefcf5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -179,7 +179,8 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 }
 
 static void
-nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
+nouveau_gart_manager_debug(struct ttm_mem_type_manager *man,
+			   struct drm_printer *printer)
 {
 }
 
@@ -252,7 +253,8 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 }
 
 static void
-nv04_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
+nv04_gart_manager_debug(struct ttm_mem_type_manager *man,
+			struct drm_printer *printer)
 {
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d3463ebc0b25..58e7fcea620e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -70,6 +70,7 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place,
 static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct drm_printer p = drm_debug_printer(TTM_PFX);
 
 	pr_err("    has_type: %d\n", man->has_type);
 	pr_err("    use_type: %d\n", man->use_type);
@@ -79,7 +80,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
 	pr_err("    available_caching: 0x%08X\n", man->available_caching);
 	pr_err("    default_caching: 0x%08X\n", man->default_caching);
 	if (mem_type != TTM_PL_SYSTEM)
-		(*man->func->debug)(man, TTM_PFX);
+		(*man->func->debug)(man, &p);
 }
 
 static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 90a6c0b03afc..a7c232dc39cb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -136,13 +136,12 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
 }
 
 static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
-			     const char *prefix)
+			     struct drm_printer *printer)
 {
 	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
-	struct drm_printer p = drm_debug_printer(prefix);
 
 	spin_lock(&rman->lock);
-	drm_mm_print(&rman->mm, &p);
+	drm_mm_print(&rman->mm, printer);
 	spin_unlock(&rman->lock);
 }
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index e695d74eaa9f..cd389c5eaef5 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -192,7 +192,7 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
 }
 
 static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
-			     const char *prefix)
+			     struct drm_printer *printer)
 {
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
index d2b03d4a3c86..f2f9d88131f2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
@@ -157,9 +157,9 @@ static int vmw_gmrid_man_takedown(struct ttm_mem_type_manager *man)
 }
 
 static void vmw_gmrid_man_debug(struct ttm_mem_type_manager *man,
-				const char *prefix)
+				struct drm_printer *printer)
 {
-	pr_info("%s: No debug info available for the GMR id manager\n", prefix);
+	drm_printf(printer, "No debug info available for the GMR id manager\n");
 }
 
 const struct ttm_mem_type_manager_func vmw_gmrid_manager_func = {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d30850e07936..5f821a9b3a1f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -229,13 +229,14 @@ struct ttm_mem_type_manager_func {
 	 * struct ttm_mem_type_manager member debug
 	 *
 	 * @man: Pointer to a memory type manager.
-	 * @prefix: Prefix to be used in printout to identify the caller.
+	 * @printer: Prefix to be used in printout to identify the caller.
 	 *
 	 * This function is called to print out the state of the memory
 	 * type manager to aid debugging of out-of-memory conditions.
 	 * It may not be called from within atomic context.
 	 */
-	void (*debug)(struct ttm_mem_type_manager *man, const char *prefix);
+	void (*debug)(struct ttm_mem_type_manager *man,
+		      struct drm_printer *printer);
 };
 
 /**
-- 
cgit v1.2.3


From f44d85389e17b2e960620c1c6d89bda978a11f2b Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Thu, 24 Aug 2017 16:08:14 +0100
Subject: drm: rename u32 in __u32 in uapi

All other fields use __

Cc: Ben Widawsky <ben@bwidawsk.net>
Fixes: db1689aa61b ("drm: Create a format/modifier blob")
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170824150814.5878-1-lionel.g.landwerlin@intel.com
---
 include/uapi/drm/drm_mode.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index a2bb7161f020..54fc38c3c3f1 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -715,24 +715,24 @@ struct drm_mode_atomic {
 struct drm_format_modifier_blob {
 #define FORMAT_BLOB_CURRENT 1
 	/* Version of this blob format */
-	u32 version;
+	__u32 version;
 
 	/* Flags */
-	u32 flags;
+	__u32 flags;
 
 	/* Number of fourcc formats supported */
-	u32 count_formats;
+	__u32 count_formats;
 
 	/* Where in this blob the formats exist (in bytes) */
-	u32 formats_offset;
+	__u32 formats_offset;
 
 	/* Number of drm_format_modifiers */
-	u32 count_modifiers;
+	__u32 count_modifiers;
 
 	/* Where in this blob the modifiers exist (in bytes) */
-	u32 modifiers_offset;
+	__u32 modifiers_offset;
 
-	/* u32 formats[] */
+	/* __u32 formats[] */
 	/* struct drm_format_modifier modifiers[] */
 };
 
-- 
cgit v1.2.3


From 2cfa0bb25d25aa183ea29f1f9c2bc65f3f2c2264 Mon Sep 17 00:00:00 2001
From: Sinclair Yeh <syeh@vmware.com>
Date: Wed, 5 Jul 2017 01:37:55 -0700
Subject: drm/vmwgfx: Prepare to support fence fd

Make the fields and flags available.

Signed-off-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Deepak Singh Rawat <drawat@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |  5 -----
 include/uapi/drm/vmwgfx_drm.h           | 11 ++++++++---
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 8c583fd16c79..178dabcdb198 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -4449,11 +4449,6 @@ int vmw_execbuf_ioctl(struct drm_device *dev, unsigned long data,
 		arg.context_handle = (uint32_t) -1;
 		break;
 	case 2:
-		if (arg.pad64 != 0) {
-			DRM_ERROR("Unused IOCTL data not set to zero.\n");
-			return -EINVAL;
-		}
-		break;
 	default:
 		break;
 	}
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index d9dfde9aa757..0bc784f5e0db 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -297,13 +297,17 @@ union drm_vmw_surface_reference_arg {
  * @version: Allows expanding the execbuf ioctl parameters without breaking
  * backwards compatibility, since user-space will always tell the kernel
  * which version it uses.
- * @flags: Execbuf flags. None currently.
+ * @flags: Execbuf flags.
+ * @imported_fence_fd:  FD for a fence imported from another device
  *
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
 #define DRM_VMW_EXECBUF_VERSION 2
 
+#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0)
+#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1)
+
 struct drm_vmw_execbuf_arg {
 	__u64 commands;
 	__u32 command_size;
@@ -312,7 +316,7 @@ struct drm_vmw_execbuf_arg {
 	__u32 version;
 	__u32 flags;
 	__u32 context_handle;
-	__u32 pad64;
+	__s32 imported_fence_fd;
 };
 
 /**
@@ -328,6 +332,7 @@ struct drm_vmw_execbuf_arg {
  * @passed_seqno: The highest seqno number processed by the hardware
  * so far. This can be used to mark user-space fence objects as signaled, and
  * to determine whether a fence seqno might be stale.
+ * @fd: FD associated with the fence, -1 if not exported
  * @error: This member should've been set to -EFAULT on submission.
  * The following actions should be take on completion:
  * error == -EFAULT: Fence communication failed. The host is synchronized.
@@ -345,7 +350,7 @@ struct drm_vmw_fence_rep {
 	__u32 mask;
 	__u32 seqno;
 	__u32 passed_seqno;
-	__u32 pad64;
+	__s32 fd;
 	__s32 error;
 };
 
-- 
cgit v1.2.3


From afaf59237843bf89823c33143beca6b262dff0ca Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Aug 2017 10:52:19 -0700
Subject: drm/syncobj: Rename fence_get to find_fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function has far more in common with drm_syncobj_find than with
any in the get/put functions.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Christian König <christian.koenig@amd.com> (v1)
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/drm_syncobj.c          | 10 +++++-----
 include/drm/drm_syncobj.h              |  6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 15d4a28d73bb..269b835571eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1035,7 +1035,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 {
 	int r;
 	struct dma_fence *fence;
-	r = drm_syncobj_fence_get(p->filp, handle, &fence);
+	r = drm_syncobj_find_fence(p->filp, handle, &fence);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index a5b38a80a99a..0412b0b0a342 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -95,9 +95,9 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 }
 EXPORT_SYMBOL(drm_syncobj_replace_fence);
 
-int drm_syncobj_fence_get(struct drm_file *file_private,
-			  u32 handle,
-			  struct dma_fence **fence)
+int drm_syncobj_find_fence(struct drm_file *file_private,
+			   u32 handle,
+			   struct dma_fence **fence)
 {
 	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
 	int ret = 0;
@@ -112,7 +112,7 @@ int drm_syncobj_fence_get(struct drm_file *file_private,
 	drm_syncobj_put(syncobj);
 	return ret;
 }
-EXPORT_SYMBOL(drm_syncobj_fence_get);
+EXPORT_SYMBOL(drm_syncobj_find_fence);
 
 /**
  * drm_syncobj_free - free a sync object.
@@ -307,7 +307,7 @@ int drm_syncobj_export_sync_file(struct drm_file *file_private,
 	if (fd < 0)
 		return fd;
 
-	ret = drm_syncobj_fence_get(file_private, handle, &fence);
+	ret = drm_syncobj_find_fence(file_private, handle, &fence);
 	if (ret)
 		goto err_put_fd;
 
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 89976da542b1..7d4ad777132e 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -81,9 +81,9 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence);
-int drm_syncobj_fence_get(struct drm_file *file_private,
-			  u32 handle,
-			  struct dma_fence **fence);
+int drm_syncobj_find_fence(struct drm_file *file_private,
+			   u32 handle,
+			   struct dma_fence **fence);
 void drm_syncobj_free(struct kref *kref);
 
 #endif
-- 
cgit v1.2.3


From 309a5482fa9eb7bc754bf95a2cd89091b01c33d2 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Aug 2017 10:52:20 -0700
Subject: drm/syncobj: Add a race-free drm_syncobj_fence_get helper (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The atomic exchange operation in drm_syncobj_replace_fence is sufficient
for the case where it races with itself.  However, if you have a race
between a replace_fence and dma_fence_get(syncobj->fence), you may end
up with the entire replace_fence happening between the point in time
where the one thread gets the syncobj->fence pointer and when it calls
dma_fence_get() on it.  If this happens, then the reference may be
dropped before we get a chance to get a new one.  The new helper uses
dma_fence_get_rcu_safe to get rid of the race.

This is also needed because it allows us to do a bit more than just get
a reference in drm_syncobj_fence_get should we wish to do so.

v2:
 - RCU isn't that scary
 - Call rcu_read_lock/unlock
 - Don't rename fence to _fence
 - Make the helper static inline

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Christian König <christian.koenig@amd.com> (v1)
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_syncobj.c |  2 +-
 include/drm/drm_syncobj.h     | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0412b0b0a342..eea38d82645c 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -105,7 +105,7 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
 	if (!syncobj)
 		return -ENOENT;
 
-	*fence = dma_fence_get(syncobj->fence);
+	*fence = drm_syncobj_fence_get(syncobj);
 	if (!*fence) {
 		ret = -EINVAL;
 	}
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 7d4ad777132e..ce94d14c5087 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -77,6 +77,18 @@ drm_syncobj_put(struct drm_syncobj *obj)
 	kref_put(&obj->refcount, drm_syncobj_free);
 }
 
+static inline struct dma_fence *
+drm_syncobj_fence_get(struct drm_syncobj *syncobj)
+{
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	fence = dma_fence_get_rcu_safe(&syncobj->fence);
+	rcu_read_unlock();
+
+	return fence;
+}
+
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
-- 
cgit v1.2.3


From 5e60a10eaebab93f823295cd7ec3848ba3b6e553 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 25 Aug 2017 10:52:22 -0700
Subject: drm/syncobj: add sync obj wait interface. (v8)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This interface will allow sync object to be used to back
Vulkan fences. This API is pretty much the vulkan fence waiting
API, and I've ported the code from amdgpu.

v2: accept relative timeout, pass remaining time back
to userspace.
v3: return to absolute timeouts.
v4: absolute zero = poll,
    rewrite any/all code to have same operation for arrays
    return -EINVAL for 0 fences.
v4.1: fixup fences allocation check, use u64_to_user_ptr
v5: move to sec/nsec, and use timespec64 for calcs.
v6: use -ETIME and drop the out status flag. (-ETIME
is suggested by ickle, I can feel a shed painting)
v7: talked to Daniel/Arnd, use ktime and ns everywhere.
v8: be more careful in the timeout calculations
    use uint32_t for counter variables so we don't overflow
    graciously handle -ENOINT being returned from dma_fence_wait_timeout

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c    |   2 +
 drivers/gpu/drm/drm_syncobj.c  | 142 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  12 ++++
 4 files changed, 158 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 4e906b82a170..534e5ac43bf8 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -167,3 +167,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
+int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index d920b2118a39..b4f443417a28 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index eea38d82645c..4e8563c36d6e 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1,5 +1,7 @@
 /*
  * Copyright 2017 Red Hat
+ * Parts ported from amdgpu (fence wait code).
+ * Copyright 2016 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,6 +33,9 @@
  * that contain an optional fence. The fence can be updated with a new
  * fence, or be NULL.
  *
+ * syncobj's can be waited upon, where it will wait for the underlying
+ * fence.
+ *
  * syncobj's can be export to fd's and back, these fd's are opaque and
  * have no other use case, except passing the syncobj between processes.
  *
@@ -447,3 +452,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 	return drm_syncobj_fd_to_handle(file_private, args->fd,
 					&args->handle);
 }
+
+/**
+ * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
+ *
+ * @timeout_nsec: timeout nsec component in ns, 0 for poll
+ *
+ * Calculate the timeout in jiffies from an absolute time in sec/nsec.
+ */
+static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec)
+{
+	ktime_t abs_timeout, now;
+	u64 timeout_ns, timeout_jiffies64;
+
+	/* make 0 timeout means poll - absolute 0 doesn't seem valid */
+	if (timeout_nsec == 0)
+		return 0;
+
+	abs_timeout = ns_to_ktime(timeout_nsec);
+	now = ktime_get();
+
+	if (!ktime_after(abs_timeout, now))
+		return 0;
+
+	timeout_ns = ktime_to_ns(ktime_sub(abs_timeout, now));
+
+	timeout_jiffies64 = nsecs_to_jiffies64(timeout_ns);
+	/*  clamp timeout to avoid infinite timeout */
+	if (timeout_jiffies64 >= MAX_SCHEDULE_TIMEOUT - 1)
+		return MAX_SCHEDULE_TIMEOUT - 1;
+
+	return timeout_jiffies64 + 1;
+}
+
+static int drm_syncobj_wait_fences(struct drm_device *dev,
+				   struct drm_file *file_private,
+				   struct drm_syncobj_wait *wait,
+				   struct dma_fence **fences)
+{
+	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
+	signed long ret = 0;
+	uint32_t first = ~0;
+
+	if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+		uint32_t i;
+		for (i = 0; i < wait->count_handles; i++) {
+			ret = dma_fence_wait_timeout(fences[i], true, timeout);
+
+			/* Various dma_fence wait callbacks will return
+			 * ENOENT to indicate that the fence has already
+			 * been signaled.  We need to sanitize this to 0 so
+			 * we don't return early and the client doesn't see
+			 * an unexpected error.
+			 */
+			if (ret == -ENOENT)
+				ret = 0;
+
+			if (ret < 0)
+				return ret;
+			if (ret == 0)
+				break;
+			timeout = ret;
+		}
+		first = 0;
+	} else {
+		ret = dma_fence_wait_any_timeout(fences,
+						 wait->count_handles,
+						 true, timeout,
+						 &first);
+	}
+
+	if (ret < 0)
+		return ret;
+
+	wait->first_signaled = first;
+	if (ret == 0)
+		return -ETIME;
+	return 0;
+}
+
+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_private)
+{
+	struct drm_syncobj_wait *args = data;
+	uint32_t *handles;
+	struct dma_fence **fences;
+	int ret = 0;
+	uint32_t i;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	/* Get the handles from userspace */
+	handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
+				GFP_KERNEL);
+	if (handles == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(handles,
+			   u64_to_user_ptr(args->handles),
+			   sizeof(uint32_t) * args->count_handles)) {
+		ret = -EFAULT;
+		goto err_free_handles;
+	}
+
+	fences = kcalloc(args->count_handles,
+			 sizeof(struct dma_fence *), GFP_KERNEL);
+	if (!fences) {
+		ret = -ENOMEM;
+		goto err_free_handles;
+	}
+
+	for (i = 0; i < args->count_handles; i++) {
+		ret = drm_syncobj_find_fence(file_private, handles[i],
+					     &fences[i]);
+		if (ret)
+			goto err_free_fence_array;
+	}
+
+	ret = drm_syncobj_wait_fences(dev, file_private,
+				      args, fences);
+
+err_free_fence_array:
+	for (i = 0; i < args->count_handles; i++)
+		dma_fence_put(fences[i]);
+	kfree(fences);
+err_free_handles:
+	kfree(handles);
+
+	return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 101593ab10ac..0757c1a41821 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -718,6 +718,17 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +851,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
cgit v1.2.3


From 9c19fb10a5893d6501df4d0fb93d954d5fc1d91b Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Mon, 28 Aug 2017 07:39:25 -0700
Subject: drm/syncobj: Add a callback mechanism for replace_fence (v3)

It is useful in certain circumstances to know when the fence is replaced
in a syncobj.  Specifically, it may be useful to know when the fence
goes from NULL to something valid.  This does make syncobj_replace_fence
a little more expensive because it has to take a lock but, in the common
case where there is no callback list, it spends a very short amount of
time inside the lock.

v2:
 - Don't lock in drm_syncobj_fence_get.  We only really need to lock
   around fence_replace to make the callback work.
v3:
 - Fix the cb_list comment to make kbuild happy

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_syncobj.c | 60 +++++++++++++++++++++++++++++++++++++++++--
 include/drm/drm_syncobj.h     | 39 ++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 4e8563c36d6e..bade497b3f1d 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -80,6 +80,46 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 }
 EXPORT_SYMBOL(drm_syncobj_find);
 
+static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
+					    struct drm_syncobj_cb *cb,
+					    drm_syncobj_func_t func)
+{
+	cb->func = func;
+	list_add_tail(&cb->node, &syncobj->cb_list);
+}
+
+/**
+ * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
+ * @syncobj: Sync object to which to add the callback
+ * @cb: Callback to add
+ * @func: Func to use when initializing the drm_syncobj_cb struct
+ *
+ * This adds a callback to be called next time the fence is replaced
+ */
+void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
+			      struct drm_syncobj_cb *cb,
+			      drm_syncobj_func_t func)
+{
+	spin_lock(&syncobj->lock);
+	drm_syncobj_add_callback_locked(syncobj, cb, func);
+	spin_unlock(&syncobj->lock);
+}
+EXPORT_SYMBOL(drm_syncobj_add_callback);
+
+/**
+ * drm_syncobj_add_callback - removes a callback to syncobj::cb_list
+ * @syncobj: Sync object from which to remove the callback
+ * @cb: Callback to remove
+ */
+void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
+				 struct drm_syncobj_cb *cb)
+{
+	spin_lock(&syncobj->lock);
+	list_del_init(&cb->node);
+	spin_unlock(&syncobj->lock);
+}
+EXPORT_SYMBOL(drm_syncobj_remove_callback);
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
@@ -91,10 +131,24 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence)
 {
 	struct dma_fence *old_fence;
+	struct drm_syncobj_cb *cur, *tmp;
 
 	if (fence)
 		dma_fence_get(fence);
-	old_fence = xchg(&syncobj->fence, fence);
+
+	spin_lock(&syncobj->lock);
+
+	old_fence = syncobj->fence;
+	syncobj->fence = fence;
+
+	if (fence != old_fence) {
+		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
+			list_del_init(&cur->node);
+			cur->func(syncobj, cur);
+		}
+	}
+
+	spin_unlock(&syncobj->lock);
 
 	dma_fence_put(old_fence);
 }
@@ -130,7 +184,7 @@ void drm_syncobj_free(struct kref *kref)
 	struct drm_syncobj *syncobj = container_of(kref,
 						   struct drm_syncobj,
 						   refcount);
-	dma_fence_put(syncobj->fence);
+	drm_syncobj_replace_fence(syncobj, NULL);
 	kfree(syncobj);
 }
 EXPORT_SYMBOL(drm_syncobj_free);
@@ -146,6 +200,8 @@ static int drm_syncobj_create(struct drm_file *file_private,
 		return -ENOMEM;
 
 	kref_init(&syncobj->refcount);
+	INIT_LIST_HEAD(&syncobj->cb_list);
+	spin_lock_init(&syncobj->lock);
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&file_private->syncobj_table_lock);
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index ce94d14c5087..c00fee539822 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -28,6 +28,8 @@
 
 #include "linux/dma-fence.h"
 
+struct drm_syncobj_cb;
+
 /**
  * struct drm_syncobj - sync object.
  *
@@ -43,8 +45,21 @@ struct drm_syncobj {
 	/**
 	 * @fence:
 	 * NULL or a pointer to the fence bound to this object.
+	 *
+	 * This field should not be used directly.  Use drm_syncobj_fence_get
+	 * and drm_syncobj_replace_fence instead.
 	 */
 	struct dma_fence *fence;
+	/**
+	 * @cb_list:
+	 * List of callbacks to call when the fence gets replaced
+	 */
+	struct list_head cb_list;
+	/**
+	 * @lock:
+	 * locks cb_list and write-locks fence.
+	 */
+	spinlock_t lock;
 	/**
 	 * @file:
 	 * a file backing for this syncobj.
@@ -52,6 +67,25 @@ struct drm_syncobj {
 	struct file *file;
 };
 
+typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
+				   struct drm_syncobj_cb *cb);
+
+/**
+ * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
+ * @node: used by drm_syncob_add_callback to append this struct to
+ *	  syncobj::cb_list
+ * @func: drm_syncobj_func_t to call
+ *
+ * This struct will be initialized by drm_syncobj_add_callback, additional
+ * data can be passed along by embedding drm_syncobj_cb in another struct.
+ * The callback will get called the next time drm_syncobj_replace_fence is
+ * called.
+ */
+struct drm_syncobj_cb {
+	struct list_head node;
+	drm_syncobj_func_t func;
+};
+
 void drm_syncobj_free(struct kref *kref);
 
 /**
@@ -91,6 +125,11 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle);
+void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
+			      struct drm_syncobj_cb *cb,
+			      drm_syncobj_func_t func);
+void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
+				 struct drm_syncobj_cb *cb);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence);
 int drm_syncobj_find_fence(struct drm_file *file_private,
-- 
cgit v1.2.3


From 1fc08218ed2a42c86af5c905fe4c00885376a07e Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Aug 2017 10:52:25 -0700
Subject: drm/syncobj: Add a CREATE_SIGNALED flag

This requests that the driver create the sync object such that it
already has a signaled dma_fence attached.  Because we don't need
anything in particular (just something signaled), we use a dummy null
fence.  This is useful for Vulkan which has a similar flag that can be
passed to vkCreateFence.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_syncobj.c | 57 ++++++++++++++++++++++++++++++++++++++++---
 include/uapi/drm/drm.h        |  1 +
 2 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index bade497b3f1d..12db8c9564cd 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -154,6 +154,49 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 }
 EXPORT_SYMBOL(drm_syncobj_replace_fence);
 
+struct drm_syncobj_null_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+};
+
+static const char *drm_syncobj_null_fence_get_name(struct dma_fence *fence)
+{
+        return "syncobjnull";
+}
+
+static bool drm_syncobj_null_fence_enable_signaling(struct dma_fence *fence)
+{
+    dma_fence_enable_sw_signaling(fence);
+    return !dma_fence_is_signaled(fence);
+}
+
+static const struct dma_fence_ops drm_syncobj_null_fence_ops = {
+	.get_driver_name = drm_syncobj_null_fence_get_name,
+	.get_timeline_name = drm_syncobj_null_fence_get_name,
+	.enable_signaling = drm_syncobj_null_fence_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = NULL,
+};
+
+static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
+{
+	struct drm_syncobj_null_fence *fence;
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (fence == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&fence->lock);
+	dma_fence_init(&fence->base, &drm_syncobj_null_fence_ops,
+		       &fence->lock, 0, 0);
+	dma_fence_signal(&fence->base);
+
+	drm_syncobj_replace_fence(syncobj, &fence->base);
+
+	dma_fence_put(&fence->base);
+
+	return 0;
+}
+
 int drm_syncobj_find_fence(struct drm_file *file_private,
 			   u32 handle,
 			   struct dma_fence **fence)
@@ -190,7 +233,7 @@ void drm_syncobj_free(struct kref *kref)
 EXPORT_SYMBOL(drm_syncobj_free);
 
 static int drm_syncobj_create(struct drm_file *file_private,
-			      u32 *handle)
+			      u32 *handle, uint32_t flags)
 {
 	int ret;
 	struct drm_syncobj *syncobj;
@@ -203,6 +246,14 @@ static int drm_syncobj_create(struct drm_file *file_private,
 	INIT_LIST_HEAD(&syncobj->cb_list);
 	spin_lock_init(&syncobj->lock);
 
+	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
+		ret = drm_syncobj_assign_null_handle(syncobj);
+		if (ret < 0) {
+			drm_syncobj_put(syncobj);
+			return ret;
+		}
+	}
+
 	idr_preload(GFP_KERNEL);
 	spin_lock(&file_private->syncobj_table_lock);
 	ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT);
@@ -438,11 +489,11 @@ drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
 		return -ENODEV;
 
 	/* no valid flags yet */
-	if (args->flags)
+	if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
 		return -EINVAL;
 
 	return drm_syncobj_create(file_private,
-				  &args->handle);
+				  &args->handle, args->flags);
 }
 
 int
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 0757c1a41821..ade7f68d32b5 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -700,6 +700,7 @@ struct drm_prime_handle {
 
 struct drm_syncobj_create {
 	__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
 	__u32 flags;
 };
 
-- 
cgit v1.2.3


From e7aca5031a2fb51b6120864d0eff5478c95e6651 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Aug 2017 10:52:24 -0700
Subject: drm/syncobj: Allow wait for submit and signal behavior (v5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted.  This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted.  This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar.  However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.

v2:
 - Fix a bug in the invalid syncobj error path
 - Unify the wait-all and wait-any cases
v3:
 - Unify the timeout == 0 case a bit with the timeout > 0 case
 - Use wait_event_interruptible_timeout
v4:
 - Use proxy fence
v5:
 - Revert to a combination of v2 and v3
 - Don't use proxy fences
 - Don't use wait_event_interruptible_timeout because it just adds an
   extra layer of callbacks

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_syncobj.c | 252 ++++++++++++++++++++++++++++++++++--------
 include/uapi/drm/drm.h        |   1 +
 2 files changed, 208 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 12db8c9564cd..cccd3bd194c6 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -51,6 +51,7 @@
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
 #include <linux/sync_file.h>
+#include <linux/sched/signal.h>
 
 #include "drm_internal.h"
 #include <drm/drm_syncobj.h>
@@ -88,6 +89,35 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
 	list_add_tail(&cb->node, &syncobj->cb_list);
 }
 
+static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
+						 struct dma_fence **fence,
+						 struct drm_syncobj_cb *cb,
+						 drm_syncobj_func_t func)
+{
+	int ret;
+
+	*fence = drm_syncobj_fence_get(syncobj);
+	if (*fence)
+		return 1;
+
+	spin_lock(&syncobj->lock);
+	/* We've already tried once to get a fence and failed.  Now that we
+	 * have the lock, try one more time just to be sure we don't add a
+	 * callback when a fence has already been set.
+	 */
+	if (syncobj->fence) {
+		*fence = dma_fence_get(syncobj->fence);
+		ret = 1;
+	} else {
+		*fence = NULL;
+		drm_syncobj_add_callback_locked(syncobj, cb, func);
+		ret = 0;
+	}
+	spin_unlock(&syncobj->lock);
+
+	return ret;
+}
+
 /**
  * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
  * @syncobj: Sync object to which to add the callback
@@ -560,6 +590,160 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 					&args->handle);
 }
 
+struct syncobj_wait_entry {
+	struct task_struct *task;
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+	struct drm_syncobj_cb syncobj_cb;
+};
+
+static void syncobj_wait_fence_func(struct dma_fence *fence,
+				    struct dma_fence_cb *cb)
+{
+	struct syncobj_wait_entry *wait =
+		container_of(cb, struct syncobj_wait_entry, fence_cb);
+
+	wake_up_process(wait->task);
+}
+
+static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
+				      struct drm_syncobj_cb *cb)
+{
+	struct syncobj_wait_entry *wait =
+		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
+
+	/* This happens inside the syncobj lock */
+	wait->fence = dma_fence_get(syncobj->fence);
+	wake_up_process(wait->task);
+}
+
+static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
+						  uint32_t count,
+						  uint32_t flags,
+						  signed long timeout,
+						  uint32_t *idx)
+{
+	struct syncobj_wait_entry *entries;
+	struct dma_fence *fence;
+	signed long ret;
+	uint32_t signaled_count, i;
+
+	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	/* Walk the list of sync objects and initialize entries.  We do
+	 * this up-front so that we can properly return -EINVAL if there is
+	 * a syncobj with a missing fence and then never have the chance of
+	 * returning -EINVAL again.
+	 */
+	signaled_count = 0;
+	for (i = 0; i < count; ++i) {
+		entries[i].task = current;
+		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
+		if (!entries[i].fence) {
+			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+				continue;
+			} else {
+				ret = -EINVAL;
+				goto cleanup_entries;
+			}
+		}
+
+		if (dma_fence_is_signaled(entries[i].fence)) {
+			if (signaled_count == 0 && idx)
+				*idx = i;
+			signaled_count++;
+		}
+	}
+
+	/* Initialize ret to the max of timeout and 1.  That way, the
+	 * default return value indicates a successful wait and not a
+	 * timeout.
+	 */
+	ret = max_t(signed long, timeout, 1);
+
+	if (signaled_count == count ||
+	    (signaled_count > 0 &&
+	     !(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)))
+		goto cleanup_entries;
+
+	/* There's a very annoying laxness in the dma_fence API here, in
+	 * that backends are not required to automatically report when a
+	 * fence is signaled prior to fence->ops->enable_signaling() being
+	 * called.  So here if we fail to match signaled_count, we need to
+	 * fallthough and try a 0 timeout wait!
+	 */
+
+	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+		for (i = 0; i < count; ++i) {
+			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
+							      &entries[i].fence,
+							      &entries[i].syncobj_cb,
+							      syncobj_wait_syncobj_func);
+		}
+	}
+
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		signaled_count = 0;
+		for (i = 0; i < count; ++i) {
+			fence = entries[i].fence;
+			if (!fence)
+				continue;
+
+			if (dma_fence_is_signaled(fence) ||
+			    (!entries[i].fence_cb.func &&
+			     dma_fence_add_callback(fence,
+						    &entries[i].fence_cb,
+						    syncobj_wait_fence_func))) {
+				/* The fence has been signaled */
+				if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+					signaled_count++;
+				} else {
+					if (idx)
+						*idx = i;
+					goto done_waiting;
+				}
+			}
+		}
+
+		if (signaled_count == count)
+			goto done_waiting;
+
+		if (timeout == 0) {
+			/* If we are doing a 0 timeout wait and we got
+			 * here, then we just timed out.
+			 */
+			ret = 0;
+			goto done_waiting;
+		}
+
+		ret = schedule_timeout(ret);
+
+		if (ret > 0 && signal_pending(current))
+			ret = -ERESTARTSYS;
+	} while (ret > 0);
+
+done_waiting:
+	__set_current_state(TASK_RUNNING);
+
+cleanup_entries:
+	for (i = 0; i < count; ++i) {
+		if (entries[i].syncobj_cb.func)
+			drm_syncobj_remove_callback(syncobjs[i],
+						    &entries[i].syncobj_cb);
+		if (entries[i].fence_cb.func)
+			dma_fence_remove_callback(entries[i].fence,
+						  &entries[i].fence_cb);
+		dma_fence_put(entries[i].fence);
+	}
+	kfree(entries);
+
+	return ret;
+}
+
 /**
  * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
  *
@@ -592,43 +776,19 @@ static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec)
 	return timeout_jiffies64 + 1;
 }
 
-static int drm_syncobj_wait_fences(struct drm_device *dev,
-				   struct drm_file *file_private,
-				   struct drm_syncobj_wait *wait,
-				   struct dma_fence **fences)
+static int drm_syncobj_array_wait(struct drm_device *dev,
+				  struct drm_file *file_private,
+				  struct drm_syncobj_wait *wait,
+				  struct drm_syncobj **syncobjs)
 {
 	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
 	signed long ret = 0;
 	uint32_t first = ~0;
 
-	if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
-		uint32_t i;
-		for (i = 0; i < wait->count_handles; i++) {
-			ret = dma_fence_wait_timeout(fences[i], true, timeout);
-
-			/* Various dma_fence wait callbacks will return
-			 * ENOENT to indicate that the fence has already
-			 * been signaled.  We need to sanitize this to 0 so
-			 * we don't return early and the client doesn't see
-			 * an unexpected error.
-			 */
-			if (ret == -ENOENT)
-				ret = 0;
-
-			if (ret < 0)
-				return ret;
-			if (ret == 0)
-				break;
-			timeout = ret;
-		}
-		first = 0;
-	} else {
-		ret = dma_fence_wait_any_timeout(fences,
-						 wait->count_handles,
-						 true, timeout,
-						 &first);
-	}
-
+	ret = drm_syncobj_array_wait_timeout(syncobjs,
+					     wait->count_handles,
+					     wait->flags,
+					     timeout, &first);
 	if (ret < 0)
 		return ret;
 
@@ -644,14 +804,15 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_syncobj_wait *args = data;
 	uint32_t *handles;
-	struct dma_fence **fences;
+	struct drm_syncobj **syncobjs;
 	int ret = 0;
 	uint32_t i;
 
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
 		return -ENODEV;
 
-	if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
 		return -EINVAL;
 
 	if (args->count_handles == 0)
@@ -670,27 +831,28 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 		goto err_free_handles;
 	}
 
-	fences = kcalloc(args->count_handles,
-			 sizeof(struct dma_fence *), GFP_KERNEL);
-	if (!fences) {
+	syncobjs = kcalloc(args->count_handles,
+			   sizeof(struct drm_syncobj *), GFP_KERNEL);
+	if (!syncobjs) {
 		ret = -ENOMEM;
 		goto err_free_handles;
 	}
 
 	for (i = 0; i < args->count_handles; i++) {
-		ret = drm_syncobj_find_fence(file_private, handles[i],
-					     &fences[i]);
-		if (ret)
+		syncobjs[i] = drm_syncobj_find(file_private, handles[i]);
+		if (!syncobjs[i]) {
+			ret = -ENOENT;
 			goto err_free_fence_array;
+		}
 	}
 
-	ret = drm_syncobj_wait_fences(dev, file_private,
-				      args, fences);
+	ret = drm_syncobj_array_wait(dev, file_private,
+				     args, syncobjs);
 
 err_free_fence_array:
-	for (i = 0; i < args->count_handles; i++)
-		dma_fence_put(fences[i]);
-	kfree(fences);
+	while (i-- > 0)
+		drm_syncobj_put(syncobjs[i]);
+	kfree(syncobjs);
 err_free_handles:
 	kfree(handles);
 
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index ade7f68d32b5..4c746597225e 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -720,6 +720,7 @@ struct drm_syncobj_handle {
 };
 
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */
-- 
cgit v1.2.3


From aa4035d2c7683d2f2fb0ffe8087abd9eabf6d54a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Mon, 28 Aug 2017 14:10:27 -0700
Subject: drm/syncobj: Add a reset ioctl (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This just resets the dma_fence to NULL so it looks like it's never been
signaled.  This will be useful once we add the new wait API for allowing
wait on "submit and signal" behavior.

v2:
 - Take an array of sync objects (Dave Airlie)
v3:
 - Throw -EINVAL if pad != 0

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Christian König <christian.koenig@amd.com> (v1)
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_ioctl.c    |  2 ++
 drivers/gpu/drm/drm_syncobj.c  | 33 +++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  7 +++++++
 4 files changed, 44 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 534e5ac43bf8..83f1615eb1ec 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -169,3 +169,5 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
+int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index b4f443417a28..16c5d51a43aa 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -659,6 +659,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 15e74ca61760..40d2ad293661 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -885,3 +885,36 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 
 	return ret;
 }
+
+int
+drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_private)
+{
+	struct drm_syncobj_array *args = data;
+	struct drm_syncobj **syncobjs;
+	uint32_t i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++)
+		drm_syncobj_replace_fence(syncobjs[i], NULL);
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return 0;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 4c746597225e..b037fdf9e43b 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -731,6 +731,12 @@ struct drm_syncobj_wait {
 	__u32 pad;
 };
 
+struct drm_syncobj_array {
+	__u64 handles;
+	__u32 count_handles;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -854,6 +860,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
cgit v1.2.3


From ffa9443fb3d3eddf0fdf6ac473dc8b5c87f08f15 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Mon, 28 Aug 2017 14:10:28 -0700
Subject: drm/syncobj: Add a signal ioctl (v3)

This IOCTL provides a mechanism for userspace to trigger a sync object
directly.  There are other ways that userspace can trigger a syncobj
such as submitting a dummy batch somewhere or hanging on to a triggered
sync_file and doing an import.  This just provides an easy way to
manually trigger the sync object without weird hacks.

The motivation for this IOCTL is Vulkan fences.  Vulkan lets you create
a fence already in the signaled state so that you can wait on it
immediatly without stalling.  We could also handle this with a new
create flag to ask the driver to create a syncobj that is already
signaled but the IOCTL seemed a bit cleaner and more generic.

v2:
 - Take an array of sync objects (Dave Airlie)
v3:
 - Throw -EINVAL if pad != 0

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_ioctl.c    |  2 ++
 drivers/gpu/drm/drm_syncobj.c  | 36 ++++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  1 +
 4 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 83f1615eb1ec..fbc3f308fa19 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -171,3 +171,5 @@ int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
+int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 16c5d51a43aa..a9ae6dd2d593 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -661,6 +661,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 40d2ad293661..0422b8c2c2e7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -918,3 +918,39 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 
 	return 0;
 }
+
+int
+drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_private)
+{
+	struct drm_syncobj_array *args = data;
+	struct drm_syncobj **syncobjs;
+	uint32_t i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		ret = drm_syncobj_assign_null_handle(syncobjs[i]);
+		if (ret < 0)
+			break;
+	}
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b037fdf9e43b..97677cd6964d 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -861,6 +861,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
 #define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
cgit v1.2.3