From a7b8829d242b1a58107e9c02b09e93aec446d55c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 5 Jul 2017 20:30:01 +0200 Subject: iio: accel: st_accel: add SPI-3wire support Add SPI Serial Interface Mode (SIM) register information in st_sensor_settings look up table to support devices (like LSM303AGR accel sensor) that allow just SPI-3wire communication mode. SIM mode has to be configured before any other operation since it is not enabled by default and the driver is not able to read without that configuration Whilst a fairly substantial patch, the actual logic is simple and it is better to have the generic fix than a band aid. Fixes: ddc05fa28606 (iio: st-accel: add support for lsm303agr accel) Signed-off-by: Lorenzo Bianconi Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 7 +++++++ include/linux/platform_data/st_sensors_pdata.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 497f2b3a5a62..97f1b465d04f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -105,6 +105,11 @@ struct st_sensor_fullscale { struct st_sensor_fullscale_avl fs_avl[ST_SENSORS_FULLSCALE_AVL_MAX]; }; +struct st_sensor_sim { + u8 addr; + u8 value; +}; + /** * struct st_sensor_bdu - ST sensor device block data update * @addr: address of the register. @@ -197,6 +202,7 @@ struct st_sensor_transfer_function { * @bdu: Block data update register. * @das: Data Alignment Selection register. * @drdy_irq: Data ready register of the sensor. + * @sim: SPI serial interface mode register of the sensor. * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read. * @bootime: samples to discard when sensor passing from power-down to power-up. */ @@ -213,6 +219,7 @@ struct st_sensor_settings { struct st_sensor_bdu bdu; struct st_sensor_das das; struct st_sensor_data_ready_irq drdy_irq; + struct st_sensor_sim sim; bool multi_read_bit; unsigned int bootime; }; diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h index 79b0e4cdb814..f8274b0c6888 100644 --- a/include/linux/platform_data/st_sensors_pdata.h +++ b/include/linux/platform_data/st_sensors_pdata.h @@ -17,10 +17,12 @@ * Available only for accelerometer and pressure sensors. * Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet). * @open_drain: set the interrupt line to be open drain if possible. + * @spi_3wire: enable spi-3wire mode. */ struct st_sensors_platform_data { u8 drdy_int_pin; bool open_drain; + bool spi_3wire; }; #endif /* ST_SENSORS_PDATA_H */ -- cgit v1.2.3 From 7cfdfdc82a467c78af9132cb9c98e84415df34bc Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 10 Jul 2017 14:45:20 +0900 Subject: libata: Cleanup ata_read_log_page() The warning message "READ LOG DMA EXT failed, trying unqueued" in ata_read_log_page() as well as the macro name ATA_HORKAGE_NO_NCQ_LOG are confusing: the command READ LOG DMA EXT is not an queued NCQ command unless it is encapsulated in a RECEIVE FPDMA QUEUED command. From ACS-4 READ LOG DMA EXT description: "The device processes the READ LOG DMA EXT command in the NCQ feature set environment (see 4.13.6) if the READ LOG DMA EXT command is encapsulated in a RECEIVE FPDMA QUEUED command (see 7.30) with the inputs encapsulated as shown in 7.23.6." To avoid confusion, fix the warning messsage to mention switching to PIO and not "unqueued" and rename the macro ATA_HORKAGE_NO_NCQ_LOG to ATA_HORKAGE_NO_DMA_LOG. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 6 +++--- include/linux/libata.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8453f9a4682f..fa7dd4394c02 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2083,7 +2083,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, retry: ata_tf_init(dev, &tf); if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) && - !(dev->horkage & ATA_HORKAGE_NO_NCQ_LOG)) { + !(dev->horkage & ATA_HORKAGE_NO_DMA_LOG)) { tf.command = ATA_CMD_READ_LOG_DMA_EXT; tf.protocol = ATA_PROT_DMA; dma = true; @@ -2102,8 +2102,8 @@ retry: buf, sectors * ATA_SECT_SIZE, 0); if (err_mask && dma) { - dev->horkage |= ATA_HORKAGE_NO_NCQ_LOG; - ata_dev_warn(dev, "READ LOG DMA EXT failed, trying unqueued\n"); + dev->horkage |= ATA_HORKAGE_NO_DMA_LOG; + ata_dev_warn(dev, "READ LOG DMA EXT failed, trying PIO\n"); goto retry; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 55de3da58b1c..931c32f1f18d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -435,7 +435,7 @@ enum { ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ - ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ + ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ -- cgit v1.2.3 From 36acbd9e8377c27570b887e2332a5e1f0b140e16 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Fri, 14 Jul 2017 18:16:44 +0530 Subject: mmc: host: omap_hsmmc: remove unused platform callbacks Remove unused callbacks in the omap_hsmmc_platform_data structure Signed-off-by: Faiz Abbas Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap_hsmmc.c | 11 ----------- include/linux/platform_data/hsmmc-omap.h | 10 ---------- 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 7c12f3715676..04ff3c97a535 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -356,9 +356,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on, struct mmc_host *mmc = host->mmc; int ret = 0; - if (mmc_pdata(host)->set_power) - return mmc_pdata(host)->set_power(host->dev, power_on, vdd); - /* * If we don't see a Vcc regulator, assume it's a fixed * voltage always-on regulator. @@ -366,9 +363,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on, if (IS_ERR(mmc->supply.vmmc)) return 0; - if (mmc_pdata(host)->before_set_reg) - mmc_pdata(host)->before_set_reg(host->dev, power_on, vdd); - ret = omap_hsmmc_set_pbias(host, false, 0); if (ret) return ret; @@ -400,9 +394,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on, return ret; } - if (mmc_pdata(host)->after_set_reg) - mmc_pdata(host)->after_set_reg(host->dev, power_on, vdd); - return 0; err_set_voltage: @@ -469,8 +460,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host) int ret; struct mmc_host *mmc = host->mmc; - if (mmc_pdata(host)->set_power) - return 0; ret = mmc_regulator_get_supply(mmc); if (ret == -EPROBE_DEFER) diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h index 8e981be2e2c2..0ff1e0dba720 100644 --- a/include/linux/platform_data/hsmmc-omap.h +++ b/include/linux/platform_data/hsmmc-omap.h @@ -55,9 +55,6 @@ struct omap_hsmmc_platform_data { u32 caps; /* Used for the MMC driver on 2430 and later */ u32 pm_caps; /* PM capabilities of the mmc */ - /* use the internal clock */ - unsigned internal_clock:1; - /* nonremovable e.g. eMMC */ unsigned nonremovable:1; @@ -73,13 +70,6 @@ struct omap_hsmmc_platform_data { int gpio_cd; /* gpio (card detect) */ int gpio_cod; /* gpio (cover detect) */ int gpio_wp; /* gpio (write protect) */ - - int (*set_power)(struct device *dev, int power_on, int vdd); - void (*remux)(struct device *dev, int power_on); - /* Call back before enabling / disabling regulators */ - void (*before_set_reg)(struct device *dev, int power_on, int vdd); - /* Call back after enabling / disabling regulators */ - void (*after_set_reg)(struct device *dev, int power_on, int vdd); /* if we have special card, init it using this callback */ void (*init_card)(struct mmc_card *card); -- cgit v1.2.3 From c641e5b207ed7dfaa692820aeb5b6dde3de3e9b0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Jul 2017 17:55:29 +0200 Subject: ASoC: fix pcm-creation regression This reverts commit 99b04f4c4051 ("ASoC: add Component level pcm_new/pcm_free"), which started calling the pcm_new callback for every component in a *card* when creating a new pcm, something which does not seem to make any sense. This specifically led to memory leaks in systems with more than one platform component and where DMA memory is allocated in the platform-driver callback. For example, when both mcasp devices are being used on an am335x board, DMA memory would be allocated twice for every DAI link during probe. When CONFIG_SND_VERBOSE_PROCFS was set this fortunately also led to warnings such as: WARNING: CPU: 0 PID: 565 at ../fs/proc/generic.c:346 proc_register+0x110/0x154 proc_dir_entry 'sub0/prealloc' already registered Since there seems to be no users of the new component callbacks, and the current implementation introduced a regression, let's revert the offending commit for now. Fixes: 99b04f4c4051 ("ASoC: add Component level pcm_new/pcm_free") Signed-off-by: Johan Hovold Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Mark Brown Cc: stable # 4.10 --- include/sound/soc.h | 6 ------ sound/soc/soc-core.c | 25 ------------------------- sound/soc/soc-pcm.c | 32 +++++++++----------------------- 3 files changed, 9 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 9c94b97c17f8..c4a8b1947566 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -795,10 +795,6 @@ struct snd_soc_component_driver { int (*suspend)(struct snd_soc_component *); int (*resume)(struct snd_soc_component *); - /* pcm creation and destruction */ - int (*pcm_new)(struct snd_soc_pcm_runtime *); - void (*pcm_free)(struct snd_pcm *); - /* DT */ int (*of_xlate_dai_name)(struct snd_soc_component *component, struct of_phandle_args *args, @@ -874,8 +870,6 @@ struct snd_soc_component { void (*remove)(struct snd_soc_component *); int (*suspend)(struct snd_soc_component *); int (*resume)(struct snd_soc_component *); - int (*pcm_new)(struct snd_soc_pcm_runtime *); - void (*pcm_free)(struct snd_pcm *); /* machine specific init */ int (*init)(struct snd_soc_component *component); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 921622a01944..c240e13ba057 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3171,8 +3171,6 @@ static int snd_soc_component_initialize(struct snd_soc_component *component, component->remove = component->driver->remove; component->suspend = component->driver->suspend; component->resume = component->driver->resume; - component->pcm_new = component->driver->pcm_new; - component->pcm_free = component->driver->pcm_free; dapm = &component->dapm; dapm->dev = dev; @@ -3360,25 +3358,6 @@ static void snd_soc_platform_drv_remove(struct snd_soc_component *component) platform->driver->remove(platform); } -static int snd_soc_platform_drv_pcm_new(struct snd_soc_pcm_runtime *rtd) -{ - struct snd_soc_platform *platform = rtd->platform; - - if (platform->driver->pcm_new) - return platform->driver->pcm_new(rtd); - else - return 0; -} - -static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm) -{ - struct snd_soc_pcm_runtime *rtd = pcm->private_data; - struct snd_soc_platform *platform = rtd->platform; - - if (platform->driver->pcm_free) - platform->driver->pcm_free(pcm); -} - /** * snd_soc_add_platform - Add a platform to the ASoC core * @dev: The parent device for the platform @@ -3402,10 +3381,6 @@ int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform, platform->component.probe = snd_soc_platform_drv_probe; if (platform_drv->remove) platform->component.remove = snd_soc_platform_drv_remove; - if (platform_drv->pcm_new) - platform->component.pcm_new = snd_soc_platform_drv_pcm_new; - if (platform_drv->pcm_free) - platform->component.pcm_free = snd_soc_platform_drv_pcm_free; #ifdef CONFIG_DEBUG_FS platform->component.debugfs_prefix = "platform"; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index dcc5ece08668..553f7a76743c 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2628,25 +2628,12 @@ static int dpcm_fe_dai_close(struct snd_pcm_substream *fe_substream) return ret; } -static void soc_pcm_free(struct snd_pcm *pcm) -{ - struct snd_soc_pcm_runtime *rtd = pcm->private_data; - struct snd_soc_component *component; - - list_for_each_entry(component, &rtd->card->component_dev_list, - card_list) { - if (component->pcm_free) - component->pcm_free(pcm); - } -} - /* create a new pcm */ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num) { struct snd_soc_platform *platform = rtd->platform; struct snd_soc_dai *codec_dai; struct snd_soc_dai *cpu_dai = rtd->cpu_dai; - struct snd_soc_component *component; struct snd_pcm *pcm; char new_name[64]; int ret = 0, playback = 0, capture = 0; @@ -2756,18 +2743,17 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num) if (capture) snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &rtd->ops); - list_for_each_entry(component, &rtd->card->component_dev_list, card_list) { - if (component->pcm_new) { - ret = component->pcm_new(rtd); - if (ret < 0) { - dev_err(component->dev, - "ASoC: pcm constructor failed: %d\n", - ret); - return ret; - } + if (platform->driver->pcm_new) { + ret = platform->driver->pcm_new(rtd); + if (ret < 0) { + dev_err(platform->dev, + "ASoC: pcm constructor failed: %d\n", + ret); + return ret; } } - pcm->private_free = soc_pcm_free; + + pcm->private_free = platform->driver->pcm_free; out: dev_info(rtd->card->dev, "%s <-> %s mapping ok\n", (rtd->num_codecs > 1) ? "multicodec" : rtd->codec_dai->name, -- cgit v1.2.3 From 43fc509c3efb5c973991ee24c449ab2a0d71dd1e Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 20 Jul 2017 11:19:58 +0100 Subject: dma-coherent: introduce interface for default DMA pool Christoph noticed [1] that default DMA pool in current form overload the DMA coherent infrastructure. In reply, Robin suggested [2] to split the per-device vs. global pool interfaces, so allocation/release from default DMA pool is driven by dma ops implementation. This patch implements Robin's idea and provide interface to allocate/release/mmap the default (aka global) DMA pool. To make it clear that existing *_from_coherent routines work on per-device pool rename them to *_from_dev_coherent. [1] https://lkml.org/lkml/2017/7/7/370 [2] https://lkml.org/lkml/2017/7/7/431 Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Suggested-by: Robin Murphy Tested-by: Andras Szemzo Reviewed-by: Robin Murphy Signed-off-by: Vladimir Murzin Signed-off-by: Christoph Hellwig --- arch/arc/mm/dma.c | 2 +- arch/arm/mm/dma-mapping.c | 2 +- arch/arm64/mm/dma-mapping.c | 4 +- arch/mips/mm/dma-default.c | 2 +- drivers/base/dma-coherent.c | 164 ++++++++++++++++++++++++++++---------------- drivers/base/dma-mapping.c | 2 +- include/linux/dma-mapping.h | 40 ++++++++--- 7 files changed, 144 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a07e6ecafbd..71d3efff99d3 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -117,7 +117,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e7380bafbfa6..fcf1473d6fed 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -851,7 +851,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_pfn(dev, dma_addr); unsigned long off = vma->vm_pgoff; - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e90cd1db42a8..f27d4dd04384 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -329,7 +329,7 @@ static int __swiotlb_mmap(struct device *dev, vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev)); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; return __swiotlb_mmap_pfn(vma, pfn, size); @@ -706,7 +706,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev)); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index e08598c70b3e..8e78251eccc2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -232,7 +232,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma, else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index 2ae24c28e70c..1c152aed6b82 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c @@ -25,7 +25,7 @@ static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *de { if (dev && dev->dma_mem) return dev->dma_mem; - return dma_coherent_default_memory; + return NULL; } static inline dma_addr_t dma_get_device_base(struct device *dev, @@ -165,34 +165,15 @@ void *dma_mark_declared_memory_occupied(struct device *dev, } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); -/** - * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area - * - * @dev: device from which we allocate memory - * @size: size of requested memory area - * @dma_handle: This will be filled with the correct dma handle - * @ret: This pointer will be filled with the virtual address - * to allocated area. - * - * This function should be only called from per-arch dma_alloc_coherent() - * to support allocation from per-device coherent memory pools. - * - * Returns 0 if dma_alloc_coherent should continue with allocating from - * generic memory areas, or !0 if dma_alloc_coherent should return @ret. - */ -int dma_alloc_from_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) +static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, + ssize_t size, dma_addr_t *dma_handle) { - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); int order = get_order(size); unsigned long flags; int pageno; int dma_memory_map; + void *ret; - if (!mem) - return 0; - - *ret = NULL; spin_lock_irqsave(&mem->spinlock, flags); if (unlikely(size > (mem->size << PAGE_SHIFT))) @@ -203,21 +184,50 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size, goto err; /* - * Memory was found in the per-device area. + * Memory was found in the coherent area. */ - *dma_handle = dma_get_device_base(dev, mem) + (pageno << PAGE_SHIFT); - *ret = mem->virt_base + (pageno << PAGE_SHIFT); + *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); + ret = mem->virt_base + (pageno << PAGE_SHIFT); dma_memory_map = (mem->flags & DMA_MEMORY_MAP); spin_unlock_irqrestore(&mem->spinlock, flags); if (dma_memory_map) - memset(*ret, 0, size); + memset(ret, 0, size); else - memset_io(*ret, 0, size); + memset_io(ret, 0, size); - return 1; + return ret; err: spin_unlock_irqrestore(&mem->spinlock, flags); + return NULL; +} + +/** + * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool + * @dev: device from which we allocate memory + * @size: size of requested memory area + * @dma_handle: This will be filled with the correct dma handle + * @ret: This pointer will be filled with the virtual address + * to allocated area. + * + * This function should be only called from per-arch dma_alloc_coherent() + * to support allocation from per-device coherent memory pools. + * + * Returns 0 if dma_alloc_coherent should continue with allocating from + * generic memory areas, or !0 if dma_alloc_coherent should return @ret. + */ +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + if (!mem) + return 0; + + *ret = __dma_alloc_from_coherent(mem, size, dma_handle); + if (*ret) + return 1; + /* * In the case where the allocation can not be satisfied from the * per-device area, try to fall back to generic memory if the @@ -225,25 +235,20 @@ err: */ return mem->flags & DMA_MEMORY_EXCLUSIVE; } -EXPORT_SYMBOL(dma_alloc_from_coherent); +EXPORT_SYMBOL(dma_alloc_from_dev_coherent); -/** - * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool - * @dev: device from which the memory was allocated - * @order: the order of pages allocated - * @vaddr: virtual address of allocated pages - * - * This checks whether the memory was allocated from the per-device - * coherent memory pool and if so, releases that memory. - * - * Returns 1 if we correctly released the memory, or 0 if - * dma_release_coherent() should proceed with releasing memory from - * generic pools. - */ -int dma_release_from_coherent(struct device *dev, int order, void *vaddr) +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle) { - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + if (!dma_coherent_default_memory) + return NULL; + + return __dma_alloc_from_coherent(dma_coherent_default_memory, size, + dma_handle); +} +static int __dma_release_from_coherent(struct dma_coherent_mem *mem, + int order, void *vaddr) +{ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; @@ -256,28 +261,39 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) } return 0; } -EXPORT_SYMBOL(dma_release_from_coherent); /** - * dma_mmap_from_coherent() - try to mmap the memory allocated from - * per-device coherent memory pool to userspace + * dma_release_from_dev_coherent() - free memory to device coherent memory pool * @dev: device from which the memory was allocated - * @vma: vm_area for the userspace memory - * @vaddr: cpu address returned by dma_alloc_from_coherent - * @size: size of the memory buffer allocated by dma_alloc_from_coherent - * @ret: result from remap_pfn_range() + * @order: the order of pages allocated + * @vaddr: virtual address of allocated pages * * This checks whether the memory was allocated from the per-device - * coherent memory pool and if so, maps that memory to the provided vma. + * coherent memory pool and if so, releases that memory. * - * Returns 1 if we correctly mapped the memory, or 0 if the caller should - * proceed with mapping memory from generic pools. + * Returns 1 if we correctly released the memory, or 0 if the caller should + * proceed with releasing memory from generic pools. */ -int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, - void *vaddr, size_t size, int *ret) +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr) { struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + return __dma_release_from_coherent(mem, order, vaddr); +} +EXPORT_SYMBOL(dma_release_from_dev_coherent); + +int dma_release_from_global_coherent(int order, void *vaddr) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_release_from_coherent(dma_coherent_default_memory, order, + vaddr); +} + +static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, + struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) +{ if (mem && vaddr >= mem->virt_base && vaddr + size <= (mem->virt_base + (mem->size << PAGE_SHIFT))) { unsigned long off = vma->vm_pgoff; @@ -296,7 +312,39 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, } return 0; } -EXPORT_SYMBOL(dma_mmap_from_coherent); + +/** + * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool + * @dev: device from which the memory was allocated + * @vma: vm_area for the userspace memory + * @vaddr: cpu address returned by dma_alloc_from_dev_coherent + * @size: size of the memory buffer allocated + * @ret: result from remap_pfn_range() + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, maps that memory to the provided vma. + * + * Returns 1 if we correctly mapped the memory, or 0 if the caller should + * proceed with mapping memory from generic pools. + */ +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, + void *vaddr, size_t size, int *ret) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); +} +EXPORT_SYMBOL(dma_mmap_from_dev_coherent); + +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, + size_t size, int *ret) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_mmap_from_coherent(dma_coherent_default_memory, vma, + vaddr, size, ret); +} /* * Support for reserved memory regions defined in device tree diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 5096755d185e..b555ff9dd8fc 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -235,7 +235,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 843ab866e0f4..03c0196a6f24 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -157,16 +157,40 @@ static inline int is_device_dma_capable(struct device *dev) * These three functions are only for dma allocator. * Don't use them in device drivers. */ -int dma_alloc_from_coherent(struct device *dev, ssize_t size, +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); -int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); -int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, size_t size, int *ret); + +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle); +int dma_release_from_global_coherent(int order, void *vaddr); +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, + size_t size, int *ret); + #else -#define dma_alloc_from_coherent(dev, size, handle, ret) (0) -#define dma_release_from_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0) +#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) +#define dma_release_from_dev_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) + +static inline void *dma_alloc_from_global_coherent(ssize_t size, + dma_addr_t *dma_handle) +{ + return NULL; +} + +static inline int dma_release_from_global_coherent(int order, void *vaddr) +{ + return 0; +} + +static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, + void *cpu_addr, size_t size, + int *ret) +{ + return 0; +} #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ #ifdef CONFIG_HAS_DMA @@ -481,7 +505,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, BUG_ON(!ops); - if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; if (!arch_dma_alloc_attrs(&dev, &flag)) @@ -503,7 +527,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, BUG_ON(!ops); WARN_ON(irqs_disabled()); - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) return; if (!ops->free || !cpu_addr) -- cgit v1.2.3 From 832e4c83abc5ec25af77db6c8a0f36d78f1cf825 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 09:16:24 +0200 Subject: uuid: remove uuid_be Everything uses uuid_t now. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- include/linux/uuid.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 2251e1925ea4..33b0bdbb613c 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -84,26 +84,12 @@ int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); /* backwards compatibility, don't use in new code */ -typedef uuid_t uuid_be; -#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ - UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) -#define NULL_UUID_BE \ - UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) - #define uuid_le_gen(u) guid_gen(u) -#define uuid_be_gen(u) uuid_gen(u) #define uuid_le_to_bin(guid, u) guid_parse(guid, u) -#define uuid_be_to_bin(uuid, u) uuid_parse(uuid, u) static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) { return memcmp(&u1, &u2, sizeof(guid_t)); } -static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_t)); -} - #endif -- cgit v1.2.3 From a0c2d9c1de0f6be33fe817069b06663277153c20 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 21 Jul 2017 16:51:23 -0600 Subject: ACPI: NUMA: add missing include in acpi_numa.h Right now if a file includes acpi_numa.h and they don't happen to include linux/numa.h before it, they get the following warning: ./include/acpi/acpi_numa.h:9:5: warning: "MAX_NUMNODES" is not defined [-Wundef] #if MAX_NUMNODES > 256 ^~~~~~~~~~~~ Signed-off-by: Ross Zwisler Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_numa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h index d4b72944ccda..1e3a74f94131 100644 --- a/include/acpi/acpi_numa.h +++ b/include/acpi/acpi_numa.h @@ -3,6 +3,7 @@ #ifdef CONFIG_ACPI_NUMA #include +#include /* Proximity bitmap length */ #if MAX_NUMNODES > 256 -- cgit v1.2.3 From 6c423f5751b9f68bfe7c7545519d4c7159f93e1b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 24 Jul 2017 13:58:00 -0600 Subject: sched/wait: Clean up some documentation warnings A couple of kerneldoc comments in had incorrect names for macro parameters, with this unsightly result: ./include/linux/wait.h:555: warning: No description found for parameter 'wq' ./include/linux/wait.h:555: warning: Excess function parameter 'wq_head' description in 'wait_event_interruptible_hrtimeout' ./include/linux/wait.h:759: warning: No description found for parameter 'wq_head' ./include/linux/wait.h:759: warning: Excess function parameter 'wq' description in 'wait_event_killable' Correct the comments and kill the warnings. Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20170724135800.769c4042@lwn.net Signed-off-by: Ingo Molnar --- include/linux/wait.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index b289c96151ee..5b74e36c0ca8 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -529,13 +529,13 @@ do { \ /** * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses - * @wq_head: the waitqueue to wait on + * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, as a ktime_t * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq_head is woken up. + * The @condition is checked each time the waitqueue @wq is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -735,12 +735,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); /** * wait_event_killable - sleep until a condition gets true - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_KILLABLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. + * The @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. -- cgit v1.2.3 From d9f89b4e9290e46cd9b273e9ad0bff0f93e86fae Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sat, 1 Jul 2017 18:26:54 +0200 Subject: KVM: arm/arm64: PMU: Fix overflow interrupt injection kvm_pmu_overflow_set() is called from perf's interrupt handler, making the call of kvm_vgic_inject_irq() from it introduced with "KVM: arm/arm64: PMU: remove request-less vcpu kick" a really bad idea, as it's quite easy to try and retake a lock that the interrupted context is already holding. The fix is to use a vcpu kick, leaving the interrupt injection to kvm_pmu_sync_hwstate(), like it was doing before the refactoring. We don't just revert, though, because before the kick was request-less, leaving the vcpu exposed to the request-less vcpu kick race, and also because the kick was used unnecessarily from register access handlers. Reviewed-by: Christoffer Dall Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- include/kvm/arm_pmu.h | 2 -- virt/kvm/arm/pmu.c | 43 +++++++++++++++---------------------------- 3 files changed, 16 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 77862881ae86..2e070d3baf9f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - kvm_pmu_overflow_set(vcpu, p->regval & mask); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); else /* accessing PMOVSCLR_EL0 */ vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index f6e030617467..f87fe20fcb05 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -48,7 +48,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val); -void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu); bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu); @@ -86,7 +85,6 @@ static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {} -static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {} static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index fc8a723ff387..8a9c42366db7 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -203,11 +203,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) return reg; } -static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) +static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow = !!kvm_pmu_overflow_status(vcpu); + bool overflow; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return; + overflow = !!kvm_pmu_overflow_status(vcpu); if (pmu->irq_level == overflow) return; @@ -215,33 +219,11 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) if (likely(irqchip_in_kernel(vcpu->kvm))) { int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow, - &vcpu->arch.pmu); + pmu->irq_num, overflow, pmu); WARN_ON(ret); } } -/** - * kvm_pmu_overflow_set - set PMU overflow interrupt - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMOVSSET register - */ -void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) -{ - if (val == 0) - return; - - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; - kvm_pmu_check_overflow(vcpu); -} - -static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) -{ - if (!kvm_arm_pmu_v3_ready(vcpu)) - return; - kvm_pmu_check_overflow(vcpu); -} - bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = &vcpu->arch.pmu; @@ -303,7 +285,7 @@ static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) } /** - * When perf event overflows, call kvm_pmu_overflow_set to set overflow status. + * When the perf event overflows, set the overflow status and inform the vcpu. */ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct perf_sample_data *data, @@ -313,7 +295,12 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; - kvm_pmu_overflow_set(vcpu, BIT(idx)); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + + if (kvm_pmu_overflow_status(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } } /** @@ -341,7 +328,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) reg = lower_32_bits(reg); vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; if (!reg) - kvm_pmu_overflow_set(vcpu, BIT(i)); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } -- cgit v1.2.3 From 2fd4167fadd1360ab015e4f0e88e51843e49556c Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 12 Jul 2017 10:58:19 -0600 Subject: nvme: fabrics commands should use the fctype field for data direction Fabrics commands with opcode 0x7F use the fctype field to indicate data direction. Signed-off-by: Jon Derrick Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Fixes: eb793e2c ("nvme.h: add NVMe over Fabrics definitions") --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bc74da018bdc..25d8225dbd04 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1006,7 +1006,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd) * Why can't we simply have a Fabrics In and Fabrics out command? */ if (unlikely(cmd->common.opcode == nvme_fabrics_command)) - return cmd->fabrics.opcode & 1; + return cmd->fabrics.fctype & 1; return cmd->common.opcode & 1; } -- cgit v1.2.3 From 9c5358e15ca12ed3dc3b1e51671dee5d155de8e0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 17 Jul 2017 13:59:39 -0700 Subject: nvme-fc: revise TRADDR parsing The FC-NVME spec hasn't locked down on the format string for TRADDR. Currently the spec is lobbying for "nn-<16hexdigits>:pn-<16hexdigits>" where the wwn's are hex values but not prefixed by 0x. Most implementations so far expect a string format of "nn-0x<16hexdigits>:pn-0x<16hexdigits>" to be used. The transport uses the match_u64 parser which requires a leading 0x prefix to set the base properly. If it's not there, a match will either fail or return a base 10 value. The resolution in T11 is pushing out. Therefore, to fix things now and to cover any eventuality and any implementations already in the field, this patch adds support for both formats. The change consists of replacing the token matching routine with a routine that validates the fixed string format, and then builds a local copy of the hex name with a 0x prefix before calling the system parser. Note: the same parser routine exists in both the initiator and target transports. Given this is about the only "shared" item, we chose to replicate rather than create an interdendency on some shared code. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 102 ++++++++++++++++++++++++----------------------- drivers/nvme/target/fc.c | 101 ++++++++++++++++++++++++---------------------- include/linux/nvme-fc.h | 19 +++++++++ 3 files changed, 125 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5630ca46c3b5..5c2a08ef08ba 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2805,66 +2805,70 @@ out_fail: return ERR_PTR(ret); } -enum { - FCT_TRADDR_ERR = 0, - FCT_TRADDR_WWNN = 1 << 0, - FCT_TRADDR_WWPN = 1 << 1, -}; struct nvmet_fc_traddr { u64 nn; u64 pn; }; -static const match_table_t traddr_opt_tokens = { - { FCT_TRADDR_WWNN, "nn-%s" }, - { FCT_TRADDR_WWPN, "pn-%s" }, - { FCT_TRADDR_ERR, NULL } -}; - static int -nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf) +__nvme_fc_parse_u64(substring_t *sstr, u64 *val) { - substring_t args[MAX_OPT_ARGS]; - char *options, *o, *p; - int token, ret = 0; u64 token64; - options = o = kstrdup(buf, GFP_KERNEL); - if (!options) - return -ENOMEM; + if (match_u64(sstr, &token64)) + return -EINVAL; + *val = token64; - while ((p = strsep(&o, ":\n")) != NULL) { - if (!*p) - continue; + return 0; +} - token = match_token(p, traddr_opt_tokens, args); - switch (token) { - case FCT_TRADDR_WWNN: - if (match_u64(args, &token64)) { - ret = -EINVAL; - goto out; - } - traddr->nn = token64; - break; - case FCT_TRADDR_WWPN: - if (match_u64(args, &token64)) { - ret = -EINVAL; - goto out; - } - traddr->pn = token64; - break; - default: - pr_warn("unknown traddr token or missing value '%s'\n", - p); - ret = -EINVAL; - goto out; - } - } +/* + * This routine validates and extracts the WWN's from the TRADDR string. + * As kernel parsers need the 0x to determine number base, universally + * build string to parse with 0x prefix before parsing name strings. + */ +static int +nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) +{ + char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; + substring_t wwn = { name, &name[sizeof(name)-1] }; + int nnoffset, pnoffset; + + /* validate it string one of the 2 allowed formats */ + if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && + !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && + !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], + "pn-0x", NVME_FC_TRADDR_OXNNLEN)) { + nnoffset = NVME_FC_TRADDR_OXNNLEN; + pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + + NVME_FC_TRADDR_OXNNLEN; + } else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH && + !strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) && + !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], + "pn-", NVME_FC_TRADDR_NNLEN))) { + nnoffset = NVME_FC_TRADDR_NNLEN; + pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; + } else + goto out_einval; -out: - kfree(options); - return ret; + name[0] = '0'; + name[1] = 'x'; + name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; + + memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); + if (__nvme_fc_parse_u64(&wwn, &traddr->nn)) + goto out_einval; + + memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); + if (__nvme_fc_parse_u64(&wwn, &traddr->pn)) + goto out_einval; + + return 0; + +out_einval: + pr_warn("%s: bad traddr string\n", __func__); + return -EINVAL; } static struct nvme_ctrl * @@ -2878,11 +2882,11 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) unsigned long flags; int ret; - ret = nvme_fc_parse_address(&raddr, opts->traddr); + ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE); if (ret || !raddr.nn || !raddr.pn) return ERR_PTR(-EINVAL); - ret = nvme_fc_parse_address(&laddr, opts->host_traddr); + ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE); if (ret || !laddr.nn || !laddr.pn) return ERR_PTR(-EINVAL); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index d5801c150b1c..31ca55dfcb1d 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2293,66 +2293,70 @@ nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port, } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort); -enum { - FCT_TRADDR_ERR = 0, - FCT_TRADDR_WWNN = 1 << 0, - FCT_TRADDR_WWPN = 1 << 1, -}; struct nvmet_fc_traddr { u64 nn; u64 pn; }; -static const match_table_t traddr_opt_tokens = { - { FCT_TRADDR_WWNN, "nn-%s" }, - { FCT_TRADDR_WWPN, "pn-%s" }, - { FCT_TRADDR_ERR, NULL } -}; - static int -nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) +__nvme_fc_parse_u64(substring_t *sstr, u64 *val) { - substring_t args[MAX_OPT_ARGS]; - char *options, *o, *p; - int token, ret = 0; u64 token64; - options = o = kstrdup(buf, GFP_KERNEL); - if (!options) - return -ENOMEM; + if (match_u64(sstr, &token64)) + return -EINVAL; + *val = token64; - while ((p = strsep(&o, ":\n")) != NULL) { - if (!*p) - continue; + return 0; +} - token = match_token(p, traddr_opt_tokens, args); - switch (token) { - case FCT_TRADDR_WWNN: - if (match_u64(args, &token64)) { - ret = -EINVAL; - goto out; - } - traddr->nn = token64; - break; - case FCT_TRADDR_WWPN: - if (match_u64(args, &token64)) { - ret = -EINVAL; - goto out; - } - traddr->pn = token64; - break; - default: - pr_warn("unknown traddr token or missing value '%s'\n", - p); - ret = -EINVAL; - goto out; - } - } +/* + * This routine validates and extracts the WWN's from the TRADDR string. + * As kernel parsers need the 0x to determine number base, universally + * build string to parse with 0x prefix before parsing name strings. + */ +static int +nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) +{ + char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; + substring_t wwn = { name, &name[sizeof(name)-1] }; + int nnoffset, pnoffset; + + /* validate it string one of the 2 allowed formats */ + if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && + !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && + !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], + "pn-0x", NVME_FC_TRADDR_OXNNLEN)) { + nnoffset = NVME_FC_TRADDR_OXNNLEN; + pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + + NVME_FC_TRADDR_OXNNLEN; + } else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH && + !strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) && + !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], + "pn-", NVME_FC_TRADDR_NNLEN))) { + nnoffset = NVME_FC_TRADDR_NNLEN; + pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; + } else + goto out_einval; + + name[0] = '0'; + name[1] = 'x'; + name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; + + memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); + if (__nvme_fc_parse_u64(&wwn, &traddr->nn)) + goto out_einval; + + memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); + if (__nvme_fc_parse_u64(&wwn, &traddr->pn)) + goto out_einval; -out: - kfree(options); - return ret; + return 0; + +out_einval: + pr_warn("%s: bad traddr string\n", __func__); + return -EINVAL; } static int @@ -2370,7 +2374,8 @@ nvmet_fc_add_port(struct nvmet_port *port) /* map the traddr address info to a target port */ - ret = nvmet_fc_parse_traddr(&traddr, port->disc_addr.traddr); + ret = nvme_fc_parse_traddr(&traddr, port->disc_addr.traddr, + sizeof(port->disc_addr.traddr)); if (ret) return ret; diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 21c37e39e41a..36cca93a5ff2 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -334,5 +334,24 @@ struct fcnvme_ls_disconnect_acc { #define NVME_FC_LS_TIMEOUT_SEC 2 /* 2 seconds */ #define NVME_FC_TGTOP_TIMEOUT_SEC 2 /* 2 seconds */ +/* + * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>" + * the string is allowed to be specified with or without a "0x" prefix + * infront of the <16hexdigits>. Without is considered the "min" string + * and with is considered the "max" string. The hexdigits may be upper + * or lower case. + */ +#define NVME_FC_TRADDR_NNLEN 3 /* "?n-" */ +#define NVME_FC_TRADDR_OXNNLEN 5 /* "?n-0x" */ +#define NVME_FC_TRADDR_HEXNAMELEN 16 +#define NVME_FC_TRADDR_MINLENGTH \ + (2 * (NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1) +#define NVME_FC_TRADDR_MAXLENGTH \ + (2 * (NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1) +#define NVME_FC_TRADDR_MIN_PN_OFFSET \ + (NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1) +#define NVME_FC_TRADDR_MAX_PN_OFFSET \ + (NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1) + #endif /* _NVME_FC_H */ -- cgit v1.2.3 From dce4551cb2adb1ac9a30f8ab5299d614392b3cff Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 25 Jul 2017 17:57:47 +0200 Subject: udp: preserve head state for IP_CMSG_PASSSEC Paul Moore reported a SELinux/IP_PASSSEC regression caused by missing skb->sp at recvmsg() time. We need to preserve the skb head state to process the IP_CMSG_PASSSEC cmsg. With this commit we avoid releasing the skb head state in the BH even if a secpath is attached to the current skb, and stores the skb status (with/without head states) in the scratch area, so that we can access it at skb deallocation time, without incurring in cache-miss penalties. This also avoids misusing the skb CB for ipv6 packets, as introduced by the commit 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing"). Clean a bit the scratch area helpers implementation, to reduce the code differences between 32 and 64 bits build. Reported-by: Paul Moore Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue") Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing") Signed-off-by: Paolo Abeni Tested-by: Paul Moore Signed-off-by: David S. Miller --- include/net/udp.h | 33 ++++++++++++++++++++++----------- net/ipv4/udp.c | 38 ++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 972ce4baab6b..56ce2d2a612d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ -#if BITS_PER_LONG == 64 - -/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on - * cold cache lines at recvmsg time. - * skb->len can be stored on 16 bits since the udp header has been already - * validated and pulled. - */ struct udp_dev_scratch { - u32 truesize; + /* skb->truesize and the stateless bit are embedded in a single field; + * do not use a bitfield since the compiler emits better/smaller code + * this way + */ + u32 _tsize_state; + +#if BITS_PER_LONG == 64 + /* len and the bit needed to compute skb_csum_unnecessary + * will be on cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been + * already validated and pulled. + */ u16 len; bool is_linear; bool csum_unnecessary; +#endif }; +static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb) +{ + return (struct udp_dev_scratch *)&skb->dev_scratch; +} + +#if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; + return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; + return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; + return udp_skb_scratch(skb)->is_linear; } #else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b057653ceca9..d243772f6efc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1163,34 +1163,32 @@ out: return ret; } -#if BITS_PER_LONG == 64 +#define UDP_SKB_IS_STATELESS 0x80000000 + static void udp_set_dev_scratch(struct sk_buff *skb) { - struct udp_dev_scratch *scratch; + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); - scratch = (struct udp_dev_scratch *)&skb->dev_scratch; - scratch->truesize = skb->truesize; + scratch->_tsize_state = skb->truesize; +#if BITS_PER_LONG == 64 scratch->len = skb->len; scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); +#endif + if (likely(!skb->_skb_refdst)) + scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } static int udp_skb_truesize(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; -} -#else -static void udp_set_dev_scratch(struct sk_buff *skb) -{ - skb->dev_scratch = skb->truesize; + return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; } -static int udp_skb_truesize(struct sk_buff *skb) +static bool udp_skb_has_head_state(struct sk_buff *skb) { - return skb->dev_scratch; + return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS); } -#endif /* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial, @@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) unlock_sock_fast(sk, slow); } - /* we cleared the head states previously only if the skb lacks any IP - * options, see __udp_queue_rcv_skb(). + /* In the more common cases we cleared the head states previously, + * see __udp_queue_rcv_skb(). */ - if (unlikely(IPCB(skb)->opt.optlen > 0)) + if (unlikely(udp_skb_has_head_state(skb))) skb_release_head_state(skb); consume_stateless_skb(skb); } @@ -1784,11 +1782,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* At recvmsg() time we need skb->dst to process IP options-related - * cmsg, elsewhere can we clear all pending head states while they are - * hot in the cache + /* At recvmsg() time we may access skb->dst or skb->sp depending on + * the IP options and the cmsg flags, elsewhere can we clear all + * pending head states while they are hot in the cache */ - if (likely(IPCB(skb)->opt.optlen == 0)) + if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp)) skb_release_head_state(skb); rc = __udp_enqueue_schedule_skb(sk, skb); -- cgit v1.2.3 From 0a94efb5acbb6980d7c9ab604372d93cd507e4d8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jul 2017 08:36:15 -0400 Subject: workqueue: implicit ordered attribute should be overridable 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") automatically enabled ordered attribute for unbound workqueues w/ max_active == 1. Because ordered workqueues reject max_active and some attribute changes, this implicit ordered mode broke cases where the user creates an unbound workqueue w/ max_active == 1 and later explicitly changes the related attributes. This patch distinguishes explicit and implicit ordered setting and overrides from attribute changes if implict. Signed-off-by: Tejun Heo Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") --- include/linux/workqueue.h | 4 +++- kernel/workqueue.c | 13 +++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index c102ef65cb64..db6dc9dc0482 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -323,6 +323,7 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ @@ -422,7 +423,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ + __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index abe4a4971c24..7146ea70a62d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3744,8 +3744,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, return -EINVAL; /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + if (!list_empty(&wq->pwqs)) { + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) + return -EINVAL; + + wq->flags &= ~__WQ_ORDERED; + } ctx = apply_wqattrs_prepare(wq, attrs); if (!ctx) @@ -4129,13 +4133,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) struct pool_workqueue *pwq; /* disallow meddling with max_active for ordered workqueues */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return; max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); mutex_lock(&wq->mutex); + wq->flags &= ~__WQ_ORDERED; wq->saved_max_active = max_active; for_each_pwq(pwq, wq) @@ -5263,7 +5268,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) * attributes breaks ordering guarantee. Disallow exposing ordered * workqueues. */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return -EINVAL; wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); -- cgit v1.2.3 From 2eaa38d9fcba5294182268b8d11770cf3fdc9bc9 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 25 Jul 2017 11:08:15 +0200 Subject: net: phy: Remove trailing semicolon in macro definition Commit e5a03bfd873c2 ("phy: Add an mdio_device structure") introduced a spurious trailing semicolon. Remove it. Signed-off-by: Marc Gonzalez Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 2a9567bb8186..0bb5b212ab42 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -830,7 +830,7 @@ static inline int phy_read_status(struct phy_device *phydev) dev_err(&_phydev->mdio.dev, format, ##args) #define phydev_dbg(_phydev, format, args...) \ - dev_dbg(&_phydev->mdio.dev, format, ##args); + dev_dbg(&_phydev->mdio.dev, format, ##args) static inline const char *phydev_name(const struct phy_device *phydev) { -- cgit v1.2.3 From fc1ff45a07abf240aa0c6586c11465c86c8cab8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 15 Jul 2017 09:32:56 -0300 Subject: media: cec-notifier: small improvements Allow calling cec_notifier_set_phys_addr and cec_notifier_set_phys_addr_from_edid with a NULL notifier, in which case these functions do nothing. Add a cec_notifier_phys_addr_invalidate helper function (the notifier equivalent of cec_phys_addr_invalidate). These changes simplify drm CEC driver support. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-notifier.c | 6 ++++++ include/media/cec-notifier.h | 15 +++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/media/cec/cec-notifier.c b/drivers/media/cec/cec-notifier.c index 74dc1c32080e..08b619d0ea1e 100644 --- a/drivers/media/cec/cec-notifier.c +++ b/drivers/media/cec/cec-notifier.c @@ -87,6 +87,9 @@ EXPORT_SYMBOL_GPL(cec_notifier_put); void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa) { + if (n == NULL) + return; + mutex_lock(&n->lock); n->phys_addr = pa; if (n->callback) @@ -100,6 +103,9 @@ void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n, { u16 pa = CEC_PHYS_ADDR_INVALID; + if (n == NULL) + return; + if (edid && edid->extensions) pa = cec_get_edid_phys_addr((const u8 *)edid, EDID_LENGTH * (edid->extensions + 1), NULL); diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h index 298f996969df..a4f7429c4ae5 100644 --- a/include/media/cec-notifier.h +++ b/include/media/cec-notifier.h @@ -57,6 +57,7 @@ void cec_notifier_put(struct cec_notifier *n); * @pa: the CEC physical address * * Set a new CEC physical address. + * Does nothing if @n == NULL. */ void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa); @@ -66,6 +67,7 @@ void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa); * @edid: the struct edid pointer * * Parses the EDID to obtain the new CEC physical address and set it. + * Does nothing if @n == NULL. */ void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n, const struct edid *edid); @@ -118,4 +120,17 @@ static inline void cec_notifier_unregister(struct cec_notifier *n) #endif +/** + * cec_notifier_phys_addr_invalidate() - set the physical address to INVALID + * + * @n: the CEC notifier + * + * This is a simple helper function to invalidate the physical + * address. Does nothing if @n == NULL. + */ +static inline void cec_notifier_phys_addr_invalidate(struct cec_notifier *n) +{ + cec_notifier_set_phys_addr(n, CEC_PHYS_ADDR_INVALID); +} + #endif -- cgit v1.2.3 From b25db383928cecba356835583b16fa7008f97b3a Mon Sep 17 00:00:00 2001 From: Prabhakar Lad Date: Thu, 20 Jul 2017 04:56:31 -0400 Subject: media: platform: davinci: drop VPFE_CMD_S_CCDC_RAW_PARAMS drop VPFE_CMD_S_CCDC_RAW_PARAMS ioctl from dm355/dm644x following reasons: - This ioctl was never in public api and was only defined in kernel header. - The function set_params constantly mixes up pointers and phys_addr_t numbers. - This is part of a 'VPFE_CMD_S_CCDC_RAW_PARAMS' ioctl command that is described as an 'experimental ioctl that will change in future kernels'. - The code to allocate the table never gets called after we copy_from_user the user input over the kernel settings, and then compare them for inequality. - We then go on to use an address provided by user space as both the __user pointer for input and pass it through phys_to_virt to come up with a kernel pointer to copy the data to. This looks like a trivially exploitable root hole. Signed-off-by: Lad, Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/davinci/ccdc_hw_device.h | 10 -- drivers/media/platform/davinci/dm355_ccdc.c | 92 +-------------- drivers/media/platform/davinci/dm644x_ccdc.c | 151 +----------------------- drivers/media/platform/davinci/vpfe_capture.c | 75 ------------ include/media/davinci/dm644x_ccdc.h | 12 -- include/media/davinci/vpfe_capture.h | 10 -- 6 files changed, 4 insertions(+), 346 deletions(-) (limited to 'include') diff --git a/drivers/media/platform/davinci/ccdc_hw_device.h b/drivers/media/platform/davinci/ccdc_hw_device.h index 8f6688a7a111..f1b521045d64 100644 --- a/drivers/media/platform/davinci/ccdc_hw_device.h +++ b/drivers/media/platform/davinci/ccdc_hw_device.h @@ -42,16 +42,6 @@ struct ccdc_hw_ops { int (*set_hw_if_params) (struct vpfe_hw_if_param *param); /* get interface parameters */ int (*get_hw_if_params) (struct vpfe_hw_if_param *param); - /* - * Pointer to function to set parameters. Used - * for implementing VPFE_S_CCDC_PARAMS - */ - int (*set_params) (void *params); - /* - * Pointer to function to get parameter. Used - * for implementing VPFE_G_CCDC_PARAMS - */ - int (*get_params) (void *params); /* Pointer to function to configure ccdc */ int (*configure) (void); diff --git a/drivers/media/platform/davinci/dm355_ccdc.c b/drivers/media/platform/davinci/dm355_ccdc.c index 73db166dc338..6d492dc4c3a9 100644 --- a/drivers/media/platform/davinci/dm355_ccdc.c +++ b/drivers/media/platform/davinci/dm355_ccdc.c @@ -17,12 +17,7 @@ * This module is for configuring DM355 CCD controller of VPFE to capture * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules * such as Defect Pixel Correction, Color Space Conversion etc to - * pre-process the Bayer RGB data, before writing it to SDRAM. This - * module also allows application to configure individual - * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL. - * To do so, application include dm355_ccdc.h and vpfe_capture.h header - * files. The setparams() API is called by vpfe_capture driver - * to configure module parameters + * pre-process the Bayer RGB data, before writing it to SDRAM. * * TODO: 1) Raw bayer parameter settings and bayer capture * 2) Split module parameter structure to module specific ioctl structs @@ -260,90 +255,6 @@ static void ccdc_setwin(struct v4l2_rect *image_win, dev_dbg(ccdc_cfg.dev, "\nEnd of ccdc_setwin..."); } -static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam) -{ - if (ccdcparam->datasft < CCDC_DATA_NO_SHIFT || - ccdcparam->datasft > CCDC_DATA_SHIFT_6BIT) { - dev_dbg(ccdc_cfg.dev, "Invalid value of data shift\n"); - return -EINVAL; - } - - if (ccdcparam->mfilt1 < CCDC_NO_MEDIAN_FILTER1 || - ccdcparam->mfilt1 > CCDC_MEDIAN_FILTER1) { - dev_dbg(ccdc_cfg.dev, "Invalid value of median filter1\n"); - return -EINVAL; - } - - if (ccdcparam->mfilt2 < CCDC_NO_MEDIAN_FILTER2 || - ccdcparam->mfilt2 > CCDC_MEDIAN_FILTER2) { - dev_dbg(ccdc_cfg.dev, "Invalid value of median filter2\n"); - return -EINVAL; - } - - if ((ccdcparam->med_filt_thres < 0) || - (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) { - dev_dbg(ccdc_cfg.dev, - "Invalid value of median filter threshold\n"); - return -EINVAL; - } - - if (ccdcparam->data_sz < CCDC_DATA_16BITS || - ccdcparam->data_sz > CCDC_DATA_8BITS) { - dev_dbg(ccdc_cfg.dev, "Invalid value of data size\n"); - return -EINVAL; - } - - if (ccdcparam->alaw.enable) { - if (ccdcparam->alaw.gamma_wd < CCDC_GAMMA_BITS_13_4 || - ccdcparam->alaw.gamma_wd > CCDC_GAMMA_BITS_09_0) { - dev_dbg(ccdc_cfg.dev, "Invalid value of ALAW\n"); - return -EINVAL; - } - } - - if (ccdcparam->blk_clamp.b_clamp_enable) { - if (ccdcparam->blk_clamp.sample_pixel < CCDC_SAMPLE_1PIXELS || - ccdcparam->blk_clamp.sample_pixel > CCDC_SAMPLE_16PIXELS) { - dev_dbg(ccdc_cfg.dev, - "Invalid value of sample pixel\n"); - return -EINVAL; - } - if (ccdcparam->blk_clamp.sample_ln < CCDC_SAMPLE_1LINES || - ccdcparam->blk_clamp.sample_ln > CCDC_SAMPLE_16LINES) { - dev_dbg(ccdc_cfg.dev, - "Invalid value of sample lines\n"); - return -EINVAL; - } - } - return 0; -} - -/* Parameter operations */ -static int ccdc_set_params(void __user *params) -{ - struct ccdc_config_params_raw ccdc_raw_params; - int x; - - /* only raw module parameters can be set through the IOCTL */ - if (ccdc_cfg.if_type != VPFE_RAW_BAYER) - return -EINVAL; - - x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params)); - if (x) { - dev_dbg(ccdc_cfg.dev, "ccdc_set_params: error in copying ccdcparams, %d\n", - x); - return -EFAULT; - } - - if (!validate_ccdc_param(&ccdc_raw_params)) { - memcpy(&ccdc_cfg.bayer.config_params, - &ccdc_raw_params, - sizeof(ccdc_raw_params)); - return 0; - } - return -EINVAL; -} - /* This function will configure CCDC for YCbCr video capture */ static void ccdc_config_ycbcr(void) { @@ -939,7 +850,6 @@ static struct ccdc_hw_device ccdc_hw_dev = { .enable = ccdc_enable, .enable_out_to_sdram = ccdc_enable_output_to_sdram, .set_hw_if_params = ccdc_set_hw_if_params, - .set_params = ccdc_set_params, .configure = ccdc_configure, .set_buftype = ccdc_set_buftype, .get_buftype = ccdc_get_buftype, diff --git a/drivers/media/platform/davinci/dm644x_ccdc.c b/drivers/media/platform/davinci/dm644x_ccdc.c index 740fbc7a8c14..3b2d8a9317b8 100644 --- a/drivers/media/platform/davinci/dm644x_ccdc.c +++ b/drivers/media/platform/davinci/dm644x_ccdc.c @@ -17,13 +17,9 @@ * This module is for configuring CCD controller of DM6446 VPFE to capture * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules * such as Defect Pixel Correction, Color Space Conversion etc to - * pre-process the Raw Bayer RGB data, before writing it to SDRAM. This - * module also allows application to configure individual - * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL. - * To do so, application includes dm644x_ccdc.h and vpfe_capture.h header - * files. The setparams() API is called by vpfe_capture driver - * to configure module parameters. This file is named DM644x so that other - * variants such DM6443 may be supported using the same module. + * pre-process the Raw Bayer RGB data, before writing it to SDRAM. + * This file is named DM644x so that other variants such DM6443 + * may be supported using the same module. * * TODO: Test Raw bayer parameter settings and bayer capture * Split module parameter structure to module specific ioctl structs @@ -216,96 +212,8 @@ static void ccdc_readregs(void) dev_notice(ccdc_cfg.dev, "\nReading 0x%x to VERT_LINES...\n", val); } -static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam) -{ - if (ccdcparam->alaw.enable) { - u8 max_gamma = ccdc_gamma_width_max_bit(ccdcparam->alaw.gamma_wd); - u8 max_data = ccdc_data_size_max_bit(ccdcparam->data_sz); - - if ((ccdcparam->alaw.gamma_wd > CCDC_GAMMA_BITS_09_0) || - (ccdcparam->alaw.gamma_wd < CCDC_GAMMA_BITS_15_6) || - (max_gamma > max_data)) { - dev_dbg(ccdc_cfg.dev, "\nInvalid data line select"); - return -1; - } - } - return 0; -} - -static int ccdc_update_raw_params(struct ccdc_config_params_raw *raw_params) -{ - struct ccdc_config_params_raw *config_params = - &ccdc_cfg.bayer.config_params; - unsigned int *fpc_virtaddr = NULL; - unsigned int *fpc_physaddr = NULL; - - memcpy(config_params, raw_params, sizeof(*raw_params)); - /* - * allocate memory for fault pixel table and copy the user - * values to the table - */ - if (!config_params->fault_pxl.enable) - return 0; - - fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr; - fpc_virtaddr = (unsigned int *)phys_to_virt( - (unsigned long)fpc_physaddr); - /* - * Allocate memory for FPC table if current - * FPC table buffer is not big enough to - * accommodate FPC Number requested - */ - if (raw_params->fault_pxl.fp_num != config_params->fault_pxl.fp_num) { - if (fpc_physaddr != NULL) { - free_pages((unsigned long)fpc_virtaddr, - get_order - (config_params->fault_pxl.fp_num * - FP_NUM_BYTES)); - } - - /* Allocate memory for FPC table */ - fpc_virtaddr = - (unsigned int *)__get_free_pages(GFP_KERNEL | GFP_DMA, - get_order(raw_params-> - fault_pxl.fp_num * - FP_NUM_BYTES)); - - if (fpc_virtaddr == NULL) { - dev_dbg(ccdc_cfg.dev, - "\nUnable to allocate memory for FPC"); - return -EFAULT; - } - fpc_physaddr = - (unsigned int *)virt_to_phys((void *)fpc_virtaddr); - } - - /* Copy number of fault pixels and FPC table */ - config_params->fault_pxl.fp_num = raw_params->fault_pxl.fp_num; - if (copy_from_user(fpc_virtaddr, - (void __user *)raw_params->fault_pxl.fpc_table_addr, - config_params->fault_pxl.fp_num * FP_NUM_BYTES)) { - dev_dbg(ccdc_cfg.dev, "\n copy_from_user failed"); - return -EFAULT; - } - config_params->fault_pxl.fpc_table_addr = (unsigned long)fpc_physaddr; - return 0; -} - static int ccdc_close(struct device *dev) { - struct ccdc_config_params_raw *config_params = - &ccdc_cfg.bayer.config_params; - unsigned int *fpc_physaddr = NULL, *fpc_virtaddr = NULL; - - fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr; - - if (fpc_physaddr != NULL) { - fpc_virtaddr = (unsigned int *) - phys_to_virt((unsigned long)fpc_physaddr); - free_pages((unsigned long)fpc_virtaddr, - get_order(config_params->fault_pxl.fp_num * - FP_NUM_BYTES)); - } return 0; } @@ -339,29 +247,6 @@ static void ccdc_sbl_reset(void) vpss_clear_wbl_overflow(VPSS_PCR_CCDC_WBL_O); } -/* Parameter operations */ -static int ccdc_set_params(void __user *params) -{ - struct ccdc_config_params_raw ccdc_raw_params; - int x; - - if (ccdc_cfg.if_type != VPFE_RAW_BAYER) - return -EINVAL; - - x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params)); - if (x) { - dev_dbg(ccdc_cfg.dev, "ccdc_set_params: error in copyingccdc params, %d\n", - x); - return -EFAULT; - } - - if (!validate_ccdc_param(&ccdc_raw_params)) { - if (!ccdc_update_raw_params(&ccdc_raw_params)) - return 0; - } - return -EINVAL; -} - /* * ccdc_config_ycbcr() * This function will configure CCDC for YCbCr video capture @@ -489,32 +374,6 @@ static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp) regw(val, CCDC_BLKCMP); } -static void ccdc_config_fpc(struct ccdc_fault_pixel *fpc) -{ - u32 val; - - /* Initially disable FPC */ - val = CCDC_FPC_DISABLE; - regw(val, CCDC_FPC); - - if (!fpc->enable) - return; - - /* Configure Fault pixel if needed */ - regw(fpc->fpc_table_addr, CCDC_FPC_ADDR); - dev_dbg(ccdc_cfg.dev, "\nWriting 0x%lx to FPC_ADDR...\n", - (fpc->fpc_table_addr)); - /* Write the FPC params with FPC disable */ - val = fpc->fp_num & CCDC_FPC_FPC_NUM_MASK; - regw(val, CCDC_FPC); - - dev_dbg(ccdc_cfg.dev, "\nWriting 0x%x to FPC...\n", val); - /* read the FPC register */ - val = regr(CCDC_FPC) | CCDC_FPC_ENABLE; - regw(val, CCDC_FPC); - dev_dbg(ccdc_cfg.dev, "\nWriting 0x%x to FPC...\n", val); -} - /* * ccdc_config_raw() * This function will configure CCDC for Raw capture mode @@ -569,9 +428,6 @@ static void ccdc_config_raw(void) /* Configure Black level compensation */ ccdc_config_black_compense(&config_params->blk_comp); - /* Configure Fault Pixel Correction */ - ccdc_config_fpc(&config_params->fault_pxl); - /* If data size is 8 bit then pack the data */ if ((config_params->data_sz == CCDC_DATA_8BITS) || config_params->alaw.enable) @@ -929,7 +785,6 @@ static struct ccdc_hw_device ccdc_hw_dev = { .reset = ccdc_sbl_reset, .enable = ccdc_enable, .set_hw_if_params = ccdc_set_hw_if_params, - .set_params = ccdc_set_params, .configure = ccdc_configure, .set_buftype = ccdc_set_buftype, .get_buftype = ccdc_get_buftype, diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c index 1831bf5ccca5..b1bf4a7e8eb7 100644 --- a/drivers/media/platform/davinci/vpfe_capture.c +++ b/drivers/media/platform/davinci/vpfe_capture.c @@ -280,45 +280,6 @@ void vpfe_unregister_ccdc_device(struct ccdc_hw_device *dev) } EXPORT_SYMBOL(vpfe_unregister_ccdc_device); -/* - * vpfe_get_ccdc_image_format - Get image parameters based on CCDC settings - */ -static int vpfe_get_ccdc_image_format(struct vpfe_device *vpfe_dev, - struct v4l2_format *f) -{ - struct v4l2_rect image_win; - enum ccdc_buftype buf_type; - enum ccdc_frmfmt frm_fmt; - - memset(f, 0, sizeof(*f)); - f->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; - ccdc_dev->hw_ops.get_image_window(&image_win); - f->fmt.pix.width = image_win.width; - f->fmt.pix.height = image_win.height; - f->fmt.pix.bytesperline = ccdc_dev->hw_ops.get_line_length(); - f->fmt.pix.sizeimage = f->fmt.pix.bytesperline * - f->fmt.pix.height; - buf_type = ccdc_dev->hw_ops.get_buftype(); - f->fmt.pix.pixelformat = ccdc_dev->hw_ops.get_pixel_format(); - frm_fmt = ccdc_dev->hw_ops.get_frame_format(); - if (frm_fmt == CCDC_FRMFMT_PROGRESSIVE) - f->fmt.pix.field = V4L2_FIELD_NONE; - else if (frm_fmt == CCDC_FRMFMT_INTERLACED) { - if (buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED) - f->fmt.pix.field = V4L2_FIELD_INTERLACED; - else if (buf_type == CCDC_BUFTYPE_FLD_SEPARATED) - f->fmt.pix.field = V4L2_FIELD_SEQ_TB; - else { - v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf_type\n"); - return -EINVAL; - } - } else { - v4l2_err(&vpfe_dev->v4l2_dev, "Invalid frm_fmt\n"); - return -EINVAL; - } - return 0; -} - /* * vpfe_config_ccdc_image_format() * For a pix format, configure ccdc to setup the capture @@ -1697,41 +1658,6 @@ unlock_out: return ret; } - -static long vpfe_param_handler(struct file *file, void *priv, - bool valid_prio, unsigned int cmd, void *param) -{ - struct vpfe_device *vpfe_dev = video_drvdata(file); - int ret; - - v4l2_dbg(2, debug, &vpfe_dev->v4l2_dev, "vpfe_param_handler\n"); - - if (vpfe_dev->started) { - /* only allowed if streaming is not started */ - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "device already started\n"); - return -EBUSY; - } - - ret = mutex_lock_interruptible(&vpfe_dev->lock); - if (ret) - return ret; - - switch (cmd) { - case VPFE_CMD_S_CCDC_RAW_PARAMS: - ret = -EINVAL; - v4l2_warn(&vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); - break; - default: - ret = -ENOTTY; - } -unlock_out: - mutex_unlock(&vpfe_dev->lock); - return ret; -} - - /* vpfe capture ioctl operations */ static const struct v4l2_ioctl_ops vpfe_ioctl_ops = { .vidioc_querycap = vpfe_querycap, @@ -1754,7 +1680,6 @@ static const struct v4l2_ioctl_ops vpfe_ioctl_ops = { .vidioc_cropcap = vpfe_cropcap, .vidioc_g_selection = vpfe_g_selection, .vidioc_s_selection = vpfe_s_selection, - .vidioc_default = vpfe_param_handler, }; static struct vpfe_device *vpfe_initialize(void) diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h index 7c909da29d43..6ea2ce241851 100644 --- a/include/media/davinci/dm644x_ccdc.h +++ b/include/media/davinci/dm644x_ccdc.h @@ -103,16 +103,6 @@ struct ccdc_black_compensation { char gb; }; -/* structure for fault pixel correction */ -struct ccdc_fault_pixel { - /* Enable or Disable fault pixel correction */ - unsigned char enable; - /* Number of fault pixel */ - unsigned short fp_num; - /* Address of fault pixel table */ - unsigned long fpc_table_addr; -}; - /* Structure for CCDC configuration parameters for raw capture mode passed * by application */ @@ -125,8 +115,6 @@ struct ccdc_config_params_raw { struct ccdc_black_clamp blk_clamp; /* Structure for Black Compensation */ struct ccdc_black_compensation blk_comp; - /* Structure for Fault Pixel Module Configuration */ - struct ccdc_fault_pixel fault_pxl; }; diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h index 8e1a4d88daa0..f003533602d0 100644 --- a/include/media/davinci/vpfe_capture.h +++ b/include/media/davinci/vpfe_capture.h @@ -183,14 +183,4 @@ struct vpfe_config_params { }; #endif /* End of __KERNEL__ */ -/** - * VPFE_CMD_S_CCDC_RAW_PARAMS - EXPERIMENTAL IOCTL to set raw capture params - * This can be used to configure modules such as defect pixel correction, - * color space conversion, culling etc. This is an experimental ioctl that - * will change in future kernels. So use this ioctl with care ! - * TODO: This is to be split into multiple ioctls and also explore the - * possibility of extending the v4l2 api to include this - **/ -#define VPFE_CMD_S_CCDC_RAW_PARAMS _IOW('V', BASE_VIDIOC_PRIVATE + 1, \ - void *) #endif /* _DAVINCI_VPFE_H */ -- cgit v1.2.3 From fdeaf7e3eb37c6dbc4b4ac97dbe1945d239eb788 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 24 Jul 2017 13:40:03 +0200 Subject: KVM: make pid available for uevents without debugfs Simplify and improve the code so that the PID is always available in the uevent even when debugfs is not available. This adds a userspace_pid field to struct kvm, as per Radim's suggestion, so that the PID can be retrieved on destruction too. Acked-by: Janosch Frank Fixes: 286de8f6ac9202 ("KVM: trigger uevents when creating or destroying a VM") Signed-off-by: Claudio Imbrenda Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 35 ++++++++++++----------------------- 2 files changed, 13 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 648b34cabb38..890b706d1943 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -445,6 +445,7 @@ struct kvm { struct kvm_stat_data **debugfs_stat_data; struct srcu_struct srcu; struct srcu_struct irq_srcu; + pid_t userspace_pid; }; #define kvm_err(fmt, ...) \ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 82987d457b8b..f3f74271f1a9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3883,7 +3883,6 @@ static const struct file_operations *stat_fops[] = { static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) { struct kobj_uevent_env *env; - char *tmp, *pathbuf = NULL; unsigned long long created, active; if (!kvm_dev.this_device || !kvm) @@ -3907,38 +3906,28 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) add_uevent_var(env, "CREATED=%llu", created); add_uevent_var(env, "COUNT=%llu", active); - if (type == KVM_EVENT_CREATE_VM) + if (type == KVM_EVENT_CREATE_VM) { add_uevent_var(env, "EVENT=create"); - else if (type == KVM_EVENT_DESTROY_VM) + kvm->userspace_pid = task_pid_nr(current); + } else if (type == KVM_EVENT_DESTROY_VM) { add_uevent_var(env, "EVENT=destroy"); + } + add_uevent_var(env, "PID=%d", kvm->userspace_pid); if (kvm->debugfs_dentry) { - char p[ITOA_MAX_LEN]; - - snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name); - tmp = strchrnul(p + 1, '-'); - *tmp = '\0'; - add_uevent_var(env, "PID=%s", p); - pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); - if (pathbuf) { - /* sizeof counts the final '\0' */ - int len = sizeof("STATS_PATH=") - 1; - const char *pvar = "STATS_PATH="; - - tmp = dentry_path_raw(kvm->debugfs_dentry, - pathbuf + len, - PATH_MAX - len); - if (!IS_ERR(tmp)) { - memcpy(tmp - len, pvar, len); - env->envp[env->envp_idx++] = tmp - len; - } + char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); + + if (p) { + tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); + if (!IS_ERR(tmp)) + add_uevent_var(env, "STATS_PATH=%s", tmp); + kfree(p); } } /* no need for checks, since we are adding at most only 5 keys */ env->envp[env->envp_idx++] = NULL; kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); kfree(env); - kfree(pathbuf); } static int kvm_init_debug(void) -- cgit v1.2.3 From bb67b496c338e15813f075f482067da930f52e39 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Tue, 18 Jul 2017 14:22:20 -0300 Subject: include/linux/vfio.h: Guard powerpc-specific functions with CONFIG_VFIO_SPAPR_EEH When CONFIG_EEH=y and CONFIG_VFIO_SPAPR_EEH=n, build fails with the following: drivers/vfio/pci/vfio_pci.o: In function `.vfio_pci_release': vfio_pci.c:(.text+0xa98): undefined reference to `.vfio_spapr_pci_eeh_release' drivers/vfio/pci/vfio_pci.o: In function `.vfio_pci_open': vfio_pci.c:(.text+0x1420): undefined reference to `.vfio_spapr_pci_eeh_open' In this case, vfio_pci.c should use the empty definitions of vfio_spapr_pci_eeh_open and vfio_spapr_pci_eeh_release functions. This patch fixes it by guarding these function definitions with CONFIG_VFIO_SPAPR_EEH, the symbol that controls whether vfio_spapr_eeh.c is built, which is where the non-empty versions of these functions are. We need to make use of IS_ENABLED() macro because CONFIG_VFIO_SPAPR_EEH is a tristate option. This issue was found during a randconfig build. Logs are here: http://kisskb.ellerman.id.au/kisskb/buildresult/12982362/ Signed-off-by: Murilo Opsfelder Araujo Reviewed-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 586809abb273..a47b985341d1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -152,7 +152,7 @@ extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, size_t *data_size); struct pci_dev; -#ifdef CONFIG_EEH +#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, @@ -173,7 +173,7 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, { return -ENOTTY; } -#endif /* CONFIG_EEH */ +#endif /* CONFIG_VFIO_SPAPR_EEH */ /* * IRQfd - generic -- cgit v1.2.3 From 273752c9ff03eb83856601b2a3458218bb949e46 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 26 Jul 2017 09:35:09 -0400 Subject: dm, dax: Make sure dm_dax_flush() is called if device supports it Currently dm_dax_flush() is not being called, even if underlying dax device supports write cache, because DAXDEV_WRITE_CACHE is not being propagated up to the DM dax device. If the underlying dax device supports write cache, set DAXDEV_WRITE_CACHE on the DM dax device. This will cause dm_dax_flush() to be called. Fixes: abebfbe2f7 ("dm: add ->flush() dax operation support") Signed-off-by: Vivek Goyal Acked-by: Dan Williams Signed-off-by: Mike Snitzer --- drivers/dax/super.c | 6 ++++++ drivers/md/dm-table.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/dax.h | 1 + 3 files changed, 42 insertions(+) (limited to 'include') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index ce9e563e6e1d..938eb4868f7f 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -278,6 +278,12 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc) } EXPORT_SYMBOL_GPL(dax_write_cache); +bool dax_write_cache_enabled(struct dax_device *dax_dev) +{ + return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); +} +EXPORT_SYMBOL_GPL(dax_write_cache_enabled); + bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a39bcd9b982a..28a4071cdf85 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "table" @@ -1630,6 +1631,37 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) return false; } +static int device_dax_write_cache_enabled(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct dax_device *dax_dev = dev->dax_dev; + + if (!dax_dev) + return false; + + if (dax_write_cache_enabled(dax_dev)) + return true; + return false; +} + +static int dm_table_supports_dax_write_cache(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, + device_dax_write_cache_enabled, NULL)) + return true; + } + + return false; +} + static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1785,6 +1817,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); + if (dm_table_supports_dax_write_cache(t)) + dax_write_cache(t->md->dax_dev, true); + /* Ensure that all underlying devices are non-rotational. */ if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); diff --git a/include/linux/dax.h b/include/linux/dax.h index 794811875732..df97b7af7e2c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -87,6 +87,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t size); void dax_write_cache(struct dax_device *dax_dev, bool wc); +bool dax_write_cache_enabled(struct dax_device *dax_dev); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From 1937f8a29f4a650bc27e0311b43b53509a34fd22 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:52:59 +0200 Subject: scsi: bnx2fc: Simplify CPU hotplug code The CPU hotplug related code of this driver can be simplified by: 1) Consolidating the callbacks into a single state. The CPU thread can be torn down on the CPU which goes offline. There is no point in delaying that to the CPU dead state 2) Let the core code invoke the online/offline callbacks and remove the extra for_each_online_cpu() loops. Signed-off-by: Thomas Gleixner Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 69 +++++++++------------------------------ include/linux/cpuhotplug.h | 1 - 2 files changed, 15 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index c4ebf8c5d884..6844ba361616 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2624,12 +2624,11 @@ static struct fcoe_transport bnx2fc_transport = { }; /** - * bnx2fc_percpu_thread_create - Create a receive thread for an - * online CPU + * bnx2fc_cpu_online - Create a receive thread for an online CPU * * @cpu: cpu index for the online cpu */ -static void bnx2fc_percpu_thread_create(unsigned int cpu) +static int bnx2fc_cpu_online(unsigned int cpu) { struct bnx2fc_percpu_s *p; struct task_struct *thread; @@ -2639,15 +2638,17 @@ static void bnx2fc_percpu_thread_create(unsigned int cpu) thread = kthread_create_on_node(bnx2fc_percpu_io_thread, (void *)p, cpu_to_node(cpu), "bnx2fc_thread/%d", cpu); + if (IS_ERR(thread)) + return PTR_ERR(thread); + /* bind thread to the cpu */ - if (likely(!IS_ERR(thread))) { - kthread_bind(thread, cpu); - p->iothread = thread; - wake_up_process(thread); - } + kthread_bind(thread, cpu); + p->iothread = thread; + wake_up_process(thread); + return 0; } -static void bnx2fc_percpu_thread_destroy(unsigned int cpu) +static int bnx2fc_cpu_offline(unsigned int cpu) { struct bnx2fc_percpu_s *p; struct task_struct *thread; @@ -2661,7 +2662,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu) thread = p->iothread; p->iothread = NULL; - /* Free all work in the list */ list_for_each_entry_safe(work, tmp, &p->work_list, list) { list_del_init(&work->list); @@ -2673,20 +2673,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu) if (thread) kthread_stop(thread); -} - - -static int bnx2fc_cpu_online(unsigned int cpu) -{ - printk(PFX "CPU %x online: Create Rx thread\n", cpu); - bnx2fc_percpu_thread_create(cpu); - return 0; -} - -static int bnx2fc_cpu_dead(unsigned int cpu) -{ - printk(PFX "CPU %x offline: Remove Rx thread\n", cpu); - bnx2fc_percpu_thread_destroy(cpu); return 0; } @@ -2761,31 +2747,16 @@ static int __init bnx2fc_mod_init(void) spin_lock_init(&p->fp_work_lock); } - get_online_cpus(); - - for_each_online_cpu(cpu) - bnx2fc_percpu_thread_create(cpu); - - rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, - "scsi/bnx2fc:online", - bnx2fc_cpu_online, NULL); + rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2fc:online", + bnx2fc_cpu_online, bnx2fc_cpu_offline); if (rc < 0) - goto stop_threads; + goto stop_thread; bnx2fc_online_state = rc; - cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD, - "scsi/bnx2fc:dead", - NULL, bnx2fc_cpu_dead); - put_online_cpus(); - cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb); - return 0; -stop_threads: - for_each_online_cpu(cpu) - bnx2fc_percpu_thread_destroy(cpu); - put_online_cpus(); +stop_thread: kthread_stop(l2_thread); free_wq: destroy_workqueue(bnx2fc_wq); @@ -2804,7 +2775,6 @@ static void __exit bnx2fc_mod_exit(void) struct fcoe_percpu_s *bg; struct task_struct *l2_thread; struct sk_buff *skb; - unsigned int cpu = 0; /* * NOTE: Since cnic calls register_driver routine rtnl_lock, @@ -2845,16 +2815,7 @@ static void __exit bnx2fc_mod_exit(void) if (l2_thread) kthread_stop(l2_thread); - get_online_cpus(); - /* Destroy per cpu threads */ - for_each_online_cpu(cpu) { - bnx2fc_percpu_thread_destroy(cpu); - } - - cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state); - cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD); - - put_online_cpus(); + cpuhp_remove_state(bnx2fc_online_state); destroy_workqueue(bnx2fc_wq); /* diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b56573bf440d..2e7b1731ad12 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -39,7 +39,6 @@ enum cpuhp_state { CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, - CPUHP_SCSI_BNX2FC_DEAD, CPUHP_SCSI_BNX2I_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, -- cgit v1.2.3 From f9f22a86912f9d36b50e9b3b383fabfb9f22dd46 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:53:00 +0200 Subject: scsi: bnx2i: Simplify cpu hotplug code The CPU hotplug related code of this driver can be simplified by: 1) Consolidating the callbacks into a single state. The CPU thread can be torn down on the CPU which goes offline. There is no point in delaying that to the CPU dead state 2) Let the core code invoke the online/offline callbacks and remove the extra for_each_online_cpu() loops. Signed-off-by: Thomas Gleixner Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_init.c | 65 ++++++++++------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 15 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c index 7487b653e799..4ebcda8d9500 100644 --- a/drivers/scsi/bnx2i/bnx2i_init.c +++ b/drivers/scsi/bnx2i/bnx2i_init.c @@ -404,12 +404,11 @@ int bnx2i_get_stats(void *handle) /** - * bnx2i_percpu_thread_create - Create a receive thread for an - * online CPU + * bnx2i_cpu_online - Create a receive thread for an online CPU * * @cpu: cpu index for the online cpu */ -static void bnx2i_percpu_thread_create(unsigned int cpu) +static int bnx2i_cpu_online(unsigned int cpu) { struct bnx2i_percpu_s *p; struct task_struct *thread; @@ -419,16 +418,17 @@ static void bnx2i_percpu_thread_create(unsigned int cpu) thread = kthread_create_on_node(bnx2i_percpu_io_thread, (void *)p, cpu_to_node(cpu), "bnx2i_thread/%d", cpu); + if (IS_ERR(thread)) + return PTR_ERR(thread); + /* bind thread to the cpu */ - if (likely(!IS_ERR(thread))) { - kthread_bind(thread, cpu); - p->iothread = thread; - wake_up_process(thread); - } + kthread_bind(thread, cpu); + p->iothread = thread; + wake_up_process(thread); + return 0; } - -static void bnx2i_percpu_thread_destroy(unsigned int cpu) +static int bnx2i_cpu_offline(unsigned int cpu) { struct bnx2i_percpu_s *p; struct task_struct *thread; @@ -451,19 +451,6 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu) spin_unlock_bh(&p->p_work_lock); if (thread) kthread_stop(thread); -} - -static int bnx2i_cpu_online(unsigned int cpu) -{ - pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu); - bnx2i_percpu_thread_create(cpu); - return 0; -} - -static int bnx2i_cpu_dead(unsigned int cpu) -{ - pr_info("CPU %x offline: Remove Rx thread\n", cpu); - bnx2i_percpu_thread_destroy(cpu); return 0; } @@ -511,28 +498,14 @@ static int __init bnx2i_mod_init(void) p->iothread = NULL; } - get_online_cpus(); - - for_each_online_cpu(cpu) - bnx2i_percpu_thread_create(cpu); - - err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, - "scsi/bnx2i:online", - bnx2i_cpu_online, NULL); + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2i:online", + bnx2i_cpu_online, bnx2i_cpu_offline); if (err < 0) - goto remove_threads; + goto unreg_driver; bnx2i_online_state = err; - - cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD, - "scsi/bnx2i:dead", - NULL, bnx2i_cpu_dead); - put_online_cpus(); return 0; -remove_threads: - for_each_online_cpu(cpu) - bnx2i_percpu_thread_destroy(cpu); - put_online_cpus(); +unreg_driver: cnic_unregister_driver(CNIC_ULP_ISCSI); unreg_xport: iscsi_unregister_transport(&bnx2i_iscsi_transport); @@ -552,7 +525,6 @@ out: static void __exit bnx2i_mod_exit(void) { struct bnx2i_hba *hba; - unsigned cpu = 0; mutex_lock(&bnx2i_dev_lock); while (!list_empty(&adapter_list)) { @@ -570,14 +542,7 @@ static void __exit bnx2i_mod_exit(void) } mutex_unlock(&bnx2i_dev_lock); - get_online_cpus(); - - for_each_online_cpu(cpu) - bnx2i_percpu_thread_destroy(cpu); - - cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state); - cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD); - put_online_cpus(); + cpuhp_remove_state(bnx2i_online_state); iscsi_unregister_transport(&bnx2i_iscsi_transport); cnic_unregister_driver(CNIC_ULP_ISCSI); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2e7b1731ad12..82b30e638430 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -39,7 +39,6 @@ enum cpuhp_state { CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, - CPUHP_SCSI_BNX2I_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 6b84202c946cd3da3a8daa92c682510e9ed80321 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 16:24:59 +0800 Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") tried to fix the issue that it may overstep the chunk end for _sctp_walk_{params, errors} with 'chunk_end > offset(length) + sizeof(length)'. But it introduced a side effect: When processing INIT, it verifies the chunks with 'param.v == chunk_end' after iterating all params by sctp_walk_params(). With the check 'chunk_end > offset(length) + sizeof(length)', it would return when the last param is not yet accessed. Because the last param usually is fwdtsn supported param whose size is 4 and 'chunk_end == offset(length) + sizeof(length)' This is a badly issue even causing sctp couldn't process 4-shakes. Client would always get abort when connecting to server, due to the failure of INIT chunk verification on server. The patch is to use 'chunk_end <= offset(length) + sizeof(length)' instead of 'chunk_end < offset(length) + sizeof(length)' for both _sctp_walk_params and _sctp_walk_errors. Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 980807d7506f..45fd4c6056b5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ - (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ @@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ -- cgit v1.2.3 From a3287c41ff405025bc57b165a0f6cd698bbbc1be Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Jul 2017 16:30:34 +0100 Subject: drivers/perf: arm_pmu: Request PMU SPIs with IRQF_PER_CPU Since the PMU register interface is banked per CPU, CPU PMU interrrupts cannot be handled by a CPU other than the one with the PMU asserting the interrupt. This means that migrating PMU SPIs, as we do during a CPU hotplug operation doesn't make any sense and can lead to the IRQ being disabled entirely if we route a spurious IRQ to the new affinity target. This has been observed in practice on AMD Seattle, where CPUs on the non-boot cluster appear to take a spurious PMU IRQ when coming online, which is routed to CPU0 where it cannot be handled. This patch passes IRQF_PERCPU for PMU SPIs and forcefully sets their affinity prior to requesting them, ensuring that they cannot be migrated during hotplug events. This interacts badly with the DB8500 erratum workaround that ping-pongs the interrupt affinity from the handler, so we avoid passing IRQF_PERCPU in that case by allowing the IRQ flags to be overridden in the platdata. Fixes: 3cf7ee98b848 ("drivers/perf: arm_pmu: move irq request/free into probe") Cc: Mark Rutland Cc: Linus Walleij Signed-off-by: Will Deacon --- arch/arm/mach-ux500/cpu-db8500.c | 1 + drivers/perf/arm_pmu.c | 41 ++++++++++++++++++++++++++-------------- include/linux/perf/arm_pmu.h | 4 ++++ 3 files changed, 32 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 28083ef72819..71a34e8c345a 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -133,6 +133,7 @@ static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) static struct arm_pmu_platdata db8500_pmu_platdata = { .handle_irq = db8500_pmu_handler, + .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD, }; static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index dc459eb1246b..1c5e0f333779 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -569,22 +569,41 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) if (irq != other_irq) { pr_warn("mismatched PPIs detected.\n"); err = -EINVAL; + goto err_out; } } else { - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu); + unsigned long irq_flags; + + err = irq_force_affinity(irq, cpumask_of(cpu)); + + if (err && num_possible_cpus() > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + goto err_out; + } + + if (platdata && platdata->irq_flags) { + irq_flags = platdata->irq_flags; + } else { + irq_flags = IRQF_PERCPU | + IRQF_NOBALANCING | + IRQF_NO_THREAD; + } + + err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } + if (err) + goto err_out; cpumask_set_cpu(cpu, &armpmu->active_irqs); - return 0; + +err_out: + pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); + return err; } int armpmu_request_irqs(struct arm_pmu *armpmu) @@ -628,12 +647,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) enable_percpu_irq(irq, IRQ_TYPE_NONE); return 0; } - - if (irq_force_affinity(irq, cpumask_of(cpu)) && - num_possible_cpus() > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - } } return 0; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 1360dd6d5e61..af0f44effd44 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -24,10 +24,14 @@ * interrupt and passed the address of the low level handler, * and can be used to implement any platform specific handling * before or after calling it. + * + * @irq_flags: if non-zero, these flags will be passed to request_irq + * when requesting interrupts for this PMU device. */ struct arm_pmu_platdata { irqreturn_t (*handle_irq)(int irq, void *dev, irq_handler_t pmu_handler); + unsigned long irq_flags; }; #ifdef CONFIG_ARM_PMU -- cgit v1.2.3 From 8397913303abc9333f376a518a8368fa22ca5e6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 27 Jul 2017 12:21:11 +0200 Subject: genirq/cpuhotplug: Revert "Set force affinity flag on hotplug migration" That commit was part of the changes moving x86 to the generic CPU hotplug interrupt migration code. The force flag was required on x86 before the hierarchical irqdomain rework, but invoking set_affinity() with force=true stayed and had no side effects. At some point in the past, the force flag got repurposed to support the exynos timer interrupt affinity setting to a not yet online CPU, so the interrupt controller callback does not verify the supplied affinity mask against cpu_online_mask. Setting the flag in the CPU hotplug code causes the cpu online masking to be blocked on these irq controllers and results in potentially affining an interrupt to the CPU which is unplugged, i.e. instead of moving it away, it's just reassigned to it. As the force flags is not longer needed on x86, it's safe to revert that patch so the ARM irqchips which use the force flag work again. Add comments to that effect, so this won't happen again. Note: The online mask handling should be done in the generic code and the force flag and the masking in the irq chips removed all together, but that's not a change possible for 4.13. Fixes: 77f85e66aa8b ("genirq/cpuhotplug: Set force affinity flag on hotplug migration") Reported-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Cc: Marc Zyngier Cc: Russell King Cc: LAK Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707271217590.3109@nanos Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 7 ++++++- kernel/irq/cpuhotplug.c | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 00db35b61e9e..d2d543794093 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -388,7 +388,12 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_mask_ack: ack and mask an interrupt source * @irq_unmask: unmask an interrupt source * @irq_eoi: end of interrupt - * @irq_set_affinity: set the CPU affinity on SMP machines + * @irq_set_affinity: Set the CPU affinity on SMP machines. If the force + * argument is true, it tells the driver to + * unconditionally apply the affinity setting. Sanity + * checks against the supplied affinity mask are not + * required. This is used for CPU hotplug where the + * target CPU is not yet set in the cpu_online_mask. * @irq_retrigger: resend an IRQ to the CPU * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index aee8f7ec40af..638eb9c83d9f 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc) affinity = cpu_online_mask; brokeaff = true; } - - err = irq_do_set_affinity(d, affinity, true); + /* + * Do not set the force argument of irq_do_set_affinity() as this + * disables the masking of offline CPUs from the supplied affinity + * mask and therefore might keep/reassign the irq to the outgoing + * CPU. + */ + err = irq_do_set_affinity(d, affinity, false); if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err); -- cgit v1.2.3 From 0b794ffae7afa7c4e5accac8791c4b78e8d080ce Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 25 May 2017 15:11:26 +0300 Subject: net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size Fix miscalculation in reserved_at_1a0 field. Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 87869c04849a..fd98aef4545c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8175,7 +8175,7 @@ struct mlx5_ifc_mtpps_reg_bits { u8 out_pulse_duration[0x10]; u8 out_periodic_adjustment[0x10]; - u8 reserved_at_1a0[0x60]; + u8 reserved_at_1a0[0x40]; }; struct mlx5_ifc_mtppse_reg_bits { -- cgit v1.2.3 From fa3676885e3b5be1edfa1b2cc775e20a45b34a19 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 25 May 2017 16:09:34 +0300 Subject: net/mlx5e: Add field select to MTPPS register In order to mark relevant fields while setting the MTPPS register add field select. Otherwise it can cause a misconfiguration in firmware. Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 29 +++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 ++++ include/linux/mlx5/mlx5_ifc.h | 10 +++++--- 4 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 66f432385dbb..ab07233e2faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -53,6 +53,15 @@ enum { MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, }; +enum { + MLX5E_MTPPS_FS_ENABLE = BIT(0x0), + MLX5E_MTPPS_FS_PATTERN = BIT(0x2), + MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), +}; + void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, struct skb_shared_hwtstamps *hwts) { @@ -221,7 +230,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) /* For future use need to add a loop for finding all 1PPS out pins */ MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); - MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF); + MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta); + MLX5_SET(mtpps_reg, in, field_select, + MLX5E_MTPPS_FS_PIN_MODE | + MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ); mlx5_set_mtpps(priv->mdev, in, sizeof(in)); } @@ -257,8 +269,7 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp, int pin = -1; int err = 0; - if (!MLX5_CAP_GEN(priv->mdev, pps) || - !MLX5_CAP_GEN(priv->mdev, pps_modify)) + if (!MLX5_PPS_CAP(priv->mdev)) return -EOPNOTSUPP; if (rq->extts.index >= tstamp->ptp_info.n_pins) @@ -277,6 +288,9 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN); MLX5_SET(mtpps_reg, in, pattern, pattern); MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE | + MLX5E_MTPPS_FS_PATTERN | + MLX5E_MTPPS_FS_ENABLE); err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); if (err) @@ -302,7 +316,7 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, int pin = -1; s64 ns; - if (!MLX5_CAP_GEN(priv->mdev, pps_modify)) + if (!MLX5_PPS_CAP(priv->mdev)) return -EOPNOTSUPP; if (rq->perout.index >= tstamp->ptp_info.n_pins) @@ -337,7 +351,10 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC); MLX5_SET(mtpps_reg, in, enable, on); MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); - + MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE | + MLX5E_MTPPS_FS_PATTERN | + MLX5E_MTPPS_FS_ENABLE | + MLX5E_MTPPS_FS_TIME_STAMP); return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); } @@ -487,7 +504,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) #define MAX_PIN_NUM 8 tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL); if (tstamp->pps_pin_caps) { - if (MLX5_CAP_GEN(priv->mdev, pps)) + if (MLX5_PPS_CAP(priv->mdev)) mlx5e_get_pps_caps(priv, tstamp); if (tstamp->ptp_info.n_pins) mlx5e_init_pin_config(tstamp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index af51a5d2b912..52b9a64cd3a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -698,7 +698,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) else mlx5_core_dbg(dev, "port_module_event is not set\n"); - if (MLX5_CAP_GEN(dev, pps)) + if (MLX5_PPS_CAP(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); if (MLX5_CAP_GEN(dev, fpga)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6a3d6bef7dd4..6a263e8d883a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -154,6 +154,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); +#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ + MLX5_CAP_GEN((mdev), pps_modify) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) + int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); void mlx5e_init(void); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fd98aef4545c..3030121b4746 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7749,8 +7749,10 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x7f]; + u8 reserved_at_0[0x7d]; + u8 mtpps_enh_out_per_adj[0x1]; + u8 mtpps_fs[0x1]; u8 pcie_performance_group[0x1]; }; @@ -8159,7 +8161,8 @@ struct mlx5_ifc_mtpps_reg_bits { u8 reserved_at_78[0x4]; u8 cap_pin_4_mode[0x4]; - u8 reserved_at_80[0x80]; + u8 field_select[0x20]; + u8 reserved_at_a0[0x60]; u8 enable[0x1]; u8 reserved_at_101[0xb]; @@ -8174,8 +8177,9 @@ struct mlx5_ifc_mtpps_reg_bits { u8 out_pulse_duration[0x10]; u8 out_periodic_adjustment[0x10]; + u8 enhanced_out_periodic_adjustment[0x20]; - u8 reserved_at_1a0[0x40]; + u8 reserved_at_1c0[0x20]; }; struct mlx5_ifc_mtppse_reg_bits { -- cgit v1.2.3 From c9f2c1ae123a751d4e4f949144500219354d5ee1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 Jul 2017 14:45:09 +0200 Subject: udp6: fix socket leak on early demux When an early demuxed packet reaches __udp6_lib_lookup_skb(), the sk reference is retrieved and used, but the relevant reference count is leaked and the socket destructor is never called. Beyond leaking the sk memory, if there are pending UDP packets in the receive queue, even the related accounted memory is leaked. In the long run, this will cause persistent forward allocation errors and no UDP skbs (both ipv4 and ipv6) will be able to reach the user-space. Fix this by explicitly accessing the early demux reference before the lookup, and properly decreasing the socket reference count after usage. Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and the now obsoleted comment about "socket cache". The newly added code is derived from the current ipv4 code for the similar path. v1 -> v2: fixed the __udp6_lib_rcv() return code for resubmission, as suggested by Eric Reported-by: Sam Edwards Reported-by: Marc Haber Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 27 ++++++++++++++++++--------- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 56ce2d2a612d..cc8036987dcb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -260,6 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, } void udp_v4_early_demux(struct sk_buff *skb); +void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fac7cb9e3b0f..e6276fa3750b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1928,7 +1928,7 @@ drop: /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ -static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; @@ -1937,6 +1937,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) dst_release(old); } } +EXPORT_SYMBOL(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4a3e65626e8b..98fe4560e24c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct udp_table *udptable) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk; - sk = skb_steal_sock(skb); - if (unlikely(sk)) - return sk; return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), udptable, skb); @@ -804,6 +800,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp6_csum_init(skb, uh, proto)) goto csum_error; + /* Check if the socket is already available, e.g. due to early demux */ + sk = skb_steal_sock(skb); + if (sk) { + struct dst_entry *dst = skb_dst(skb); + int ret; + + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); + + ret = udpv6_queue_rcv_skb(sk, skb); + sock_put(sk); + + /* a return value > 0 means to resubmit the input */ + if (ret > 0) + return ret; + return 0; + } + /* * Multicast receive code */ @@ -812,11 +826,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); /* Unicast */ - - /* - * check socket cache ... must talk to Alan about his plans - * for sock caches... i'll skip this for now. - */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { int ret; -- cgit v1.2.3 From 37ef38f3f83891a2f413fb872bae7d0f9bb95b27 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 27 Jul 2017 16:15:52 -0500 Subject: tty: pl011: fix initialization order of QDF2400 E44 The work-around for Qualcomm Technologies QDF2400 Erratum 44 hinges on a global variable defined in the pl011 driver. The ACPI SPCR parsing code determines whether the work-around is needed, and if so, it changes the console name from "pl011" to "qdf2400_e44". The expectation is that the pl011 driver will implement the work-around when it sees the console name. The global variable qdf2400_e44_present is set when that happens. The problem is that work-around needs to be enabled when the pl011 driver probes, not when the console name is queried. However, sbsa_probe() is called before pl011_console_match(). The work-around appeared to work previously because the default console on QDF2400 platforms was always ttyAMA1. The first time sbsa_probe() is called (for ttyAMA0), qdf2400_e44_present is still false. Then pl011_console_match() is called, and it sets qdf2400_e44_present to true. All subsequent calls to sbsa_probe() enable the work-around. The solution is to move the global variable into spcr.c and let the pl011 driver query it during probe time. This works because all QDF2400 platforms require SPCR, so parse_spcr() will always be called. pl011_console_match still checks for the "qdf2400_e44" console name, but it doesn't do anything else special. Fixes: 5a0722b898f8 ("tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44") Tested-by: Jeffrey Hugo Signed-off-by: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/spcr.c | 36 ++++++++++++++++++++++++++++++++++-- drivers/tty/serial/amba-pl011.c | 37 +++++++++++++++++++------------------ include/linux/acpi.h | 1 + 3 files changed, 54 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index 4ac3e06b41d8..98aa8c808a33 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -16,6 +16,16 @@ #include #include +/* + * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as + * occasionally getting stuck as 1. To avoid the potential for a hang, check + * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART + * implementations, so only do so if an affected platform is detected in + * parse_spcr(). + */ +bool qdf2400_e44_present; +EXPORT_SYMBOL(qdf2400_e44_present); + /* * Some Qualcomm Datacenter Technologies SoCs have a defective UART BUSY bit. * Detect them by examining the OEM fields in the SPCR header, similiar to PCI @@ -147,8 +157,30 @@ int __init parse_spcr(bool earlycon) goto done; } - if (qdf2400_erratum_44_present(&table->header)) - uart = "qdf2400_e44"; + /* + * If the E44 erratum is required, then we need to tell the pl011 + * driver to implement the work-around. + * + * The global variable is used by the probe function when it + * creates the UARTs, whether or not they're used as a console. + * + * If the user specifies "traditional" earlycon, the qdf2400_e44 + * console name matches the EARLYCON_DECLARE() statement, and + * SPCR is not used. Parameter "earlycon" is false. + * + * If the user specifies "SPCR" earlycon, then we need to update + * the console name so that it also says "qdf2400_e44". Parameter + * "earlycon" is true. + * + * For consistency, if we change the console name, then we do it + * for everyone, not just earlycon. + */ + if (qdf2400_erratum_44_present(&table->header)) { + qdf2400_e44_present = true; + if (earlycon) + uart = "qdf2400_e44"; + } + if (xgene_8250_erratum_present(table)) iotype = "mmio32"; diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 8a857bb34fbb..1888d168a41c 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -142,15 +142,7 @@ static struct vendor_data vendor_sbsa = { .fixed_options = true, }; -/* - * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as - * occasionally getting stuck as 1. To avoid the potential for a hang, check - * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART - * implementations, so only do so if an affected platform is detected in - * parse_spcr(). - */ -static bool qdf2400_e44_present = false; - +#ifdef CONFIG_ACPI_SPCR_TABLE static struct vendor_data vendor_qdt_qdf2400_e44 = { .reg_offset = pl011_std_offsets, .fr_busy = UART011_FR_TXFE, @@ -165,6 +157,7 @@ static struct vendor_data vendor_qdt_qdf2400_e44 = { .always_enabled = true, .fixed_options = true, }; +#endif static u16 pl011_st_offsets[REG_ARRAY_SIZE] = { [REG_DR] = UART01x_DR, @@ -2375,12 +2368,14 @@ static int __init pl011_console_match(struct console *co, char *name, int idx, resource_size_t addr; int i; - if (strcmp(name, "qdf2400_e44") == 0) { - pr_info_once("UART: Working around QDF2400 SoC erratum 44"); - qdf2400_e44_present = true; - } else if (strcmp(name, "pl011") != 0) { + /* + * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum + * have a distinct console name, so make sure we check for that. + * The actual implementation of the erratum occurs in the probe + * function. + */ + if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0)) return -ENODEV; - } if (uart_parse_earlycon(options, &iotype, &addr, &options)) return -ENODEV; @@ -2734,11 +2729,17 @@ static int sbsa_uart_probe(struct platform_device *pdev) } uap->port.irq = ret; - uap->reg_offset = vendor_sbsa.reg_offset; - uap->vendor = qdf2400_e44_present ? - &vendor_qdt_qdf2400_e44 : &vendor_sbsa; +#ifdef CONFIG_ACPI_SPCR_TABLE + if (qdf2400_e44_present) { + dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n"); + uap->vendor = &vendor_qdt_qdf2400_e44; + } else +#endif + uap->vendor = &vendor_sbsa; + + uap->reg_offset = uap->vendor->reg_offset; uap->fifosize = 32; - uap->port.iotype = vendor_sbsa.access_32b ? UPIO_MEM32 : UPIO_MEM; + uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM; uap->port.ops = &sbsa_uart_pops; uap->fixed_baud = baudrate; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c749eef1daa1..27b4b6615263 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1209,6 +1209,7 @@ static inline bool acpi_has_watchdog(void) { return false; } #endif #ifdef CONFIG_ACPI_SPCR_TABLE +extern bool qdf2400_e44_present; int parse_spcr(bool earlycon); #else static inline int parse_spcr(bool earlycon) { return 0; } -- cgit v1.2.3 From a627b0a7c15ee4d2c87a86d5be5c8167382e8d0d Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Sun, 30 Jul 2017 22:30:11 -0400 Subject: ext4: remove unused metadata accounting variables Two variables in ext4_inode_info, i_reserved_meta_blocks and i_allocated_meta_blocks, are unused. Removing them saves a little memory per in-memory inode and cleans up clutter in several tracepoints. Adjust tracepoint output from ext4_alloc_da_blocks() for consistency and fix a typo and whitespace near these changes. Signed-off-by: Eric Whitney Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/ext4.h | 6 ++---- fs/ext4/super.c | 2 -- include/trace/events/ext4.h | 35 ++++++++--------------------------- 3 files changed, 10 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9ebde0cd632e..dcbcd9d444d1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -961,7 +961,7 @@ struct ext4_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ @@ -1049,10 +1049,8 @@ struct ext4_inode_info { ext4_group_t i_last_alloc_group; /* allocation reservation info for delalloc */ - /* In case of bigalloc, these refer to clusters rather than blocks */ + /* In case of bigalloc, this refer to clusters rather than blocks */ unsigned int i_reserved_data_blocks; - unsigned int i_reserved_meta_blocks; - unsigned int i_allocated_meta_blocks; ext4_lblk_t i_da_metadata_calc_last_lblock; int i_da_metadata_calc_len; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0886fe82e9c4..d61a70e2193a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_es_shk_nr = 0; ei->i_es_shrink_lblk = 0; ei->i_reserved_data_blocks = 0; - ei->i_reserved_meta_blocks = 0; - ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index dfae175ddebc..9c3bc3883d2f 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -937,21 +937,19 @@ TRACE_EVENT(ext4_alloc_da_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( unsigned int, data_blocks ) - __field( unsigned int, meta_blocks ) + __field( unsigned int, data_blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; ), - TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u", + TP_printk("dev %d,%d ino %lu reserved_data_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->data_blocks, __entry->meta_blocks) + __entry->data_blocks) ); TRACE_EVENT(ext4_mballoc_alloc, @@ -1153,8 +1151,6 @@ TRACE_EVENT(ext4_da_update_reserve_space, __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) - __field( int, reserved_meta_blocks ) - __field( int, allocated_meta_blocks ) __field( int, quota_claim ) __field( __u16, mode ) ), @@ -1166,22 +1162,16 @@ TRACE_EVENT(ext4_da_update_reserve_space, __entry->used_blocks = used_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = - EXT4_I(inode)->i_reserved_meta_blocks; - __entry->allocated_meta_blocks = - EXT4_I(inode)->i_allocated_meta_blocks; __entry->quota_claim = quota_claim; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " - "reserved_data_blocks %d reserved_meta_blocks %d " - "allocated_meta_blocks %d quota_claim %d", + "reserved_data_blocks %d quota_claim %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, - __entry->reserved_meta_blocks, __entry->allocated_meta_blocks, __entry->quota_claim) ); @@ -1195,7 +1185,6 @@ TRACE_EVENT(ext4_da_reserve_space, __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, reserved_data_blocks ) - __field( int, reserved_meta_blocks ) __field( __u16, mode ) ), @@ -1204,17 +1193,15 @@ TRACE_EVENT(ext4_da_reserve_space, __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu " - "reserved_data_blocks %d reserved_meta_blocks %d", + "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, - __entry->reserved_data_blocks, - __entry->reserved_meta_blocks) + __entry->reserved_data_blocks) ); TRACE_EVENT(ext4_da_release_space, @@ -1228,8 +1215,6 @@ TRACE_EVENT(ext4_da_release_space, __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) - __field( int, reserved_meta_blocks ) - __field( int, allocated_meta_blocks ) __field( __u16, mode ) ), @@ -1239,19 +1224,15 @@ TRACE_EVENT(ext4_da_release_space, __entry->i_blocks = inode->i_blocks; __entry->freed_blocks = freed_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; - __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d " - "reserved_data_blocks %d reserved_meta_blocks %d " - "allocated_meta_blocks %d", + "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, - __entry->freed_blocks, __entry->reserved_data_blocks, - __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) + __entry->freed_blocks, __entry->reserved_data_blocks) ); DECLARE_EVENT_CLASS(ext4__bitmap_load, -- cgit v1.2.3 From 99f828436788f0155798145853607ca8f0e6de93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jul 2017 22:29:51 +0100 Subject: dma-buf/sync_file: Allow multiple sync_files to wrap a single dma-fence Up until recently sync_file were create to export a single dma-fence to userspace, and so we could canabalise a bit insie dma-fence to mark whether or not we had enable polling for the sync_file itself. However, with the advent of syncobj, we do allow userspace to create multiple sync_files for a single dma-fence. (Similarly, that the sw-sync validation framework also started returning multiple sync-files wrapping a single dma-fence for a syncpt also triggering the problem.) This patch reverts my suggestion in commit e24165537312 ("dma-buf/sync_file: only enable fence signalling on poll()") to use a single bit in the shared dma-fence and restores the sync_file->flags for tracking the bits individually. Reported-by: Gustavo Padovan Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline") Fixes: e9083420bbac ("drm: introduce sync objects (v4)") Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Cc: dri-devel@lists.freedesktop.org Cc: # v4.13-rc1+ Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk (cherry picked from commit db1fc97ca0c0d3fdeeadf314d99a26188438940a) --- drivers/dma-buf/sync_file.c | 5 +++-- include/linux/sync_file.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index d7e219d2669d..66fb40d0ebdb 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file) { struct sync_file *sync_file = file->private_data; - if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) + if (test_bit(POLL_ENABLED, &sync_file->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); @@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait) poll_wait(file, &sync_file->wq, wait); - if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) { + if (list_empty(&sync_file->cb.node) && + !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) { if (dma_fence_add_callback(sync_file->fence, &sync_file->cb, fence_check_cb_func) < 0) wake_up_all(&sync_file->wq); diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index 5726107963b2..0ad87c434ae6 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -43,12 +43,13 @@ struct sync_file { #endif wait_queue_head_t wq; + unsigned long flags; struct dma_fence *fence; struct dma_fence_cb cb; }; -#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS +#define POLL_ENABLED 0 struct sync_file *sync_file_create(struct dma_fence *fence); struct dma_fence *sync_file_get_fence(int fd); -- cgit v1.2.3 From 9c80034921d1ece5c0e3241ba1645bce32387684 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 29 Jul 2017 14:11:43 +0200 Subject: i2c: rephrase explanation of I2C_CLASS_DEPRECATED Hopefully making clear that it is not needed for new drivers. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 00ca5b86a753..d501d3956f13 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -689,7 +689,8 @@ i2c_unlock_adapter(struct i2c_adapter *adapter) #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ -#define I2C_CLASS_DEPRECATED (1<<8) /* Warn users that adapter will stop using classes */ +/* Warn users that the adapter doesn't support classes anymore */ +#define I2C_CLASS_DEPRECATED (1<<8) /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -- cgit v1.2.3 From cb891fa6a1d5f52c5f5c07b6f7f4c6d65ea55fc0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 31 Jul 2017 16:52:36 +0200 Subject: udp6: fix jumbogram reception Since commit 67a51780aebb ("ipv6: udp: leverage scratch area helpers") udp6_recvmsg() read the skb len from the scratch area, to avoid a cache miss. But the UDP6 rx path support RFC 2675 UDPv6 jumbograms, and their length exceeds the 16 bits available in the scratch area. As a side effect the length returned by recvmsg() is: % (1<<16) This commit addresses the issue allocating one more bit in the IP6CB flags field and setting it for incoming jumbograms. Such field is still in the first cacheline, so at recvmsg() time we can check it and fallback to access skb->len if required, without a measurable overhead. Fixes: 67a51780aebb ("ipv6: udp: leverage scratch area helpers") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/ipv6.h | 6 ++++++ net/ipv6/exthdrs.c | 1 + net/ipv6/udp.c | 11 ++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e1b442996f81..474d6bbc158c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -128,6 +128,7 @@ struct inet6_skb_parm { #define IP6SKB_FRAGMENTED 16 #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 +#define IP6SKB_JUMBOGRAM 128 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -152,6 +153,11 @@ static inline int inet6_iif(const struct sk_buff *skb) return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } +static inline bool inet6_is_jumbogram(const struct sk_buff *skb) +{ + return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); +} + /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4996d734f1d2..3cec529c6113 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; + IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM; return true; drop: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 98fe4560e24c..578142b7ca3e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -328,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be EXPORT_SYMBOL_GPL(udp6_lib_lookup); #endif +/* do not use the scratch area len for jumbogram: their length execeeds the + * scratch area space; note that the IP6CB flags is still in the first + * cacheline, so checking for jumbograms is cheap + */ +static int udp6_skb_len(struct sk_buff *skb) +{ + return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb); +} + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -358,7 +367,7 @@ try_again: if (!skb) return err; - ulen = udp_skb_len(skb); + ulen = udp6_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; -- cgit v1.2.3 From e17e8969f5c59a10083af5e260bdad6026872203 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Jul 2017 16:43:49 +0200 Subject: libceph: fallback for when there isn't a pool-specific choose_arg There is now a fallback to a choose_arg index of -1 if there isn't a pool-specific choose_arg set. If you create a per-pool weight-set, that works for that pool. Otherwise we try the compat/default one. If that doesn't exist either, then we use the normal CRUSH weights. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 2 +- net/ceph/osdmap.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 92e165d417a6..07eed95e10c7 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -193,7 +193,7 @@ struct crush_choose_arg { struct crush_choose_arg_map { #ifdef __KERNEL__ struct rb_node node; - u64 choose_args_index; + s64 choose_args_index; #endif struct crush_choose_arg *args; /*!< replacement for each bucket in the crushmap */ diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 64ae9f89773a..eb57a06373ca 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -2301,10 +2301,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi, } } +/* + * Magic value used for a "default" fallback choose_args, used if the + * crush_choose_arg_map passed to do_crush() does not exist. If this + * also doesn't exist, fall back to canonical weights. + */ +#define CEPH_DEFAULT_CHOOSE_ARGS -1 + static int do_crush(struct ceph_osdmap *map, int ruleno, int x, int *result, int result_max, const __u32 *weight, int weight_max, - u64 choose_args_index) + s64 choose_args_index) { struct crush_choose_arg_map *arg_map; int r; @@ -2313,6 +2320,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, arg_map = lookup_choose_arg_map(&map->crush->choose_args, choose_args_index); + if (!arg_map) + arg_map = lookup_choose_arg_map(&map->crush->choose_args, + CEPH_DEFAULT_CHOOSE_ARGS); mutex_lock(&map->crush_workspace_mutex); r = crush_do_rule(map->crush, ruleno, x, result, result_max, -- cgit v1.2.3 From ae78dd8139ce93a528beb7f3914531b7a7be9e30 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 27 Jul 2017 17:59:14 +0200 Subject: libceph: make RECOVERY_DELETES feature create a new interval This is needed so that the OSDs can regenerate the missing set at the start of a new interval where support for recovery deletes changed. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/osd_client.h | 1 + include/linux/ceph/osdmap.h | 2 ++ include/linux/ceph/rados.h | 4 ++++ net/ceph/osd_client.c | 5 +++++ net/ceph/osdmap.c | 5 ++++- 5 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c6d96a5f46fd..adf670ecaf94 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -148,6 +148,7 @@ struct ceph_osd_request_target { int size; int min_size; bool sort_bitwise; + bool recovery_deletes; unsigned int flags; /* CEPH_OSD_FLAG_* */ bool paused; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index a0996cb9faed..af3444a5bfdd 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, u32 new_pg_num, bool old_sort_bitwise, bool new_sort_bitwise, + bool old_recovery_deletes, + bool new_recovery_deletes, const struct ceph_pg *pgid); bool ceph_osds_changed(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 385db08bb8b2..b8281feda9c7 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ #define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ #define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ +#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */ +#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */ +#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */ +#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */ /* * The error code to return when an OSD can't handle a write diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b5f016cb9569..dcfbdd74dfd1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, bool legacy_change; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); + bool recovery_deletes = ceph_osdmap_flag(osdc, + CEPH_OSDMAP_RECOVERY_DELETES); enum calc_target_result ct_res; int ret; @@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, pi->pg_num, t->sort_bitwise, sort_bitwise, + t->recovery_deletes, + recovery_deletes, &last_pgid)) force_resend = true; @@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, t->pg_num = pi->pg_num; t->pg_num_mask = pi->pg_num_mask; t->sort_bitwise = sort_bitwise; + t->recovery_deletes = recovery_deletes; t->osd = acting.primary; } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 0bec71fa712e..f358d0bfa76b 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -2082,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, u32 new_pg_num, bool old_sort_bitwise, bool new_sort_bitwise, + bool old_recovery_deletes, + bool new_recovery_deletes, const struct ceph_pg *pgid) { return !osds_equal(old_acting, new_acting) || @@ -2089,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, old_size != new_size || old_min_size != new_min_size || ceph_pg_is_split(pgid, old_pg_num, new_pg_num) || - old_sort_bitwise != new_sort_bitwise; + old_sort_bitwise != new_sort_bitwise || + old_recovery_deletes != new_recovery_deletes; } static int calc_pg_rank(int osd, const struct ceph_osds *acting) -- cgit v1.2.3 From fd40559c8657418385e42f797e0b04bfc0add748 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 16:02:47 -0400 Subject: NFSv4: Fix EXCHANGE_ID corrupt verifier issue The verifier is allocated on the stack, but the EXCHANGE_ID RPC call was changed to be asynchronous by commit 8d89bd70bc939. If we interrrupt the call to rpc_wait_for_completion_task(), we can therefore end up transmitting random stack contents in lieu of the verifier. Fixes: 8d89bd70bc939 ("NFS setup async exchange_id") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 11 ++++------- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 93c1d7352238..c458a1e28b91 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7460,7 +7460,7 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data) cdata->res.server_scope = NULL; } /* Save the EXCHANGE_ID verifier session trunk tests */ - memcpy(clp->cl_confirm.data, cdata->args.verifier->data, + memcpy(clp->cl_confirm.data, cdata->args.verifier.data, sizeof(clp->cl_confirm.data)); } out: @@ -7497,7 +7497,6 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, u32 sp4_how, struct rpc_xprt *xprt) { - nfs4_verifier verifier; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], .rpc_cred = cred, @@ -7521,8 +7520,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, return -ENOMEM; } - if (!xprt) - nfs4_init_boot_verifier(clp, &verifier); + nfs4_init_boot_verifier(clp, &calldata->args.verifier); status = nfs4_init_uniform_client_string(clp); if (status) @@ -7563,9 +7561,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, task_setup_data.rpc_xprt = xprt; task_setup_data.flags = RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC; - calldata->args.verifier = &clp->cl_confirm; - } else { - calldata->args.verifier = &verifier; + memcpy(calldata->args.verifier.data, clp->cl_confirm.data, + sizeof(calldata->args.verifier.data)); } calldata->args.client = clp; #ifdef CONFIG_NFS_V4_1_MIGRATION diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fa3eb361d4f8..37c8af003275 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1785,7 +1785,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, int len = 0; encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); - encode_nfs4_verifier(xdr, args->verifier); + encode_nfs4_verifier(xdr, &args->verifier); encode_string(xdr, strlen(args->client->cl_owner_id), args->client->cl_owner_id); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ca3bcc4ed4e5..62cbcb842f99 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1235,7 +1235,7 @@ struct nfs41_state_protection { struct nfs41_exchange_id_args { struct nfs_client *client; - nfs4_verifier *verifier; + nfs4_verifier verifier; u32 flags; struct nfs41_state_protection state_protect; }; -- cgit v1.2.3 From d9535cb7b7603aeb549c697ecdf92024e4d0a650 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 28 Jul 2017 17:30:02 -0500 Subject: ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 42 ++++++++++++++++++++++++++++++++++++++++ drivers/ptp/ptp_private.h | 3 +++ include/linux/ptp_clock_kernel.h | 20 +++++++++++++++++++ 3 files changed, 65 insertions(+) (limited to 'include') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index b77435783ef3..7eacc1c4b3b1 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "ptp_private.h" @@ -184,6 +185,19 @@ static void delete_ptp_clock(struct posix_clock *pc) kfree(ptp); } +static void ptp_aux_kworker(struct kthread_work *work) +{ + struct ptp_clock *ptp = container_of(work, struct ptp_clock, + aux_work.work); + struct ptp_clock_info *info = ptp->info; + long delay; + + delay = info->do_aux_work(info); + + if (delay >= 0) + kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay); +} + /* public interface */ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, @@ -217,6 +231,20 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, mutex_init(&ptp->pincfg_mux); init_waitqueue_head(&ptp->tsev_wq); + if (ptp->info->do_aux_work) { + char *worker_name = kasprintf(GFP_KERNEL, "ptp%d", ptp->index); + + kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); + ptp->kworker = kthread_create_worker(0, worker_name ? + worker_name : info->name); + kfree(worker_name); + if (IS_ERR(ptp->kworker)) { + err = PTR_ERR(ptp->kworker); + pr_err("failed to create ptp aux_worker %d\n", err); + goto kworker_err; + } + } + err = ptp_populate_pin_groups(ptp); if (err) goto no_pin_groups; @@ -259,6 +287,9 @@ no_pps: no_device: ptp_cleanup_pin_groups(ptp); no_pin_groups: + if (ptp->kworker) + kthread_destroy_worker(ptp->kworker); +kworker_err: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); ida_simple_remove(&ptp_clocks_map, index); @@ -274,6 +305,11 @@ int ptp_clock_unregister(struct ptp_clock *ptp) ptp->defunct = 1; wake_up_interruptible(&ptp->tsev_wq); + if (ptp->kworker) { + kthread_cancel_delayed_work_sync(&ptp->aux_work); + kthread_destroy_worker(ptp->kworker); + } + /* Release the clock's resources. */ if (ptp->pps_source) pps_unregister_source(ptp->pps_source); @@ -339,6 +375,12 @@ int ptp_find_pin(struct ptp_clock *ptp, } EXPORT_SYMBOL(ptp_find_pin); +int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) +{ + return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay); +} +EXPORT_SYMBOL(ptp_schedule_worker); + /* module operations */ static void __exit ptp_exit(void) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index d95888974d0c..b86f1bfecd6f 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -56,6 +57,8 @@ struct ptp_clock { struct attribute_group pin_attr_group; /* 1st entry is a pointer to the real group, 2nd is NULL terminator */ const struct attribute_group *pin_attr_groups[2]; + struct kthread_worker *kworker; + struct kthread_delayed_work aux_work; }; /* diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index a026bfd089db..51349d124ee5 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -99,6 +99,11 @@ struct system_device_crosststamp; * parameter func: the desired function to use. * parameter chan: the function channel index to use. * + * @do_work: Request driver to perform auxiliary (periodic) operations + * Driver should return delay of the next auxiliary work scheduling + * time (>=0) or negative value in case further scheduling + * is not required. + * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). * @@ -126,6 +131,7 @@ struct ptp_clock_info { struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan); + long (*do_aux_work)(struct ptp_clock_info *ptp); }; struct ptp_clock; @@ -211,6 +217,16 @@ extern int ptp_clock_index(struct ptp_clock *ptp); int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan); +/** + * ptp_schedule_worker() - schedule ptp auxiliary work + * + * @ptp: The clock obtained from ptp_clock_register(). + * @delay: number of jiffies to wait before queuing + * See kthread_queue_delayed_work() for more info. + */ + +int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -225,6 +241,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp) static inline int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { return -1; } +static inline int ptp_schedule_worker(struct ptp_clock *ptp, + unsigned long delay) +{ return -EOPNOTSUPP; } + #endif #endif -- cgit v1.2.3 From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 27 Jul 2017 10:42:35 -0600 Subject: drm/msm: Remove __user from __u64 data types __user should be used to identify user pointers and not __u64 variables containing pointers. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 26c54f6d595d..ad4eb2863e70 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd { __u32 size; /* in, cmdstream size */ __u32 pad; __u32 nr_relocs; /* in, number of submit_reloc's */ - __u64 __user relocs; /* in, ptr to array of submit_reloc's */ + __u64 relocs; /* in, ptr to array of submit_reloc's */ }; /* Each buffer referenced elsewhere in the cmdstream submit (ie. the @@ -215,8 +215,8 @@ struct drm_msm_gem_submit { __u32 fence; /* out */ __u32 nr_bos; /* in, number of submit_bo's */ __u32 nr_cmds; /* in, number of submit_cmd's */ - __u64 __user bos; /* in, ptr to array of submit_bo's */ - __u64 __user cmds; /* in, ptr to array of submit_cmd's */ + __u64 bos; /* in, ptr to array of submit_bo's */ + __u64 cmds; /* in, ptr to array of submit_cmd's */ __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ }; -- cgit v1.2.3 From a477b9cd37aa81a490dfa3265b7ff4f2c5a92463 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Aug 2017 20:11:02 -0500 Subject: PCI: Add pci_reset_function_locked() The implementation of PCI workarounds may require that the device is reset from its probe function. This implies that the PCI device lock is already held, and makes calling pci_reset_function() impossible (since it will itself try to take that lock). Add pci_reset_function_locked(), which is the equivalent of pci_reset_function(), except that it requires the PCI device lock to be already held by the caller. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier [bhelgaas: folded in fix for conflict with 52354b9d1f46 ("PCI: Remove __pci_dev_reset() and pci_dev_reset()")] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # 4.11: 52354b9d1f46: PCI: Remove __pci_dev_reset() and pci_dev_reset() Cc: stable@vger.kernel.org # 4.11 --- drivers/pci/pci.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af0cc3456dc1..b4b7eab29400 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4259,6 +4259,41 @@ int pci_reset_function(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_reset_function); +/** + * pci_reset_function_locked - quiesce and reset a PCI device function + * @dev: PCI device to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * This function does not just reset the PCI portion of a device, but + * clears all the state associated with the device. This function differs + * from __pci_reset_function() in that it saves and restores device state + * over the reset. It also differs from pci_reset_function() in that it + * requires the PCI device lock to be held. + * + * Returns 0 if the device function was successfully reset or negative if the + * device doesn't support resetting a single function. + */ +int pci_reset_function_locked(struct pci_dev *dev) +{ + int rc; + + rc = pci_probe_reset_function(dev); + if (rc) + return rc; + + pci_dev_save_and_disable(dev); + + rc = __pci_reset_function_locked(dev); + + pci_dev_restore(dev); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_reset_function_locked); + /** * pci_try_reset_function - quiesce and reset a PCI device function * @dev: PCI device to reset diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..a75c13673852 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1067,6 +1067,7 @@ void pcie_flr(struct pci_dev *dev); int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); +int pci_reset_function_locked(struct pci_dev *dev); int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); int pci_reset_slot(struct pci_slot *slot); -- cgit v1.2.3 From 6d29231000bbe0fb9e4893a9c68151ffdd3b5469 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 31 Jul 2017 10:31:27 +0200 Subject: mtd: nand: Declare tBERS, tR and tPROG as u64 to avoid integer overflow All timings in nand_sdr_timings are expressed in picoseconds but some of them may not fit in an u32. Signed-off-by: Boris Brezillon Fixes: 204e7ecd47e2 ("mtd: nand: Add a few more timings to nand_sdr_timings") Reported-by: Alexander Dahl Cc: Reviewed-by: Alexander Dahl Tested-by: Alexander Dahl Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_timings.c | 6 +++--- include/linux/mtd/nand.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c index f06312df3669..7e36d7d13c26 100644 --- a/drivers/mtd/nand/nand_timings.c +++ b/drivers/mtd/nand/nand_timings.c @@ -311,9 +311,9 @@ int onfi_init_data_interface(struct nand_chip *chip, struct nand_sdr_timings *timings = &iface->timings.sdr; /* microseconds -> picoseconds */ - timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog); - timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers); - timings->tR_max = 1000000UL * le16_to_cpu(params->t_r); + timings->tPROG_max = 1000000ULL * le16_to_cpu(params->t_prog); + timings->tBERS_max = 1000000ULL * le16_to_cpu(params->t_bers); + timings->tR_max = 1000000ULL * le16_to_cpu(params->t_r); /* nanoseconds -> picoseconds */ timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 892148c448cc..5216d2eb2289 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -681,10 +681,10 @@ struct nand_buffers { * @tWW_min: WP# transition to WE# low */ struct nand_sdr_timings { - u32 tBERS_max; + u64 tBERS_max; u32 tCCS_min; - u32 tPROG_max; - u32 tR_max; + u64 tPROG_max; + u64 tR_max; u32 tALH_min; u32 tADL_min; u32 tALS_min; -- cgit v1.2.3 From c994f778bb1cca8ebe7a4e528cefec233e93b5cc Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Tue, 1 Aug 2017 16:43:43 +0300 Subject: net/mlx4_en: Fix wrong indication of Wake-on-LAN (WoL) support Currently when WoL is supported but disabled, ethtool reports: "Supports Wake-on: d". Fix the indication of Wol support, so that the indication remains "g" all the time if the NIC supports WoL. Tested: As accepted, when NIC supports WoL- ethtool reports: Supports Wake-on: g Wake-on: d when NIC doesn't support WoL- ethtool reports: Supports Wake-on: d Wake-on: d Fixes: 14c07b1358ed ("mlx4: Wake on LAN support") Signed-off-by: Inbar Karmy Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 15 ++++++++------- drivers/net/ethernet/mellanox/mlx4/fw.c | 4 ++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ include/linux/mlx4/device.h | 1 + 5 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c751a1d434ad..3d4e4a5d00d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -223,6 +223,7 @@ static void mlx4_en_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_caps *caps = &priv->mdev->dev->caps; int err = 0; u64 config = 0; u64 mask; @@ -235,24 +236,24 @@ static void mlx4_en_get_wol(struct net_device *netdev, mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : MLX4_DEV_CAP_FLAG_WOL_PORT2; - if (!(priv->mdev->dev->caps.flags & mask)) { + if (!(caps->flags & mask)) { wol->supported = 0; wol->wolopts = 0; return; } + if (caps->wol_port[priv->port]) + wol->supported = WAKE_MAGIC; + else + wol->supported = 0; + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL information\n"); return; } - if (config & MLX4_EN_WOL_MAGIC) - wol->supported = WAKE_MAGIC; - else - wol->supported = 0; - - if (config & MLX4_EN_WOL_ENABLED) + if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC)) wol->wolopts = WAKE_MAGIC; else wol->wolopts = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 37e84a59e751..c165f16623a9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -764,6 +764,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 +#define QUERY_DEV_CAP_WOL_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 @@ -920,6 +921,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; + MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET); + dev_cap->wol_port[1] = !!(field & 0x20); + dev_cap->wol_port[2] = !!(field & 0x40); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 5343a0599253..b52ba01aa486 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -129,6 +129,7 @@ struct mlx4_dev_cap { u32 dmfs_high_rate_qpn_range; struct mlx4_rate_limit_caps rl_caps; struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; + bool wol_port[MLX4_MAX_PORTS + 1]; }; struct mlx4_func_cap { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a27c9c13a36e..09b9bc17bce9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -424,6 +424,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + dev->caps.wol_port[1] = dev_cap->wol_port[1]; + dev->caps.wol_port[2] = dev_cap->wol_port[2]; /* Save uar page shift */ if (!mlx4_is_slave(dev)) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index aad5d81dfb44..b54517c05e9a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -620,6 +620,7 @@ struct mlx4_caps { u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; u32 vf_caps; + bool wol_port[MLX4_MAX_PORTS + 1]; struct mlx4_rate_limit_caps rl_caps; }; -- cgit v1.2.3 From 3898da947bbaf9e7fd5816e825978d360028bba2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 2 Aug 2017 17:55:54 +0200 Subject: KVM: avoid using rcu_dereference_protected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During teardown, accesses to memslots and buses are using rcu_dereference_protected with an always-true condition because these accesses are done outside the usual mutexes. This is because the last reference is gone and there cannot be any concurrent modifications, but rcu_dereference_protected is ugly and unobvious. Instead, check the refcount in kvm_get_bus and __kvm_memslots. Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- include/linux/kvm_host.h | 6 ++++-- virt/kvm/kvm_main.c | 11 ++++------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 890b706d1943..21a6fd6c44af 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -477,7 +477,8 @@ struct kvm { static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, - lockdep_is_held(&kvm->slots_lock)); + lockdep_is_held(&kvm->slots_lock) || + !refcount_read(&kvm->users_count)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) @@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, - lockdep_is_held(&kvm->slots_lock)); + lockdep_is_held(&kvm->slots_lock) || + !refcount_read(&kvm->users_count)); } static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f3f74271f1a9..15252d723b54 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -717,10 +717,9 @@ out_err_no_srcu: hardware_disable_all(); out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) - kfree(rcu_access_pointer(kvm->buses[i])); + kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, - rcu_dereference_protected(kvm->memslots[i], 1)); + kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); @@ -754,9 +753,8 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - struct kvm_io_bus *bus; + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); - bus = rcu_dereference_protected(kvm->buses[i], 1); if (bus) kvm_io_bus_destroy(bus); kvm->buses[i] = NULL; @@ -770,8 +768,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, - rcu_dereference_protected(kvm->memslots[i], 1)); + kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); -- cgit v1.2.3 From 3ea277194daaeaa84ce75180ec7c7a2075027a68 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 2 Aug 2017 13:31:52 -0700 Subject: mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries Nadav Amit identified a theoritical race between page reclaim and mprotect due to TLB flushes being batched outside of the PTL being held. He described the race as follows: CPU0 CPU1 ---- ---- user accesses memory using RW PTE [PTE now cached in TLB] try_to_unmap_one() ==> ptep_get_and_clear() ==> set_tlb_ubc_flush_pending() mprotect(addr, PROT_READ) ==> change_pte_range() ==> [ PTE non-present - no flush ] user writes using cached RW PTE ... try_to_unmap_flush() The same type of race exists for reads when protecting for PROT_NONE and also exists for operations that can leave an old TLB entry behind such as munmap, mremap and madvise. For some operations like mprotect, it's not necessarily a data integrity issue but it is a correctness issue as there is a window where an mprotect that limits access still allows access. For munmap, it's potentially a data integrity issue although the race is massive as an munmap, mmap and return to userspace must all complete between the window when reclaim drops the PTL and flushes the TLB. However, it's theoritically possible so handle this issue by flushing the mm if reclaim is potentially currently batching TLB flushes. Other instances where a flush is required for a present pte should be ok as either the page lock is held preventing parallel reclaim or a page reference count is elevated preventing a parallel free leading to corruption. In the case of page_mkclean there isn't an obvious path that userspace could take advantage of without using the operations that are guarded by this patch. Other users such as gup as a race with reclaim looks just at PTEs. huge page variants should be ok as they don't race with reclaim. mincore only looks at PTEs. userfault also should be ok as if a parallel reclaim takes place, it will either fault the page back in or read some of the data before the flush occurs triggering a fault. Note that a variant of this patch was acked by Andy Lutomirski but this was for the x86 parts on top of his PCID work which didn't make the 4.13 merge window as expected. His ack is dropped from this version and there will be a follow-on patch on top of PCID that will include his ack. [akpm@linux-foundation.org: tweak comments] [akpm@linux-foundation.org: fix spello] Link: http://lkml.kernel.org/r/20170717155523.emckq2esjro6hf3z@suse.de Reported-by: Nadav Amit Signed-off-by: Mel Gorman Cc: Andy Lutomirski Cc: [v4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 4 ++++ mm/internal.h | 5 ++++- mm/madvise.c | 1 + mm/memory.c | 1 + mm/mprotect.c | 1 + mm/mremap.c | 1 + mm/rmap.c | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ff151814a02d..7f384bb62d8e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -494,6 +494,10 @@ struct mm_struct { * PROT_NONE or PROT_NUMA mapped page. */ bool tlb_flush_pending; +#endif +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/internal.h b/mm/internal.h index 24d88f084705..4ef49fc55e58 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); +void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { @@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush_dirty(void) { } - +static inline void flush_tlb_batched_pending(struct mm_struct *mm) +{ +} #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; diff --git a/mm/madvise.c b/mm/madvise.c index 9976852f1e1c..47d8d8a25eae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, tlb_remove_check_page_size_change(tlb, PAGE_SIZE); orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; diff --git a/mm/memory.c b/mm/memory.c index 0e517be91a89..f65beaad319b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1197,6 +1197,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; diff --git a/mm/mprotect.c b/mm/mprotect.c index 1a8c9ca83e48..4180ad8cc9c5 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, atomic_read(&vma->vm_mm->mm_users) == 1) target_node = numa_node_id(); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { oldpte = *pte; diff --git a/mm/mremap.c b/mm/mremap.c index cd8a1b199ef9..6e3d857458de 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, diff --git a/mm/rmap.c b/mm/rmap.c index ced14f1af6dc..c8993c63eb25 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -604,6 +604,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) arch_tlbbatch_add_mm(&tlb_ubc->arch, mm); tlb_ubc->flush_required = true; + /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() @@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { -- cgit v1.2.3 From d16977f3a6cfbb5e9ce477f423a1bf343347c1ed Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 2 Aug 2017 13:32:01 -0700 Subject: kthread: fix documentation build warning The kerneldoc comment for kthread_create() had an incorrect argument name, leading to a warning in the docs build. Correct it, and make one more small step toward a warning-free build. Link: http://lkml.kernel.org/r/20170724135916.7f486c6f@lwn.net Signed-off-by: Jonathan Corbet Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 4fec8b775895..82e197eeac91 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -15,7 +15,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), * @threadfn: the function to run in the thread * @data: data pointer for @threadfn() * @namefmt: printf-style format string for the thread name - * @...: arguments for @namefmt. + * @arg...: arguments for @namefmt. * * This macro will create a kthread on the current node, leaving it in * the stopped state. This is just a helper for kthread_create_on_node(); -- cgit v1.2.3 From 89affbf5d9ebb15c6460596822e8857ea2f9e735 Mon Sep 17 00:00:00 2001 From: Dima Zavin Date: Wed, 2 Aug 2017 13:32:18 -0700 Subject: cpuset: fix a deadlock due to incomplete patching of cpusets_enabled() In codepaths that use the begin/retry interface for reading mems_allowed_seq with irqs disabled, there exists a race condition that stalls the patch process after only modifying a subset of the static_branch call sites. This problem manifested itself as a deadlock in the slub allocator, inside get_any_partial. The loop reads mems_allowed_seq value (via read_mems_allowed_begin), performs the defrag operation, and then verifies the consistency of mem_allowed via the read_mems_allowed_retry and the cookie returned by xxx_begin. The issue here is that both begin and retry first check if cpusets are enabled via cpusets_enabled() static branch. This branch can be rewritted dynamically (via cpuset_inc) if a new cpuset is created. The x86 jump label code fully synchronizes across all CPUs for every entry it rewrites. If it rewrites only one of the callsites (specifically the one in read_mems_allowed_retry) and then waits for the smp_call_function(do_sync_core) to complete while a CPU is inside the begin/retry section with IRQs off and the mems_allowed value is changed, we can hang. This is because begin() will always return 0 (since it wasn't patched yet) while retry() will test the 0 against the actual value of the seq counter. The fix is to use two different static keys: one for begin (pre_enable_key) and one for retry (enable_key). In cpuset_inc(), we first bump the pre_enable key to ensure that cpuset_mems_allowed_begin() always return a valid seqcount if are enabling cpusets. Similarly, when disabling cpusets via cpuset_dec(), we first ensure that callers of cpuset_mems_allowed_retry() will start ignoring the seqcount value before we let cpuset_mems_allowed_begin() return 0. The relevant stack traces of the two stuck threads: CPU: 1 PID: 1415 Comm: mkdir Tainted: G L 4.9.36-00104-g540c51286237 #4 Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017 task: ffff8817f9c28000 task.stack: ffffc9000ffa4000 RIP: smp_call_function_many+0x1f9/0x260 Call Trace: smp_call_function+0x3b/0x70 on_each_cpu+0x2f/0x90 text_poke_bp+0x87/0xd0 arch_jump_label_transform+0x93/0x100 __jump_label_update+0x77/0x90 jump_label_update+0xaa/0xc0 static_key_slow_inc+0x9e/0xb0 cpuset_css_online+0x70/0x2e0 online_css+0x2c/0xa0 cgroup_apply_control_enable+0x27f/0x3d0 cgroup_mkdir+0x2b7/0x420 kernfs_iop_mkdir+0x5a/0x80 vfs_mkdir+0xf6/0x1a0 SyS_mkdir+0xb7/0xe0 entry_SYSCALL_64_fastpath+0x18/0xad ... CPU: 2 PID: 1 Comm: init Tainted: G L 4.9.36-00104-g540c51286237 #4 Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017 task: ffff8818087c0000 task.stack: ffffc90000030000 RIP: int3+0x39/0x70 Call Trace: <#DB> ? ___slab_alloc+0x28b/0x5a0 ? copy_process.part.40+0xf7/0x1de0 __slab_alloc.isra.80+0x54/0x90 copy_process.part.40+0xf7/0x1de0 copy_process.part.40+0xf7/0x1de0 kmem_cache_alloc_node+0x8a/0x280 copy_process.part.40+0xf7/0x1de0 _do_fork+0xe7/0x6c0 _raw_spin_unlock_irq+0x2d/0x60 trace_hardirqs_on_caller+0x136/0x1d0 entry_SYSCALL_64_fastpath+0x5/0xad do_syscall_64+0x27/0x350 SyS_clone+0x19/0x20 do_syscall_64+0x60/0x350 entry_SYSCALL64_slow_path+0x25/0x25 Link: http://lkml.kernel.org/r/20170731040113.14197-1-dmitriyz@waymo.com Fixes: 46e700abc44c ("mm, page_alloc: remove unnecessary taking of a seqlock when cpusets are disabled") Signed-off-by: Dima Zavin Reported-by: Cliff Spradlin Acked-by: Vlastimil Babka Cc: Peter Zijlstra Cc: Christopher Lameter Cc: Li Zefan Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 19 +++++++++++++++++-- kernel/cgroup/cpuset.c | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 119a3f9604b0..898cfe2eeb42 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -18,6 +18,19 @@ #ifdef CONFIG_CPUSETS +/* + * Static branch rewrites can happen in an arbitrary order for a given + * key. In code paths where we need to loop with read_mems_allowed_begin() and + * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need + * to ensure that begin() always gets rewritten before retry() in the + * disabled -> enabled transition. If not, then if local irqs are disabled + * around the loop, we can deadlock since retry() would always be + * comparing the latest value of the mems_allowed seqcount against 0 as + * begin() still would see cpusets_enabled() as false. The enabled -> disabled + * transition should happen in reverse order for the same reasons (want to stop + * looking at real value of mems_allowed.sequence in retry() first). + */ +extern struct static_key_false cpusets_pre_enable_key; extern struct static_key_false cpusets_enabled_key; static inline bool cpusets_enabled(void) { @@ -32,12 +45,14 @@ static inline int nr_cpusets(void) static inline void cpuset_inc(void) { + static_branch_inc(&cpusets_pre_enable_key); static_branch_inc(&cpusets_enabled_key); } static inline void cpuset_dec(void) { static_branch_dec(&cpusets_enabled_key); + static_branch_dec(&cpusets_pre_enable_key); } extern int cpuset_init(void); @@ -115,7 +130,7 @@ extern void cpuset_print_current_mems_allowed(void); */ static inline unsigned int read_mems_allowed_begin(void) { - if (!cpusets_enabled()) + if (!static_branch_unlikely(&cpusets_pre_enable_key)) return 0; return read_seqcount_begin(¤t->mems_allowed_seq); @@ -129,7 +144,7 @@ static inline unsigned int read_mems_allowed_begin(void) */ static inline bool read_mems_allowed_retry(unsigned int seq) { - if (!cpusets_enabled()) + if (!static_branch_unlikely(&cpusets_enabled_key)) return false; return read_seqcount_retry(¤t->mems_allowed_seq, seq); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ca8376e5008c..8d5151688504 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -63,6 +63,7 @@ #include #include +DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); /* See "Frequency meter" comments, below. */ -- cgit v1.2.3 From 1ee1c3f5b5cff959e0ac95a125bd15eaf88cc638 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 2 Aug 2017 13:32:27 -0700 Subject: mm: allow page_cache_get_speculative in interrupt context Kernel panic when calling the IRQ-safe __get_user_pages_fast in NMI handler. The bug was introduced by commit 2947ba054a4d ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation"). The original x86 __get_user_page_fast used plain get_page() or page_ref_add(). However, the generic __get_user_page_fast uses page_cache_get_speculative(), which has VM_BUG_ON(in_interrupt()). There is no reason to prevent page_cache_get_speculative from using in interrupt context. According to the author, putting a BUG_ON there is just because the code is not verifying correctness of interrupt races. I did some tests in interrupt context. There is no issue found. Removing VM_BUG_ON(in_interrupt()) for page_cache_get_speculative(). Link: http://lkml.kernel.org/r/1501609146-59730-1-git-send-email-kan.liang@intel.com Fixes: 2947ba054a4d ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation") Signed-off-by: Kan Liang Cc: Jens Axboe Cc: Al Viro Cc: Kirill A. Shutemov Cc: Ying Huang Cc: Nicholas Piggin Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index baa9344dcd10..79b36f57c3ba 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -163,8 +163,6 @@ void release_pages(struct page **pages, int nr, bool cold); */ static inline int page_cache_get_speculative(struct page *page) { - VM_BUG_ON(in_interrupt()); - #ifdef CONFIG_TINY_RCU # ifdef CONFIG_PREEMPT_COUNT VM_BUG_ON(!in_atomic() && !irqs_disabled()); -- cgit v1.2.3 From e1a10ef7fa876f8510aaec36ea5c0cf34baba410 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 3 Aug 2017 09:19:52 -0400 Subject: tcp: introduce tcp_rto_delta_us() helper for xmit timer fix Pure refactor. This helper will be required in the xmit timer fix later in the patch series. (Because the TLP logic will want to make this calculation.) Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++++++++++ net/ipv4/tcp_input.c | 5 +---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..ada65e767b28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1916,6 +1916,16 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +/* At how many usecs into the future should the RTO fire? */ +static inline s64 tcp_rto_delta_us(const struct sock *sk) +{ + const struct sk_buff *skb = tcp_write_queue_head(sk); + u32 rto = inet_csk(sk)->icsk_rto; + u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); + + return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; +} + /* * Save and compile IPv4 options, return a pointer to it */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dad026fcfd09..b9ba8d55d8b8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3004,10 +3004,7 @@ void tcp_rearm_rto(struct sock *sk) /* Offset the time elapsed after installing regular RTO */ if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { - struct sk_buff *skb = tcp_write_queue_head(sk); - u64 rto_time_stamp = skb->skb_mstamp + - jiffies_to_usecs(rto); - s64 delta_us = rto_time_stamp - tp->tcp_mstamp; + s64 delta_us = tcp_rto_delta_us(sk); /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ -- cgit v1.2.3 From 931b3c1a832621b4bdcbaf783096fc267eb36fbe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Aug 2017 09:41:37 +0300 Subject: RDMA/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 2 +- include/linux/mlx5/qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 6f41270d80c0..f378dc0e7eaf 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 -#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, -- cgit v1.2.3 From 978d13d60c34818a41fc35962602bdfa5c03f214 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 4 Aug 2017 23:59:31 -0700 Subject: iscsi-target: Fix iscsi_np reset hung task during parallel delete This patch fixes a bug associated with iscsit_reset_np_thread() that can occur during parallel configfs rmdir of a single iscsi_np used across multiple iscsi-target instances, that would result in hung task(s) similar to below where configfs rmdir process context was blocked indefinately waiting for iscsi_np->np_restart_comp to finish: [ 6726.112076] INFO: task dcp_proxy_node_:15550 blocked for more than 120 seconds. [ 6726.119440] Tainted: G W O 4.1.26-3321 #2 [ 6726.125045] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 6726.132927] dcp_proxy_node_ D ffff8803f202bc88 0 15550 1 0x00000000 [ 6726.140058] ffff8803f202bc88 ffff88085c64d960 ffff88083b3b1ad0 ffff88087fffeb08 [ 6726.147593] ffff8803f202c000 7fffffffffffffff ffff88083f459c28 ffff88083b3b1ad0 [ 6726.155132] ffff88035373c100 ffff8803f202bca8 ffffffff8168ced2 ffff8803f202bcb8 [ 6726.162667] Call Trace: [ 6726.165150] [] schedule+0x32/0x80 [ 6726.170156] [] schedule_timeout+0x214/0x290 [ 6726.176030] [] ? __send_signal+0x52/0x4a0 [ 6726.181728] [] wait_for_completion+0x96/0x100 [ 6726.187774] [] ? wake_up_state+0x10/0x10 [ 6726.193395] [] iscsit_reset_np_thread+0x62/0xe0 [iscsi_target_mod] [ 6726.201278] [] iscsit_tpg_disable_portal_group+0x96/0x190 [iscsi_target_mod] [ 6726.210033] [] lio_target_tpg_store_enable+0x4f/0xc0 [iscsi_target_mod] [ 6726.218351] [] configfs_write_file+0xaa/0x110 [ 6726.224392] [] vfs_write+0xa4/0x1b0 [ 6726.229576] [] SyS_write+0x41/0xb0 [ 6726.234659] [] system_call_fastpath+0x12/0x71 It would happen because each iscsit_reset_np_thread() sets state to ISCSI_NP_THREAD_RESET, sends SIGINT, and then blocks waiting for completion on iscsi_np->np_restart_comp. However, if iscsi_np was active processing a login request and more than a single iscsit_reset_np_thread() caller to the same iscsi_np was blocked on iscsi_np->np_restart_comp, iscsi_np kthread process context in __iscsi_target_login_thread() would flush pending signals and only perform a single completion of np->np_restart_comp before going back to sleep within transport specific iscsit_transport->iscsi_accept_np code. To address this bug, add a iscsi_np->np_reset_count and update __iscsi_target_login_thread() to keep completing np->np_restart_comp until ->np_reset_count has reached zero. Reported-by: Gary Guo Tested-by: Gary Guo Cc: Mike Christie Cc: Hannes Reinecke Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 1 + drivers/target/iscsi/iscsi_target_login.c | 7 +++++-- include/target/iscsi/iscsi_target_core.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 12803de99400..5001261f5d69 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -418,6 +418,7 @@ int iscsit_reset_np_thread( return 0; } np->np_thread_state = ISCSI_NP_THREAD_RESET; + atomic_inc(&np->np_reset_count); if (np->np_thread) { spin_unlock_bh(&np->np_thread_lock); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index e9bdc8b86e7d..dc13afbd4c88 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1243,9 +1243,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) flush_signals(current); spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); + return 1; } else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) { spin_unlock_bh(&np->np_thread_lock); goto exit; @@ -1278,7 +1280,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto exit; } else if (rc < 0) { spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); iscsit_put_transport(conn->conn_transport); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 0ca1fb08805b..fb87d32f5e51 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -786,6 +786,7 @@ struct iscsi_np { int np_sock_type; enum np_thread_state_table np_thread_state; bool enabled; + atomic_t np_reset_count; enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; -- cgit v1.2.3 From 0cca6c8920ade95e2741b2062cf1397dc546fb0f Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Sun, 6 Aug 2017 16:00:05 +0200 Subject: pinctrl: generic: update references to Documentation/pinctrl.txt Update deprecated references to Documentation/pinctrl.txt since it has been moved to Documentation/driver-api/pinctl.rst. Signed-off-by: Ludovic Desroches Fixes: 5a9b73832e9e ("pinctrl.txt: move it to the driver-api book") Signed-off-by: Linus Walleij --- Documentation/gpio/gpio-legacy.txt | 2 +- MAINTAINERS | 2 +- include/linux/device.h | 2 +- include/linux/pinctrl/pinconf-generic.h | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt index b34fd94f7089..5eacc147ea87 100644 --- a/Documentation/gpio/gpio-legacy.txt +++ b/Documentation/gpio/gpio-legacy.txt @@ -459,7 +459,7 @@ pin controller? This is done by registering "ranges" of pins, which are essentially cross-reference tables. These are described in -Documentation/pinctrl.txt +Documentation/driver-api/pinctl.rst While the pin allocation is totally managed by the pinctrl subsystem, gpio (under gpiolib) is still maintained by gpio drivers. It may happen diff --git a/MAINTAINERS b/MAINTAINERS index f66488dfdbc9..2c85558aec19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10375,7 +10375,7 @@ L: linux-gpio@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git S: Maintained F: Documentation/devicetree/bindings/pinctrl/ -F: Documentation/pinctrl.txt +F: Documentation/driver-api/pinctl.rst F: drivers/pinctrl/ F: include/linux/pinctrl/ diff --git a/include/linux/device.h b/include/linux/device.h index 723cd54b94da..beabdbc08420 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -843,7 +843,7 @@ struct dev_links_info { * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. * @pins: For device pin management. - * See Documentation/pinctrl.txt for details. + * See Documentation/driver-api/pinctl.rst for details. * @msi_list: Hosts MSI descriptors * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 231d3075815a..e91d1b6a260d 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -81,8 +81,8 @@ * it. * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a * value on the line. Use argument 1 to indicate high level, argument 0 to - * indicate low level. (Please see Documentation/pinctrl.txt, section - * "GPIO mode pitfalls" for a discussion around this parameter.) + * indicate low level. (Please see Documentation/driver-api/pinctl.rst, + * section "GPIO mode pitfalls" for a discussion around this parameter.) * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells * the driver which alternative power source to use. -- cgit v1.2.3 From 3be8eddd9d58a925b461b582fa5aa422a9c145ee Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 25 Jul 2017 09:27:33 -0700 Subject: drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk order. This is useful to allow GL to provide defined results for overlapping glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited window movement without first blitting to a temporary. x11perf -copywinwin100 goes from 1850/sec to 4850/sec. v2: Default to the same behavior as before when the flags aren't passed. (suggested by Boris) Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20170725162733.28007-2-eric@anholt.net Reviewed-by: Boris Brezillon --- drivers/gpu/drm/vc4/vc4_drv.c | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 5 ++++- drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++----- include/uapi/drm/vc4_drm.h | 11 +++++++++++ 4 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index e8f0e1790d5e..1c96edcb302b 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -99,6 +99,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_BRANCHES: case DRM_VC4_PARAM_SUPPORTS_ETC1: case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: + case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: args->value = true; break; default: diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 22a55cff7e64..d0c6bfb68c4e 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -1007,7 +1007,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct ww_acquire_ctx acquire_ctx; int ret = 0; - if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { + if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | + VC4_SUBMIT_CL_FIXED_RCL_ORDER | + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); return -EINVAL; } diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index e0539731130b..273984f71ae2 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, uint8_t max_y_tile = args->max_y_tile; uint8_t xtiles = max_x_tile - min_x_tile + 1; uint8_t ytiles = max_y_tile - min_y_tile + 1; - uint8_t x, y; + uint8_t xi, yi; uint32_t size, loop_body_size; + bool positive_x = true; + bool positive_y = true; + + if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) { + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)) + positive_x = false; + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) + positive_y = false; + } size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; @@ -354,10 +363,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, rcl_u16(setup, args->height); rcl_u16(setup, args->color_write.bits); - for (y = min_y_tile; y <= max_y_tile; y++) { - for (x = min_x_tile; x <= max_x_tile; x++) { - bool first = (x == min_x_tile && y == min_y_tile); - bool last = (x == max_x_tile && y == max_y_tile); + for (yi = 0; yi < ytiles; yi++) { + int y = positive_y ? min_y_tile + yi : max_y_tile - yi; + for (xi = 0; xi < xtiles; xi++) { + int x = positive_x ? min_x_tile + xi : max_x_tile - xi; + bool first = (xi == 0 && yi == 0); + bool last = (xi == xtiles - 1 && yi == ytiles - 1); emit_tile(exec, setup, x, y, first, last); } diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 551628e571f9..afae87004963 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -155,6 +155,16 @@ struct drm_vc4_submit_cl { __u32 pad:24; #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) +/* By default, the kernel gets to choose the order that the tiles are + * rendered in. If this is set, then the tiles will be rendered in a + * raster order, with the right-to-left vs left-to-right and + * top-to-bottom vs bottom-to-top dictated by + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping + * blits to be implemented using the 3D engine. + */ +#define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) __u32 flags; /* Returned value of the seqno of this render job (for the @@ -294,6 +304,7 @@ struct drm_vc4_get_hang_state { #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 +#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 struct drm_vc4_get_param { __u32 param; -- cgit v1.2.3 From 8941a7cbcc5f06fb9e1d105911c928766d742861 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 7 Aug 2017 12:39:37 -0500 Subject: drm/tinydrm: Generalize tinydrm_xrgb8888_to_gray8() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds parameters for vaddr and clip to tinydrm_xrgb8888_to_gray8() to make it more generic. dma_buf_{begin,end}_cpu_access() are moved out to the repaper driver. Return type is change to void to simplify error handling by callers. Signed-off-by: David Lechner Signed-off-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/1502127581-10517-2-git-send-email-david@lechnology.com --- drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c | 42 +++++++++----------------- drivers/gpu/drm/tinydrm/repaper.c | 28 +++++++++++++++-- include/drm/tinydrm/tinydrm-helpers.h | 3 +- 3 files changed, 41 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c index 75808bb84c9a..bd6cce093a85 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c @@ -185,7 +185,9 @@ EXPORT_SYMBOL(tinydrm_xrgb8888_to_rgb565); /** * tinydrm_xrgb8888_to_gray8 - Convert XRGB8888 to grayscale * @dst: 8-bit grayscale destination buffer + * @vaddr: XRGB8888 source buffer * @fb: DRM framebuffer + * @clip: Clip rectangle area to copy * * Drm doesn't have native monochrome or grayscale support. * Such drivers can announce the commonly supported XR24 format to userspace @@ -195,41 +197,31 @@ EXPORT_SYMBOL(tinydrm_xrgb8888_to_rgb565); * where 1 means foreground color and 0 background color. * * ITU BT.601 is used for the RGB -> luma (brightness) conversion. - * - * Returns: - * Zero on success, negative error code on failure. */ -int tinydrm_xrgb8888_to_gray8(u8 *dst, struct drm_framebuffer *fb) +void tinydrm_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb, + struct drm_clip_rect *clip) { - struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0); - struct dma_buf_attachment *import_attach = cma_obj->base.import_attach; - unsigned int x, y, pitch = fb->pitches[0]; - int ret = 0; + unsigned int len = (clip->x2 - clip->x1) * sizeof(u32); + unsigned int x, y; void *buf; u32 *src; if (WARN_ON(fb->format->format != DRM_FORMAT_XRGB8888)) - return -EINVAL; + return; /* * The cma memory is write-combined so reads are uncached. * Speed up by fetching one line at a time. */ - buf = kmalloc(pitch, GFP_KERNEL); + buf = kmalloc(len, GFP_KERNEL); if (!buf) - return -ENOMEM; - - if (import_attach) { - ret = dma_buf_begin_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - goto err_free; - } + return; - for (y = 0; y < fb->height; y++) { - src = cma_obj->vaddr + (y * pitch); - memcpy(buf, src, pitch); + for (y = clip->y1; y < clip->y2; y++) { + src = vaddr + (y * fb->pitches[0]); + src += clip->x1; + memcpy(buf, src, len); src = buf; - for (x = 0; x < fb->width; x++) { + for (x = clip->x1; x < clip->x2; x++) { u8 r = (*src & 0x00ff0000) >> 16; u8 g = (*src & 0x0000ff00) >> 8; u8 b = *src & 0x000000ff; @@ -240,13 +232,7 @@ int tinydrm_xrgb8888_to_gray8(u8 *dst, struct drm_framebuffer *fb) } } - if (import_attach) - ret = dma_buf_end_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); -err_free: kfree(buf); - - return ret; } EXPORT_SYMBOL(tinydrm_xrgb8888_to_gray8); diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c index 3343d3f15a90..30dc97b3ff21 100644 --- a/drivers/gpu/drm/tinydrm/repaper.c +++ b/drivers/gpu/drm/tinydrm/repaper.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -525,11 +526,20 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, struct drm_clip_rect *clips, unsigned int num_clips) { + struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0); + struct dma_buf_attachment *import_attach = cma_obj->base.import_attach; struct tinydrm_device *tdev = fb->dev->dev_private; struct repaper_epd *epd = epd_from_tinydrm(tdev); + struct drm_clip_rect clip; u8 *buf = NULL; int ret = 0; + /* repaper can't do partial updates */ + clip.x1 = 0; + clip.x2 = fb->width; + clip.y1 = 0; + clip.y2 = fb->height; + mutex_lock(&tdev->dirty_lock); if (!epd->enabled) @@ -550,9 +560,21 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, goto out_unlock; } - ret = tinydrm_xrgb8888_to_gray8(buf, fb); - if (ret) - goto out_unlock; + if (import_attach) { + ret = dma_buf_begin_cpu_access(import_attach->dmabuf, + DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + } + + tinydrm_xrgb8888_to_gray8(buf, cma_obj->vaddr, fb, &clip); + + if (import_attach) { + ret = dma_buf_end_cpu_access(import_attach->dmabuf, + DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + } repaper_gray8_to_mono_reversed(buf, fb->width, fb->height); diff --git a/include/drm/tinydrm/tinydrm-helpers.h b/include/drm/tinydrm/tinydrm-helpers.h index a6c387f91eff..d554ded60ee9 100644 --- a/include/drm/tinydrm/tinydrm-helpers.h +++ b/include/drm/tinydrm/tinydrm-helpers.h @@ -43,7 +43,8 @@ void tinydrm_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb, void tinydrm_xrgb8888_to_rgb565(u16 *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_clip_rect *clip, bool swap); -int tinydrm_xrgb8888_to_gray8(u8 *dst, struct drm_framebuffer *fb); +void tinydrm_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb, + struct drm_clip_rect *clip); struct backlight_device *tinydrm_of_find_backlight(struct device *dev); int tinydrm_enable_backlight(struct backlight_device *backlight); -- cgit v1.2.3 From 0fb228d30b8d72bfee51f57e638d412324d44a11 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 1 Aug 2017 15:12:39 -0700 Subject: nvmet_fc: add defer_req callback for deferment of cmd buffer return At queue creation, the transport allocates a local job struct (struct nvmet_fc_fcp_iod) for each possible element of the queue. When a new CMD is received from the wire, a jobs struct is allocated from the queue and then used for the duration of the command. The job struct contains buffer space for the wire command iu. Thus, upon allocation of the job struct, the cmd iu buffer is copied to the job struct and the LLDD may immediately free/reuse the CMD IU buffer passed in the call. However, in some circumstances, due to the packetized nature of FC and the api of the FC LLDD which may issue a hw command to send the wire response, but the LLDD may not get the hw completion for the command and upcall the nvmet_fc layer before a new command may be asynchronously received on the wire. In other words, its possible for the initiator to get the response from the wire, thus believe a command slot free, and send a new command iu. The new command iu may be received by the LLDD and passed to the transport before the LLDD had serviced the hw completion and made the teardown calls for the original job struct. As such, there is no available job struct available for the new io. E.g. it appears like the host sent more queue elements than the queue size. It didn't based on it's understanding. Rather than treat this as a hard connection failure queue the new request until the job struct does free up. As the buffer isn't copied as there's no job struct, a special return value must be returned to the LLDD to signify to hold off on recycling the cmd iu buffer. And later, when a job struct is allocated and the buffer copied, a new LLDD callback is introduced to notify the LLDD and allow it to recycle it's command iu buffer. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 212 +++++++++++++++++++++++++++++++++++------ include/linux/nvme-fc-driver.h | 7 ++ 2 files changed, 191 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 31ca55dfcb1d..1b7f2520a20d 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -114,6 +114,11 @@ struct nvmet_fc_tgtport { struct kref ref; }; +struct nvmet_fc_defer_fcp_req { + struct list_head req_list; + struct nvmefc_tgt_fcp_req *fcp_req; +}; + struct nvmet_fc_tgt_queue { bool ninetypercent; u16 qid; @@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue { struct nvmet_fc_tgt_assoc *assoc; struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ struct list_head fod_list; + struct list_head pending_cmd_list; + struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; } __aligned(sizeof(unsigned long long)); @@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod); /* *********************** FC-NVME DMA Handling **************************** */ @@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod * nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) { static struct nvmet_fc_fcp_iod *fod; - unsigned long flags; - spin_lock_irqsave(&queue->qlock, flags); + lockdep_assert_held(&queue->qlock); + fod = list_first_entry_or_null(&queue->fod_list, struct nvmet_fc_fcp_iod, fcp_list); if (fod) { @@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) * will "inherit" that reference. */ } - spin_unlock_irqrestore(&queue->qlock, flags); return fod; } +static void +nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_tgt_queue *queue, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + + /* + * put all admin cmds on hw queue id 0. All io commands go to + * the respective hw queue based on a modulo basis + */ + fcpreq->hwqid = queue->qid ? + ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; + + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); +} + static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; + struct nvmet_fc_defer_fcp_req *deferfcp; unsigned long flags; fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, @@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, fcpreq->nvmet_fc_private = NULL; - spin_lock_irqsave(&queue->qlock, flags); - list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; fod->abort = false; fod->aborted = false; fod->writedataactive = false; fod->fcpreq = NULL; + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + + spin_lock_irqsave(&queue->qlock, flags); + deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (!deferfcp) { + list_add_tail(&fod->fcp_list, &fod->queue->fod_list); + spin_unlock_irqrestore(&queue->qlock, flags); + + /* Release reference taken at queue lookup and fod allocation */ + nvmet_fc_tgt_q_put(queue); + return; + } + + /* Re-use the fod for the next pending cmd that was deferred */ + list_del(&deferfcp->req_list); + + fcpreq = deferfcp->fcp_req; + + /* deferfcp can be reused for another IO at a later date */ + list_add_tail(&deferfcp->req_list, &queue->avail_defer_list); + spin_unlock_irqrestore(&queue->qlock, flags); + /* Save NVME CMD IO in fod */ + memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen); + + /* Setup new fcpreq to be processed */ + fcpreq->rspaddr = NULL; + fcpreq->rsplen = 0; + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + fod->active = true; + + /* inform LLDD IO is now being processed */ + tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq); + + /* Submit deferred IO for processing */ + nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); + /* - * release the reference taken at queue lookup and fod allocation + * Leave the queue lookup get reference taken when + * fod was originally allocated. */ - nvmet_fc_tgt_q_put(queue); - - tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); + INIT_LIST_HEAD(&queue->avail_defer_list); + INIT_LIST_HEAD(&queue->pending_cmd_list); atomic_set(&queue->connected, 0); atomic_set(&queue->sqtail, 0); atomic_set(&queue->rsn, 1); @@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; + struct nvmet_fc_defer_fcp_req *deferfcp; unsigned long flags; int i, writedataactive; bool disconnect; @@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) } } } + + /* Cleanup defer'ed IOs in queue */ + list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) { + list_del(&deferfcp->req_list); + kfree(deferfcp); + } + + for (;;) { + deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (!deferfcp) + break; + + list_del(&deferfcp->req_list); + spin_unlock_irqrestore(&queue->qlock, flags); + + tgtport->ops->defer_rcv(&tgtport->fc_target_port, + deferfcp->fcp_req); + + tgtport->ops->fcp_abort(&tgtport->fc_target_port, + deferfcp->fcp_req); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, + deferfcp->fcp_req); + + kfree(deferfcp); + + spin_lock_irqsave(&queue->qlock, flags); + } spin_unlock_irqrestore(&queue->qlock, flags); flush_workqueue(queue->work_q); @@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc * layer for processing. * - * The nvmet-fc layer will copy cmd payload to an internal structure for - * processing. As such, upon completion of the routine, the LLDD may - * immediately free/reuse the CMD IU buffer passed in the call. + * The nvmet_fc layer allocates a local job structure (struct + * nvmet_fc_fcp_iod) from the queue for the io and copies the + * CMD IU buffer to the job structure. As such, on a successful + * completion (returns 0), the LLDD may immediately free/reuse + * the CMD IU buffer passed in the call. + * + * However, in some circumstances, due to the packetized nature of FC + * and the api of the FC LLDD which may issue a hw command to send the + * response, but the LLDD may not get the hw completion for that command + * and upcall the nvmet_fc layer before a new command may be + * asynchronously received - its possible for a command to be received + * before the LLDD and nvmet_fc have recycled the job structure. It gives + * the appearance of more commands received than fits in the sq. + * To alleviate this scenario, a temporary queue is maintained in the + * transport for pending LLDD requests waiting for a queue job structure. + * In these "overrun" cases, a temporary queue element is allocated + * the LLDD request and CMD iu buffer information remembered, and the + * routine returns a -EOVERFLOW status. Subsequently, when a queue job + * structure is freed, it is immediately reallocated for anything on the + * pending request list. The LLDDs defer_rcv() callback is called, + * informing the LLDD that it may reuse the CMD IU buffer, and the io + * is then started normally with the transport. * - * If this routine returns error, the lldd should abort the exchange. + * The LLDD, when receiving an -EOVERFLOW completion status, is to treat + * the completion as successful but must not reuse the CMD IU buffer + * until the LLDD's defer_rcv() callback has been called for the + * corresponding struct nvmefc_tgt_fcp_req pointer. + * + * If there is any other condition in which an error occurs, the + * transport will return a non-zero status indicating the error. + * In all cases other than -EOVERFLOW, the transport has not accepted the + * request and the LLDD should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU * was received on. @@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, struct nvme_fc_cmd_iu *cmdiu = cmdiubuf; struct nvmet_fc_tgt_queue *queue; struct nvmet_fc_fcp_iod *fod; + struct nvmet_fc_defer_fcp_req *deferfcp; + unsigned long flags; /* validate iu, so the connection id can be used to find the queue */ if ((cmdiubuf_len != sizeof(*cmdiu)) || @@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, * when the fod is freed. */ + spin_lock_irqsave(&queue->qlock, flags); + fod = nvmet_fc_alloc_fcp_iod(queue); - if (!fod) { + if (fod) { + spin_unlock_irqrestore(&queue->qlock, flags); + + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + + memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + + nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); + + return 0; + } + + if (!tgtport->ops->defer_rcv) { + spin_unlock_irqrestore(&queue->qlock, flags); /* release the queue lookup reference */ nvmet_fc_tgt_q_put(queue); return -ENOENT; } - fcpreq->nvmet_fc_private = fod; - fod->fcpreq = fcpreq; - /* - * put all admin cmds on hw queue id 0. All io commands go to - * the respective hw queue based on a modulo basis - */ - fcpreq->hwqid = queue->qid ? - ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; - memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + deferfcp = list_first_entry_or_null(&queue->avail_defer_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (deferfcp) { + /* Just re-use one that was previously allocated */ + list_del(&deferfcp->req_list); + } else { + spin_unlock_irqrestore(&queue->qlock, flags); - if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) - queue_work_on(queue->cpu, queue->work_q, &fod->work); - else - nvmet_fc_handle_fcp_rqst(tgtport, fod); + /* Now we need to dynamically allocate one */ + deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL); + if (!deferfcp) { + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + return -ENOMEM; + } + spin_lock_irqsave(&queue->qlock, flags); + } - return 0; + /* For now, use rspaddr / rsplen to save payload information */ + fcpreq->rspaddr = cmdiubuf; + fcpreq->rsplen = cmdiubuf_len; + deferfcp->fcp_req = fcpreq; + + /* defer processing till a fod becomes available */ + list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list); + + /* NOTE: the queue lookup reference is still valid */ + + spin_unlock_irqrestore(&queue->qlock, flags); + + return -EOVERFLOW; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 6c8c5d8041b7..2591878c1d48 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -346,6 +346,11 @@ struct nvme_fc_remote_port { * indicating an FC transport Aborted status. * Entrypoint is Mandatory. * + * @defer_rcv: Called by the transport to signal the LLLD that it has + * begun processing of a previously received NVME CMD IU. The LLDD + * is now free to re-use the rcv buffer associated with the + * nvmefc_tgt_fcp_req. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -846,6 +851,8 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_fcp_req *fcpreq); void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3 From 16af97dc5a8975371a83d9e30a64038b48f40a2d Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 10 Aug 2017 15:23:56 -0700 Subject: mm: migrate: prevent racy access to tlb_flush_pending Patch series "fixes of TLB batching races", v6. It turns out that Linux TLB batching mechanism suffers from various races. Races that are caused due to batching during reclamation were recently handled by Mel and this patch-set deals with others. The more fundamental issue is that concurrent updates of the page-tables allow for TLB flushes to be batched on one core, while another core changes the page-tables. This other core may assume a PTE change does not require a flush based on the updated PTE value, while it is unaware that TLB flushes are still pending. This behavior affects KSM (which may result in memory corruption) and MADV_FREE and MADV_DONTNEED (which may result in incorrect behavior). A proof-of-concept can easily produce the wrong behavior of MADV_DONTNEED. Memory corruption in KSM is harder to produce in practice, but was observed by hacking the kernel and adding a delay before flushing and replacing the KSM page. Finally, there is also one memory barrier missing, which may affect architectures with weak memory model. This patch (of 7): Setting and clearing mm->tlb_flush_pending can be performed by multiple threads, since mmap_sem may only be acquired for read in task_numa_work(). If this happens, tlb_flush_pending might be cleared while one of the threads still changes PTEs and batches TLB flushes. This can lead to the same race between migration and change_protection_range() that led to the introduction of tlb_flush_pending. The result of this race was data corruption, which means that this patch also addresses a theoretically possible data corruption. An actual data corruption was not observed, yet the race was was confirmed by adding assertion to check tlb_flush_pending is not set by two threads, adding artificial latency in change_protection_range() and using sysctl to reduce kernel.numa_balancing_scan_delay_ms. Link: http://lkml.kernel.org/r/20170802000818.4760-2-namit@vmware.com Fixes: 20841405940e ("mm: fix TLB flush race between migration, and change_protection_range") Signed-off-by: Nadav Amit Acked-by: Mel Gorman Acked-by: Rik van Riel Acked-by: Minchan Kim Cc: Andy Lutomirski Cc: Hugh Dickins Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Russell King Cc: Sergey Senozhatsky Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 31 ++++++++++++++++++++++--------- kernel/fork.c | 2 +- mm/debug.c | 2 +- mm/mprotect.c | 4 ++-- 4 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f384bb62d8e..f58f76ee1dfa 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -493,7 +493,7 @@ struct mm_struct { * can move process memory needs to flush the TLB when moving a * PROT_NONE or PROT_NUMA mapped page. */ - bool tlb_flush_pending; + atomic_t tlb_flush_pending; #endif #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ @@ -532,33 +532,46 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { barrier(); - return mm->tlb_flush_pending; + return atomic_read(&mm->tlb_flush_pending) > 0; } -static inline void set_tlb_flush_pending(struct mm_struct *mm) + +static inline void init_tlb_flush_pending(struct mm_struct *mm) { - mm->tlb_flush_pending = true; + atomic_set(&mm->tlb_flush_pending, 0); +} + +static inline void inc_tlb_flush_pending(struct mm_struct *mm) +{ + atomic_inc(&mm->tlb_flush_pending); /* - * Guarantee that the tlb_flush_pending store does not leak into the + * Guarantee that the tlb_flush_pending increase does not leak into the * critical section updating the page tables */ smp_mb__before_spinlock(); } + /* Clearing is done after a TLB flush, which also provides a barrier. */ -static inline void clear_tlb_flush_pending(struct mm_struct *mm) +static inline void dec_tlb_flush_pending(struct mm_struct *mm) { barrier(); - mm->tlb_flush_pending = false; + atomic_dec(&mm->tlb_flush_pending); } #else static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { return false; } -static inline void set_tlb_flush_pending(struct mm_struct *mm) + +static inline void init_tlb_flush_pending(struct mm_struct *mm) { } -static inline void clear_tlb_flush_pending(struct mm_struct *mm) + +static inline void inc_tlb_flush_pending(struct mm_struct *mm) +{ +} + +static inline void dec_tlb_flush_pending(struct mm_struct *mm) { } #endif diff --git a/kernel/fork.c b/kernel/fork.c index 17921b0390b4..e075b7780421 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_aio(mm); mm_init_owner(mm, p); mmu_notifier_mm_init(mm); - clear_tlb_flush_pending(mm); + init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif diff --git a/mm/debug.c b/mm/debug.c index db1cd26d8752..d70103bb4731 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -159,7 +159,7 @@ void dump_mm(const struct mm_struct *mm) mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq, #endif #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) - mm->tlb_flush_pending, + atomic_read(&mm->tlb_flush_pending), #endif mm->def_flags, &mm->def_flags ); diff --git a/mm/mprotect.c b/mm/mprotect.c index 4180ad8cc9c5..bd0f409922cb 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); - set_tlb_flush_pending(mm); + inc_tlb_flush_pending(mm); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, /* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); - clear_tlb_flush_pending(mm); + dec_tlb_flush_pending(mm); return pages; } -- cgit v1.2.3 From 0a2c40487f3e4215c6ab46e7f837036badfb542b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 10 Aug 2017 15:23:59 -0700 Subject: mm: migrate: fix barriers around tlb_flush_pending Reading tlb_flush_pending while the page-table lock is taken does not require a barrier, since the lock/unlock already acts as a barrier. Removing the barrier in mm_tlb_flush_pending() to address this issue. However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending() while the page-table lock is already released, which may present a problem on architectures with weak memory model (PPC). To deal with this case, a new parameter is added to mm_tlb_flush_pending() to indicate if it is read without the page-table lock taken, and calling smp_mb__after_unlock_lock() in this case. Link: http://lkml.kernel.org/r/20170802000818.4760-3-namit@vmware.com Signed-off-by: Nadav Amit Acked-by: Rik van Riel Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Andy Lutomirski Cc: Mel Gorman Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Nadav Amit Cc: Russell King Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f58f76ee1dfa..0e478ebd2706 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -526,12 +526,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) /* * Memory barriers to keep this state in sync are graciously provided by * the page table locks, outside of which no page table modifications happen. - * The barriers below prevent the compiler from re-ordering the instructions - * around the memory barriers that are already present in the code. + * The barriers are used to ensure the order between tlb_flush_pending updates, + * which happen while the lock is not taken, and the PTE updates, which happen + * while the lock is taken, are serialized. */ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { - barrier(); return atomic_read(&mm->tlb_flush_pending) > 0; } @@ -554,7 +554,13 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm) /* Clearing is done after a TLB flush, which also provides a barrier. */ static inline void dec_tlb_flush_pending(struct mm_struct *mm) { - barrier(); + /* + * Guarantee that the tlb_flush_pending does not not leak into the + * critical section, since we must order the PTE change and changes to + * the pending TLB flush indication. We could have relied on TLB flush + * as a memory barrier, but this behavior is not clearly documented. + */ + smp_mb__before_atomic(); atomic_dec(&mm->tlb_flush_pending); } #else -- cgit v1.2.3 From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:05 -0700 Subject: mm: refactor TLB gathering API This patch is a preparatory patch for solving race problems caused by TLB batch. For that, we will increase/decrease TLB flush pending count of mm_struct whenever tlb_[gather|finish]_mmu is called. Before making it simple, this patch separates architecture specific part and rename it to arch_tlb_[gather|finish]_mmu and generic part just calls it. It shouldn't change any behavior. Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Acked-by: Mel Gorman Cc: Ingo Molnar Cc: Russell King Cc: Tony Luck Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Jeff Dike Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 6 ++++-- arch/ia64/include/asm/tlb.h | 6 ++++-- arch/s390/include/asm/tlb.h | 12 ++++++------ arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 8 +++++--- include/asm-generic/tlb.h | 7 ++++--- include/linux/mm_types.h | 6 ++++++ mm/memory.c | 28 +++++++++++++++++++++------- 8 files changed, 54 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 3f2eb76243e3..7f5b2a2d3861 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->fullmm = !(start | (end+1)); @@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index fced197b9626..93cadc04ac62 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); @@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start * collected. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 7317b3108a88..d574d0820dc8 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -47,10 +47,9 @@ struct mmu_table_batch { extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_gather_mmu(struct mmu_gather *tlb, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline void +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -static inline void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 46e0d635e36f..89786560dbd4 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { if (tlb->fullmm) flush_tlb_mm(tlb->mm); diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 600a2e9bfee2..2a901eca7145 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/* tlb_finish_mmu +/* arch_tlb_finish_mmu * Called at the end of the shootdown operation to free up any resources * that were required. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8afa4335e5b2..8f71521e7a44 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -112,10 +112,11 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); +void arch_tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, - unsigned long end); +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e478ebd2706..c605f2a3a68e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return mm->cpu_vm_mask_var; } +struct mmu_gather; +extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end); +extern void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end); + #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * Memory barriers to keep this state in sync are graciously provided by diff --git a/mm/memory.c b/mm/memory.c index f65beaad319b..34cba5113e06 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) return true; } -/* tlb_gather_mmu - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @fullmm argument is used when @mm is without - * users and we're going to destroy the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; @@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * Called at the end of the shootdown operation to free up any resources * that were required. */ -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { struct mmu_gather_batch *batch, *next; @@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +/* tlb_gather_mmu + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @fullmm argument is used when @mm is without + * users and we're going to destroy the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + arch_tlb_finish_mmu(tlb, start, end); +} + /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. -- cgit v1.2.3 From 0a2dd266dd6b7a31503b5bbe63af05961a6b446d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:09 -0700 Subject: mm: make tlb_flush_pending global Currently, tlb_flush_pending is used only for CONFIG_[NUMA_BALANCING| COMPACTION] but upcoming patches to solve subtle TLB flush batching problem will use it regardless of compaction/NUMA so this patch doesn't remove the dependency. [akpm@linux-foundation.org: remove more ifdefs from world's ugliest printk statement] Link: http://lkml.kernel.org/r/20170802000818.4760-6-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Acked-by: Mel Gorman Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Heiko Carstens Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Russell King Cc: Sergey Senozhatsky Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 21 --------------------- mm/debug.c | 4 ---- 2 files changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c605f2a3a68e..892a7b0196fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -487,14 +487,12 @@ struct mm_struct { /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * An operation with batched TLB flushing is going on. Anything that * can move process memory needs to flush the TLB when moving a * PROT_NONE or PROT_NUMA mapped page. */ atomic_t tlb_flush_pending; -#endif #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ bool tlb_flush_batched; @@ -528,7 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, extern void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * Memory barriers to keep this state in sync are graciously provided by * the page table locks, outside of which no page table modifications happen. @@ -569,24 +566,6 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm) smp_mb__before_atomic(); atomic_dec(&mm->tlb_flush_pending); } -#else -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) -{ - return false; -} - -static inline void init_tlb_flush_pending(struct mm_struct *mm) -{ -} - -static inline void inc_tlb_flush_pending(struct mm_struct *mm) -{ -} - -static inline void dec_tlb_flush_pending(struct mm_struct *mm) -{ -} -#endif struct vm_fault; diff --git a/mm/debug.c b/mm/debug.c index d70103bb4731..5715448ab0b5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm) #ifdef CONFIG_NUMA_BALANCING "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) "tlb_flush_pending %d\n" -#endif "def_flags: %#lx(%pGv)\n", mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, @@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm) #ifdef CONFIG_NUMA_BALANCING mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq, #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) atomic_read(&mm->tlb_flush_pending), -#endif mm->def_flags, &mm->def_flags ); } -- cgit v1.2.3 From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:12 -0700 Subject: mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem Nadav reported parallel MADV_DONTNEED on same range has a stale TLB problem and Mel fixed it[1] and found same problem on MADV_FREE[2]. Quote from Mel Gorman: "The race in question is CPU 0 running madv_free and updating some PTEs while CPU 1 is also running madv_free and looking at the same PTEs. CPU 1 may have writable TLB entries for a page but fail the pte_dirty check (because CPU 0 has updated it already) and potentially fail to flush. Hence, when madv_free on CPU 1 returns, there are still potentially writable TLB entries and the underlying PTE is still present so that a subsequent write does not necessarily propagate the dirty bit to the underlying PTE any more. Reclaim at some unknown time at the future may then see that the PTE is still clean and discard the page even though a write has happened in the meantime. I think this is possible but I could have missed some protection in madv_free that prevents it happening." This patch aims for solving both problems all at once and is ready for other problem with KSM, MADV_FREE and soft-dirty story[3]. TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can catch there are parallel threads going on. In that case, forcefully, flush TLB to prevent for user to access memory via stale TLB entry although it fail to gather page table entry. I confirmed this patch works with [4] test program Nadav gave so this patch supersedes "mm: Always flush VMA ranges affected by zap_page_range v2" in current mmotm. NOTE: This patch modifies arch-specific TLB gathering interface(x86, ia64, s390, sh, um). It seems most of architecture are straightforward but s390 need to be careful because tlb_flush_mmu works only if mm->context.flush_mm is set to non-zero which happens only a pte entry really is cleared by ptep_get_and_clear and friends. However, this problem never changes the pte entries but need to flush to prevent memory access from stale tlb. [1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net [2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de [3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com [4] https://patchwork.kernel.org/patch/9861621/ [minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu] Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Reported-by: Nadav Amit Reported-by: Mel Gorman Acked-by: Mel Gorman Cc: Ingo Molnar Cc: Russell King Cc: Tony Luck Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Jeff Dike Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 7 ++++++- arch/ia64/include/asm/tlb.h | 4 +++- arch/s390/include/asm/tlb.h | 7 ++++++- arch/sh/include/asm/tlb.h | 4 ++-- arch/um/include/asm/tlb.h | 7 ++++++- include/asm-generic/tlb.h | 2 +- include/linux/mm_types.h | 8 ++++++++ mm/memory.c | 18 ++++++++++++++++-- 8 files changed, 48 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 7f5b2a2d3861..d5562f9ce600 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->range_start = start; + tlb->range_end = end; + } + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 93cadc04ac62..cbe5ac3699bf 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) + tlb->need_flush = 1; /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d574d0820dc8..2eb8ff0d6fca 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + } + tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 89786560dbd4..51a8bc967e75 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { - if (tlb->fullmm) + if (tlb->fullmm || force) flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 2a901eca7145..344d95619d03 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + tlb->need_flush = 1; + } tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8f71521e7a44..faddde44de8c 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool force); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 892a7b0196fd..3cadee0a3508 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending) > 0; } +/* + * Returns true if there are two above TLB batching threads in parallel. + */ +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); diff --git a/mm/memory.c b/mm/memory.c index 34cba5113e06..e158f7ac6730 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * that were required. */ void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; + if (force) + __tlb_adjust_range(tlb, start, end - start); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); } void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - arch_tlb_finish_mmu(tlb, start, end); + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); } /* -- cgit v1.2.3 From e4672e55d6f3428ae9f27542e05c891f2af71051 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 2 Aug 2017 13:56:01 +0200 Subject: drm: Extract drm_device.h I need this to untangle an include loop in the next patch. Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170802115604.12734-2-daniel.vetter@ffwll.ch --- include/drm/drmP.h | 175 +------------------------------------------ include/drm/drm_device.h | 190 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 174 deletions(-) create mode 100644 include/drm/drm_device.h (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 3aa3809ab524..3031c256105e 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -82,19 +82,10 @@ #include #include #include - +#include struct module; -struct drm_device; -struct drm_agp_head; -struct drm_local_map; -struct drm_device_dma; -struct drm_gem_object; -struct drm_master; -struct drm_vblank_crtc; -struct drm_vma_offset_manager; - struct device_node; struct videomode; struct reservation_object; @@ -305,170 +296,6 @@ struct pci_controller; #define DRM_IF_VERSION(maj, min) (maj << 16 | min) -/** - * DRM device structure. This structure represent a complete card that - * may contain multiple heads. - */ -struct drm_device { - struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */ - int if_version; /**< Highest interface version set */ - - /** \name Lifetime Management */ - /*@{ */ - struct kref ref; /**< Object ref-count */ - struct device *dev; /**< Device structure of bus-device */ - struct drm_driver *driver; /**< DRM driver managing the device */ - void *dev_private; /**< DRM driver private data */ - struct drm_minor *control; /**< Control node */ - struct drm_minor *primary; /**< Primary node */ - struct drm_minor *render; /**< Render node */ - bool registered; - - /* currently active master for this device. Protected by master_mutex */ - struct drm_master *master; - - atomic_t unplugged; /**< Flag whether dev is dead */ - struct inode *anon_inode; /**< inode for private address-space */ - char *unique; /**< unique name of the device */ - /*@} */ - - /** \name Locks */ - /*@{ */ - struct mutex struct_mutex; /**< For others */ - struct mutex master_mutex; /**< For drm_minor::master and drm_file::is_master */ - /*@} */ - - /** \name Usage Counters */ - /*@{ */ - int open_count; /**< Outstanding files open, protected by drm_global_mutex. */ - spinlock_t buf_lock; /**< For drm_device::buf_use and a few other things. */ - int buf_use; /**< Buffers in use -- cannot alloc */ - atomic_t buf_alloc; /**< Buffer allocation in progress */ - /*@} */ - - struct mutex filelist_mutex; - struct list_head filelist; - - /** \name Memory management */ - /*@{ */ - struct list_head maplist; /**< Linked list of regions */ - struct drm_open_hash map_hash; /**< User token hash table for maps */ - - /** \name Context handle management */ - /*@{ */ - struct list_head ctxlist; /**< Linked list of context handles */ - struct mutex ctxlist_mutex; /**< For ctxlist */ - - struct idr ctx_idr; - - struct list_head vmalist; /**< List of vmas (for debugging) */ - - /*@} */ - - /** \name DMA support */ - /*@{ */ - struct drm_device_dma *dma; /**< Optional pointer for DMA support */ - /*@} */ - - /** \name Context support */ - /*@{ */ - - __volatile__ long context_flag; /**< Context swapping flag */ - int last_context; /**< Last current context */ - /*@} */ - - /** - * @irq_enabled: - * - * Indicates that interrupt handling is enabled, specifically vblank - * handling. Drivers which don't use drm_irq_install() need to set this - * to true manually. - */ - bool irq_enabled; - int irq; - - /** - * @vblank_disable_immediate: - * - * If true, vblank interrupt will be disabled immediately when the - * refcount drops to zero, as opposed to via the vblank disable - * timer. - * - * This can be set to true it the hardware has a working vblank counter - * with high-precision timestamping (otherwise there are races) and the - * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off() - * appropriately. See also @max_vblank_count and - * &drm_crtc_funcs.get_vblank_counter. - */ - bool vblank_disable_immediate; - - /** - * @vblank: - * - * Array of vblank tracking structures, one per &struct drm_crtc. For - * historical reasons (vblank support predates kernel modesetting) this - * is free-standing and not part of &struct drm_crtc itself. It must be - * initialized explicitly by calling drm_vblank_init(). - */ - struct drm_vblank_crtc *vblank; - - spinlock_t vblank_time_lock; /**< Protects vblank count and time updates during vblank enable/disable */ - spinlock_t vbl_lock; - - /** - * @max_vblank_count: - * - * Maximum value of the vblank registers. This value +1 will result in a - * wrap-around of the vblank register. It is used by the vblank core to - * handle wrap-arounds. - * - * If set to zero the vblank core will try to guess the elapsed vblanks - * between times when the vblank interrupt is disabled through - * high-precision timestamps. That approach is suffering from small - * races and imprecision over longer time periods, hence exposing a - * hardware vblank counter is always recommended. - * - * If non-zeor, &drm_crtc_funcs.get_vblank_counter must be set. - */ - u32 max_vblank_count; /**< size of vblank counter register */ - - /** - * List of events - */ - struct list_head vblank_event_list; - spinlock_t event_lock; - - /*@} */ - - struct drm_agp_head *agp; /**< AGP data */ - - struct pci_dev *pdev; /**< PCI device structure */ -#ifdef __alpha__ - struct pci_controller *hose; -#endif - - struct drm_sg_mem *sg; /**< Scatter gather memory */ - unsigned int num_crtcs; /**< Number of CRTCs on this device */ - - struct { - int context; - struct drm_hw_lock *lock; - } sigdata; - - struct drm_local_map *agp_buffer_map; - unsigned int agp_buffer_token; - - struct drm_mode_config mode_config; /**< Current mode config */ - - /** \name GEM information */ - /*@{ */ - struct mutex object_name_lock; - struct idr object_name_idr; - struct drm_vma_offset_manager *vma_offset_manager; - /*@} */ - int switch_power_state; -}; - /** * drm_drv_uses_atomic_modeset - check if the driver implements * atomic_commit() diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h new file mode 100644 index 000000000000..e21af87a2f3c --- /dev/null +++ b/include/drm/drm_device.h @@ -0,0 +1,190 @@ +#ifndef _DRM_DEVICE_H_ +#define _DRM_DEVICE_H_ + +#include +#include +#include +#include + +#include +#include + +struct drm_driver; +struct drm_minor; +struct drm_master; +struct drm_device_dma; +struct drm_vblank_crtc; +struct drm_sg_mem; +struct drm_local_map; +struct drm_vma_offset_manager; + +struct inode; + +struct pci_dev; +struct pci_controller; + +/** + * DRM device structure. This structure represent a complete card that + * may contain multiple heads. + */ +struct drm_device { + struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */ + int if_version; /**< Highest interface version set */ + + /** \name Lifetime Management */ + /*@{ */ + struct kref ref; /**< Object ref-count */ + struct device *dev; /**< Device structure of bus-device */ + struct drm_driver *driver; /**< DRM driver managing the device */ + void *dev_private; /**< DRM driver private data */ + struct drm_minor *control; /**< Control node */ + struct drm_minor *primary; /**< Primary node */ + struct drm_minor *render; /**< Render node */ + bool registered; + + /* currently active master for this device. Protected by master_mutex */ + struct drm_master *master; + + atomic_t unplugged; /**< Flag whether dev is dead */ + struct inode *anon_inode; /**< inode for private address-space */ + char *unique; /**< unique name of the device */ + /*@} */ + + /** \name Locks */ + /*@{ */ + struct mutex struct_mutex; /**< For others */ + struct mutex master_mutex; /**< For drm_minor::master and drm_file::is_master */ + /*@} */ + + /** \name Usage Counters */ + /*@{ */ + int open_count; /**< Outstanding files open, protected by drm_global_mutex. */ + spinlock_t buf_lock; /**< For drm_device::buf_use and a few other things. */ + int buf_use; /**< Buffers in use -- cannot alloc */ + atomic_t buf_alloc; /**< Buffer allocation in progress */ + /*@} */ + + struct mutex filelist_mutex; + struct list_head filelist; + + /** \name Memory management */ + /*@{ */ + struct list_head maplist; /**< Linked list of regions */ + struct drm_open_hash map_hash; /**< User token hash table for maps */ + + /** \name Context handle management */ + /*@{ */ + struct list_head ctxlist; /**< Linked list of context handles */ + struct mutex ctxlist_mutex; /**< For ctxlist */ + + struct idr ctx_idr; + + struct list_head vmalist; /**< List of vmas (for debugging) */ + + /*@} */ + + /** \name DMA support */ + /*@{ */ + struct drm_device_dma *dma; /**< Optional pointer for DMA support */ + /*@} */ + + /** \name Context support */ + /*@{ */ + + __volatile__ long context_flag; /**< Context swapping flag */ + int last_context; /**< Last current context */ + /*@} */ + + /** + * @irq_enabled: + * + * Indicates that interrupt handling is enabled, specifically vblank + * handling. Drivers which don't use drm_irq_install() need to set this + * to true manually. + */ + bool irq_enabled; + int irq; + + /** + * @vblank_disable_immediate: + * + * If true, vblank interrupt will be disabled immediately when the + * refcount drops to zero, as opposed to via the vblank disable + * timer. + * + * This can be set to true it the hardware has a working vblank counter + * with high-precision timestamping (otherwise there are races) and the + * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off() + * appropriately. See also @max_vblank_count and + * &drm_crtc_funcs.get_vblank_counter. + */ + bool vblank_disable_immediate; + + /** + * @vblank: + * + * Array of vblank tracking structures, one per &struct drm_crtc. For + * historical reasons (vblank support predates kernel modesetting) this + * is free-standing and not part of &struct drm_crtc itself. It must be + * initialized explicitly by calling drm_vblank_init(). + */ + struct drm_vblank_crtc *vblank; + + spinlock_t vblank_time_lock; /**< Protects vblank count and time updates during vblank enable/disable */ + spinlock_t vbl_lock; + + /** + * @max_vblank_count: + * + * Maximum value of the vblank registers. This value +1 will result in a + * wrap-around of the vblank register. It is used by the vblank core to + * handle wrap-arounds. + * + * If set to zero the vblank core will try to guess the elapsed vblanks + * between times when the vblank interrupt is disabled through + * high-precision timestamps. That approach is suffering from small + * races and imprecision over longer time periods, hence exposing a + * hardware vblank counter is always recommended. + * + * If non-zeor, &drm_crtc_funcs.get_vblank_counter must be set. + */ + u32 max_vblank_count; /**< size of vblank counter register */ + + /** + * List of events + */ + struct list_head vblank_event_list; + spinlock_t event_lock; + + /*@} */ + + struct drm_agp_head *agp; /**< AGP data */ + + struct pci_dev *pdev; /**< PCI device structure */ +#ifdef __alpha__ + struct pci_controller *hose; +#endif + + struct drm_sg_mem *sg; /**< Scatter gather memory */ + unsigned int num_crtcs; /**< Number of CRTCs on this device */ + + struct { + int context; + struct drm_hw_lock *lock; + } sigdata; + + struct drm_local_map *agp_buffer_map; + unsigned int agp_buffer_token; + + struct drm_mode_config mode_config; /**< Current mode config */ + + /** \name GEM information */ + /*@{ */ + struct mutex object_name_lock; + struct idr object_name_idr; + struct drm_vma_offset_manager *vma_offset_manager; + /*@} */ + int switch_power_state; +}; + +#endif -- cgit v1.2.3 From c07dcd61a0e57c6840c23d90cb1beddad7d682a1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 2 Aug 2017 13:56:02 +0200 Subject: drm: Document device unplug infrastructure While at it, also ocd and give them a consistent drm_dev_ prefix, like the other device instance functionality. Plus move the functions into the right places. Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170802115604.12734-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_drv.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/drm_file.c | 2 +- drivers/gpu/drm/drm_gem.c | 2 +- drivers/gpu/drm/drm_gem_cma_helper.c | 2 +- drivers/gpu/drm/drm_ioctl.c | 4 ++-- drivers/gpu/drm/drm_vm.c | 2 +- drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c | 2 +- drivers/gpu/drm/udl/udl_connector.c | 2 +- drivers/gpu/drm/udl/udl_drv.c | 2 +- drivers/gpu/drm/udl/udl_fb.c | 2 +- include/drm/drmP.h | 13 ------------- include/drm/drm_drv.h | 22 ++++++++++++++++++++-- 12 files changed, 51 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 2ed2d919beae..39191e5c240e 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -291,7 +291,7 @@ struct drm_minor *drm_minor_acquire(unsigned int minor_id) if (!minor) { return ERR_PTR(-ENODEV); - } else if (drm_device_is_unplugged(minor->dev)) { + } else if (drm_dev_is_unplugged(minor->dev)) { drm_dev_unref(minor->dev); return ERR_PTR(-ENODEV); } @@ -364,7 +364,22 @@ void drm_put_dev(struct drm_device *dev) } EXPORT_SYMBOL(drm_put_dev); -void drm_unplug_dev(struct drm_device *dev) +static void drm_device_set_unplugged(struct drm_device *dev) +{ + smp_wmb(); + atomic_set(&dev->unplugged, 1); +} + +/** + * drm_dev_unplug - unplug a DRM device + * @dev: DRM device + * + * This unplugs a hotpluggable DRM device, which makes it inaccessible to + * userspace operations. Entry-points can use drm_dev_is_unplugged(). This + * essentially unregisters the device like drm_dev_unregister(), but can be + * called while there are still open users of @dev. + */ +void drm_dev_unplug(struct drm_device *dev) { /* for a USB device */ if (drm_core_check_feature(dev, DRIVER_MODESET)) @@ -383,7 +398,7 @@ void drm_unplug_dev(struct drm_device *dev) } mutex_unlock(&drm_global_mutex); } -EXPORT_SYMBOL(drm_unplug_dev); +EXPORT_SYMBOL(drm_dev_unplug); /* * DRM internal mount @@ -835,6 +850,9 @@ EXPORT_SYMBOL(drm_dev_register); * drm_dev_register() but does not deallocate the device. The caller must call * drm_dev_unref() to drop their final reference. * + * A special form of unregistering for hotpluggable devices is drm_dev_unplug(), + * which can be called while there are still open users of @dev. + * * This should be called first in the device teardown code to make sure * userspace can't access the device instance any more. */ diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 59b75a974357..b3c6e997ccdb 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -436,7 +436,7 @@ int drm_release(struct inode *inode, struct file *filp) if (!--dev->open_count) { drm_lastclose(dev); - if (drm_device_is_unplugged(dev)) + if (drm_dev_is_unplugged(dev)) drm_put_dev(dev); } mutex_unlock(&drm_global_mutex); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index a8d396bed6a4..ad4e9cfe48a2 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1001,7 +1001,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) struct drm_vma_offset_node *node; int ret; - if (drm_device_is_unplugged(dev)) + if (drm_dev_is_unplugged(dev)) return -ENODEV; drm_vma_offset_lock_lookup(dev->vma_offset_manager); diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 275ab872b34f..f4e00571839d 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -390,7 +390,7 @@ unsigned long drm_gem_cma_get_unmapped_area(struct file *filp, struct drm_device *dev = priv->minor->dev; struct drm_vma_offset_node *node; - if (drm_device_is_unplugged(dev)) + if (drm_dev_is_unplugged(dev)) return -ENODEV; drm_vma_offset_lock_lookup(dev->vma_offset_manager); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 8bfeb32f8a10..d920b2118a39 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -716,7 +716,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, struct drm_device *dev = file_priv->minor->dev; int retcode; - if (drm_device_is_unplugged(dev)) + if (drm_dev_is_unplugged(dev)) return -ENODEV; retcode = drm_ioctl_permit(flags, file_priv); @@ -765,7 +765,7 @@ long drm_ioctl(struct file *filp, dev = file_priv->minor->dev; - if (drm_device_is_unplugged(dev)) + if (drm_dev_is_unplugged(dev)) return -ENODEV; is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END; diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 1170b3209a12..13a59ed2afbc 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -631,7 +631,7 @@ int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma) struct drm_device *dev = priv->minor->dev; int ret; - if (drm_device_is_unplugged(dev)) + if (drm_dev_is_unplugged(dev)) return -ENODEV; mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c index f224b54a30f6..177e9d861001 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c @@ -56,7 +56,7 @@ static const struct drm_connector_helper_funcs tinydrm_connector_hfuncs = { static enum drm_connector_status tinydrm_connector_detect(struct drm_connector *connector, bool force) { - if (drm_device_is_unplugged(connector->dev)) + if (drm_dev_is_unplugged(connector->dev)) return connector_status_disconnected; return connector->status; diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index d2f57c52f7db..9f9a49748d17 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -96,7 +96,7 @@ static int udl_mode_valid(struct drm_connector *connector, static enum drm_connector_status udl_detect(struct drm_connector *connector, bool force) { - if (drm_device_is_unplugged(connector->dev)) + if (drm_dev_is_unplugged(connector->dev)) return connector_status_disconnected; return connector_status_connected; } diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 0f02e1acf0ba..96c44ede5d69 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -102,7 +102,7 @@ static void udl_usb_disconnect(struct usb_interface *interface) drm_kms_helper_poll_disable(dev); udl_fbdev_unplug(dev); udl_drop_usb(dev); - drm_unplug_dev(dev); + drm_dev_unplug(dev); } /* diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index a5c54dc60def..77a2c59c56a1 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -198,7 +198,7 @@ static int udl_fb_open(struct fb_info *info, int user) struct udl_device *udl = dev->dev_private; /* If the USB device is gone, we don't accept new opens */ - if (drm_device_is_unplugged(udl->ddev)) + if (drm_dev_is_unplugged(udl->ddev)) return -ENODEV; ufbdev->fb_count++; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 3031c256105e..7277783a4ff0 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -320,19 +320,6 @@ static __inline__ int drm_core_check_feature(struct drm_device *dev, return ((dev->driver->driver_features & feature) ? 1 : 0); } -static inline void drm_device_set_unplugged(struct drm_device *dev) -{ - smp_wmb(); - atomic_set(&dev->unplugged, 1); -} - -static inline int drm_device_is_unplugged(struct drm_device *dev) -{ - int ret = atomic_read(&dev->unplugged); - smp_rmb(); - return ret; -} - /******************************************************************/ /** \name Internal function definitions */ /*@{*/ diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 505c91354802..71bbaaec836d 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -30,7 +30,8 @@ #include #include -struct drm_device; +#include + struct drm_file; struct drm_gem_object; struct drm_master; @@ -613,7 +614,24 @@ void drm_dev_unregister(struct drm_device *dev); void drm_dev_ref(struct drm_device *dev); void drm_dev_unref(struct drm_device *dev); void drm_put_dev(struct drm_device *dev); -void drm_unplug_dev(struct drm_device *dev); +void drm_dev_unplug(struct drm_device *dev); + +/** + * drm_dev_is_unplugged - is a DRM device unplugged + * @dev: DRM device + * + * This function can be called to check whether a hotpluggable is unplugged. + * Unplugging itself is singalled through drm_dev_unplug(). If a device is + * unplugged, these two functions guarantee that any store before calling + * drm_dev_unplug() is visible to callers of this function after it completes + */ +static inline int drm_dev_is_unplugged(struct drm_device *dev) +{ + int ret = atomic_read(&dev->unplugged); + smp_rmb(); + return ret; +} + int drm_dev_set_unique(struct drm_device *dev, const char *name); -- cgit v1.2.3 From 7faf952a3030d304334fe527be339b63e9e2745f Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 10 Aug 2017 13:01:48 -0400 Subject: dma-buf: add reservation_object_copy_fences (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to copy all the fences in a reservation object to another one. v2: handle NULL src_list Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/1502384509-10465-2-git-send-email-alexander.deucher@amd.com --- drivers/dma-buf/reservation.c | 60 +++++++++++++++++++++++++++++++++++++++++++ include/linux/reservation.h | 3 +++ 2 files changed, 63 insertions(+) (limited to 'include') diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 87f8f5747500..d4881f91c43b 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -261,6 +261,66 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, } EXPORT_SYMBOL(reservation_object_add_excl_fence); +/** +* reservation_object_copy_fences - Copy all fences from src to dst. +* @dst: the destination reservation object +* @src: the source reservation object +* +* Copy all fences from src to dst. Both src->lock as well as dst-lock must be +* held. +*/ +int reservation_object_copy_fences(struct reservation_object *dst, + struct reservation_object *src) +{ + struct reservation_object_list *src_list, *dst_list; + struct dma_fence *old, *new; + size_t size; + unsigned i; + + src_list = reservation_object_get_list(src); + + if (src_list) { + size = offsetof(typeof(*src_list), + shared[src_list->shared_count]); + dst_list = kmalloc(size, GFP_KERNEL); + if (!dst_list) + return -ENOMEM; + + dst_list->shared_count = src_list->shared_count; + dst_list->shared_max = src_list->shared_count; + for (i = 0; i < src_list->shared_count; ++i) + dst_list->shared[i] = + dma_fence_get(src_list->shared[i]); + } else { + dst_list = NULL; + } + + kfree(dst->staged); + dst->staged = NULL; + + src_list = reservation_object_get_list(dst); + + old = reservation_object_get_excl(dst); + new = reservation_object_get_excl(src); + + dma_fence_get(new); + + preempt_disable(); + write_seqcount_begin(&dst->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(dst->fence_excl, new); + RCU_INIT_POINTER(dst->fence, dst_list); + write_seqcount_end(&dst->seq); + preempt_enable(); + + if (src_list) + kfree_rcu(src_list, rcu); + dma_fence_put(old); + + return 0; +} +EXPORT_SYMBOL(reservation_object_copy_fences); + /** * reservation_object_get_fences_rcu - Get an object's shared and exclusive * fences without update side lock held diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 156cfd330b66..21fc84d82d41 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -254,6 +254,9 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, unsigned *pshared_count, struct dma_fence ***pshared); +int reservation_object_copy_fences(struct reservation_object *dst, + struct reservation_object *src); + long reservation_object_wait_timeout_rcu(struct reservation_object *obj, bool wait_all, bool intr, unsigned long timeout); -- cgit v1.2.3 From 6a1c9510694fe1e901a3b5b53386eac069adcea6 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Tue, 15 Aug 2017 23:00:20 -0400 Subject: drm/amdkfd: Adding new IOCTL for scratch memory v2 v2: * Renamed ALLOC_MEMORY_OF_SCRATCH to SET_SCRATCH_BACKING_VA * Removed size parameter from the ioctl, it was unused * Removed hole in ioctl number space * No more call to write_config_static_mem * Return correct error code from ioctl Signed-off-by: Moses Reuben Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 37 ++++++++++++++++++++++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++ .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 ++ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + include/uapi/linux/kfd_ioctl.h | 11 ++++++- 6 files changed, 55 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 65b506f19b46..7436d34b77ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -848,6 +848,40 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, return err; } +static int kfd_ioctl_set_scratch_backing_va(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_scratch_backing_va_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + long err; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto bind_process_to_device_fail; + } + + pdd->qpd.sh_hidden_private_base = args->va_addr; + + mutex_unlock(&p->mutex); + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + dev->kfd2kgd->set_scratch_backing_va( + dev->kgd, args->va_addr, pdd->qpd.vmid); + + return 0; + +bind_process_to_device_fail: + mutex_unlock(&p->mutex); + return err; +} #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ @@ -902,6 +936,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, kfd_ioctl_dbg_wave_control, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, + kfd_ioctl_set_scratch_backing_va, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 618ac65b6136..53a66e821624 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -270,6 +270,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", q->pipe, q->queue); + dqm->dev->kfd2kgd->set_scratch_backing_va( + dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); if (retval) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index fadc56a8be71..72c3cbabc0a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -24,6 +24,7 @@ #include "kfd_device_queue_manager.h" #include "cik_regs.h" #include "oss/oss_2_4_sh_mask.h" +#include "gca/gfx_7_2_sh_mask.h" static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -123,6 +124,7 @@ static int register_process_cik(struct device_queue_manager *dqm, } else { temp = get_sh_mem_bases_nybble_64(pdd); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 15e81ae9d2f4..40e9ddd096cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -135,6 +135,8 @@ static int register_process_vi(struct device_queue_manager *dqm, qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; + qpd->sh_mem_config |= 1 << + SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 30ce92c6e6a1..b397ec726400 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -432,6 +432,7 @@ struct qcm_process_device { uint32_t gds_size; uint32_t num_gws; uint32_t num_oac; + uint32_t sh_hidden_private_base; }; /* Data that is per-process-per device. */ diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d6833426fdef..1b9c5609d523 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -232,6 +232,12 @@ struct kfd_ioctl_wait_events_args { uint32_t wait_result; /* from KFD */ }; +struct kfd_ioctl_set_scratch_backing_va_args { + uint64_t va_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -286,7 +292,10 @@ struct kfd_ioctl_wait_events_args { #define AMDKFD_IOC_DBG_WAVE_CONTROL \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) +#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ + AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x11 +#define AMDKFD_COMMAND_END 0x12 #endif -- cgit v1.2.3 From 5d71dbc3a588690c3d66d76db8cd29973425ce6d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 15 Aug 2017 23:00:22 -0400 Subject: drm/amdkfd: Implement image tiling mode support v2 v2: Removed hole in ioctl number space Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 43 ++++++++++++++++++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 28 ++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7436d34b77ab..e4a8c2e52cb2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -883,6 +883,46 @@ bind_process_to_device_fail: return err; } +static int kfd_ioctl_get_tile_config(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_tile_config_args *args = data; + struct kfd_dev *dev; + struct tile_config config; + int err = 0; + + dev = kfd_device_by_id(args->gpu_id); + + dev->kfd2kgd->get_tile_config(dev->kgd, &config); + + args->gb_addr_config = config.gb_addr_config; + args->num_banks = config.num_banks; + args->num_ranks = config.num_ranks; + + if (args->num_tile_configs > config.num_tile_configs) + args->num_tile_configs = config.num_tile_configs; + err = copy_to_user((void __user *)args->tile_config_ptr, + config.tile_config_ptr, + args->num_tile_configs * sizeof(uint32_t)); + if (err) { + args->num_tile_configs = 0; + return -EFAULT; + } + + if (args->num_macro_tile_configs > config.num_macro_tile_configs) + args->num_macro_tile_configs = + config.num_macro_tile_configs; + err = copy_to_user((void __user *)args->macro_tile_config_ptr, + config.macro_tile_config_ptr, + args->num_macro_tile_configs * sizeof(uint32_t)); + if (err) { + args->num_macro_tile_configs = 0; + return -EFAULT; + } + + return 0; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -939,6 +979,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, kfd_ioctl_set_scratch_backing_va, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, + kfd_ioctl_get_tile_config, 0) }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1b9c5609d523..7b4567bacfc2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -238,6 +238,29 @@ struct kfd_ioctl_set_scratch_backing_va_args { uint32_t pad; }; +struct kfd_ioctl_get_tile_config_args { + /* to KFD: pointer to tile array */ + uint64_t tile_config_ptr; + /* to KFD: pointer to macro tile array */ + uint64_t macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_macro_tile_configs; + + uint32_t gpu_id; /* to KFD */ + uint32_t gb_addr_config; /* from KFD */ + uint32_t num_banks; /* from KFD */ + uint32_t num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -295,7 +318,10 @@ struct kfd_ioctl_set_scratch_backing_va_args { #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x12 +#define AMDKFD_COMMAND_END 0x13 #endif -- cgit v1.2.3 From d20fa5a06d7bddb7823d14255982148fefb72950 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 11 Aug 2017 16:49:04 +0300 Subject: drm: omapdrm: hdmi: Pass HDMI core version as integer to HDMI audio The HDMI audio driver only needs to know which generation of HDMI transmitter it deals with, not the detailed SoC model. Pass the version number as an integer to prepare for removal of the OMAP SoC version from the omapdrm driver. Signed-off-by: Laurent Pinchart Acked-by: Mark Brown Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4.c | 2 +- drivers/gpu/drm/omapdrm/dss/hdmi5.c | 2 +- drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c | 2 +- drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c | 2 +- include/sound/omap-hdmi-audio.h | 2 +- sound/soc/omap/omap-hdmi-audio.c | 9 +++------ 6 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index 438f47d8ab69..4c131d710282 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -667,7 +667,7 @@ static int hdmi_audio_register(struct device *dev) { struct omap_hdmi_audio_pdata pdata = { .dev = dev, - .dss_version = omapdss_get_version(), + .version = 4, .audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp), .ops = &hdmi_audio_ops, }; diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index 8ab76b50662a..a6adddeee5bb 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -694,7 +694,7 @@ static int hdmi_audio_register(struct device *dev) { struct omap_hdmi_audio_pdata pdata = { .dev = dev, - .dss_version = omapdss_get_version(), + .version = 5, .audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp), .ops = &hdmi_audio_ops, }; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c index 156a254705ea..ec78d61bc551 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c @@ -664,7 +664,7 @@ static int hdmi_audio_register(struct device *dev) { struct omap_hdmi_audio_pdata pdata = { .dev = dev, - .dss_version = omapdss_get_version(), + .version = 4, .audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp), .ops = &hdmi_audio_ops, }; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c index 4da36bcab977..2e2fcc3d6d4f 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c @@ -695,7 +695,7 @@ static int hdmi_audio_register(struct device *dev) { struct omap_hdmi_audio_pdata pdata = { .dev = dev, - .dss_version = omapdss_get_version(), + .version = 5, .audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp), .ops = &hdmi_audio_ops, }; diff --git a/include/sound/omap-hdmi-audio.h b/include/sound/omap-hdmi-audio.h index 1df2ff61a4dd..0e495ed8872e 100644 --- a/include/sound/omap-hdmi-audio.h +++ b/include/sound/omap-hdmi-audio.h @@ -39,7 +39,7 @@ struct omap_hdmi_audio_ops { /* HDMI audio initalization data */ struct omap_hdmi_audio_pdata { struct device *dev; - enum omapdss_version dss_version; + unsigned int version; phys_addr_t audio_dma_addr; const struct omap_hdmi_audio_ops *ops; diff --git a/sound/soc/omap/omap-hdmi-audio.c b/sound/soc/omap/omap-hdmi-audio.c index 888133f9e65d..3e9cc4842a1d 100644 --- a/sound/soc/omap/omap-hdmi-audio.c +++ b/sound/soc/omap/omap-hdmi-audio.c @@ -337,14 +337,11 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev) ad->dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; mutex_init(&ad->current_stream_lock); - switch (ha->dss_version) { - case OMAPDSS_VER_OMAP4430_ES1: - case OMAPDSS_VER_OMAP4430_ES2: - case OMAPDSS_VER_OMAP4: + switch (ha->version) { + case 4: dai_drv = &omap4_hdmi_dai; break; - case OMAPDSS_VER_OMAP5: - case OMAPDSS_VER_DRA7xx: + case 5: dai_drv = &omap5_hdmi_dai; break; default: -- cgit v1.2.3 From 739a6d5d640a9811beef6a828253ee184dd431c5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 11 Aug 2017 16:49:12 +0300 Subject: drm: omapdrm: Remove omapdrm platform data The omapdrm platform data are not used anymore, remove them. Signed-off-by: Laurent Pinchart Reviewed-by: Tomi Valkeinen Signed-off-by: Tomi Valkeinen --- include/linux/platform_data/omap_drm.h | 53 ---------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 include/linux/platform_data/omap_drm.h (limited to 'include') diff --git a/include/linux/platform_data/omap_drm.h b/include/linux/platform_data/omap_drm.h deleted file mode 100644 index f4e4a237ebd2..000000000000 --- a/include/linux/platform_data/omap_drm.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * DRM/KMS platform data for TI OMAP platforms - * - * Copyright (C) 2012 Texas Instruments - * Author: Rob Clark - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#ifndef __PLATFORM_DATA_OMAP_DRM_H__ -#define __PLATFORM_DATA_OMAP_DRM_H__ - -/* - * Optional platform data to configure the default configuration of which - * pipes/overlays/CRTCs are used.. if this is not provided, then instead the - * first CONFIG_DRM_OMAP_NUM_CRTCS are used, and they are each connected to - * one manager, with priority given to managers that are connected to - * detected devices. Remaining overlays are used as video planes. This - * should be a good default behavior for most cases, but yet there still - * might be times when you wish to do something different. - */ -struct omap_kms_platform_data { - /* overlays to use as CRTCs: */ - int ovl_cnt; - const int *ovl_ids; - - /* overlays to use as video planes: */ - int pln_cnt; - const int *pln_ids; - - int mgr_cnt; - const int *mgr_ids; - - int dev_cnt; - const char **dev_names; -}; - -struct omap_drm_platform_data { - uint32_t omaprev; - struct omap_kms_platform_data *kms_pdata; -}; - -#endif /* __PLATFORM_DATA_OMAP_DRM_H__ */ -- cgit v1.2.3 From d956e1293b9b43f3a9a508162cdbaa96cf02e6e0 Mon Sep 17 00:00:00 2001 From: Noralf Trønnes Date: Sun, 6 Aug 2017 17:41:08 +0200 Subject: drm/gem-cma-helper: Remove drm_gem_cma_dumb_map_offset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no more users of drm_gem_cma_dumb_map_offset(), so remove it. Signed-off-by: Noralf Trønnes Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1502034068-51384-20-git-send-email-noralf@tronnes.org --- drivers/gpu/drm/drm_gem_cma_helper.c | 35 ----------------------------------- include/drm/drm_gem_cma_helper.h | 5 ----- 2 files changed, 40 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index f4e00571839d..373e33f22be4 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -264,41 +264,6 @@ int drm_gem_cma_dumb_create(struct drm_file *file_priv, } EXPORT_SYMBOL_GPL(drm_gem_cma_dumb_create); -/** - * drm_gem_cma_dumb_map_offset - return the fake mmap offset for a CMA GEM - * object - * @file_priv: DRM file-private structure containing the GEM object - * @drm: DRM device - * @handle: GEM object handle - * @offset: return location for the fake mmap offset - * - * This function look up an object by its handle and returns the fake mmap - * offset associated with it. Drivers using the CMA helpers should set this - * as their &drm_driver.dumb_map_offset callback. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -int drm_gem_cma_dumb_map_offset(struct drm_file *file_priv, - struct drm_device *drm, u32 handle, - u64 *offset) -{ - struct drm_gem_object *gem_obj; - - gem_obj = drm_gem_object_lookup(file_priv, handle); - if (!gem_obj) { - dev_err(drm->dev, "failed to lookup GEM object\n"); - return -EINVAL; - } - - *offset = drm_vma_node_offset_addr(&gem_obj->vma_node); - - drm_gem_object_put_unlocked(gem_obj); - - return 0; -} -EXPORT_SYMBOL_GPL(drm_gem_cma_dumb_map_offset); - const struct vm_operations_struct drm_gem_cma_vm_ops = { .open = drm_gem_vm_open, .close = drm_gem_vm_close, diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index b42529e0fae0..58a739bf15f1 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -73,11 +73,6 @@ int drm_gem_cma_dumb_create(struct drm_file *file_priv, struct drm_device *drm, struct drm_mode_create_dumb *args); -/* map memory region for DRM framebuffer to user space */ -int drm_gem_cma_dumb_map_offset(struct drm_file *file_priv, - struct drm_device *drm, u32 handle, - u64 *offset); - /* set vm_flags and we can change the VM attribute to other one at here */ int drm_gem_cma_mmap(struct file *filp, struct vm_area_struct *vma); -- cgit v1.2.3 From 4c3dbb2c312c9fafbac30d98c523b8b1f3455d78 Mon Sep 17 00:00:00 2001 From: Noralf Trønnes Date: Sun, 13 Aug 2017 15:31:44 +0200 Subject: drm: Add GEM backed framebuffer library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This library provides helpers for drivers that don't subclass drm_framebuffer and are backed by drm_gem_object. The code is taken from drm_fb_cma_helper. Signed-off-by: Noralf Trønnes Reviewed-by: Daniel Vetter Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1502631125-13557-2-git-send-email-noralf@tronnes.org --- Documentation/gpu/drm-kms-helpers.rst | 9 + drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 283 +++++++++++++++++++++++++++ include/drm/drm_framebuffer.h | 7 + include/drm/drm_gem_framebuffer_helper.h | 37 ++++ 5 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/drm_gem_framebuffer_helper.c create mode 100644 include/drm/drm_gem_framebuffer_helper.h (limited to 'include') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 7c5e2549a58a..13dd237418cc 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -296,3 +296,12 @@ Auxiliary Modeset Helpers .. kernel-doc:: drivers/gpu/drm/drm_modeset_helper.c :export: + +Framebuffer GEM Helper Reference +================================ + +.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c + :export: diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 24a066e1841c..a8acc197dec3 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -33,7 +33,7 @@ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \ drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \ drm_kms_helper_common.o drm_dp_dual_mode_helper.o \ drm_simple_kms_helper.o drm_modeset_helper.o \ - drm_scdc_helper.o + drm_scdc_helper.o drm_gem_framebuffer_helper.o drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c new file mode 100644 index 000000000000..d54a083dc5dd --- /dev/null +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -0,0 +1,283 @@ +/* + * drm gem framebuffer helper functions + * + * Copyright (C) 2017 Noralf Trønnes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * DOC: overview + * + * This library provides helpers for drivers that don't subclass + * &drm_framebuffer and and use &drm_gem_object for their backing storage. + * + * Drivers without additional needs to validate framebuffers can simply use + * drm_gem_fb_create() and everything is wired up automatically. But all + * parts can be used individually. + */ + +/** + * drm_gem_fb_get_obj() - Get GEM object for framebuffer + * @fb: The framebuffer + * @plane: Which plane + * + * Returns the GEM object for given framebuffer. + */ +struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb, + unsigned int plane) +{ + if (plane >= 4) + return NULL; + + return fb->obj[plane]; +} +EXPORT_SYMBOL_GPL(drm_gem_fb_get_obj); + +static struct drm_framebuffer * +drm_gem_fb_alloc(struct drm_device *dev, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object **obj, unsigned int num_planes, + const struct drm_framebuffer_funcs *funcs) +{ + struct drm_framebuffer *fb; + int ret, i; + + fb = kzalloc(sizeof(*fb), GFP_KERNEL); + if (!fb) + return ERR_PTR(-ENOMEM); + + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + + for (i = 0; i < num_planes; i++) + fb->obj[i] = obj[i]; + + ret = drm_framebuffer_init(dev, fb, funcs); + if (ret) { + DRM_DEV_ERROR(dev->dev, "Failed to init framebuffer: %d\n", + ret); + kfree(fb); + return ERR_PTR(ret); + } + + return fb; +} + +/** + * drm_gem_fb_destroy - Free GEM backed framebuffer + * @fb: DRM framebuffer + * + * Frees a GEM backed framebuffer with its backing buffer(s) and the structure + * itself. Drivers can use this as their &drm_framebuffer_funcs->destroy + * callback. + */ +void drm_gem_fb_destroy(struct drm_framebuffer *fb) +{ + int i; + + for (i = 0; i < 4; i++) + drm_gem_object_put_unlocked(fb->obj[i]); + + drm_framebuffer_cleanup(fb); + kfree(fb); +} +EXPORT_SYMBOL(drm_gem_fb_destroy); + +/** + * drm_gem_fb_create_handle - Create handle for GEM backed framebuffer + * @fb: DRM framebuffer + * @file: drm file + * @handle: handle created + * + * Drivers can use this as their &drm_framebuffer_funcs->create_handle + * callback. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file, + unsigned int *handle) +{ + return drm_gem_handle_create(file, fb->obj[0], handle); +} +EXPORT_SYMBOL(drm_gem_fb_create_handle); + +/** + * drm_gem_fb_create_with_funcs() - helper function for the + * &drm_mode_config_funcs.fb_create + * callback + * @dev: DRM device + * @file: drm file for the ioctl call + * @mode_cmd: metadata from the userspace fb creation request + * @funcs: vtable to be used for the new framebuffer object + * + * This can be used to set &drm_framebuffer_funcs for drivers that need the + * &drm_framebuffer_funcs.dirty callback. Use drm_gem_fb_create() if you don't + * need to change &drm_framebuffer_funcs. + * The function does buffer size validation. + */ +struct drm_framebuffer * +drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd, + const struct drm_framebuffer_funcs *funcs) +{ + const struct drm_format_info *info; + struct drm_gem_object *objs[4]; + struct drm_framebuffer *fb; + int ret, i; + + info = drm_get_format_info(dev, mode_cmd); + if (!info) + return ERR_PTR(-EINVAL); + + for (i = 0; i < info->num_planes; i++) { + unsigned int width = mode_cmd->width / (i ? info->hsub : 1); + unsigned int height = mode_cmd->height / (i ? info->vsub : 1); + unsigned int min_size; + + objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]); + if (!objs[i]) { + DRM_DEV_ERROR(dev->dev, "Failed to lookup GEM\n"); + ret = -ENOENT; + goto err_gem_object_put; + } + + min_size = (height - 1) * mode_cmd->pitches[i] + + width * info->cpp[i] + + mode_cmd->offsets[i]; + + if (objs[i]->size < min_size) { + drm_gem_object_put_unlocked(objs[i]); + ret = -EINVAL; + goto err_gem_object_put; + } + } + + fb = drm_gem_fb_alloc(dev, mode_cmd, objs, i, funcs); + if (IS_ERR(fb)) { + ret = PTR_ERR(fb); + goto err_gem_object_put; + } + + return fb; + +err_gem_object_put: + for (i--; i >= 0; i--) + drm_gem_object_put_unlocked(objs[i]); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_funcs); + +static const struct drm_framebuffer_funcs drm_gem_fb_funcs = { + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, +}; + +/** + * drm_gem_fb_create() - &drm_mode_config_funcs.fb_create callback function + * @dev: DRM device + * @file: drm file for the ioctl call + * @mode_cmd: metadata from the userspace fb creation request + * + * If your hardware has special alignment or pitch requirements these should be + * checked before calling this function. The function does buffer size + * validation. Use drm_gem_fb_create_with_funcs() if you need to set + * &drm_framebuffer_funcs.dirty. + */ +struct drm_framebuffer * +drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, + &drm_gem_fb_funcs); +} +EXPORT_SYMBOL_GPL(drm_gem_fb_create); + +/** + * drm_gem_fb_prepare_fb() - Prepare gem framebuffer + * @plane: Which plane + * @state: Plane state attach fence to + * + * This can be used as the &drm_plane_helper_funcs.prepare_fb hook. + * + * This function checks if the plane FB has an dma-buf attached, extracts + * the exclusive fence and attaches it to plane state for the atomic helper + * to wait on. + * + * There is no need for &drm_plane_helper_funcs.cleanup_fb hook for simple + * gem based framebuffer drivers which have their buffers always pinned in + * memory. + */ +int drm_gem_fb_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct dma_buf *dma_buf; + struct dma_fence *fence; + + if ((plane->state->fb == state->fb) || !state->fb) + return 0; + + dma_buf = drm_gem_fb_get_obj(state->fb, 0)->dma_buf; + if (dma_buf) { + fence = reservation_object_get_excl_rcu(dma_buf->resv); + drm_atomic_set_fence_for_plane(state, fence); + } + + return 0; +} +EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb); + +/** + * drm_gem_fbdev_fb_create - Create a drm_framebuffer for fbdev emulation + * @dev: DRM device + * @sizes: fbdev size description + * @pitch_align: optional pitch alignment + * @obj: GEM object backing the framebuffer + * @funcs: vtable to be used for the new framebuffer object + * + * This function creates a framebuffer for use with fbdev emulation. + * + * Returns: + * Pointer to a drm_framebuffer on success or an error pointer on failure. + */ +struct drm_framebuffer * +drm_gem_fbdev_fb_create(struct drm_device *dev, + struct drm_fb_helper_surface_size *sizes, + unsigned int pitch_align, struct drm_gem_object *obj, + const struct drm_framebuffer_funcs *funcs) +{ + struct drm_mode_fb_cmd2 mode_cmd = { 0 }; + + mode_cmd.width = sizes->surface_width; + mode_cmd.height = sizes->surface_height; + mode_cmd.pitches[0] = sizes->surface_width * + DIV_ROUND_UP(sizes->surface_bpp, 8); + if (pitch_align) + mode_cmd.pitches[0] = roundup(mode_cmd.pitches[0], + pitch_align); + mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); + if (obj->size < mode_cmd.pitches[0] * mode_cmd.height) + return ERR_PTR(-EINVAL); + + return drm_gem_fb_alloc(dev, &mode_cmd, &obj, 1, funcs); +} +EXPORT_SYMBOL(drm_gem_fbdev_fb_create); diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 5244f059d23a..b6996ddb19d6 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -190,6 +190,13 @@ struct drm_framebuffer { * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ struct list_head filp_head; + /** + * @obj: GEM objects backing the framebuffer, one per plane (optional). + * + * This is used by the GEM framebuffer helpers, see e.g. + * drm_gem_fb_create(). + */ + struct drm_gem_object *obj[4]; }; #define obj_to_fb(x) container_of(x, struct drm_framebuffer, base) diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h new file mode 100644 index 000000000000..db9cfa07235e --- /dev/null +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -0,0 +1,37 @@ +#ifndef __DRM_GEM_FB_HELPER_H__ +#define __DRM_GEM_FB_HELPER_H__ + +struct drm_device; +struct drm_file; +struct drm_fb_helper_surface_size; +struct drm_framebuffer; +struct drm_framebuffer_funcs; +struct drm_gem_object; +struct drm_mode_fb_cmd2; +struct drm_plane; +struct drm_plane_state; + +struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb, + unsigned int plane); +void drm_gem_fb_destroy(struct drm_framebuffer *fb); +int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file, + unsigned int *handle); + +struct drm_framebuffer * +drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd, + const struct drm_framebuffer_funcs *funcs); +struct drm_framebuffer * +drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd); + +int drm_gem_fb_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *state); + +struct drm_framebuffer * +drm_gem_fbdev_fb_create(struct drm_device *dev, + struct drm_fb_helper_surface_size *sizes, + unsigned int pitch_align, struct drm_gem_object *obj, + const struct drm_framebuffer_funcs *funcs); + +#endif -- cgit v1.2.3 From 373533f80b89f0f4fb59b65c2942d1b20b91319c Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 7 Aug 2017 11:13:41 +0200 Subject: drm/ttm: make ttm_mem_type_manager_func debug more useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide the drm printer directly instead of just the callback. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 +++---- drivers/gpu/drm/nouveau/nouveau_ttm.c | 6 ++++-- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo_manager.c | 5 ++--- drivers/gpu/drm/virtio/virtgpu_ttm.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 4 ++-- include/drm/ttm/ttm_bo_driver.h | 5 +++-- 8 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 5e6b90c6794f..26818b0a0cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -253,18 +253,17 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, * amdgpu_gtt_mgr_debug - dump VRAM table * * @man: TTM memory type manager - * @prefix: text prefix + * @printer: DRM printer to use * * Dump the table content using printk. */ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct amdgpu_gtt_mgr *mgr = man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, &p); + drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a2c59a08b2bd..3f86b47984a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -204,18 +204,17 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, * amdgpu_vram_mgr_debug - dump VRAM table * * @man: TTM memory type manager - * @prefix: text prefix + * @printer: DRM printer to use * * Dump the table content using printk. */ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, &p); + drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); } diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 999c35a25498..b0ad7fcefcf5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -179,7 +179,8 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, } static void -nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) +nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, + struct drm_printer *printer) { } @@ -252,7 +253,8 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man, } static void -nv04_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) +nv04_gart_manager_debug(struct ttm_mem_type_manager *man, + struct drm_printer *printer) { } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d3463ebc0b25..58e7fcea620e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -70,6 +70,7 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place, static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) { struct ttm_mem_type_manager *man = &bdev->man[mem_type]; + struct drm_printer p = drm_debug_printer(TTM_PFX); pr_err(" has_type: %d\n", man->has_type); pr_err(" use_type: %d\n", man->use_type); @@ -79,7 +80,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) pr_err(" available_caching: 0x%08X\n", man->available_caching); pr_err(" default_caching: 0x%08X\n", man->default_caching); if (mem_type != TTM_PL_SYSTEM) - (*man->func->debug)(man, TTM_PFX); + (*man->func->debug)(man, &p); } static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index 90a6c0b03afc..a7c232dc39cb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -136,13 +136,12 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man) } static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&rman->lock); - drm_mm_print(&rman->mm, &p); + drm_mm_print(&rman->mm, printer); spin_unlock(&rman->lock); } diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index e695d74eaa9f..cd389c5eaef5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -192,7 +192,7 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man) } static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index d2b03d4a3c86..f2f9d88131f2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -157,9 +157,9 @@ static int vmw_gmrid_man_takedown(struct ttm_mem_type_manager *man) } static void vmw_gmrid_man_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { - pr_info("%s: No debug info available for the GMR id manager\n", prefix); + drm_printf(printer, "No debug info available for the GMR id manager\n"); } const struct ttm_mem_type_manager_func vmw_gmrid_manager_func = { diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d30850e07936..5f821a9b3a1f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -229,13 +229,14 @@ struct ttm_mem_type_manager_func { * struct ttm_mem_type_manager member debug * * @man: Pointer to a memory type manager. - * @prefix: Prefix to be used in printout to identify the caller. + * @printer: Prefix to be used in printout to identify the caller. * * This function is called to print out the state of the memory * type manager to aid debugging of out-of-memory conditions. * It may not be called from within atomic context. */ - void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); + void (*debug)(struct ttm_mem_type_manager *man, + struct drm_printer *printer); }; /** -- cgit v1.2.3 From f44d85389e17b2e960620c1c6d89bda978a11f2b Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 24 Aug 2017 16:08:14 +0100 Subject: drm: rename u32 in __u32 in uapi All other fields use __ Cc: Ben Widawsky Fixes: db1689aa61b ("drm: Create a format/modifier blob") Signed-off-by: Lionel Landwerlin Signed-off-by: Daniel Stone Reviewed-by: Chris Wilson Reviewed-by: Emil Velikov Reviewed-by: Ben Widawsky Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20170824150814.5878-1-lionel.g.landwerlin@intel.com --- include/uapi/drm/drm_mode.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index a2bb7161f020..54fc38c3c3f1 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -715,24 +715,24 @@ struct drm_mode_atomic { struct drm_format_modifier_blob { #define FORMAT_BLOB_CURRENT 1 /* Version of this blob format */ - u32 version; + __u32 version; /* Flags */ - u32 flags; + __u32 flags; /* Number of fourcc formats supported */ - u32 count_formats; + __u32 count_formats; /* Where in this blob the formats exist (in bytes) */ - u32 formats_offset; + __u32 formats_offset; /* Number of drm_format_modifiers */ - u32 count_modifiers; + __u32 count_modifiers; /* Where in this blob the modifiers exist (in bytes) */ - u32 modifiers_offset; + __u32 modifiers_offset; - /* u32 formats[] */ + /* __u32 formats[] */ /* struct drm_format_modifier modifiers[] */ }; -- cgit v1.2.3 From 2cfa0bb25d25aa183ea29f1f9c2bc65f3f2c2264 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 5 Jul 2017 01:37:55 -0700 Subject: drm/vmwgfx: Prepare to support fence fd Make the fields and flags available. Signed-off-by: Sinclair Yeh Reviewed-by: Deepak Singh Rawat Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 5 ----- include/uapi/drm/vmwgfx_drm.h | 11 ++++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 8c583fd16c79..178dabcdb198 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -4449,11 +4449,6 @@ int vmw_execbuf_ioctl(struct drm_device *dev, unsigned long data, arg.context_handle = (uint32_t) -1; break; case 2: - if (arg.pad64 != 0) { - DRM_ERROR("Unused IOCTL data not set to zero.\n"); - return -EINVAL; - } - break; default: break; } diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index d9dfde9aa757..0bc784f5e0db 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -297,13 +297,17 @@ union drm_vmw_surface_reference_arg { * @version: Allows expanding the execbuf ioctl parameters without breaking * backwards compatibility, since user-space will always tell the kernel * which version it uses. - * @flags: Execbuf flags. None currently. + * @flags: Execbuf flags. + * @imported_fence_fd: FD for a fence imported from another device * * Argument to the DRM_VMW_EXECBUF Ioctl. */ #define DRM_VMW_EXECBUF_VERSION 2 +#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0) +#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1) + struct drm_vmw_execbuf_arg { __u64 commands; __u32 command_size; @@ -312,7 +316,7 @@ struct drm_vmw_execbuf_arg { __u32 version; __u32 flags; __u32 context_handle; - __u32 pad64; + __s32 imported_fence_fd; }; /** @@ -328,6 +332,7 @@ struct drm_vmw_execbuf_arg { * @passed_seqno: The highest seqno number processed by the hardware * so far. This can be used to mark user-space fence objects as signaled, and * to determine whether a fence seqno might be stale. + * @fd: FD associated with the fence, -1 if not exported * @error: This member should've been set to -EFAULT on submission. * The following actions should be take on completion: * error == -EFAULT: Fence communication failed. The host is synchronized. @@ -345,7 +350,7 @@ struct drm_vmw_fence_rep { __u32 mask; __u32 seqno; __u32 passed_seqno; - __u32 pad64; + __s32 fd; __s32 error; }; -- cgit v1.2.3 From afaf59237843bf89823c33143beca6b262dff0ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:19 -0700 Subject: drm/syncobj: Rename fence_get to find_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function has far more in common with drm_syncobj_find than with any in the get/put functions. Signed-off-by: Jason Ekstrand Acked-by: Christian König (v1) Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/drm_syncobj.c | 10 +++++----- include/drm/drm_syncobj.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 15d4a28d73bb..269b835571eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1035,7 +1035,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_fence_get(p->filp, handle, &fence); + r = drm_syncobj_find_fence(p->filp, handle, &fence); if (r) return r; diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index a5b38a80a99a..0412b0b0a342 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -95,9 +95,9 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, } EXPORT_SYMBOL(drm_syncobj_replace_fence); -int drm_syncobj_fence_get(struct drm_file *file_private, - u32 handle, - struct dma_fence **fence) +int drm_syncobj_find_fence(struct drm_file *file_private, + u32 handle, + struct dma_fence **fence) { struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle); int ret = 0; @@ -112,7 +112,7 @@ int drm_syncobj_fence_get(struct drm_file *file_private, drm_syncobj_put(syncobj); return ret; } -EXPORT_SYMBOL(drm_syncobj_fence_get); +EXPORT_SYMBOL(drm_syncobj_find_fence); /** * drm_syncobj_free - free a sync object. @@ -307,7 +307,7 @@ int drm_syncobj_export_sync_file(struct drm_file *file_private, if (fd < 0) return fd; - ret = drm_syncobj_fence_get(file_private, handle, &fence); + ret = drm_syncobj_find_fence(file_private, handle, &fence); if (ret) goto err_put_fd; diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 89976da542b1..7d4ad777132e 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -81,9 +81,9 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence); -int drm_syncobj_fence_get(struct drm_file *file_private, - u32 handle, - struct dma_fence **fence); +int drm_syncobj_find_fence(struct drm_file *file_private, + u32 handle, + struct dma_fence **fence); void drm_syncobj_free(struct kref *kref); #endif -- cgit v1.2.3 From 309a5482fa9eb7bc754bf95a2cd89091b01c33d2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:20 -0700 Subject: drm/syncobj: Add a race-free drm_syncobj_fence_get helper (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The atomic exchange operation in drm_syncobj_replace_fence is sufficient for the case where it races with itself. However, if you have a race between a replace_fence and dma_fence_get(syncobj->fence), you may end up with the entire replace_fence happening between the point in time where the one thread gets the syncobj->fence pointer and when it calls dma_fence_get() on it. If this happens, then the reference may be dropped before we get a chance to get a new one. The new helper uses dma_fence_get_rcu_safe to get rid of the race. This is also needed because it allows us to do a bit more than just get a reference in drm_syncobj_fence_get should we wish to do so. v2: - RCU isn't that scary - Call rcu_read_lock/unlock - Don't rename fence to _fence - Make the helper static inline Signed-off-by: Jason Ekstrand Acked-by: Christian König (v1) Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 2 +- include/drm/drm_syncobj.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 0412b0b0a342..eea38d82645c 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -105,7 +105,7 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (!syncobj) return -ENOENT; - *fence = dma_fence_get(syncobj->fence); + *fence = drm_syncobj_fence_get(syncobj); if (!*fence) { ret = -EINVAL; } diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 7d4ad777132e..ce94d14c5087 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -77,6 +77,18 @@ drm_syncobj_put(struct drm_syncobj *obj) kref_put(&obj->refcount, drm_syncobj_free); } +static inline struct dma_fence * +drm_syncobj_fence_get(struct drm_syncobj *syncobj) +{ + struct dma_fence *fence; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&syncobj->fence); + rcu_read_unlock(); + + return fence; +} + struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, -- cgit v1.2.3 From 5e60a10eaebab93f823295cd7ec3848ba3b6e553 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 25 Aug 2017 10:52:22 -0700 Subject: drm/syncobj: add sync obj wait interface. (v8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This interface will allow sync object to be used to back Vulkan fences. This API is pretty much the vulkan fence waiting API, and I've ported the code from amdgpu. v2: accept relative timeout, pass remaining time back to userspace. v3: return to absolute timeouts. v4: absolute zero = poll, rewrite any/all code to have same operation for arrays return -EINVAL for 0 fences. v4.1: fixup fences allocation check, use u64_to_user_ptr v5: move to sec/nsec, and use timespec64 for calcs. v6: use -ETIME and drop the out status flag. (-ETIME is suggested by ickle, I can feel a shed painting) v7: talked to Daniel/Arnd, use ktime and ns everywhere. v8: be more careful in the timeout calculations use uint32_t for counter variables so we don't overflow graciously handle -ENOINT being returned from dma_fence_wait_timeout Signed-off-by: Dave Airlie Reviewed-by: Jason Ekstrand Acked-by: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_syncobj.c | 142 +++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 12 ++++ 4 files changed, 158 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 4e906b82a170..534e5ac43bf8 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -167,3 +167,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index d920b2118a39..b4f443417a28 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index eea38d82645c..4e8563c36d6e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1,5 +1,7 @@ /* * Copyright 2017 Red Hat + * Parts ported from amdgpu (fence wait code). + * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +33,9 @@ * that contain an optional fence. The fence can be updated with a new * fence, or be NULL. * + * syncobj's can be waited upon, where it will wait for the underlying + * fence. + * * syncobj's can be export to fd's and back, these fd's are opaque and * have no other use case, except passing the syncobj between processes. * @@ -447,3 +452,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, return drm_syncobj_fd_to_handle(file_private, args->fd, &args->handle); } + +/** + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value + * + * @timeout_nsec: timeout nsec component in ns, 0 for poll + * + * Calculate the timeout in jiffies from an absolute time in sec/nsec. + */ +static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec) +{ + ktime_t abs_timeout, now; + u64 timeout_ns, timeout_jiffies64; + + /* make 0 timeout means poll - absolute 0 doesn't seem valid */ + if (timeout_nsec == 0) + return 0; + + abs_timeout = ns_to_ktime(timeout_nsec); + now = ktime_get(); + + if (!ktime_after(abs_timeout, now)) + return 0; + + timeout_ns = ktime_to_ns(ktime_sub(abs_timeout, now)); + + timeout_jiffies64 = nsecs_to_jiffies64(timeout_ns); + /* clamp timeout to avoid infinite timeout */ + if (timeout_jiffies64 >= MAX_SCHEDULE_TIMEOUT - 1) + return MAX_SCHEDULE_TIMEOUT - 1; + + return timeout_jiffies64 + 1; +} + +static int drm_syncobj_wait_fences(struct drm_device *dev, + struct drm_file *file_private, + struct drm_syncobj_wait *wait, + struct dma_fence **fences) +{ + signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); + signed long ret = 0; + uint32_t first = ~0; + + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { + uint32_t i; + for (i = 0; i < wait->count_handles; i++) { + ret = dma_fence_wait_timeout(fences[i], true, timeout); + + /* Various dma_fence wait callbacks will return + * ENOENT to indicate that the fence has already + * been signaled. We need to sanitize this to 0 so + * we don't return early and the client doesn't see + * an unexpected error. + */ + if (ret == -ENOENT) + ret = 0; + + if (ret < 0) + return ret; + if (ret == 0) + break; + timeout = ret; + } + first = 0; + } else { + ret = dma_fence_wait_any_timeout(fences, + wait->count_handles, + true, timeout, + &first); + } + + if (ret < 0) + return ret; + + wait->first_signaled = first; + if (ret == 0) + return -ETIME; + return 0; +} + +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + uint32_t *handles; + struct dma_fence **fences; + int ret = 0; + uint32_t i; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + /* Get the handles from userspace */ + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), + GFP_KERNEL); + if (handles == NULL) + return -ENOMEM; + + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(uint32_t) * args->count_handles)) { + ret = -EFAULT; + goto err_free_handles; + } + + fences = kcalloc(args->count_handles, + sizeof(struct dma_fence *), GFP_KERNEL); + if (!fences) { + ret = -ENOMEM; + goto err_free_handles; + } + + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_find_fence(file_private, handles[i], + &fences[i]); + if (ret) + goto err_free_fence_array; + } + + ret = drm_syncobj_wait_fences(dev, file_private, + args, fences); + +err_free_fence_array: + for (i = 0; i < args->count_handles; i++) + dma_fence_put(fences[i]); + kfree(fences); +err_free_handles: + kfree(handles); + + return ret; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 101593ab10ac..0757c1a41821 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -718,6 +718,17 @@ struct drm_syncobj_handle { __u32 pad; }; +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -840,6 +851,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3 From 9c19fb10a5893d6501df4d0fb93d954d5fc1d91b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 07:39:25 -0700 Subject: drm/syncobj: Add a callback mechanism for replace_fence (v3) It is useful in certain circumstances to know when the fence is replaced in a syncobj. Specifically, it may be useful to know when the fence goes from NULL to something valid. This does make syncobj_replace_fence a little more expensive because it has to take a lock but, in the common case where there is no callback list, it spends a very short amount of time inside the lock. v2: - Don't lock in drm_syncobj_fence_get. We only really need to lock around fence_replace to make the callback work. v3: - Fix the cb_list comment to make kbuild happy Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 60 +++++++++++++++++++++++++++++++++++++++++-- include/drm/drm_syncobj.h | 39 ++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 4e8563c36d6e..bade497b3f1d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -80,6 +80,46 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, } EXPORT_SYMBOL(drm_syncobj_find); +static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) +{ + cb->func = func; + list_add_tail(&cb->node, &syncobj->cb_list); +} + +/** + * drm_syncobj_add_callback - adds a callback to syncobj::cb_list + * @syncobj: Sync object to which to add the callback + * @cb: Callback to add + * @func: Func to use when initializing the drm_syncobj_cb struct + * + * This adds a callback to be called next time the fence is replaced + */ +void drm_syncobj_add_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) +{ + spin_lock(&syncobj->lock); + drm_syncobj_add_callback_locked(syncobj, cb, func); + spin_unlock(&syncobj->lock); +} +EXPORT_SYMBOL(drm_syncobj_add_callback); + +/** + * drm_syncobj_add_callback - removes a callback to syncobj::cb_list + * @syncobj: Sync object from which to remove the callback + * @cb: Callback to remove + */ +void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb) +{ + spin_lock(&syncobj->lock); + list_del_init(&cb->node); + spin_unlock(&syncobj->lock); +} +EXPORT_SYMBOL(drm_syncobj_remove_callback); + /** * drm_syncobj_replace_fence - replace fence in a sync object. * @syncobj: Sync object to replace fence in @@ -91,10 +131,24 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence) { struct dma_fence *old_fence; + struct drm_syncobj_cb *cur, *tmp; if (fence) dma_fence_get(fence); - old_fence = xchg(&syncobj->fence, fence); + + spin_lock(&syncobj->lock); + + old_fence = syncobj->fence; + syncobj->fence = fence; + + if (fence != old_fence) { + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { + list_del_init(&cur->node); + cur->func(syncobj, cur); + } + } + + spin_unlock(&syncobj->lock); dma_fence_put(old_fence); } @@ -130,7 +184,7 @@ void drm_syncobj_free(struct kref *kref) struct drm_syncobj *syncobj = container_of(kref, struct drm_syncobj, refcount); - dma_fence_put(syncobj->fence); + drm_syncobj_replace_fence(syncobj, NULL); kfree(syncobj); } EXPORT_SYMBOL(drm_syncobj_free); @@ -146,6 +200,8 @@ static int drm_syncobj_create(struct drm_file *file_private, return -ENOMEM; kref_init(&syncobj->refcount); + INIT_LIST_HEAD(&syncobj->cb_list); + spin_lock_init(&syncobj->lock); idr_preload(GFP_KERNEL); spin_lock(&file_private->syncobj_table_lock); diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index ce94d14c5087..c00fee539822 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -28,6 +28,8 @@ #include "linux/dma-fence.h" +struct drm_syncobj_cb; + /** * struct drm_syncobj - sync object. * @@ -43,8 +45,21 @@ struct drm_syncobj { /** * @fence: * NULL or a pointer to the fence bound to this object. + * + * This field should not be used directly. Use drm_syncobj_fence_get + * and drm_syncobj_replace_fence instead. */ struct dma_fence *fence; + /** + * @cb_list: + * List of callbacks to call when the fence gets replaced + */ + struct list_head cb_list; + /** + * @lock: + * locks cb_list and write-locks fence. + */ + spinlock_t lock; /** * @file: * a file backing for this syncobj. @@ -52,6 +67,25 @@ struct drm_syncobj { struct file *file; }; +typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb); + +/** + * struct drm_syncobj_cb - callback for drm_syncobj_add_callback + * @node: used by drm_syncob_add_callback to append this struct to + * syncobj::cb_list + * @func: drm_syncobj_func_t to call + * + * This struct will be initialized by drm_syncobj_add_callback, additional + * data can be passed along by embedding drm_syncobj_cb in another struct. + * The callback will get called the next time drm_syncobj_replace_fence is + * called. + */ +struct drm_syncobj_cb { + struct list_head node; + drm_syncobj_func_t func; +}; + void drm_syncobj_free(struct kref *kref); /** @@ -91,6 +125,11 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj) struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); +void drm_syncobj_add_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func); +void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence); int drm_syncobj_find_fence(struct drm_file *file_private, -- cgit v1.2.3 From 1fc08218ed2a42c86af5c905fe4c00885376a07e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:25 -0700 Subject: drm/syncobj: Add a CREATE_SIGNALED flag This requests that the driver create the sync object such that it already has a signaled dma_fence attached. Because we don't need anything in particular (just something signaled), we use a dummy null fence. This is useful for Vulkan which has a similar flag that can be passed to vkCreateFence. Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 57 ++++++++++++++++++++++++++++++++++++++++--- include/uapi/drm/drm.h | 1 + 2 files changed, 55 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index bade497b3f1d..12db8c9564cd 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -154,6 +154,49 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, } EXPORT_SYMBOL(drm_syncobj_replace_fence); +struct drm_syncobj_null_fence { + struct dma_fence base; + spinlock_t lock; +}; + +static const char *drm_syncobj_null_fence_get_name(struct dma_fence *fence) +{ + return "syncobjnull"; +} + +static bool drm_syncobj_null_fence_enable_signaling(struct dma_fence *fence) +{ + dma_fence_enable_sw_signaling(fence); + return !dma_fence_is_signaled(fence); +} + +static const struct dma_fence_ops drm_syncobj_null_fence_ops = { + .get_driver_name = drm_syncobj_null_fence_get_name, + .get_timeline_name = drm_syncobj_null_fence_get_name, + .enable_signaling = drm_syncobj_null_fence_enable_signaling, + .wait = dma_fence_default_wait, + .release = NULL, +}; + +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) +{ + struct drm_syncobj_null_fence *fence; + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return -ENOMEM; + + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &drm_syncobj_null_fence_ops, + &fence->lock, 0, 0); + dma_fence_signal(&fence->base); + + drm_syncobj_replace_fence(syncobj, &fence->base); + + dma_fence_put(&fence->base); + + return 0; +} + int drm_syncobj_find_fence(struct drm_file *file_private, u32 handle, struct dma_fence **fence) @@ -190,7 +233,7 @@ void drm_syncobj_free(struct kref *kref) EXPORT_SYMBOL(drm_syncobj_free); static int drm_syncobj_create(struct drm_file *file_private, - u32 *handle) + u32 *handle, uint32_t flags) { int ret; struct drm_syncobj *syncobj; @@ -203,6 +246,14 @@ static int drm_syncobj_create(struct drm_file *file_private, INIT_LIST_HEAD(&syncobj->cb_list); spin_lock_init(&syncobj->lock); + if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { + ret = drm_syncobj_assign_null_handle(syncobj); + if (ret < 0) { + drm_syncobj_put(syncobj); + return ret; + } + } + idr_preload(GFP_KERNEL); spin_lock(&file_private->syncobj_table_lock); ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT); @@ -438,11 +489,11 @@ drm_syncobj_create_ioctl(struct drm_device *dev, void *data, return -ENODEV; /* no valid flags yet */ - if (args->flags) + if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED) return -EINVAL; return drm_syncobj_create(file_private, - &args->handle); + &args->handle, args->flags); } int diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 0757c1a41821..ade7f68d32b5 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -700,6 +700,7 @@ struct drm_prime_handle { struct drm_syncobj_create { __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) __u32 flags; }; -- cgit v1.2.3 From e7aca5031a2fb51b6120864d0eff5478c95e6651 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:24 -0700 Subject: drm/syncobj: Allow wait for submit and signal behavior (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand Cc: Dave Airlie Cc: Chris Wilson Cc: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 252 ++++++++++++++++++++++++++++++++++-------- include/uapi/drm/drm.h | 1 + 2 files changed, 208 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 12db8c9564cd..cccd3bd194c6 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "drm_internal.h" #include @@ -88,6 +89,35 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, list_add_tail(&cb->node, &syncobj->cb_list); } +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, + struct dma_fence **fence, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) +{ + int ret; + + *fence = drm_syncobj_fence_get(syncobj); + if (*fence) + return 1; + + spin_lock(&syncobj->lock); + /* We've already tried once to get a fence and failed. Now that we + * have the lock, try one more time just to be sure we don't add a + * callback when a fence has already been set. + */ + if (syncobj->fence) { + *fence = dma_fence_get(syncobj->fence); + ret = 1; + } else { + *fence = NULL; + drm_syncobj_add_callback_locked(syncobj, cb, func); + ret = 0; + } + spin_unlock(&syncobj->lock); + + return ret; +} + /** * drm_syncobj_add_callback - adds a callback to syncobj::cb_list * @syncobj: Sync object to which to add the callback @@ -560,6 +590,160 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, &args->handle); } +struct syncobj_wait_entry { + struct task_struct *task; + struct dma_fence *fence; + struct dma_fence_cb fence_cb; + struct drm_syncobj_cb syncobj_cb; +}; + +static void syncobj_wait_fence_func(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct syncobj_wait_entry *wait = + container_of(cb, struct syncobj_wait_entry, fence_cb); + + wake_up_process(wait->task); +} + +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb) +{ + struct syncobj_wait_entry *wait = + container_of(cb, struct syncobj_wait_entry, syncobj_cb); + + /* This happens inside the syncobj lock */ + wait->fence = dma_fence_get(syncobj->fence); + wake_up_process(wait->task); +} + +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, + uint32_t count, + uint32_t flags, + signed long timeout, + uint32_t *idx) +{ + struct syncobj_wait_entry *entries; + struct dma_fence *fence; + signed long ret; + uint32_t signaled_count, i; + + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + /* Walk the list of sync objects and initialize entries. We do + * this up-front so that we can properly return -EINVAL if there is + * a syncobj with a missing fence and then never have the chance of + * returning -EINVAL again. + */ + signaled_count = 0; + for (i = 0; i < count; ++i) { + entries[i].task = current; + entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); + if (!entries[i].fence) { + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + continue; + } else { + ret = -EINVAL; + goto cleanup_entries; + } + } + + if (dma_fence_is_signaled(entries[i].fence)) { + if (signaled_count == 0 && idx) + *idx = i; + signaled_count++; + } + } + + /* Initialize ret to the max of timeout and 1. That way, the + * default return value indicates a successful wait and not a + * timeout. + */ + ret = max_t(signed long, timeout, 1); + + if (signaled_count == count || + (signaled_count > 0 && + !(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL))) + goto cleanup_entries; + + /* There's a very annoying laxness in the dma_fence API here, in + * that backends are not required to automatically report when a + * fence is signaled prior to fence->ops->enable_signaling() being + * called. So here if we fail to match signaled_count, we need to + * fallthough and try a 0 timeout wait! + */ + + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + for (i = 0; i < count; ++i) { + drm_syncobj_fence_get_or_add_callback(syncobjs[i], + &entries[i].fence, + &entries[i].syncobj_cb, + syncobj_wait_syncobj_func); + } + } + + do { + set_current_state(TASK_INTERRUPTIBLE); + + signaled_count = 0; + for (i = 0; i < count; ++i) { + fence = entries[i].fence; + if (!fence) + continue; + + if (dma_fence_is_signaled(fence) || + (!entries[i].fence_cb.func && + dma_fence_add_callback(fence, + &entries[i].fence_cb, + syncobj_wait_fence_func))) { + /* The fence has been signaled */ + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { + signaled_count++; + } else { + if (idx) + *idx = i; + goto done_waiting; + } + } + } + + if (signaled_count == count) + goto done_waiting; + + if (timeout == 0) { + /* If we are doing a 0 timeout wait and we got + * here, then we just timed out. + */ + ret = 0; + goto done_waiting; + } + + ret = schedule_timeout(ret); + + if (ret > 0 && signal_pending(current)) + ret = -ERESTARTSYS; + } while (ret > 0); + +done_waiting: + __set_current_state(TASK_RUNNING); + +cleanup_entries: + for (i = 0; i < count; ++i) { + if (entries[i].syncobj_cb.func) + drm_syncobj_remove_callback(syncobjs[i], + &entries[i].syncobj_cb); + if (entries[i].fence_cb.func) + dma_fence_remove_callback(entries[i].fence, + &entries[i].fence_cb); + dma_fence_put(entries[i].fence); + } + kfree(entries); + + return ret; +} + /** * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value * @@ -592,43 +776,19 @@ static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec) return timeout_jiffies64 + 1; } -static int drm_syncobj_wait_fences(struct drm_device *dev, - struct drm_file *file_private, - struct drm_syncobj_wait *wait, - struct dma_fence **fences) +static int drm_syncobj_array_wait(struct drm_device *dev, + struct drm_file *file_private, + struct drm_syncobj_wait *wait, + struct drm_syncobj **syncobjs) { signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); signed long ret = 0; uint32_t first = ~0; - if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { - uint32_t i; - for (i = 0; i < wait->count_handles; i++) { - ret = dma_fence_wait_timeout(fences[i], true, timeout); - - /* Various dma_fence wait callbacks will return - * ENOENT to indicate that the fence has already - * been signaled. We need to sanitize this to 0 so - * we don't return early and the client doesn't see - * an unexpected error. - */ - if (ret == -ENOENT) - ret = 0; - - if (ret < 0) - return ret; - if (ret == 0) - break; - timeout = ret; - } - first = 0; - } else { - ret = dma_fence_wait_any_timeout(fences, - wait->count_handles, - true, timeout, - &first); - } - + ret = drm_syncobj_array_wait_timeout(syncobjs, + wait->count_handles, + wait->flags, + timeout, &first); if (ret < 0) return ret; @@ -644,14 +804,15 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, { struct drm_syncobj_wait *args = data; uint32_t *handles; - struct dma_fence **fences; + struct drm_syncobj **syncobjs; int ret = 0; uint32_t i; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) return -ENODEV; - if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) return -EINVAL; if (args->count_handles == 0) @@ -670,27 +831,28 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, goto err_free_handles; } - fences = kcalloc(args->count_handles, - sizeof(struct dma_fence *), GFP_KERNEL); - if (!fences) { + syncobjs = kcalloc(args->count_handles, + sizeof(struct drm_syncobj *), GFP_KERNEL); + if (!syncobjs) { ret = -ENOMEM; goto err_free_handles; } for (i = 0; i < args->count_handles; i++) { - ret = drm_syncobj_find_fence(file_private, handles[i], - &fences[i]); - if (ret) + syncobjs[i] = drm_syncobj_find(file_private, handles[i]); + if (!syncobjs[i]) { + ret = -ENOENT; goto err_free_fence_array; + } } - ret = drm_syncobj_wait_fences(dev, file_private, - args, fences); + ret = drm_syncobj_array_wait(dev, file_private, + args, syncobjs); err_free_fence_array: - for (i = 0; i < args->count_handles; i++) - dma_fence_put(fences[i]); - kfree(fences); + while (i-- > 0) + drm_syncobj_put(syncobjs[i]); + kfree(syncobjs); err_free_handles: kfree(handles); diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index ade7f68d32b5..4c746597225e 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -720,6 +720,7 @@ struct drm_syncobj_handle { }; #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ -- cgit v1.2.3 From aa4035d2c7683d2f2fb0ffe8087abd9eabf6d54a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:27 -0700 Subject: drm/syncobj: Add a reset ioctl (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just resets the dma_fence to NULL so it looks like it's never been signaled. This will be useful once we add the new wait API for allowing wait on "submit and signal" behavior. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Reviewed-by: Christian König (v1) Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 2 ++ drivers/gpu/drm/drm_ioctl.c | 2 ++ drivers/gpu/drm/drm_syncobj.c | 33 +++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 7 +++++++ 4 files changed, 44 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 534e5ac43bf8..83f1615eb1ec 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -169,3 +169,5 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index b4f443417a28..16c5d51a43aa 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -659,6 +659,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 15e74ca61760..40d2ad293661 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -885,3 +885,36 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, return ret; } + +int +drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_array *args = data; + struct drm_syncobj **syncobjs; + uint32_t i; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->pad != 0) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + for (i = 0; i < args->count_handles; i++) + drm_syncobj_replace_fence(syncobjs[i], NULL); + + drm_syncobj_array_free(syncobjs, args->count_handles); + + return 0; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 4c746597225e..b037fdf9e43b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -731,6 +731,12 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -854,6 +860,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3 From ffa9443fb3d3eddf0fdf6ac473dc8b5c87f08f15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:28 -0700 Subject: drm/syncobj: Add a signal ioctl (v3) This IOCTL provides a mechanism for userspace to trigger a sync object directly. There are other ways that userspace can trigger a syncobj such as submitting a dummy batch somewhere or hanging on to a triggered sync_file and doing an import. This just provides an easy way to manually trigger the sync object without weird hacks. The motivation for this IOCTL is Vulkan fences. Vulkan lets you create a fence already in the signaled state so that you can wait on it immediatly without stalling. We could also handle this with a new create flag to ask the driver to create a syncobj that is already signaled but the IOCTL seemed a bit cleaner and more generic. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 2 ++ drivers/gpu/drm/drm_ioctl.c | 2 ++ drivers/gpu/drm/drm_syncobj.c | 36 ++++++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 1 + 4 files changed, 41 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 83f1615eb1ec..fbc3f308fa19 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -171,3 +171,5 @@ int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 16c5d51a43aa..a9ae6dd2d593 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -661,6 +661,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 40d2ad293661..0422b8c2c2e7 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -918,3 +918,39 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, return 0; } + +int +drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_array *args = data; + struct drm_syncobj **syncobjs; + uint32_t i; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->pad != 0) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_assign_null_handle(syncobjs[i]); + if (ret < 0) + break; + } + + drm_syncobj_array_free(syncobjs, args->count_handles); + + return ret; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index b037fdf9e43b..97677cd6964d 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -861,6 +861,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3