diff options
Diffstat (limited to 'drivers')
281 files changed, 21554 insertions, 6403 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 83e5f7e1a20d..c6db511b5f73 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -256,7 +256,7 @@ config ACPI_PROCESSOR config ACPI_IPMI tristate "IPMI" - depends on IPMI_SI + depends on IPMI_HANDLER default n help This driver enables the ACPI to access the BMC controller. And it @@ -469,9 +469,8 @@ config ACPI_WATCHDOG config ACPI_EXTLOG tristate "Extended Error Log support" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC && EDAC select UEFI_CPER - select RAS default n help Certain usages such as Predictive Failure Analysis (PFA) require diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a15270a806fc..502ea4dc2080 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -229,7 +229,7 @@ static int __init extlog_init(void) if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) return -ENODEV; - if (get_edac_report_status() == EDAC_REPORTING_FORCE) { + if (edac_get_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } @@ -285,8 +285,8 @@ static int __init extlog_init(void) * eMCA event report method has higher priority than EDAC method, * unless EDAC event report method is mandatory. */ - old_edac_report_status = get_edac_report_status(); - set_edac_report_status(EDAC_REPORTING_DISABLED); + old_edac_report_status = edac_get_report_status(); + edac_set_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,7 +308,7 @@ err: static void __exit extlog_exit(void) { - set_edac_report_status(old_edac_report_status); + edac_set_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c index 747c2ba98534..1b64419e2fec 100644 --- a/drivers/acpi/acpi_ipmi.c +++ b/drivers/acpi/acpi_ipmi.c @@ -429,8 +429,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) if (msg->recv_type == IPMI_RESPONSE_RECV_TYPE && msg->msg.data_len == 1) { if (msg->msg.data[0] == IPMI_TIMEOUT_COMPLETION_CODE) { - dev_WARN_ONCE(dev, true, - "Unexpected response (timeout).\n"); + dev_dbg_once(dev, "Unexpected response (timeout).\n"); tx_msg->msg_done = ACPI_IPMI_TIMEOUT; } goto out_comp; diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 70b57d2229d6..ff6cb9e4c381 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -14,7 +14,6 @@ menuconfig ATA tristate "Serial ATA and Parallel ATA drivers (libata)" depends on HAS_IOMEM depends on BLOCK - depends on !(M32R || S390) || BROKEN select SCSI select GLOB ---help--- @@ -118,6 +117,15 @@ config AHCI_DA850 If unsure, say N. +config AHCI_DM816 + tristate "DaVinci DM816 AHCI SATA support" + depends on ARCH_OMAP2PLUS + help + This option enables support for the DaVinci DM816 SoC's + onboard AHCI SATA controller. + + If unsure, say N. + config AHCI_ST tristate "ST AHCI SATA support" depends on ARCH_STI @@ -885,14 +893,6 @@ config PATA_AT32 If unsure, say N. -config PATA_AT91 - tristate "PATA support for AT91SAM9260" - depends on ARM && SOC_AT91SAM9 - help - This option enables support for IDE devices on the Atmel AT91SAM9260 SoC. - - If unsure, say N. - config PATA_CMD640_PCI tristate "CMD640 PCI PATA support (Experimental)" depends on PCI diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 89a0a1915d36..3048cc100a46 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o obj-$(CONFIG_AHCI_BRCM) += ahci_brcm.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_CEVA) += ahci_ceva.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_DA850) += ahci_da850.o libahci.o libahci_platform.o +obj-$(CONFIG_AHCI_DM816) += ahci_dm816.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_IMX) += ahci_imx.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_MVEBU) += ahci_mvebu.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_OCTEON) += ahci_octeon.o @@ -91,7 +92,6 @@ obj-$(CONFIG_PATA_WINBOND) += pata_sl82c105.o # SFF PIO only obj-$(CONFIG_PATA_AT32) += pata_at32.o -obj-$(CONFIG_PATA_AT91) += pata_at91.o obj-$(CONFIG_PATA_CMD640_PCI) += pata_cmd640.o obj-$(CONFIG_PATA_FALCON) += pata_falcon.o obj-$(CONFIG_PATA_ISAPNP) += pata_isapnp.o diff --git a/drivers/ata/ahci_dm816.c b/drivers/ata/ahci_dm816.c new file mode 100644 index 000000000000..fbd827c3a75c --- /dev/null +++ b/drivers/ata/ahci_dm816.c @@ -0,0 +1,200 @@ +/* + * DaVinci DM816 AHCI SATA platform driver + * + * Copyright (C) 2017 BayLibre SAS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pm.h> +#include <linux/platform_device.h> +#include <linux/libata.h> +#include <linux/ahci_platform.h> + +#include "ahci.h" + +#define AHCI_DM816_DRV_NAME "ahci-dm816" + +#define AHCI_DM816_PHY_ENPLL(x) ((x) << 0) +#define AHCI_DM816_PHY_MPY(x) ((x) << 1) +#define AHCI_DM816_PHY_LOS(x) ((x) << 12) +#define AHCI_DM816_PHY_RXCDR(x) ((x) << 13) +#define AHCI_DM816_PHY_RXEQ(x) ((x) << 16) +#define AHCI_DM816_PHY_TXSWING(x) ((x) << 23) + +#define AHCI_DM816_P0PHYCR_REG 0x178 +#define AHCI_DM816_P1PHYCR_REG 0x1f8 + +#define AHCI_DM816_PLL_OUT 1500000000LU + +static const unsigned long pll_mpy_table[] = { + 400, 500, 600, 800, 825, 1000, 1200, + 1250, 1500, 1600, 1650, 2000, 2200, 2500 +}; + +static int ahci_dm816_get_mpy_bits(unsigned long refclk_rate) +{ + unsigned long pll_multiplier; + int i; + + /* + * We need to determine the value of the multiplier (MPY) bits. + * In order to include the 8.25 multiplier we need to first divide + * the refclk rate by 100. + */ + pll_multiplier = AHCI_DM816_PLL_OUT / (refclk_rate / 100); + + for (i = 0; i < ARRAY_SIZE(pll_mpy_table); i++) { + if (pll_mpy_table[i] == pll_multiplier) + return i; + } + + /* + * We should have divided evenly - if not, return an invalid + * value. + */ + return -1; +} + +static int ahci_dm816_phy_init(struct ahci_host_priv *hpriv, struct device *dev) +{ + unsigned long refclk_rate; + int mpy; + u32 val; + + /* + * We should have been supplied two clocks: the functional and + * keep-alive clock and the external reference clock. We need the + * rate of the latter to calculate the correct value of MPY bits. + */ + if (!hpriv->clks[1]) { + dev_err(dev, "reference clock not supplied\n"); + return -EINVAL; + } + + refclk_rate = clk_get_rate(hpriv->clks[1]); + if ((refclk_rate % 100) != 0) { + dev_err(dev, "reference clock rate must be divisible by 100\n"); + return -EINVAL; + } + + mpy = ahci_dm816_get_mpy_bits(refclk_rate); + if (mpy < 0) { + dev_err(dev, "can't calculate the MPY bits value\n"); + return -EINVAL; + } + + /* Enable the PHY and configure the first HBA port. */ + val = AHCI_DM816_PHY_MPY(mpy) | AHCI_DM816_PHY_LOS(1) | + AHCI_DM816_PHY_RXCDR(4) | AHCI_DM816_PHY_RXEQ(1) | + AHCI_DM816_PHY_TXSWING(3) | AHCI_DM816_PHY_ENPLL(1); + writel(val, hpriv->mmio + AHCI_DM816_P0PHYCR_REG); + + /* Configure the second HBA port. */ + val = AHCI_DM816_PHY_LOS(1) | AHCI_DM816_PHY_RXCDR(4) | + AHCI_DM816_PHY_RXEQ(1) | AHCI_DM816_PHY_TXSWING(3); + writel(val, hpriv->mmio + AHCI_DM816_P1PHYCR_REG); + + return 0; +} + +static int ahci_dm816_softreset(struct ata_link *link, + unsigned int *class, unsigned long deadline) +{ + int pmp, ret; + + pmp = sata_srst_pmp(link); + + /* + * There's an issue with the SATA controller on DM816 SoC: if we + * enable Port Multiplier support, but the drive is connected directly + * to the board, it can't be detected. As a workaround: if PMP is + * enabled, we first call ahci_do_softreset() and pass it the result of + * sata_srst_pmp(). If this call fails, we retry with pmp = 0. + */ + ret = ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); + if (pmp && ret == -EBUSY) + return ahci_do_softreset(link, class, 0, + deadline, ahci_check_ready); + + return ret; +} + +static struct ata_port_operations ahci_dm816_port_ops = { + .inherits = &ahci_platform_ops, + .softreset = ahci_dm816_softreset, +}; + +static const struct ata_port_info ahci_dm816_port_info = { + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_dm816_port_ops, +}; + +static struct scsi_host_template ahci_dm816_platform_sht = { + AHCI_SHT(AHCI_DM816_DRV_NAME), +}; + +static int ahci_dm816_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_host_priv *hpriv; + int rc; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + rc = ahci_dm816_phy_init(hpriv, dev); + if (rc) + goto disable_resources; + + rc = ahci_platform_init_host(pdev, hpriv, + &ahci_dm816_port_info, + &ahci_dm816_platform_sht); + if (rc) + goto disable_resources; + + return 0; + +disable_resources: + ahci_platform_disable_resources(hpriv); + + return rc; +} + +static SIMPLE_DEV_PM_OPS(ahci_dm816_pm_ops, + ahci_platform_suspend, + ahci_platform_resume); + +static const struct of_device_id ahci_dm816_of_match[] = { + { .compatible = "ti,dm816-ahci", }, + { }, +}; +MODULE_DEVICE_TABLE(of, ahci_dm816_of_match); + +static struct platform_driver ahci_dm816_driver = { + .probe = ahci_dm816_probe, + .remove = ata_platform_remove_one, + .driver = { + .name = AHCI_DM816_DRV_NAME, + .of_match_table = ahci_dm816_of_match, + .pm = &ahci_dm816_pm_ops, + }, +}; +module_platform_driver(ahci_dm816_driver); + +MODULE_DESCRIPTION("DaVinci DM816 AHCI SATA platform driver"); +MODULE_AUTHOR("Bartosz Golaszewski <bgolaszewski@baylibre.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/ata/ahci_octeon.c b/drivers/ata/ahci_octeon.c index ea865fe953e1..5a44e089c6bb 100644 --- a/drivers/ata/ahci_octeon.c +++ b/drivers/ata/ahci_octeon.c @@ -38,11 +38,6 @@ static int ahci_octeon_probe(struct platform_device *pdev) int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Platform resource[0] is missing\n"); - return -ENODEV; - } - base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(base)) return PTR_ERR(base); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ca75823697dd..2d83b8c75965 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4910,7 +4910,7 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, * LOCKING: * spin_lock_irqsave(host lock) */ -void ata_sg_clean(struct ata_queued_cmd *qc) +static void ata_sg_clean(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct scatterlist *sg = qc->sg; @@ -5902,9 +5902,9 @@ struct ata_port *ata_port_alloc(struct ata_host *host) INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); init_completion(&ap->park_req_pending); - init_timer_deferrable(&ap->fastdrain_timer); - ap->fastdrain_timer.function = ata_eh_fastdrain_timerfn; - ap->fastdrain_timer.data = (unsigned long)ap; + setup_deferrable_timer(&ap->fastdrain_timer, + ata_eh_fastdrain_timerfn, + (unsigned long)ap); ap->cbl = ATA_CBL_NONE; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1ac70744ae7b..49ba9834c715 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3393,46 +3393,6 @@ static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax, } /** - * ata_format_dsm_trim_descr() - SATL Write Same to ATA SCT Write Same - * @cmd: SCSI command being translated - * @lba: Starting sector - * @num: Number of sectors to be zero'd. - * - * Rewrite the WRITE SAME payload to be an SCT Write Same formatted - * descriptor. - * NOTE: Writes a pattern (0's) in the foreground. - * - * Return: Number of bytes copied into sglist. - */ -static size_t ata_format_sct_write_same(struct scsi_cmnd *cmd, u64 lba, u64 num) -{ - struct scsi_device *sdp = cmd->device; - size_t len = sdp->sector_size; - size_t r; - u16 *buf; - unsigned long flags; - - spin_lock_irqsave(&ata_scsi_rbuf_lock, flags); - buf = ((void *)ata_scsi_rbuf); - - put_unaligned_le16(0x0002, &buf[0]); /* SCT_ACT_WRITE_SAME */ - put_unaligned_le16(0x0101, &buf[1]); /* WRITE PTRN FG */ - put_unaligned_le64(lba, &buf[2]); - put_unaligned_le64(num, &buf[6]); - put_unaligned_le32(0u, &buf[10]); /* pattern */ - - WARN_ON(len > ATA_SCSI_RBUF_SIZE); - - if (len > ATA_SCSI_RBUF_SIZE) - len = ATA_SCSI_RBUF_SIZE; - - r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len); - spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags); - - return r; -} - -/** * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same * @qc: Command to be translated * @@ -3462,32 +3422,31 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) if (unlikely(!dev->dma_mode)) goto invalid_opcode; + /* + * We only allow sending this command through the block layer, + * as it modifies the DATA OUT buffer, which would corrupt user + * memory for SG_IO commands. + */ + if (unlikely(blk_rq_is_passthrough(scmd->request))) + goto invalid_opcode; + if (unlikely(scmd->cmd_len < 16)) { fp = 15; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); - if (unmap) { - /* If trim is not enabled the cmd is invalid. */ - if ((dev->horkage & ATA_HORKAGE_NOTRIM) || - !ata_id_has_trim(dev->id)) { - fp = 1; - bp = 3; - goto invalid_fld; - } - /* If the request is too large the cmd is invalid */ - if (n_block > 0xffff * trmax) { - fp = 2; - goto invalid_fld; - } - } else { - /* If write same is not available the cmd is invalid */ - if (!ata_id_sct_write_same(dev->id)) { - fp = 1; - bp = 3; - goto invalid_fld; - } + if (!unmap || + (dev->horkage & ATA_HORKAGE_NOTRIM) || + !ata_id_has_trim(dev->id)) { + fp = 1; + bp = 3; + goto invalid_fld; + } + /* If the request is too large the cmd is invalid */ + if (n_block > 0xffff * trmax) { + fp = 2; + goto invalid_fld; } /* @@ -3502,49 +3461,28 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count) * is defined as number of 512 byte blocks to be transferred. */ - if (unmap) { - size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block); - if (size != len) - goto invalid_param_len; - if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) { - /* Newer devices support queued TRIM commands */ - tf->protocol = ATA_PROT_NCQ; - tf->command = ATA_CMD_FPDMA_SEND; - tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f; - tf->nsect = qc->tag << 3; - tf->hob_feature = (size / 512) >> 8; - tf->feature = size / 512; + size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block); + if (size != len) + goto invalid_param_len; - tf->auxiliary = 1; - } else { - tf->protocol = ATA_PROT_DMA; - tf->hob_feature = 0; - tf->feature = ATA_DSM_TRIM; - tf->hob_nsect = (size / 512) >> 8; - tf->nsect = size / 512; - tf->command = ATA_CMD_DSM; - } - } else { - size = ata_format_sct_write_same(scmd, block, n_block); - if (size != len) - goto invalid_param_len; + if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) { + /* Newer devices support queued TRIM commands */ + tf->protocol = ATA_PROT_NCQ; + tf->command = ATA_CMD_FPDMA_SEND; + tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f; + tf->nsect = qc->tag << 3; + tf->hob_feature = (size / 512) >> 8; + tf->feature = size / 512; - tf->hob_feature = 0; - tf->feature = 0; - tf->hob_nsect = 0; - tf->nsect = 1; - tf->lbah = 0; - tf->lbam = 0; - tf->lbal = ATA_CMD_STANDBYNOW1; - tf->hob_lbah = 0; - tf->hob_lbam = 0; - tf->hob_lbal = 0; - tf->device = ATA_CMD_STANDBYNOW1; + tf->auxiliary = 1; + } else { tf->protocol = ATA_PROT_DMA; - tf->command = ATA_CMD_WRITE_LOG_DMA_EXT; - if (unlikely(dev->flags & ATA_DFLAG_PIO)) - tf->command = ATA_CMD_WRITE_LOG_EXT; + tf->hob_feature = 0; + tf->feature = ATA_DSM_TRIM; + tf->hob_nsect = (size / 512) >> 8; + tf->nsect = size / 512; + tf->command = ATA_CMD_DSM; } tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 | @@ -3619,10 +3557,6 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case START_STOP: supported = 3; break; - case WRITE_SAME_16: - if (!ata_id_sct_write_same(dev->id)) - break; - /* fallthrough: if SCT ... only enable for ZBC */ case ZBC_IN: case ZBC_OUT: if (ata_id_zoned_cap(dev->id) || diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c deleted file mode 100644 index fd5b34f0d007..000000000000 --- a/drivers/ata/pata_at91.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * PATA driver for AT91SAM9260 Static Memory Controller - * with CompactFlash interface in True IDE mode - * - * Copyright (C) 2009 Matyukevich Sergey - * 2011 Igor Plyatov - * - * Based on: - * * generic platform driver by Paul Mundt: drivers/ata/pata_platform.c - * * pata_at32 driver by Kristoffer Nyborg Gregertsen - * * at91_ide driver by Stanislaw Gruszka - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/blkdev.h> -#include <linux/gfp.h> -#include <scsi/scsi_host.h> -#include <linux/ata.h> -#include <linux/clk.h> -#include <linux/libata.h> -#include <linux/mfd/syscon.h> -#include <linux/mfd/syscon/atmel-smc.h> -#include <linux/platform_device.h> -#include <linux/ata_platform.h> -#include <linux/platform_data/atmel.h> -#include <linux/regmap.h> -#include <linux/gpio.h> - -#define DRV_NAME "pata_at91" -#define DRV_VERSION "0.3" - -#define CF_IDE_OFFSET 0x00c00000 -#define CF_ALT_IDE_OFFSET 0x00e00000 -#define CF_IDE_RES_SIZE 0x08 -#define CS_PULSE_MAXIMUM 319 -#define ER_SMC_CALC 1 -#define ER_SMC_RECALC 2 - -struct at91_ide_info { - unsigned long mode; - unsigned int cs; - struct clk *mck; - void __iomem *ide_addr; - void __iomem *alt_addr; -}; - -/** - * struct smc_range - range of valid values for SMC register. - */ -struct smc_range { - int min; - int max; -}; - -struct regmap *smc; - -struct at91sam9_smc_generic_fields { - struct regmap_field *setup; - struct regmap_field *pulse; - struct regmap_field *cycle; - struct regmap_field *mode; -} fields; - -/** - * adjust_smc_value - adjust value for one of SMC registers. - * @value: adjusted value - * @range: array of SMC ranges with valid values - * @size: SMC ranges array size - * - * This returns the difference between input and output value or negative - * in case of invalid input value. - * If negative returned, then output value = maximal possible from ranges. - */ -static int adjust_smc_value(int *value, struct smc_range *range, int size) -{ - int maximum = (range + size - 1)->max; - int remainder; - - do { - if (*value < range->min) { - remainder = range->min - *value; - *value = range->min; /* nearest valid value */ - return remainder; - } else if ((range->min <= *value) && (*value <= range->max)) - return 0; - - range++; - } while (--size); - *value = maximum; - - return -1; /* invalid value */ -} - -/** - * calc_smc_vals - calculate SMC register values - * @dev: ATA device - * @setup: SMC_SETUP register value - * @pulse: SMC_PULSE register value - * @cycle: SMC_CYCLE register value - * - * This returns negative in case of invalid values for SMC registers: - * -ER_SMC_RECALC - recalculation required for SMC values, - * -ER_SMC_CALC - calculation failed (invalid input values). - * - * SMC use special coding scheme, see "Coding and Range of Timing - * Parameters" table from AT91SAM9 datasheets. - * - * SMC_SETUP = 128*setup[5] + setup[4:0] - * SMC_PULSE = 256*pulse[6] + pulse[5:0] - * SMC_CYCLE = 256*cycle[8:7] + cycle[6:0] - */ -static int calc_smc_vals(struct device *dev, - int *setup, int *pulse, int *cycle, int *cs_pulse) -{ - int ret_val; - int err = 0; - struct smc_range range_setup[] = { /* SMC_SETUP valid values */ - {.min = 0, .max = 31}, /* first range */ - {.min = 128, .max = 159} /* second range */ - }; - struct smc_range range_pulse[] = { /* SMC_PULSE valid values */ - {.min = 0, .max = 63}, /* first range */ - {.min = 256, .max = 319} /* second range */ - }; - struct smc_range range_cycle[] = { /* SMC_CYCLE valid values */ - {.min = 0, .max = 127}, /* first range */ - {.min = 256, .max = 383}, /* second range */ - {.min = 512, .max = 639}, /* third range */ - {.min = 768, .max = 895} /* fourth range */ - }; - - ret_val = adjust_smc_value(setup, range_setup, ARRAY_SIZE(range_setup)); - if (ret_val < 0) - dev_warn(dev, "maximal SMC Setup value\n"); - else - *cycle += ret_val; - - ret_val = adjust_smc_value(pulse, range_pulse, ARRAY_SIZE(range_pulse)); - if (ret_val < 0) - dev_warn(dev, "maximal SMC Pulse value\n"); - else - *cycle += ret_val; - - ret_val = adjust_smc_value(cycle, range_cycle, ARRAY_SIZE(range_cycle)); - if (ret_val < 0) - dev_warn(dev, "maximal SMC Cycle value\n"); - - *cs_pulse = *cycle; - if (*cs_pulse > CS_PULSE_MAXIMUM) { - dev_err(dev, "unable to calculate valid SMC settings\n"); - return -ER_SMC_CALC; - } - - ret_val = adjust_smc_value(cs_pulse, range_pulse, - ARRAY_SIZE(range_pulse)); - if (ret_val < 0) { - dev_warn(dev, "maximal SMC CS Pulse value\n"); - } else if (ret_val != 0) { - *cycle = *cs_pulse; - dev_warn(dev, "SMC Cycle extended\n"); - err = -ER_SMC_RECALC; - } - - return err; -} - -/** - * to_smc_format - convert values into SMC format - * @setup: SETUP value of SMC Setup Register - * @pulse: PULSE value of SMC Pulse Register - * @cycle: CYCLE value of SMC Cycle Register - * @cs_pulse: NCS_PULSE value of SMC Pulse Register - */ -static void to_smc_format(int *setup, int *pulse, int *cycle, int *cs_pulse) -{ - *setup = (*setup & 0x1f) | ((*setup & 0x80) >> 2); - *pulse = (*pulse & 0x3f) | ((*pulse & 0x100) >> 2); - *cycle = (*cycle & 0x7f) | ((*cycle & 0x300) >> 1); - *cs_pulse = (*cs_pulse & 0x3f) | ((*cs_pulse & 0x100) >> 2); -} - -static unsigned long calc_mck_cycles(unsigned long ns, unsigned long mck_hz) -{ - unsigned long mul; - - /* - * cycles = x [nsec] * f [Hz] / 10^9 [ns in sec] = - * x * (f / 1_000_000_000) = - * x * ((f * 65536) / 1_000_000_000) / 65536 = - * x * (((f / 10_000) * 65536) / 100_000) / 65536 = - */ - - mul = (mck_hz / 10000) << 16; - mul /= 100000; - - return (ns * mul + 65536) >> 16; /* rounding */ -} - -/** - * set_smc_timing - SMC timings setup. - * @dev: device - * @info: AT91 IDE info - * @ata: ATA timings - * - * Its assumed that write timings are same as read timings, - * cs_setup = 0 and cs_pulse = cycle. - */ -static void set_smc_timing(struct device *dev, struct ata_device *adev, - struct at91_ide_info *info, const struct ata_timing *ata) -{ - int ret = 0; - int use_iordy; - unsigned int t6z; /* data tristate time in ns */ - unsigned int cycle; /* SMC Cycle width in MCK ticks */ - unsigned int setup; /* SMC Setup width in MCK ticks */ - unsigned int pulse; /* CFIOR and CFIOW pulse width in MCK ticks */ - unsigned int cs_pulse; /* CS4 or CS5 pulse width in MCK ticks*/ - unsigned int tdf_cycles; /* SMC TDF MCK ticks */ - unsigned long mck_hz; /* MCK frequency in Hz */ - - t6z = (ata->mode < XFER_PIO_5) ? 30 : 20; - mck_hz = clk_get_rate(info->mck); - cycle = calc_mck_cycles(ata->cyc8b, mck_hz); - setup = calc_mck_cycles(ata->setup, mck_hz); - pulse = calc_mck_cycles(ata->act8b, mck_hz); - tdf_cycles = calc_mck_cycles(t6z, mck_hz); - - do { - ret = calc_smc_vals(dev, &setup, &pulse, &cycle, &cs_pulse); - } while (ret == -ER_SMC_RECALC); - - if (ret == -ER_SMC_CALC) - dev_err(dev, "Interface may not operate correctly\n"); - - dev_dbg(dev, "SMC Setup=%u, Pulse=%u, Cycle=%u, CS Pulse=%u\n", - setup, pulse, cycle, cs_pulse); - to_smc_format(&setup, &pulse, &cycle, &cs_pulse); - /* disable or enable waiting for IORDY signal */ - use_iordy = ata_pio_need_iordy(adev); - if (use_iordy) - info->mode |= AT91_SMC_EXNWMODE_READY; - - if (tdf_cycles > 15) { - tdf_cycles = 15; - dev_warn(dev, "maximal SMC TDF Cycles value\n"); - } - - dev_dbg(dev, "Use IORDY=%u, TDF Cycles=%u\n", use_iordy, tdf_cycles); - - regmap_fields_write(fields.setup, info->cs, - AT91SAM9_SMC_NRDSETUP(setup) | - AT91SAM9_SMC_NWESETUP(setup) | - AT91SAM9_SMC_NCS_NRDSETUP(0) | - AT91SAM9_SMC_NCS_WRSETUP(0)); - regmap_fields_write(fields.pulse, info->cs, - AT91SAM9_SMC_NRDPULSE(pulse) | - AT91SAM9_SMC_NWEPULSE(pulse) | - AT91SAM9_SMC_NCS_NRDPULSE(cs_pulse) | - AT91SAM9_SMC_NCS_WRPULSE(cs_pulse)); - regmap_fields_write(fields.cycle, info->cs, - AT91SAM9_SMC_NRDCYCLE(cycle) | - AT91SAM9_SMC_NWECYCLE(cycle)); - regmap_fields_write(fields.mode, info->cs, info->mode | - AT91_SMC_TDF_(tdf_cycles)); -} - -static void pata_at91_set_piomode(struct ata_port *ap, struct ata_device *adev) -{ - struct at91_ide_info *info = ap->host->private_data; - struct ata_timing timing; - int ret; - - /* Compute ATA timing and set it to SMC */ - ret = ata_timing_compute(adev, adev->pio_mode, &timing, 1000, 0); - if (ret) { - dev_warn(ap->dev, "Failed to compute ATA timing %d, " - "set PIO_0 timing\n", ret); - timing = *ata_timing_find_mode(XFER_PIO_0); - } - set_smc_timing(ap->dev, adev, info, &timing); -} - -static unsigned int pata_at91_data_xfer_noirq(struct ata_queued_cmd *qc, - unsigned char *buf, unsigned int buflen, int rw) -{ - struct at91_ide_info *info = qc->dev->link->ap->host->private_data; - unsigned int consumed; - unsigned int mode; - unsigned long flags; - - local_irq_save(flags); - regmap_fields_read(fields.mode, info->cs, &mode); - - /* set 16bit mode before writing data */ - regmap_fields_write(fields.mode, info->cs, (mode & ~AT91_SMC_DBW) | - AT91_SMC_DBW_16); - - consumed = ata_sff_data_xfer(qc, buf, buflen, rw); - - /* restore 8bit mode after data is written */ - regmap_fields_write(fields.mode, info->cs, (mode & ~AT91_SMC_DBW) | - AT91_SMC_DBW_8); - - local_irq_restore(flags); - return consumed; -} - -static struct scsi_host_template pata_at91_sht = { - ATA_PIO_SHT(DRV_NAME), -}; - -static struct ata_port_operations pata_at91_port_ops = { - .inherits = &ata_sff_port_ops, - - .sff_data_xfer = pata_at91_data_xfer_noirq, - .set_piomode = pata_at91_set_piomode, - .cable_detect = ata_cable_40wire, -}; - -static int at91sam9_smc_fields_init(struct device *dev) -{ - struct reg_field field = REG_FIELD(0, 0, 31); - - field.id_size = 8; - field.id_offset = AT91SAM9_SMC_GENERIC_BLK_SZ; - - field.reg = AT91SAM9_SMC_SETUP(AT91SAM9_SMC_GENERIC); - fields.setup = devm_regmap_field_alloc(dev, smc, field); - if (IS_ERR(fields.setup)) - return PTR_ERR(fields.setup); - - field.reg = AT91SAM9_SMC_PULSE(AT91SAM9_SMC_GENERIC); - fields.pulse = devm_regmap_field_alloc(dev, smc, field); - if (IS_ERR(fields.pulse)) - return PTR_ERR(fields.pulse); - - field.reg = AT91SAM9_SMC_CYCLE(AT91SAM9_SMC_GENERIC); - fields.cycle = devm_regmap_field_alloc(dev, smc, field); - if (IS_ERR(fields.cycle)) - return PTR_ERR(fields.cycle); - - field.reg = AT91SAM9_SMC_MODE(AT91SAM9_SMC_GENERIC); - fields.mode = devm_regmap_field_alloc(dev, smc, field); - - return PTR_ERR_OR_ZERO(fields.mode); -} - -static int pata_at91_probe(struct platform_device *pdev) -{ - struct at91_cf_data *board = dev_get_platdata(&pdev->dev); - struct device *dev = &pdev->dev; - struct at91_ide_info *info; - struct resource *mem_res; - struct ata_host *host; - struct ata_port *ap; - - int irq_flags = 0; - int irq = 0; - int ret; - - /* get platform resources: IO/CTL memories and irq/rst pins */ - - if (pdev->num_resources != 1) { - dev_err(&pdev->dev, "invalid number of resources\n"); - return -EINVAL; - } - - mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - if (!mem_res) { - dev_err(dev, "failed to get mem resource\n"); - return -EINVAL; - } - - irq = board->irq_pin; - - smc = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "atmel,smc"); - if (IS_ERR(smc)) - return PTR_ERR(smc); - - ret = at91sam9_smc_fields_init(dev); - if (ret < 0) - return ret; - - /* init ata host */ - - host = ata_host_alloc(dev, 1); - - if (!host) - return -ENOMEM; - - ap = host->ports[0]; - ap->ops = &pata_at91_port_ops; - ap->flags |= ATA_FLAG_SLAVE_POSS; - ap->pio_mask = ATA_PIO4; - - if (!gpio_is_valid(irq)) { - ap->flags |= ATA_FLAG_PIO_POLLING; - ata_port_desc(ap, "no IRQ, using PIO polling"); - } - - info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); - - if (!info) { - dev_err(dev, "failed to allocate memory for private data\n"); - return -ENOMEM; - } - - info->mck = clk_get(NULL, "mck"); - - if (IS_ERR(info->mck)) { - dev_err(dev, "failed to get access to mck clock\n"); - return -ENODEV; - } - - info->cs = board->chipselect; - info->mode = AT91_SMC_READMODE | AT91_SMC_WRITEMODE | - AT91_SMC_EXNWMODE_READY | AT91_SMC_BAT_SELECT | - AT91_SMC_DBW_8 | AT91_SMC_TDF_(0); - - info->ide_addr = devm_ioremap(dev, - mem_res->start + CF_IDE_OFFSET, CF_IDE_RES_SIZE); - - if (!info->ide_addr) { - dev_err(dev, "failed to map IO base\n"); - ret = -ENOMEM; - goto err_put; - } - - info->alt_addr = devm_ioremap(dev, - mem_res->start + CF_ALT_IDE_OFFSET, CF_IDE_RES_SIZE); - - if (!info->alt_addr) { - dev_err(dev, "failed to map CTL base\n"); - ret = -ENOMEM; - goto err_put; - } - - ap->ioaddr.cmd_addr = info->ide_addr; - ap->ioaddr.ctl_addr = info->alt_addr + 0x06; - ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr; - - ata_sff_std_ports(&ap->ioaddr); - - ata_port_desc(ap, "mmio cmd 0x%llx ctl 0x%llx", - (unsigned long long)mem_res->start + CF_IDE_OFFSET, - (unsigned long long)mem_res->start + CF_ALT_IDE_OFFSET); - - host->private_data = info; - - ret = ata_host_activate(host, gpio_is_valid(irq) ? gpio_to_irq(irq) : 0, - gpio_is_valid(irq) ? ata_sff_interrupt : NULL, - irq_flags, &pata_at91_sht); - if (ret) - goto err_put; - - return 0; - -err_put: - clk_put(info->mck); - return ret; -} - -static int pata_at91_remove(struct platform_device *pdev) -{ - struct ata_host *host = platform_get_drvdata(pdev); - struct at91_ide_info *info; - - if (!host) - return 0; - info = host->private_data; - - ata_host_detach(host); - - if (!info) - return 0; - - clk_put(info->mck); - - return 0; -} - -static struct platform_driver pata_at91_driver = { - .probe = pata_at91_probe, - .remove = pata_at91_remove, - .driver = { - .name = DRV_NAME, - }, -}; - -module_platform_driver(pata_at91_driver); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Driver for CF in True IDE mode on AT91SAM9260 SoC"); -MODULE_AUTHOR("Matyukevich Sergey"); -MODULE_VERSION(DRV_VERSION); - diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index e347e7acd8ed..0adcb40d2794 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1328,7 +1328,7 @@ static int pata_macio_pci_resume(struct pci_dev *pdev) } #endif /* CONFIG_PM_SLEEP */ -static struct of_device_id pata_macio_match[] = +static const struct of_device_id pata_macio_match[] = { { .name = "IDE", diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c index 252ba27fa63b..9730125530f6 100644 --- a/drivers/ata/pata_mpc52xx.c +++ b/drivers/ata/pata_mpc52xx.c @@ -847,7 +847,7 @@ mpc52xx_ata_resume(struct platform_device *op) } #endif -static struct of_device_id mpc52xx_ata_of_match[] = { +static const struct of_device_id mpc52xx_ata_of_match[] = { { .compatible = "fsl,mpc5200-ata", }, { .compatible = "mpc5200-ata", }, {}, diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c index 201a32d0627f..01161c1aef4d 100644 --- a/drivers/ata/pata_of_platform.c +++ b/drivers/ata/pata_of_platform.c @@ -67,7 +67,7 @@ static int pata_of_platform_probe(struct platform_device *ofdev) reg_shift, pio_mask, &pata_platform_sht); } -static struct of_device_id pata_of_platform_match[] = { +static const struct of_device_id pata_of_platform_match[] = { { .compatible = "ata-generic", }, { }, }; diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index a723ae929783..01734d54c69c 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1612,7 +1612,7 @@ static int sata_fsl_resume(struct platform_device *op) } #endif -static struct of_device_id fsl_sata_match[] = { +static const struct of_device_id fsl_sata_match[] = { { .compatible = "fsl,pq-sata", }, diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 00ce26d0c047..b66bcda88320 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -4286,7 +4286,7 @@ static int mv_platform_resume(struct platform_device *pdev) #endif #ifdef CONFIG_OF -static struct of_device_id mv_sata_dt_ids[] = { +static const struct of_device_id mv_sata_dt_ids[] = { { .compatible = "marvell,armada-370-sata", }, { .compatible = "marvell,orion-sata", }, {}, diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f744de7a0f9b..19df4918e37e 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -312,22 +312,6 @@ config BLK_DEV_SKD Use device /dev/skd$N amd /dev/skd$Np$M. -config BLK_DEV_OSD - tristate "OSD object-as-blkdev support" - depends on SCSI_OSD_ULD - ---help--- - Saying Y or M here will allow the exporting of a single SCSI - OSD (object-based storage) object as a Linux block device. - - For example, if you create a 2G object on an OSD device, - you can then use this module to present that 2G object as - a Linux block device. - - To compile this driver as a module, choose M here: the - module will be called osdblk. - - If unsure, say N. - config BLK_DEV_SX8 tristate "Promise SATA SX8 support" depends on PCI @@ -434,23 +418,6 @@ config ATA_OVER_ETH This driver provides Support for ATA over Ethernet block devices like the Coraid EtherDrive (R) Storage Blade. -config MG_DISK - tristate "mGine mflash, gflash support" - depends on ARM && GPIOLIB - help - mGine mFlash(gFlash) block device driver - -config MG_DISK_RES - int "Size of reserved area before MBR" - depends on MG_DISK - default 0 - help - Define size of reserved area that usually used for boot. Unit is KB. - All of the block device operation will be taken this value as start - offset - Examples: - 1024 => 1 MB - config SUNVDC tristate "Sun Virtual Disk Client support" depends on SUN_LDOMS @@ -512,19 +479,7 @@ config VIRTIO_BLK_SCSI Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on virtio-blk devices. This is only supported for the legacy virtio protocol and not enabled by default by any hypervisor. - Your probably want to virtio-scsi instead. - -config BLK_DEV_HD - bool "Very old hard disk (MFM/RLL/IDE) driver" - depends on HAVE_IDE - depends on !ARM || ARCH_RPC || BROKEN - help - This is a very old hard disk driver that lacks the enhanced - functionality of the newer ones. - - It is required for systems with ancient MFM/RLL/ESDI drives. - - If unsure, say N. + You probably want to use virtio-scsi instead. config BLK_DEV_RBD tristate "Rados block device (RBD)" diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 1e9661e26f29..ec8c36897b75 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -19,10 +19,8 @@ obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o -obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_SKD) += skd.o -obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o @@ -30,7 +28,6 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o -obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 2104b1b4ccda..fa69ecd52cb5 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -617,12 +617,12 @@ static void fd_error( void ) if (!fd_request) return; - fd_request->errors++; - if (fd_request->errors >= MAX_ERRORS) { + fd_request->error_count++; + if (fd_request->error_count >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); fd_end_request_cur(-EIO); } - else if (fd_request->errors == RECALIBRATE_ERRORS) { + else if (fd_request->error_count == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); if (SelectedDrive != -1) SUD.track = -1; @@ -1386,7 +1386,7 @@ static void setup_req_params( int drive ) ReqData = ReqBuffer + 512 * ReqCnt; if (UseTrackbuffer) - read_track = (ReqCmd == READ && fd_request->errors == 0); + read_track = (ReqCmd == READ && fd_request->error_count == 0); else read_track = 0; @@ -1409,8 +1409,10 @@ static struct request *set_next_request(void) fdc_queue = 0; if (q) { rq = blk_fetch_request(q); - if (rq) + if (rq) { + rq->error_count = 0; break; + } } } while (fdc_queue != old_pos); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3adc32a3153b..4ec84d504780 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -134,28 +134,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) return page; } -static void brd_free_page(struct brd_device *brd, sector_t sector) -{ - struct page *page; - pgoff_t idx; - - spin_lock(&brd->brd_lock); - idx = sector >> PAGE_SECTORS_SHIFT; - page = radix_tree_delete(&brd->brd_pages, idx); - spin_unlock(&brd->brd_lock); - if (page) - __free_page(page); -} - -static void brd_zero_page(struct brd_device *brd, sector_t sector) -{ - struct page *page; - - page = brd_lookup_page(brd, sector); - if (page) - clear_highpage(page); -} - /* * Free all backing store pages and radix tree. This must only be called when * there are no other users of the device. @@ -212,24 +190,6 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) return 0; } -static void discard_from_brd(struct brd_device *brd, - sector_t sector, size_t n) -{ - while (n >= PAGE_SIZE) { - /* - * Don't want to actually discard pages here because - * re-allocating the pages can result in writeback - * deadlocks under heavy load. - */ - if (0) - brd_free_page(brd, sector); - else - brd_zero_page(brd, sector); - sector += PAGE_SIZE >> SECTOR_SHIFT; - n -= PAGE_SIZE; - } -} - /* * Copy n bytes from src to the brd starting at sector. Does not sleep. */ @@ -338,14 +298,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) goto io_error; - if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { - if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) || - bio->bi_iter.bi_size & ~PAGE_MASK) - goto io_error; - discard_from_brd(brd, sector, bio->bi_iter.bi_size); - goto out; - } - bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; @@ -357,7 +309,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) sector += len >> SECTOR_SHIFT; } -out: bio_endio(bio); return BLK_QC_T_NONE; io_error: @@ -464,11 +415,6 @@ static struct brd_device *brd_alloc(int i) * is harmless) */ blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); - - brd->brd_queue->limits.discard_granularity = PAGE_SIZE; - blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX); - brd->brd_queue->limits.discard_zeroes_data = 1; - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); #ifdef CONFIG_BLK_DEV_RAM_DAX queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); #endif diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 8e1a4554951c..cd375503f7b0 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1864,8 +1864,7 @@ static void cciss_softirq_done(struct request *rq) /* set the residual count for pc requests */ if (blk_rq_is_passthrough(rq)) scsi_req(rq)->resid_len = c->err_info->ResidualCnt; - - blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); + blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0); spin_lock_irqsave(&h->lock, flags); cmd_free(h, c); @@ -3140,18 +3139,19 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, { int retry_cmd = 0; struct request *rq = cmd->rq; + struct scsi_request *sreq = scsi_req(rq); - rq->errors = 0; + sreq->result = 0; if (timeout) - rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT); + sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT); if (cmd->err_info->CommandStatus == 0) /* no error has occurred */ goto after_error_processing; switch (cmd->err_info->CommandStatus) { case CMD_TARGET_STATUS: - rq->errors = evaluate_target_status(h, cmd, &retry_cmd); + sreq->result = evaluate_target_status(h, cmd, &retry_cmd); break; case CMD_DATA_UNDERRUN: if (!blk_rq_is_passthrough(cmd->rq)) { @@ -3169,7 +3169,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_INVALID: dev_warn(&h->pdev->dev, "cciss: cmd %p is " "reported invalid\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3177,7 +3177,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_PROTOCOL_ERR: dev_warn(&h->pdev->dev, "cciss: cmd %p has " "protocol error\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3185,7 +3185,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_HARDWARE_ERR: dev_warn(&h->pdev->dev, "cciss: cmd %p had " " hardware error\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3193,7 +3193,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_CONNECTION_LOST: dev_warn(&h->pdev->dev, "cciss: cmd %p had " "connection lost\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3201,7 +3201,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_ABORTED: dev_warn(&h->pdev->dev, "cciss: cmd %p was " "aborted\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); @@ -3209,7 +3209,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_ABORT_FAILED: dev_warn(&h->pdev->dev, "cciss: cmd %p reports " "abort failed\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3224,21 +3224,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, } else dev_warn(&h->pdev->dev, "%p retried too many times\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_TIMEOUT: dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNABORTABLE: dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3247,7 +3247,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, dev_warn(&h->pdev->dev, "cmd %p returned " "unknown status %x\n", cmd, cmd->err_info->CommandStatus); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3380,9 +3380,9 @@ static void do_cciss_request(struct request_queue *q) if (dma_mapping_error(&h->pdev->dev, temp64.val)) { dev_warn(&h->pdev->dev, "%s: error mapping page for DMA\n", __func__); - creq->errors = make_status_bytes(SAM_STAT_GOOD, - 0, DRIVER_OK, - DID_SOFT_ERROR); + scsi_req(creq)->result = + make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK, + DID_SOFT_ERROR); cmd_free(h, c); return; } @@ -3395,9 +3395,9 @@ static void do_cciss_request(struct request_queue *q) if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex], (seg - (h->max_cmd_sgentries - 1)) * sizeof(SGDescriptor_struct))) { - creq->errors = make_status_bytes(SAM_STAT_GOOD, - 0, DRIVER_OK, - DID_SOFT_ERROR); + scsi_req(creq)->result = + make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK, + DID_SOFT_ERROR); cmd_free(h, c); return; } diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index de5c3ee8a790..494837e59f23 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -236,9 +236,6 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); - - if (f & EE_IS_TRIM) - __seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim"); seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same"); seq_putc(m, '\n'); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 724d1c50fc52..d5da45bb03a6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -437,9 +437,6 @@ enum { /* is this a TRIM aka REQ_DISCARD? */ __EE_IS_TRIM, - /* our lower level cannot handle trim, - * and we want to fall back to zeroout instead */ - __EE_IS_TRIM_USE_ZEROOUT, /* In case a barrier failed, * we need to resubmit without the barrier flag. */ @@ -482,7 +479,6 @@ enum { #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_IS_TRIM (1<<__EE_IS_TRIM) -#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT) #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) @@ -1561,8 +1557,6 @@ extern void start_resync_timer_fn(unsigned long data); extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); /* drbd_receiver.c */ -extern int drbd_issue_discard_or_zero_out(struct drbd_device *device, - sector_t start, unsigned int nr_sectors, bool discard); extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_ack_receiver(struct drbd_thread *thi); extern void drbd_send_ping_wf(struct work_struct *ws); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 92c60cbd04ee..84455c365f57 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -931,7 +931,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r p->qlim->io_min = cpu_to_be32(queue_io_min(q)); p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); p->qlim->discard_enabled = blk_queue_discard(q); - p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q); p->qlim->write_same_capable = !!q->limits.max_write_same_sectors; } else { q = device->rq_queue; @@ -941,7 +940,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r p->qlim->io_min = cpu_to_be32(queue_io_min(q)); p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); p->qlim->discard_enabled = 0; - p->qlim->discard_zeroes_data = 0; p->qlim->write_same_capable = 0; } } @@ -1668,7 +1666,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, (bio->bi_opf & REQ_FUA ? DP_FUA : 0) | (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) | (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) | - (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0); + (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) | + (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0); else return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 908c704e20aa..02255a0d68b9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1199,10 +1199,6 @@ static void decide_on_discard_support(struct drbd_device *device, struct drbd_connection *connection = first_peer_device(device)->connection; bool can_do = b ? blk_queue_discard(b) : true; - if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) { - can_do = false; - drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n"); - } if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { can_do = false; drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); @@ -1217,10 +1213,12 @@ static void decide_on_discard_support(struct drbd_device *device, blk_queue_discard_granularity(q, 512); q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); } else { queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); blk_queue_discard_granularity(q, 0); q->limits.max_discard_sectors = 0; + q->limits.max_write_zeroes_sectors = 0; } } @@ -1482,8 +1480,7 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis if (disk_conf->al_extents > drbd_al_extents_max(nbc)) disk_conf->al_extents = drbd_al_extents_max(nbc); - if (!blk_queue_discard(q) - || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) { + if (!blk_queue_discard(q)) { if (disk_conf->rs_discard_granularity) { disk_conf->rs_discard_granularity = 0; /* disable feature */ drbd_info(device, "rs_discard_granularity feature disabled\n"); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aa6bf9692eff..1b0a2be24f39 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1448,105 +1448,14 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); } -/* - * We *may* ignore the discard-zeroes-data setting, if so configured. - * - * Assumption is that it "discard_zeroes_data=0" is only because the backend - * may ignore partial unaligned discards. - * - * LVM/DM thin as of at least - * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) - * Library version: 1.02.93-RHEL7 (2015-01-28) - * Driver version: 4.29.0 - * still behaves this way. - * - * For unaligned (wrt. alignment and granularity) or too small discards, - * we zero-out the initial (and/or) trailing unaligned partial chunks, - * but discard all the aligned full chunks. - * - * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1". - */ -int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard) -{ - struct block_device *bdev = device->ldev->backing_bdev; - struct request_queue *q = bdev_get_queue(bdev); - sector_t tmp, nr; - unsigned int max_discard_sectors, granularity; - int alignment; - int err = 0; - - if (!discard) - goto zero_out; - - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - - max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); - max_discard_sectors -= max_discard_sectors % granularity; - if (unlikely(!max_discard_sectors)) - goto zero_out; - - if (nr_sectors < granularity) - goto zero_out; - - tmp = start; - if (sector_div(tmp, granularity) != alignment) { - if (nr_sectors < 2*granularity) - goto zero_out; - /* start + gran - (start + gran - align) % gran */ - tmp = start + granularity - alignment; - tmp = start + granularity - sector_div(tmp, granularity); - - nr = tmp - start; - err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); - nr_sectors -= nr; - start = tmp; - } - while (nr_sectors >= granularity) { - nr = min_t(sector_t, nr_sectors, max_discard_sectors); - err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); - nr_sectors -= nr; - start += nr; - } - zero_out: - if (nr_sectors) { - err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0); - } - return err != 0; -} - -static bool can_do_reliable_discards(struct drbd_device *device) -{ - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); - struct disk_conf *dc; - bool can_do; - - if (!blk_queue_discard(q)) - return false; - - if (q->limits.discard_zeroes_data) - return true; - - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - can_do = dc->discard_zeroes_if_aligned; - rcu_read_unlock(); - return can_do; -} - static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) { - /* If the backend cannot discard, or does not guarantee - * read-back zeroes in discarded ranges, we fall back to - * zero-out. Unless configuration specifically requested - * otherwise. */ - if (!can_do_reliable_discards(device)) - peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; + struct block_device *bdev = device->ldev->backing_bdev; - if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, - peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT))) + if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9, + GFP_NOIO, 0)) peer_req->flags |= EE_WAS_ERROR; + drbd_endio_write_sec_final(peer_req); } @@ -2376,7 +2285,7 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf) static unsigned long wire_flags_to_bio_op(u32 dpf) { if (dpf & DP_DISCARD) - return REQ_OP_DISCARD; + return REQ_OP_WRITE_ZEROES; else return REQ_OP_WRITE; } @@ -2567,7 +2476,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * op_flags = wire_flags_to_bio_flags(dp_flags); if (pi->cmd == P_TRIM) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_DISCARD); + D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); D_ASSERT(peer_device, peer_req->pages == NULL); } else if (peer_req->pages == NULL) { D_ASSERT(device, peer_req->i.size == 0); @@ -4880,7 +4789,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac if (get_ldev(device)) { struct drbd_peer_request *peer_req; - const int op = REQ_OP_DISCARD; + const int op = REQ_OP_WRITE_ZEROES; peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, size, 0, GFP_NOIO); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 652114ae1a8a..b5730e17b455 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -59,6 +59,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio drbd_req_make_private_bio(req, bio_src); req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0) | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0) + | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0) | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0); req->device = device; req->master_bio = bio_src; @@ -1148,10 +1149,10 @@ static int drbd_process_write_request(struct drbd_request *req) static void drbd_process_discard_req(struct drbd_request *req) { - int err = drbd_issue_discard_or_zero_out(req->device, - req->i.sector, req->i.size >> 9, true); + struct block_device *bdev = req->device->ldev->backing_bdev; - if (err) + if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9, + GFP_NOIO, 0)) req->private_bio->bi_error = -EIO; bio_endio(req->private_bio); } @@ -1180,7 +1181,8 @@ drbd_submit_req_private_bio(struct drbd_request *req) if (get_ldev(device)) { if (drbd_insert_fault(device, type)) bio_io_error(bio); - else if (bio_op(bio) == REQ_OP_DISCARD) + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_DISCARD) drbd_process_discard_req(req); else generic_make_request(bio); @@ -1234,7 +1236,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long _drbd_start_io_acct(device, req); /* process discards always from our submitter thread */ - if (bio_op(bio) & REQ_OP_DISCARD) + if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) || + (bio_op(bio) & REQ_OP_DISCARD)) goto queue_for_submitter_thread; if (rw == WRITE && req->private_bio && req->i.size diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3bff33f21435..1afcb4e02d8d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -174,7 +174,8 @@ void drbd_peer_request_endio(struct bio *bio) struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_device *device = peer_req->peer_device->device; bool is_write = bio_data_dir(bio) == WRITE; - bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); + bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_DISCARD; if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) drbd_warn(device, "%s: error=%d s=%llus\n", @@ -249,6 +250,7 @@ void drbd_request_endio(struct bio *bio) /* to avoid recursion in __req_mod */ if (unlikely(bio->bi_error)) { switch (bio_op(bio)) { + case REQ_OP_WRITE_ZEROES: case REQ_OP_DISCARD: if (bio->bi_error == -EOPNOTSUPP) what = DISCARD_COMPLETED_NOTSUPP; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 45b4384f650c..60d4c7653178 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2805,8 +2805,10 @@ static int set_next_request(void) fdc_queue = 0; if (q) { current_req = blk_fetch_request(q); - if (current_req) + if (current_req) { + current_req->error_count = 0; break; + } } } while (fdc_queue != old_pos); @@ -2866,7 +2868,7 @@ do_request: _floppy = floppy_type + DP->autodetect[DRS->probed_format]; } else probing = 0; - errors = &(current_req->errors); + errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); @@ -4207,9 +4209,7 @@ static int __init do_floppy_init(void) disks[drive]->fops = &floppy_fops; sprintf(disks[drive]->disk_name, "fd%d", drive); - init_timer(&motor_off_timer[drive]); - motor_off_timer[drive].data = drive; - motor_off_timer[drive].function = motor_off_callback; + setup_timer(&motor_off_timer[drive], motor_off_callback, drive); } err = register_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/hd.c b/drivers/block/hd.c deleted file mode 100644 index 6043648da1e8..000000000000 --- a/drivers/block/hd.c +++ /dev/null @@ -1,803 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This is the low-level hd interrupt support. It traverses the - * request-list, using interrupts to jump between functions. As - * all the functions are called within interrupts, we may not - * sleep. Special care is recommended. - * - * modified by Drew Eckhardt to check nr of hd's from the CMOS. - * - * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug - * in the early extended-partition checks and added DM partitions - * - * IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", - * and general streamlining by Mark Lord. - * - * Removed 99% of above. Use Mark's ide driver for those options. - * This is now a lightweight ST-506 driver. (Paul Gortmaker) - * - * Modified 1995 Russell King for ARM processor. - * - * Bugfix: max_sectors must be <= 255 or the wheels tend to come - * off in a hurry once you queue things up - Paul G. 02/2001 - */ - -/* Uncomment the following if you want verbose error reports. */ -/* #define VERBOSE_ERRORS */ - -#include <linux/blkdev.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/interrupt.h> -#include <linux/timer.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/genhd.h> -#include <linux/string.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/blkpg.h> -#include <linux/ata.h> -#include <linux/hdreg.h> - -#define HD_IRQ 14 - -#define REALLY_SLOW_IO -#include <asm/io.h> -#include <linux/uaccess.h> - -#ifdef __arm__ -#undef HD_IRQ -#endif -#include <asm/irq.h> -#ifdef __arm__ -#define HD_IRQ IRQ_HARDDISK -#endif - -/* Hd controller regster ports */ - -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ - -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ - -/* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define SERVICE_STAT SEEK_STAT -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ - -static DEFINE_SPINLOCK(hd_lock); -static struct request_queue *hd_queue; -static struct request *hd_req; - -#define TIMEOUT_VALUE (6*HZ) -#define HD_DELAY 0 - -#define MAX_ERRORS 16 /* Max read/write errors/sector */ -#define RESET_FREQ 8 /* Reset controller every 8th retry */ -#define RECAL_FREQ 4 /* Recalibrate every 4th retry */ -#define MAX_HD 2 - -#define STAT_OK (READY_STAT|SEEK_STAT) -#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK) - -static void recal_intr(void); -static void bad_rw_intr(void); - -static int reset; -static int hd_error; - -/* - * This struct defines the HD's and their types. - */ -struct hd_i_struct { - unsigned int head, sect, cyl, wpcom, lzone, ctl; - int unit; - int recalibrate; - int special_op; -}; - -#ifdef HD_TYPE -static struct hd_i_struct hd_info[] = { HD_TYPE }; -static int NR_HD = ARRAY_SIZE(hd_info); -#else -static struct hd_i_struct hd_info[MAX_HD]; -static int NR_HD; -#endif - -static struct gendisk *hd_gendisk[MAX_HD]; - -static struct timer_list device_timer; - -#define TIMEOUT_VALUE (6*HZ) - -#define SET_TIMER \ - do { \ - mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \ - } while (0) - -static void (*do_hd)(void) = NULL; -#define SET_HANDLER(x) \ -if ((do_hd = (x)) != NULL) \ - SET_TIMER; \ -else \ - del_timer(&device_timer); - - -#if (HD_DELAY > 0) - -#include <linux/i8253.h> - -unsigned long last_req; - -unsigned long read_timer(void) -{ - unsigned long t, flags; - int i; - - raw_spin_lock_irqsave(&i8253_lock, flags); - t = jiffies * 11932; - outb_p(0, 0x43); - i = inb_p(0x40); - i |= inb(0x40) << 8; - raw_spin_unlock_irqrestore(&i8253_lock, flags); - return(t - i); -} -#endif - -static void __init hd_setup(char *str, int *ints) -{ - int hdind = 0; - - if (ints[0] != 3) - return; - if (hd_info[0].head != 0) - hdind = 1; - hd_info[hdind].head = ints[2]; - hd_info[hdind].sect = ints[3]; - hd_info[hdind].cyl = ints[1]; - hd_info[hdind].wpcom = 0; - hd_info[hdind].lzone = ints[1]; - hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0); - NR_HD = hdind+1; -} - -static bool hd_end_request(int err, unsigned int bytes) -{ - if (__blk_end_request(hd_req, err, bytes)) - return true; - hd_req = NULL; - return false; -} - -static bool hd_end_request_cur(int err) -{ - return hd_end_request(err, blk_rq_cur_bytes(hd_req)); -} - -static void dump_status(const char *msg, unsigned int stat) -{ - char *name = "hd?"; - if (hd_req) - name = hd_req->rq_disk->disk_name; - -#ifdef VERBOSE_ERRORS - printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & BUSY_STAT) printk("Busy "); - if (stat & READY_STAT) printk("DriveReady "); - if (stat & WRERR_STAT) printk("WriteFault "); - if (stat & SEEK_STAT) printk("SeekComplete "); - if (stat & DRQ_STAT) printk("DataRequest "); - if (stat & ECC_STAT) printk("CorrectedError "); - if (stat & INDEX_STAT) printk("Index "); - if (stat & ERR_STAT) printk("Error "); - printk("}\n"); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff); - if (hd_error & BBD_ERR) printk("BadSector "); - if (hd_error & ECC_ERR) printk("UncorrectableError "); - if (hd_error & ID_ERR) printk("SectorIdNotFound "); - if (hd_error & ABRT_ERR) printk("DriveStatusError "); - if (hd_error & TRK0_ERR) printk("TrackZeroNotFound "); - if (hd_error & MARK_ERR) printk("AddrMarkNotFound "); - printk("}"); - if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { - printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), - inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); - if (hd_req) - printk(", sector=%ld", blk_rq_pos(hd_req)); - } - printk("\n"); - } -#else - printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff); - } -#endif -} - -static void check_status(void) -{ - int i = inb_p(HD_STATUS); - - if (!OK_STATUS(i)) { - dump_status("check_status", i); - bad_rw_intr(); - } -} - -static int controller_busy(void) -{ - int retries = 100000; - unsigned char status; - - do { - status = inb_p(HD_STATUS); - } while ((status & BUSY_STAT) && --retries); - return status; -} - -static int status_ok(void) -{ - unsigned char status = inb_p(HD_STATUS); - - if (status & BUSY_STAT) - return 1; /* Ancient, but does it make sense??? */ - if (status & WRERR_STAT) - return 0; - if (!(status & READY_STAT)) - return 0; - if (!(status & SEEK_STAT)) - return 0; - return 1; -} - -static int controller_ready(unsigned int drive, unsigned int head) -{ - int retry = 100; - - do { - if (controller_busy() & BUSY_STAT) - return 0; - outb_p(0xA0 | (drive<<4) | head, HD_CURRENT); - if (status_ok()) - return 1; - } while (--retry); - return 0; -} - -static void hd_out(struct hd_i_struct *disk, - unsigned int nsect, - unsigned int sect, - unsigned int head, - unsigned int cyl, - unsigned int cmd, - void (*intr_addr)(void)) -{ - unsigned short port; - -#if (HD_DELAY > 0) - while (read_timer() - last_req < HD_DELAY) - /* nothing */; -#endif - if (reset) - return; - if (!controller_ready(disk->unit, head)) { - reset = 1; - return; - } - SET_HANDLER(intr_addr); - outb_p(disk->ctl, HD_CMD); - port = HD_DATA; - outb_p(disk->wpcom >> 2, ++port); - outb_p(nsect, ++port); - outb_p(sect, ++port); - outb_p(cyl, ++port); - outb_p(cyl >> 8, ++port); - outb_p(0xA0 | (disk->unit << 4) | head, ++port); - outb_p(cmd, ++port); -} - -static void hd_request (void); - -static int drive_busy(void) -{ - unsigned int i; - unsigned char c; - - for (i = 0; i < 500000 ; i++) { - c = inb_p(HD_STATUS); - if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK) - return 0; - } - dump_status("reset timed out", c); - return 1; -} - -static void reset_controller(void) -{ - int i; - - outb_p(4, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - outb_p(hd_info[0].ctl & 0x0f, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - if (drive_busy()) - printk("hd: controller still busy\n"); - else if ((hd_error = inb(HD_ERROR)) != 1) - printk("hd: controller reset failed: %02x\n", hd_error); -} - -static void reset_hd(void) -{ - static int i; - -repeat: - if (reset) { - reset = 0; - i = -1; - reset_controller(); - } else { - check_status(); - if (reset) - goto repeat; - } - if (++i < NR_HD) { - struct hd_i_struct *disk = &hd_info[i]; - disk->special_op = disk->recalibrate = 1; - hd_out(disk, disk->sect, disk->sect, disk->head-1, - disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd); - if (reset) - goto repeat; - } else - hd_request(); -} - -/* - * Ok, don't know what to do with the unexpected interrupts: on some machines - * doing a reset and a retry seems to result in an eternal loop. Right now I - * ignore it, and just set the timeout. - * - * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the - * drive enters "idle", "standby", or "sleep" mode, so if the status looks - * "good", we just ignore the interrupt completely. - */ -static void unexpected_hd_interrupt(void) -{ - unsigned int stat = inb_p(HD_STATUS); - - if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) { - dump_status("unexpected interrupt", stat); - SET_TIMER; - } -} - -/* - * bad_rw_intr() now tries to be a bit smarter and does things - * according to the error returned by the controller. - * -Mika Liljeberg (liljeber@cs.Helsinki.FI) - */ -static void bad_rw_intr(void) -{ - struct request *req = hd_req; - - if (req != NULL) { - struct hd_i_struct *disk = req->rq_disk->private_data; - if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - hd_end_request_cur(-EIO); - disk->special_op = disk->recalibrate = 1; - } else if (req->errors % RESET_FREQ == 0) - reset = 1; - else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0) - disk->special_op = disk->recalibrate = 1; - /* Otherwise just retry */ - } -} - -static inline int wait_DRQ(void) -{ - int retries; - int stat; - - for (retries = 0; retries < 100000; retries++) { - stat = inb_p(HD_STATUS); - if (stat & DRQ_STAT) - return 0; - } - dump_status("wait_DRQ", stat); - return -1; -} - -static void read_intr(void) -{ - struct request *req; - int i, retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if (i & DRQ_STAT) - goto ok_to_read; - } while (--retries > 0); - dump_status("read_intr", i); - bad_rw_intr(); - hd_request(); - return; - -ok_to_read: - req = hd_req; - insw(HD_DATA, bio_data(req->bio), 256); -#ifdef DEBUG - printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", - req->rq_disk->disk_name, blk_rq_pos(req) + 1, - blk_rq_sectors(req) - 1, bio_data(req->bio)+512); -#endif - if (hd_end_request(0, 512)) { - SET_HANDLER(&read_intr); - return; - } - - (void) inb_p(HD_STATUS); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -static void write_intr(void) -{ - struct request *req = hd_req; - int i; - int retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT)) - goto ok_to_write; - } while (--retries > 0); - dump_status("write_intr", i); - bad_rw_intr(); - hd_request(); - return; - -ok_to_write: - if (hd_end_request(0, 512)) { - SET_HANDLER(&write_intr); - outsw(HD_DATA, bio_data(req->bio), 256); - return; - } - -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -static void recal_intr(void) -{ - check_status(); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -/* - * This is another of the error-routines I don't know what to do with. The - * best idea seems to just set reset, and start all over again. - */ -static void hd_times_out(unsigned long dummy) -{ - char *name; - - do_hd = NULL; - - if (!hd_req) - return; - - spin_lock_irq(hd_queue->queue_lock); - reset = 1; - name = hd_req->rq_disk->disk_name; - printk("%s: timeout\n", name); - if (++hd_req->errors >= MAX_ERRORS) { -#ifdef DEBUG - printk("%s: too many errors\n", name); -#endif - hd_end_request_cur(-EIO); - } - hd_request(); - spin_unlock_irq(hd_queue->queue_lock); -} - -static int do_special_op(struct hd_i_struct *disk, struct request *req) -{ - if (disk->recalibrate) { - disk->recalibrate = 0; - hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr); - return reset; - } - if (disk->head > 16) { - printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - hd_end_request_cur(-EIO); - } - disk->special_op = 0; - return 1; -} - -/* - * The driver enables interrupts as much as possible. In order to do this, - * (a) the device-interrupt is disabled before entering hd_request(), - * and (b) the timeout-interrupt is disabled before the sti(). - * - * Interrupts are still masked (by default) whenever we are exchanging - * data/cmds with a drive, because some drives seem to have very poor - * tolerance for latency during I/O. The IDE driver has support to unmask - * interrupts for non-broken hardware, so use that driver if required. - */ -static void hd_request(void) -{ - unsigned int block, nsect, sec, track, head, cyl; - struct hd_i_struct *disk; - struct request *req; - - if (do_hd) - return; -repeat: - del_timer(&device_timer); - - if (!hd_req) { - hd_req = blk_fetch_request(hd_queue); - if (!hd_req) { - do_hd = NULL; - return; - } - } - req = hd_req; - - if (reset) { - reset_hd(); - return; - } - disk = req->rq_disk->private_data; - block = blk_rq_pos(req); - nsect = blk_rq_sectors(req); - if (block >= get_capacity(req->rq_disk) || - ((block+nsect) > get_capacity(req->rq_disk))) { - printk("%s: bad access: block=%d, count=%d\n", - req->rq_disk->disk_name, block, nsect); - hd_end_request_cur(-EIO); - goto repeat; - } - - if (disk->special_op) { - if (do_special_op(disk, req)) - goto repeat; - return; - } - sec = block % disk->sect + 1; - track = block / disk->sect; - head = track % disk->head; - cyl = track / disk->head; -#ifdef DEBUG - printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", - req->rq_disk->disk_name, - req_data_dir(req) == READ ? "read" : "writ", - cyl, head, sec, nsect, bio_data(req->bio)); -#endif - - switch (req_op(req)) { - case REQ_OP_READ: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, - &read_intr); - if (reset) - goto repeat; - break; - case REQ_OP_WRITE: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, - &write_intr); - if (reset) - goto repeat; - if (wait_DRQ()) { - bad_rw_intr(); - goto repeat; - } - outsw(HD_DATA, bio_data(req->bio), 256); - break; - default: - printk("unknown hd-command\n"); - hd_end_request_cur(-EIO); - break; - } -} - -static void do_hd_request(struct request_queue *q) -{ - hd_request(); -} - -static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct hd_i_struct *disk = bdev->bd_disk->private_data; - - geo->heads = disk->head; - geo->sectors = disk->sect; - geo->cylinders = disk->cyl; - return 0; -} - -/* - * Releasing a block device means we sync() it, so that it can safely - * be forgotten about... - */ - -static irqreturn_t hd_interrupt(int irq, void *dev_id) -{ - void (*handler)(void) = do_hd; - - spin_lock(hd_queue->queue_lock); - - do_hd = NULL; - del_timer(&device_timer); - if (!handler) - handler = unexpected_hd_interrupt; - handler(); - - spin_unlock(hd_queue->queue_lock); - - return IRQ_HANDLED; -} - -static const struct block_device_operations hd_fops = { - .getgeo = hd_getgeo, -}; - -static int __init hd_init(void) -{ - int drive; - - if (register_blkdev(HD_MAJOR, "hd")) - return -1; - - hd_queue = blk_init_queue(do_hd_request, &hd_lock); - if (!hd_queue) { - unregister_blkdev(HD_MAJOR, "hd"); - return -ENOMEM; - } - - blk_queue_max_hw_sectors(hd_queue, 255); - init_timer(&device_timer); - device_timer.function = hd_times_out; - blk_queue_logical_block_size(hd_queue, 512); - - if (!NR_HD) { - /* - * We don't know anything about the drive. This means - * that you *MUST* specify the drive parameters to the - * kernel yourself. - * - * If we were on an i386, we used to read this info from - * the BIOS or CMOS. This doesn't work all that well, - * since this assumes that this is a primary or secondary - * drive, and if we're using this legacy driver, it's - * probably an auxiliary controller added to recover - * legacy data off an ST-506 drive. Either way, it's - * definitely safest to have the user explicitly specify - * the information. - */ - printk("hd: no drives specified - use hd=cyl,head,sectors" - " on kernel command line\n"); - goto out; - } - - for (drive = 0 ; drive < NR_HD ; drive++) { - struct gendisk *disk = alloc_disk(64); - struct hd_i_struct *p = &hd_info[drive]; - if (!disk) - goto Enomem; - disk->major = HD_MAJOR; - disk->first_minor = drive << 6; - disk->fops = &hd_fops; - sprintf(disk->disk_name, "hd%c", 'a'+drive); - disk->private_data = p; - set_capacity(disk, p->head * p->sect * p->cyl); - disk->queue = hd_queue; - p->unit = drive; - hd_gendisk[drive] = disk; - printk("%s: %luMB, CHS=%d/%d/%d\n", - disk->disk_name, (unsigned long)get_capacity(disk)/2048, - p->cyl, p->head, p->sect); - } - - if (request_irq(HD_IRQ, hd_interrupt, 0, "hd", NULL)) { - printk("hd: unable to get IRQ%d for the hard disk driver\n", - HD_IRQ); - goto out1; - } - if (!request_region(HD_DATA, 8, "hd")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA); - goto out2; - } - if (!request_region(HD_CMD, 1, "hd(cmd)")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD); - goto out3; - } - - /* Let them fly */ - for (drive = 0; drive < NR_HD; drive++) - add_disk(hd_gendisk[drive]); - - return 0; - -out3: - release_region(HD_DATA, 8); -out2: - free_irq(HD_IRQ, NULL); -out1: - for (drive = 0; drive < NR_HD; drive++) - put_disk(hd_gendisk[drive]); - NR_HD = 0; -out: - del_timer(&device_timer); - unregister_blkdev(HD_MAJOR, "hd"); - blk_cleanup_queue(hd_queue); - return -1; -Enomem: - while (drive--) - put_disk(hd_gendisk[drive]); - goto out; -} - -static int __init parse_hd_setup(char *line) -{ - int ints[6]; - - (void) get_options(line, ARRAY_SIZE(ints), ints); - hd_setup(NULL, ints); - - return 1; -} -__setup("hd=", parse_hd_setup); - -late_initcall(hd_init); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0ecb6461ed81..994403efee19 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -445,32 +445,27 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) return ret; } -static inline void handle_partial_read(struct loop_cmd *cmd, long bytes) +static void lo_complete_rq(struct request *rq) { - if (bytes < 0 || op_is_write(req_op(cmd->rq))) - return; + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - if (unlikely(bytes < blk_rq_bytes(cmd->rq))) { + if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio && + cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) { struct bio *bio = cmd->rq->bio; - bio_advance(bio, bytes); + bio_advance(bio, cmd->ret); zero_fill_bio(bio); } + + blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); - struct request *rq = cmd->rq; - - handle_partial_read(cmd, ret); - if (ret > 0) - ret = 0; - else if (ret < 0) - ret = -EIO; - - blk_mq_complete_request(rq, ret); + cmd->ret = ret; + blk_mq_complete_request(cmd->rq); } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, @@ -528,6 +523,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) case REQ_OP_FLUSH: return lo_req_flush(lo, rq); case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: return lo_discard(lo, rq, pos); case REQ_OP_WRITE: if (lo->transfer) @@ -826,7 +822,7 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = 0; q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, 0); - q->limits.discard_zeroes_data = 0; + blk_queue_max_write_zeroes_sectors(q, 0); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); return; } @@ -834,7 +830,7 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = inode->i_sb->s_blocksize; q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, UINT_MAX >> 9); - q->limits.discard_zeroes_data = 1; + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } @@ -1660,6 +1656,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, switch (req_op(cmd->rq)) { case REQ_OP_FLUSH: case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: cmd->use_aio = false; break; default: @@ -1686,8 +1683,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ret = do_req_filebacked(lo, cmd->rq); failed: /* complete non-aio request */ - if (!cmd->use_aio || ret) - blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); + if (!cmd->use_aio || ret) { + cmd->ret = ret ? -EIO : 0; + blk_mq_complete_request(cmd->rq); + } } static void loop_queue_work(struct kthread_work *work) @@ -1710,9 +1709,10 @@ static int loop_init_request(void *data, struct request *rq, return 0; } -static struct blk_mq_ops loop_mq_ops = { +static const struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, .init_request = loop_init_request, + .complete = lo_complete_rq, }; static int loop_add(struct loop_device **l, int i) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index fb2237c73e61..fecd3f97ef8c 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -70,6 +70,7 @@ struct loop_cmd { struct request *rq; struct list_head list; bool use_aio; /* use AIO interface to handle I/O */ + long ret; struct kiocb iocb; }; diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c deleted file mode 100644 index 286f276f586e..000000000000 --- a/drivers/block/mg_disk.c +++ /dev/null @@ -1,1112 +0,0 @@ -/* - * drivers/block/mg_disk.c - * - * Support for the mGine m[g]flash IO mode. - * Based on legacy hd.c - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim <donari75@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/blkdev.h> -#include <linux/hdreg.h> -#include <linux/ata.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/platform_device.h> -#include <linux/gpio.h> -#include <linux/mg_disk.h> -#include <linux/slab.h> - -#define MG_RES_SEC (CONFIG_MG_DISK_RES << 1) - -/* name for block device */ -#define MG_DISK_NAME "mgd" - -#define MG_DISK_MAJ 0 -#define MG_DISK_MAX_PART 16 -#define MG_SECTOR_SIZE 512 -#define MG_MAX_SECTS 256 - -/* Register offsets */ -#define MG_BUFF_OFFSET 0x8000 -#define MG_REG_OFFSET 0xC000 -#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ -#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ -#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) -#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) -#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) -#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) -#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) -#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ -#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ -#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) -#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) - -/* handy status */ -#define MG_STAT_READY (ATA_DRDY | ATA_DSC) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \ - ATA_ERR))) == MG_STAT_READY) - -/* error code for others */ -#define MG_ERR_NONE 0 -#define MG_ERR_TIMEOUT 0x100 -#define MG_ERR_INIT_STAT 0x101 -#define MG_ERR_TRANSLATION 0x102 -#define MG_ERR_CTRL_RST 0x103 -#define MG_ERR_INV_STAT 0x104 -#define MG_ERR_RSTOUT 0x105 - -#define MG_MAX_ERRORS 6 /* Max read/write errors */ - -/* command */ -#define MG_CMD_RD 0x20 -#define MG_CMD_WR 0x30 -#define MG_CMD_SLEEP 0x99 -#define MG_CMD_WAKEUP 0xC3 -#define MG_CMD_ID 0xEC -#define MG_CMD_WR_CONF 0x3C -#define MG_CMD_RD_CONF 0x40 - -/* operation mode */ -#define MG_OP_CASCADE (1 << 0) -#define MG_OP_CASCADE_SYNC_RD (1 << 1) -#define MG_OP_CASCADE_SYNC_WR (1 << 2) -#define MG_OP_INTERLEAVE (1 << 3) - -/* synchronous */ -#define MG_BURST_LAT_4 (3 << 4) -#define MG_BURST_LAT_5 (4 << 4) -#define MG_BURST_LAT_6 (5 << 4) -#define MG_BURST_LAT_7 (6 << 4) -#define MG_BURST_LAT_8 (7 << 4) -#define MG_BURST_LEN_4 (1 << 1) -#define MG_BURST_LEN_8 (2 << 1) -#define MG_BURST_LEN_16 (3 << 1) -#define MG_BURST_LEN_32 (4 << 1) -#define MG_BURST_LEN_CONT (0 << 1) - -/* timeout value (unit: ms) */ -#define MG_TMAX_CONF_TO_CMD 1 -#define MG_TMAX_WAIT_RD_DRQ 10 -#define MG_TMAX_WAIT_WR_DRQ 500 -#define MG_TMAX_RST_TO_BUSY 10 -#define MG_TMAX_HDRST_TO_RDY 500 -#define MG_TMAX_SWRST_TO_RDY 500 -#define MG_TMAX_RSTOUT 3000 - -#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) - -/* main structure for mflash driver */ -struct mg_host { - struct device *dev; - - struct request_queue *breq; - struct request *req; - spinlock_t lock; - struct gendisk *gd; - - struct timer_list timer; - void (*mg_do_intr) (struct mg_host *); - - u16 id[ATA_ID_WORDS]; - - u16 cyls; - u16 heads; - u16 sectors; - u32 n_sectors; - u32 nres_sectors; - - void __iomem *dev_base; - unsigned int irq; - unsigned int rst; - unsigned int rstout; - - u32 major; - u32 error; -}; - -/* - * Debugging macro and defines - */ -#undef DO_MG_DEBUG -#ifdef DO_MG_DEBUG -# define MG_DBG(fmt, args...) \ - printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) -#else /* CONFIG_MG_DEBUG */ -# define MG_DBG(fmt, args...) do { } while (0) -#endif /* CONFIG_MG_DEBUG */ - -static void mg_request(struct request_queue *); - -static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes) -{ - if (__blk_end_request(host->req, err, nr_bytes)) - return true; - - host->req = NULL; - return false; -} - -static bool mg_end_request_cur(struct mg_host *host, int err) -{ - return mg_end_request(host, err, blk_rq_cur_bytes(host->req)); -} - -static void mg_dump_status(const char *msg, unsigned int stat, - struct mg_host *host) -{ - char *name = MG_DISK_NAME; - - if (host->req) - name = host->req->rq_disk->disk_name; - - printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & ATA_BUSY) - printk("Busy "); - if (stat & ATA_DRDY) - printk("DriveReady "); - if (stat & ATA_DF) - printk("WriteFault "); - if (stat & ATA_DSC) - printk("SeekComplete "); - if (stat & ATA_DRQ) - printk("DataRequest "); - if (stat & ATA_CORR) - printk("CorrectedError "); - if (stat & ATA_ERR) - printk("Error "); - printk("}\n"); - if ((stat & ATA_ERR) == 0) { - host->error = 0; - } else { - host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR); - printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg, - host->error & 0xff); - if (host->error & ATA_BBK) - printk("BadSector "); - if (host->error & ATA_UNC) - printk("UncorrectableError "); - if (host->error & ATA_IDNF) - printk("SectorIdNotFound "); - if (host->error & ATA_ABORTED) - printk("DriveStatusError "); - if (host->error & ATA_AMNF) - printk("AddrMarkNotFound "); - printk("}"); - if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) { - if (host->req) - printk(", sector=%u", - (unsigned int)blk_rq_pos(host->req)); - } - printk("\n"); - } -} - -static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) -{ - u8 status; - unsigned long expire, cur_jiffies; - struct mg_drv_data *prv_data = host->dev->platform_data; - - host->error = MG_ERR_NONE; - expire = jiffies + msecs_to_jiffies(msec); - - /* These 2 times dummy status read prevents reading invalid - * status. A very little time (3 times of mflash operating clk) - * is required for busy bit is set. Use dummy read instead of - * busy wait, because mflash's PLL is machine dependent. - */ - if (prv_data->use_polling) { - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - } - - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - - do { - cur_jiffies = jiffies; - if (status & ATA_BUSY) { - if (expect == ATA_BUSY) - break; - } else { - /* Check the error condition! */ - if (status & ATA_ERR) { - mg_dump_status("mg_wait", status, host); - break; - } - - if (expect == MG_STAT_READY) - if (MG_READY_OK(status)) - break; - - if (expect == ATA_DRQ) - if (status & ATA_DRQ) - break; - } - if (!msec) { - mg_dump_status("not ready", status, host); - return MG_ERR_INV_STAT; - } - - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - } while (time_before(cur_jiffies, expire)); - - if (time_after_eq(cur_jiffies, expire) && msec) - host->error = MG_ERR_TIMEOUT; - - return host->error; -} - -static unsigned int mg_wait_rstout(u32 rstout, u32 msec) -{ - unsigned long expire; - - expire = jiffies + msecs_to_jiffies(msec); - while (time_before(jiffies, expire)) { - if (gpio_get_value(rstout) == 1) - return MG_ERR_NONE; - msleep(10); - } - - return MG_ERR_RSTOUT; -} - -static void mg_unexpected_intr(struct mg_host *host) -{ - u32 status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - - mg_dump_status("mg_unexpected_intr", status, host); -} - -static irqreturn_t mg_irq(int irq, void *dev_id) -{ - struct mg_host *host = dev_id; - void (*handler)(struct mg_host *) = host->mg_do_intr; - - spin_lock(&host->lock); - - host->mg_do_intr = NULL; - del_timer(&host->timer); - if (!handler) - handler = mg_unexpected_intr; - handler(host); - - spin_unlock(&host->lock); - - return IRQ_HANDLED; -} - -/* local copy of ata_id_string() */ -static void mg_id_string(const u16 *id, unsigned char *s, - unsigned int ofs, unsigned int len) -{ - unsigned int c; - - BUG_ON(len & 1); - - while (len > 0) { - c = id[ofs] >> 8; - *s = c; - s++; - - c = id[ofs] & 0xff; - *s = c; - s++; - - ofs++; - len -= 2; - } -} - -/* local copy of ata_id_c_string() */ -static void mg_id_c_string(const u16 *id, unsigned char *s, - unsigned int ofs, unsigned int len) -{ - unsigned char *p; - - mg_id_string(id, s, ofs, len - 1); - - p = s + strnlen(s, len - 1); - while (p > s && p[-1] == ' ') - p--; - *p = '\0'; -} - -static int mg_get_disk_id(struct mg_host *host) -{ - u32 i; - s32 err; - const u16 *id = host->id; - struct mg_drv_data *prv_data = host->dev->platform_data; - char fwrev[ATA_ID_FW_REV_LEN + 1]; - char model[ATA_ID_PROD_LEN + 1]; - char serial[ATA_ID_SERNO_LEN + 1]; - - if (!prv_data->use_polling) - outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND); - err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ); - if (err) - return err; - - for (i = 0; i < (MG_SECTOR_SIZE >> 1); i++) - host->id[i] = le16_to_cpu(inw((unsigned long)host->dev_base + - MG_BUFF_OFFSET + i * 2)); - - outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - err = mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD); - if (err) - return err; - - if ((id[ATA_ID_FIELD_VALID] & 1) == 0) - return MG_ERR_TRANSLATION; - - host->n_sectors = ata_id_u32(id, ATA_ID_LBA_CAPACITY); - host->cyls = id[ATA_ID_CYLS]; - host->heads = id[ATA_ID_HEADS]; - host->sectors = id[ATA_ID_SECTORS]; - - if (MG_RES_SEC && host->heads && host->sectors) { - /* modify cyls, n_sectors */ - host->cyls = (host->n_sectors - MG_RES_SEC) / - host->heads / host->sectors; - host->nres_sectors = host->n_sectors - host->cyls * - host->heads * host->sectors; - host->n_sectors -= host->nres_sectors; - } - - mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev)); - mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model)); - mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial)); - printk(KERN_INFO "mg_disk: model: %s\n", model); - printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev); - printk(KERN_INFO "mg_disk: serial: %s\n", serial); - printk(KERN_INFO "mg_disk: %d + reserved %d sectors\n", - host->n_sectors, host->nres_sectors); - - if (!prv_data->use_polling) - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - return err; -} - - -static int mg_disk_init(struct mg_host *host) -{ - struct mg_drv_data *prv_data = host->dev->platform_data; - s32 err; - u8 init_status; - - /* hdd rst low */ - gpio_set_value(host->rst, 0); - err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY); - if (err) - return err; - - /* hdd rst high */ - gpio_set_value(host->rst, 1); - err = mg_wait(host, MG_STAT_READY, MG_TMAX_HDRST_TO_RDY); - if (err) - return err; - - /* soft reset on */ - outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0), - (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY); - if (err) - return err; - - /* soft reset off */ - outb(prv_data->use_polling ? ATA_NIEN : 0, - (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY); - if (err) - return err; - - init_status = inb((unsigned long)host->dev_base + MG_REG_STATUS) & 0xf; - - if (init_status == 0xf) - return MG_ERR_INIT_STAT; - - return err; -} - -static void mg_bad_rw_intr(struct mg_host *host) -{ - if (host->req) - if (++host->req->errors >= MG_MAX_ERRORS || - host->error == MG_ERR_TIMEOUT) - mg_end_request_cur(host, -EIO); -} - -static unsigned int mg_out(struct mg_host *host, - unsigned int sect_num, - unsigned int sect_cnt, - unsigned int cmd, - void (*intr_addr)(struct mg_host *)) -{ - struct mg_drv_data *prv_data = host->dev->platform_data; - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return host->error; - - if (!prv_data->use_polling) { - host->mg_do_intr = intr_addr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } - if (MG_RES_SEC) - sect_num += MG_RES_SEC; - outb((u8)sect_cnt, (unsigned long)host->dev_base + MG_REG_SECT_CNT); - outb((u8)sect_num, (unsigned long)host->dev_base + MG_REG_SECT_NUM); - outb((u8)(sect_num >> 8), (unsigned long)host->dev_base + - MG_REG_CYL_LOW); - outb((u8)(sect_num >> 16), (unsigned long)host->dev_base + - MG_REG_CYL_HIGH); - outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS), - (unsigned long)host->dev_base + MG_REG_DRV_HEAD); - outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND); - return MG_ERR_NONE; -} - -static void mg_read_one(struct mg_host *host, struct request *req) -{ - u16 *buff = (u16 *)bio_data(req->bio); - u32 i; - - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) - *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + - (i << 1)); -} - -static void mg_read(struct request *req) -{ - struct mg_host *host = req->rq_disk->private_data; - - if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), - MG_CMD_RD, NULL) != MG_ERR_NONE) - mg_bad_rw_intr(host); - - MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio)); - - do { - if (mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - - mg_read_one(host, req); - - outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); -} - -static void mg_write_one(struct mg_host *host, struct request *req) -{ - u16 *buff = (u16 *)bio_data(req->bio); - u32 i; - - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) - outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET + - (i << 1)); -} - -static void mg_write(struct request *req) -{ - struct mg_host *host = req->rq_disk->private_data; - unsigned int rem = blk_rq_sectors(req); - - if (mg_out(host, blk_rq_pos(req), rem, - MG_CMD_WR, NULL) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - - MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - rem, blk_rq_pos(req), bio_data(req->bio)); - - if (mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - - do { - mg_write_one(host, req); - - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - - rem--; - if (rem > 1 && mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } else if (mg_wait(host, MG_STAT_READY, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); -} - -static void mg_read_intr(struct mg_host *host) -{ - struct request *req = host->req; - u32 i; - - /* check status */ - do { - i = inb((unsigned long)host->dev_base + MG_REG_STATUS); - if (i & ATA_BUSY) - break; - if (!MG_READY_OK(i)) - break; - if (i & ATA_DRQ) - goto ok_to_read; - } while (0); - mg_dump_status("mg_read_intr", i, host); - mg_bad_rw_intr(host); - mg_request(host->breq); - return; - -ok_to_read: - mg_read_one(host, req); - - MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio)); - - /* send read confirm */ - outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - - if (mg_end_request(host, 0, MG_SECTOR_SIZE)) { - /* set handler if read remains */ - host->mg_do_intr = mg_read_intr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } else /* goto next request */ - mg_request(host->breq); -} - -static void mg_write_intr(struct mg_host *host) -{ - struct request *req = host->req; - u32 i; - bool rem; - - /* check status */ - do { - i = inb((unsigned long)host->dev_base + MG_REG_STATUS); - if (i & ATA_BUSY) - break; - if (!MG_READY_OK(i)) - break; - if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ)) - goto ok_to_write; - } while (0); - mg_dump_status("mg_write_intr", i, host); - mg_bad_rw_intr(host); - mg_request(host->breq); - return; - -ok_to_write: - if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { - /* write 1 sector and set handler if remains */ - mg_write_one(host, req); - MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); - host->mg_do_intr = mg_write_intr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } - - /* send write confirm */ - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - - if (!rem) - mg_request(host->breq); -} - -static void mg_times_out(unsigned long data) -{ - struct mg_host *host = (struct mg_host *)data; - char *name; - - spin_lock_irq(&host->lock); - - if (!host->req) - goto out_unlock; - - host->mg_do_intr = NULL; - - name = host->req->rq_disk->disk_name; - printk(KERN_DEBUG "%s: timeout\n", name); - - host->error = MG_ERR_TIMEOUT; - mg_bad_rw_intr(host); - -out_unlock: - mg_request(host->breq); - spin_unlock_irq(&host->lock); -} - -static void mg_request_poll(struct request_queue *q) -{ - struct mg_host *host = q->queuedata; - - while (1) { - if (!host->req) { - host->req = blk_fetch_request(q); - if (!host->req) - break; - } - - switch (req_op(host->req)) { - case REQ_OP_READ: - mg_read(host->req); - break; - case REQ_OP_WRITE: - mg_write(host->req); - break; - default: - mg_end_request_cur(host, -EIO); - break; - } - } -} - -static unsigned int mg_issue_req(struct request *req, - struct mg_host *host, - unsigned int sect_num, - unsigned int sect_cnt) -{ - switch (req_op(host->req)) { - case REQ_OP_READ: - if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) - != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return host->error; - } - break; - case REQ_OP_WRITE: - /* TODO : handler */ - outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) - != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return host->error; - } - del_timer(&host->timer); - mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ); - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - if (host->error) { - mg_bad_rw_intr(host); - return host->error; - } - mg_write_one(host, req); - mod_timer(&host->timer, jiffies + 3 * HZ); - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - break; - default: - mg_end_request_cur(host, -EIO); - break; - } - return MG_ERR_NONE; -} - -/* This function also called from IRQ context */ -static void mg_request(struct request_queue *q) -{ - struct mg_host *host = q->queuedata; - struct request *req; - u32 sect_num, sect_cnt; - - while (1) { - if (!host->req) { - host->req = blk_fetch_request(q); - if (!host->req) - break; - } - req = host->req; - - /* check unwanted request call */ - if (host->mg_do_intr) - return; - - del_timer(&host->timer); - - sect_num = blk_rq_pos(req); - /* deal whole segments */ - sect_cnt = blk_rq_sectors(req); - - /* sanity check */ - if (sect_num >= get_capacity(req->rq_disk) || - ((sect_num + sect_cnt) > - get_capacity(req->rq_disk))) { - printk(KERN_WARNING - "%s: bad access: sector=%d, count=%d\n", - req->rq_disk->disk_name, - sect_num, sect_cnt); - mg_end_request_cur(host, -EIO); - continue; - } - - if (!mg_issue_req(req, host, sect_num, sect_cnt)) - return; - } -} - -static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct mg_host *host = bdev->bd_disk->private_data; - - geo->cylinders = (unsigned short)host->cyls; - geo->heads = (unsigned char)host->heads; - geo->sectors = (unsigned char)host->sectors; - return 0; -} - -static const struct block_device_operations mg_disk_ops = { - .getgeo = mg_getgeo -}; - -#ifdef CONFIG_PM_SLEEP -static int mg_suspend(struct device *dev) -{ - struct mg_drv_data *prv_data = dev->platform_data; - struct mg_host *host = prv_data->host; - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return -EIO; - - if (!prv_data->use_polling) - outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND); - /* wait until mflash deep sleep */ - msleep(1); - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) { - if (!prv_data->use_polling) - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - return -EIO; - } - - return 0; -} - -static int mg_resume(struct device *dev) -{ - struct mg_drv_data *prv_data = dev->platform_data; - struct mg_host *host = prv_data->host; - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return -EIO; - - outb(MG_CMD_WAKEUP, (unsigned long)host->dev_base + MG_REG_COMMAND); - /* wait until mflash wakeup */ - msleep(1); - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return -EIO; - - if (!prv_data->use_polling) - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - return 0; -} -#endif - -static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume); - -static int mg_probe(struct platform_device *plat_dev) -{ - struct mg_host *host; - struct resource *rsc; - struct mg_drv_data *prv_data = plat_dev->dev.platform_data; - int err = 0; - - if (!prv_data) { - printk(KERN_ERR "%s:%d fail (no driver_data)\n", - __func__, __LINE__); - err = -EINVAL; - goto probe_err; - } - - /* alloc mg_host */ - host = kzalloc(sizeof(struct mg_host), GFP_KERNEL); - if (!host) { - printk(KERN_ERR "%s:%d fail (no memory for mg_host)\n", - __func__, __LINE__); - err = -ENOMEM; - goto probe_err; - } - host->major = MG_DISK_MAJ; - - /* link each other */ - prv_data->host = host; - host->dev = &plat_dev->dev; - - /* io remap */ - rsc = platform_get_resource(plat_dev, IORESOURCE_MEM, 0); - if (!rsc) { - printk(KERN_ERR "%s:%d platform_get_resource fail\n", - __func__, __LINE__); - err = -EINVAL; - goto probe_err_2; - } - host->dev_base = ioremap(rsc->start, resource_size(rsc)); - if (!host->dev_base) { - printk(KERN_ERR "%s:%d ioremap fail\n", - __func__, __LINE__); - err = -EIO; - goto probe_err_2; - } - MG_DBG("dev_base = 0x%x\n", (u32)host->dev_base); - - /* get reset pin */ - rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, - MG_RST_PIN); - if (!rsc) { - printk(KERN_ERR "%s:%d get reset pin fail\n", - __func__, __LINE__); - err = -EIO; - goto probe_err_3; - } - host->rst = rsc->start; - - /* init rst pin */ - err = gpio_request(host->rst, MG_RST_PIN); - if (err) - goto probe_err_3; - gpio_direction_output(host->rst, 1); - - /* reset out pin */ - if (!(prv_data->dev_attr & MG_DEV_MASK)) { - err = -EINVAL; - goto probe_err_3a; - } - - if (prv_data->dev_attr != MG_BOOT_DEV) { - rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, - MG_RSTOUT_PIN); - if (!rsc) { - printk(KERN_ERR "%s:%d get reset-out pin fail\n", - __func__, __LINE__); - err = -EIO; - goto probe_err_3a; - } - host->rstout = rsc->start; - err = gpio_request(host->rstout, MG_RSTOUT_PIN); - if (err) - goto probe_err_3a; - gpio_direction_input(host->rstout); - } - - /* disk reset */ - if (prv_data->dev_attr == MG_STORAGE_DEV) { - /* If POR seq. not yet finished, wait */ - err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT); - if (err) - goto probe_err_3b; - err = mg_disk_init(host); - if (err) { - printk(KERN_ERR "%s:%d fail (err code : %d)\n", - __func__, __LINE__, err); - err = -EIO; - goto probe_err_3b; - } - } - - /* get irq resource */ - if (!prv_data->use_polling) { - host->irq = platform_get_irq(plat_dev, 0); - if (host->irq == -ENXIO) { - err = host->irq; - goto probe_err_3b; - } - err = request_irq(host->irq, mg_irq, - IRQF_TRIGGER_RISING, - MG_DEV_NAME, host); - if (err) { - printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n", - __func__, __LINE__, err); - goto probe_err_3b; - } - - } - - /* get disk id */ - err = mg_get_disk_id(host); - if (err) { - printk(KERN_ERR "%s:%d fail (err code : %d)\n", - __func__, __LINE__, err); - err = -EIO; - goto probe_err_4; - } - - err = register_blkdev(host->major, MG_DISK_NAME); - if (err < 0) { - printk(KERN_ERR "%s:%d register_blkdev fail (err code : %d)\n", - __func__, __LINE__, err); - goto probe_err_4; - } - if (!host->major) - host->major = err; - - spin_lock_init(&host->lock); - - if (prv_data->use_polling) - host->breq = blk_init_queue(mg_request_poll, &host->lock); - else - host->breq = blk_init_queue(mg_request, &host->lock); - - if (!host->breq) { - err = -ENOMEM; - printk(KERN_ERR "%s:%d (blk_init_queue) fail\n", - __func__, __LINE__); - goto probe_err_5; - } - host->breq->queuedata = host; - - /* mflash is random device, thanx for the noop */ - err = elevator_change(host->breq, "noop"); - if (err) { - printk(KERN_ERR "%s:%d (elevator_init) fail\n", - __func__, __LINE__); - goto probe_err_6; - } - blk_queue_max_hw_sectors(host->breq, MG_MAX_SECTS); - blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE); - - init_timer(&host->timer); - host->timer.function = mg_times_out; - host->timer.data = (unsigned long)host; - - host->gd = alloc_disk(MG_DISK_MAX_PART); - if (!host->gd) { - printk(KERN_ERR "%s:%d (alloc_disk) fail\n", - __func__, __LINE__); - err = -ENOMEM; - goto probe_err_7; - } - host->gd->major = host->major; - host->gd->first_minor = 0; - host->gd->fops = &mg_disk_ops; - host->gd->queue = host->breq; - host->gd->private_data = host; - sprintf(host->gd->disk_name, MG_DISK_NAME"a"); - - set_capacity(host->gd, host->n_sectors); - - add_disk(host->gd); - - return err; - -probe_err_7: - del_timer_sync(&host->timer); -probe_err_6: - blk_cleanup_queue(host->breq); -probe_err_5: - unregister_blkdev(host->major, MG_DISK_NAME); -probe_err_4: - if (!prv_data->use_polling) - free_irq(host->irq, host); -probe_err_3b: - gpio_free(host->rstout); -probe_err_3a: - gpio_free(host->rst); -probe_err_3: - iounmap(host->dev_base); -probe_err_2: - kfree(host); -probe_err: - return err; -} - -static int mg_remove(struct platform_device *plat_dev) -{ - struct mg_drv_data *prv_data = plat_dev->dev.platform_data; - struct mg_host *host = prv_data->host; - int err = 0; - - /* delete timer */ - del_timer_sync(&host->timer); - - /* remove disk */ - if (host->gd) { - del_gendisk(host->gd); - put_disk(host->gd); - } - /* remove queue */ - if (host->breq) - blk_cleanup_queue(host->breq); - - /* unregister blk device */ - unregister_blkdev(host->major, MG_DISK_NAME); - - /* free irq */ - if (!prv_data->use_polling) - free_irq(host->irq, host); - - /* free reset-out pin */ - if (prv_data->dev_attr != MG_BOOT_DEV) - gpio_free(host->rstout); - - /* free rst pin */ - if (host->rst) - gpio_free(host->rst); - - /* unmap io */ - if (host->dev_base) - iounmap(host->dev_base); - - /* free mg_host */ - kfree(host); - - return err; -} - -static struct platform_driver mg_disk_driver = { - .probe = mg_probe, - .remove = mg_remove, - .driver = { - .name = MG_DEV_NAME, - .pm = &mg_pm, - } -}; - -/**************************************************************************** - * - * Module stuff - * - ****************************************************************************/ - -static int __init mg_init(void) -{ - printk(KERN_INFO "mGine mflash driver, (c) 2008 mGine Co.\n"); - return platform_driver_register(&mg_disk_driver); -} - -static void __exit mg_exit(void) -{ - printk(KERN_INFO "mflash driver : bye bye\n"); - platform_driver_unregister(&mg_disk_driver); -} - -module_init(mg_init); -module_exit(mg_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("unsik Kim <donari75@gmail.com>"); -MODULE_DESCRIPTION("mGine m[g]flash device driver"); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 1d1dc11aa5fa..02804cc79d82 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -169,6 +169,25 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) return false; /* device present */ } +/* we have to use runtime tag to setup command header */ +static void mtip_init_cmd_header(struct request *rq) +{ + struct driver_data *dd = rq->q->queuedata; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; + + /* Point the command headers at the command tables. */ + cmd->command_header = dd->port->command_list + + (sizeof(struct mtip_cmd_hdr) * rq->tag); + cmd->command_header_dma = dd->port->command_list_dma + + (sizeof(struct mtip_cmd_hdr) * rq->tag); + + if (host_cap_64) + cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); + + cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); +} + static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; @@ -180,6 +199,9 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) if (IS_ERR(rq)) return NULL; + /* Internal cmd isn't submitted via .queue_rq */ + mtip_init_cmd_header(rq); + return blk_mq_rq_to_pdu(rq); } @@ -241,7 +263,8 @@ static void mtip_async_complete(struct mtip_port *port, rq = mtip_rq_from_tag(dd, tag); - blk_mq_complete_request(rq, status); + cmd->status = status; + blk_mq_complete_request(rq); } /* @@ -2910,18 +2933,19 @@ static void mtip_softirq_done_fn(struct request *rq) if (unlikely(cmd->unaligned)) up(&dd->port->cmd_slot_unal); - blk_mq_end_request(rq, rq->errors); + blk_mq_end_request(rq, cmd->status); } static void mtip_abort_cmd(struct request *req, void *data, bool reserved) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); clear_bit(req->tag, dd->port->cmds_to_issue); - req->errors = -EIO; + cmd->status = -EIO; mtip_softirq_done_fn(req); } @@ -3807,6 +3831,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; int ret; + mtip_init_cmd_header(rq); + if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; @@ -3816,7 +3842,6 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, if (likely(!ret)) return BLK_MQ_RQ_QUEUE_OK; - rq->errors = ret; return BLK_MQ_RQ_QUEUE_ERROR; } @@ -3838,7 +3863,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, { struct driver_data *dd = data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; /* * For flush requests, request_idx starts at the end of the @@ -3855,17 +3879,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, memset(cmd->command, 0, CMD_DMA_ALLOC_SZ); - /* Point the command headers at the command tables. */ - cmd->command_header = dd->port->command_list + - (sizeof(struct mtip_cmd_hdr) * request_idx); - cmd->command_header_dma = dd->port->command_list_dma + - (sizeof(struct mtip_cmd_hdr) * request_idx); - - if (host_cap_64) - cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); - - cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); - sg_init_table(cmd->sg, MTIP_MAX_SG); return 0; } @@ -3889,7 +3902,7 @@ exit_handler: return BLK_EH_RESET_TIMER; } -static struct blk_mq_ops mtip_mq_ops = { +static const struct blk_mq_ops mtip_mq_ops = { .queue_rq = mtip_queue_rq, .init_request = mtip_init_cmd, .exit_request = mtip_free_cmd, @@ -4025,7 +4038,6 @@ skip_create_disk: dd->queue->limits.discard_granularity = 4096; blk_queue_max_discard_sectors(dd->queue, MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); - dd->queue->limits.discard_zeroes_data = 0; } /* Set the capacity of the device in 512 byte sectors. */ @@ -4107,9 +4119,11 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) struct driver_data *dd = (struct driver_data *)data; struct mtip_cmd *cmd; - if (likely(!reserv)) - blk_mq_complete_request(rq, -ENODEV); - else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + if (likely(!reserv)) { + cmd = blk_mq_rq_to_pdu(rq); + cmd->status = -ENODEV; + blk_mq_complete_request(rq); + } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); if (cmd->comp_func) @@ -4162,7 +4176,7 @@ static int mtip_block_remove(struct driver_data *dd) dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); - blk_mq_freeze_queue_start(dd->queue); + blk_freeze_queue_start(dd->queue); blk_mq_stop_hw_queues(dd->queue); blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 7617888f7944..57b41528a824 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -352,6 +352,7 @@ struct mtip_cmd { int retries; /* The number of retries left for this command. */ int direction; /* Data transfer direction */ + int status; }; /* Structure used to describe a port. */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d8a23561b4cb..ac376b9b852d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -40,49 +40,82 @@ #include <asm/types.h> #include <linux/nbd.h> +#include <linux/nbd-netlink.h> +#include <net/genetlink.h> static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); +static int nbd_total_devices = 0; struct nbd_sock { struct socket *sock; struct mutex tx_lock; struct request *pending; int sent; + bool dead; + int fallback_index; + int cookie; +}; + +struct recv_thread_args { + struct work_struct work; + struct nbd_device *nbd; + int index; +}; + +struct link_dead_args { + struct work_struct work; + int index; }; #define NBD_TIMEDOUT 0 #define NBD_DISCONNECT_REQUESTED 1 #define NBD_DISCONNECTED 2 -#define NBD_RUNNING 3 +#define NBD_HAS_PID_FILE 3 +#define NBD_HAS_CONFIG_REF 4 +#define NBD_BOUND 5 +#define NBD_DESTROY_ON_DISCONNECT 6 -struct nbd_device { +struct nbd_config { u32 flags; unsigned long runtime_flags; - struct nbd_sock **socks; - int magic; + u64 dead_conn_timeout; - struct blk_mq_tag_set tag_set; - - struct mutex config_lock; - struct gendisk *disk; + struct nbd_sock **socks; int num_connections; + atomic_t live_connections; + wait_queue_head_t conn_wait; + atomic_t recv_threads; wait_queue_head_t recv_wq; loff_t blksize; loff_t bytesize; - - struct task_struct *task_recv; - struct task_struct *task_setup; - #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbg_dir; #endif }; +struct nbd_device { + struct blk_mq_tag_set tag_set; + + int index; + refcount_t config_refs; + refcount_t refs; + struct nbd_config *config; + struct mutex config_lock; + struct gendisk *disk; + + struct list_head list; + struct task_struct *task_recv; + struct task_struct *task_setup; +}; + struct nbd_cmd { struct nbd_device *nbd; + int index; + int cookie; struct completion send_complete; + int status; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -100,18 +133,16 @@ static int part_shift; static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); - +static void nbd_config_put(struct nbd_device *nbd); +static void nbd_connect_reply(struct genl_info *info, int index); +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); +static void nbd_dead_link_work(struct work_struct *work); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { return disk_to_dev(nbd->disk); } -static bool nbd_is_connected(struct nbd_device *nbd) -{ - return !!nbd->task_recv; -} - static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -124,44 +155,104 @@ static const char *nbdcmd_to_ascii(int cmd) return "invalid"; } -static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) +static ssize_t pid_show(struct device *dev, + struct device_attribute *attr, char *buf) { - if (bdev->bd_openers <= 1) - bd_set_size(bdev, 0); - set_capacity(nbd->disk, 0); - kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + struct gendisk *disk = dev_to_disk(dev); + struct nbd_device *nbd = (struct nbd_device *)disk->private_data; - return 0; + return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); +} + +static struct device_attribute pid_attr = { + .attr = { .name = "pid", .mode = S_IRUGO}, + .show = pid_show, +}; + +static void nbd_dev_remove(struct nbd_device *nbd) +{ + struct gendisk *disk = nbd->disk; + if (disk) { + del_gendisk(disk); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&nbd->tag_set); + disk->private_data = NULL; + put_disk(disk); + } + kfree(nbd); +} + +static void nbd_put(struct nbd_device *nbd) +{ + if (refcount_dec_and_mutex_lock(&nbd->refs, + &nbd_index_mutex)) { + idr_remove(&nbd_index_idr, nbd->index); + mutex_unlock(&nbd_index_mutex); + nbd_dev_remove(nbd); + } +} + +static int nbd_disconnected(struct nbd_config *config) +{ + return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || + test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); +} + +static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, + int notify) +{ + if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { + struct link_dead_args *args; + args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); + if (args) { + INIT_WORK(&args->work, nbd_dead_link_work); + args->index = nbd->index; + queue_work(system_wq, &args->work); + } + } + if (!nsock->dead) { + kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + atomic_dec(&nbd->config->live_connections); + } + nsock->dead = true; + nsock->pending = NULL; + nsock->sent = 0; +} + +static void nbd_size_clear(struct nbd_device *nbd) +{ + if (nbd->config->bytesize) { + set_capacity(nbd->disk, 0); + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + } } -static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) +static void nbd_size_update(struct nbd_device *nbd) { - blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize); - blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize); - bd_set_size(bdev, nbd->bytesize); - set_capacity(nbd->disk, nbd->bytesize >> 9); + struct nbd_config *config = nbd->config; + blk_queue_logical_block_size(nbd->disk->queue, config->blksize); + blk_queue_physical_block_size(nbd->disk->queue, config->blksize); + set_capacity(nbd->disk, config->bytesize >> 9); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } -static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, - loff_t blocksize, loff_t nr_blocks) +static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, + loff_t nr_blocks) { - nbd->blksize = blocksize; - nbd->bytesize = blocksize * nr_blocks; - if (nbd_is_connected(nbd)) - nbd_size_update(nbd, bdev); + struct nbd_config *config = nbd->config; + config->blksize = blocksize; + config->bytesize = blocksize * nr_blocks; + nbd_size_update(nbd); } -static void nbd_end_request(struct nbd_cmd *cmd) +static void nbd_complete_rq(struct request *req) { - struct nbd_device *nbd = cmd->nbd; - struct request *req = blk_mq_rq_from_pdu(cmd); - int error = req->errors ? -EIO : 0; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd, - error ? "failed" : "done"); + dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd, + cmd->status ? "failed" : "done"); - blk_mq_complete_request(req, error); + blk_mq_end_request(req, cmd->status); } /* @@ -169,17 +260,18 @@ static void nbd_end_request(struct nbd_cmd *cmd) */ static void sock_shutdown(struct nbd_device *nbd) { + struct nbd_config *config = nbd->config; int i; - if (nbd->num_connections == 0) + if (config->num_connections == 0) return; - if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) + if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) return; - for (i = 0; i < nbd->num_connections; i++) { - struct nbd_sock *nsock = nbd->socks[i]; + for (i = 0; i < config->num_connections; i++) { + struct nbd_sock *nsock = config->socks[i]; mutex_lock(&nsock->tx_lock); - kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + nbd_mark_nsock_dead(nbd, nsock, 0); mutex_unlock(&nsock->tx_lock); } dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); @@ -190,14 +282,58 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); struct nbd_device *nbd = cmd->nbd; + struct nbd_config *config; - dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); - set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); - req->errors = -EIO; + if (!refcount_inc_not_zero(&nbd->config_refs)) { + cmd->status = -EIO; + return BLK_EH_HANDLED; + } - mutex_lock(&nbd->config_lock); + /* If we are waiting on our dead timer then we could get timeout + * callbacks for our request. For this we just want to reset the timer + * and let the queue side take care of everything. + */ + if (!completion_done(&cmd->send_complete)) { + nbd_config_put(nbd); + return BLK_EH_RESET_TIMER; + } + config = nbd->config; + + if (config->num_connections > 1) { + dev_err_ratelimited(nbd_to_dev(nbd), + "Connection timed out, retrying\n"); + /* + * Hooray we have more connections, requeue this IO, the submit + * path will put it on a real connection. + */ + if (config->socks && config->num_connections > 1) { + if (cmd->index < config->num_connections) { + struct nbd_sock *nsock = + config->socks[cmd->index]; + mutex_lock(&nsock->tx_lock); + /* We can have multiple outstanding requests, so + * we don't want to mark the nsock dead if we've + * already reconnected with a new socket, so + * only mark it dead if its the same socket we + * were sent out on. + */ + if (cmd->cookie == nsock->cookie) + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); + } + blk_mq_requeue_request(req, true); + nbd_config_put(nbd); + return BLK_EH_NOT_HANDLED; + } + } else { + dev_err_ratelimited(nbd_to_dev(nbd), + "Connection timed out\n"); + } + set_bit(NBD_TIMEDOUT, &config->runtime_flags); + cmd->status = -EIO; sock_shutdown(nbd); - mutex_unlock(&nbd->config_lock); + nbd_config_put(nbd); + return BLK_EH_HANDLED; } @@ -207,7 +343,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, static int sock_xmit(struct nbd_device *nbd, int index, int send, struct iov_iter *iter, int msg_flags, int *sent) { - struct socket *sock = nbd->socks[index]->sock; + struct nbd_config *config = nbd->config; + struct socket *sock = config->socks[index]->sock; int result; struct msghdr msg; unsigned long pflags = current->flags; @@ -253,7 +390,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); - struct nbd_sock *nsock = nbd->socks[index]; + struct nbd_config *config = nbd->config; + struct nbd_sock *nsock = config->socks[index]; int result; struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; @@ -284,7 +422,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } if (rq_data_dir(req) == WRITE && - (nbd->flags & NBD_FLAG_READ_ONLY)) { + (config->flags & NBD_FLAG_READ_ONLY)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Write on read-only\n"); return -EIO; @@ -301,6 +439,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } iov_iter_advance(&from, sent); } + cmd->index = index; + cmd->cookie = nsock->cookie; request.type = htonl(type); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); @@ -328,7 +468,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); - return -EIO; + return -EAGAIN; } send_pages: if (type != NBD_CMD_WRITE) @@ -370,7 +510,7 @@ send_pages: dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); - return -EIO; + return -EAGAIN; } /* * The completion might already have come in, @@ -392,6 +532,7 @@ out: /* NULL returned = something went wrong, inform userspace */ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { + struct nbd_config *config = nbd->config; int result; struct nbd_reply reply; struct nbd_cmd *cmd; @@ -405,8 +546,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { - if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && - !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + if (!nbd_disconnected(config)) dev_err(disk_to_dev(nbd->disk), "Receive control failed (result %d)\n", result); return ERR_PTR(result); @@ -433,7 +573,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); - req->errors = -EIO; + cmd->status = -EIO; return cmd; } @@ -449,8 +589,19 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); - req->errors = -EIO; - return cmd; + /* + * If we've disconnected or we only have 1 + * connection then we need to make sure we + * complete this request, otherwise error out + * and let the timeout stuff handle resubmitting + * this request onto another connection. + */ + if (nbd_disconnected(config) || + config->num_connections <= 1) { + cmd->status = -EIO; + return cmd; + } + return ERR_PTR(-EIO); } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", cmd, bvec.bv_len); @@ -462,54 +613,34 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) return cmd; } -static ssize_t pid_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - struct nbd_device *nbd = (struct nbd_device *)disk->private_data; - - return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); -} - -static struct device_attribute pid_attr = { - .attr = { .name = "pid", .mode = S_IRUGO}, - .show = pid_show, -}; - -struct recv_thread_args { - struct work_struct work; - struct nbd_device *nbd; - int index; -}; - static void recv_work(struct work_struct *work) { struct recv_thread_args *args = container_of(work, struct recv_thread_args, work); struct nbd_device *nbd = args->nbd; + struct nbd_config *config = nbd->config; struct nbd_cmd *cmd; int ret = 0; - BUG_ON(nbd->magic != NBD_MAGIC); while (1) { cmd = nbd_read_stat(nbd, args->index); if (IS_ERR(cmd)) { + struct nbd_sock *nsock = config->socks[args->index]; + + mutex_lock(&nsock->tx_lock); + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); ret = PTR_ERR(cmd); break; } - nbd_end_request(cmd); + blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } - - /* - * We got an error, shut everybody down if this wasn't the result of a - * disconnect request. - */ - if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - sock_shutdown(nbd); - atomic_dec(&nbd->recv_threads); - wake_up(&nbd->recv_wq); + atomic_dec(&config->recv_threads); + wake_up(&config->recv_wq); + nbd_config_put(nbd); + kfree(args); } static void nbd_clear_req(struct request *req, void *data, bool reserved) @@ -519,47 +650,119 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) if (!blk_mq_request_started(req)) return; cmd = blk_mq_rq_to_pdu(req); - req->errors = -EIO; - nbd_end_request(cmd); + cmd->status = -EIO; + blk_mq_complete_request(req); } static void nbd_clear_que(struct nbd_device *nbd) { - BUG_ON(nbd->magic != NBD_MAGIC); - + blk_mq_stop_hw_queues(nbd->disk->queue); blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); + blk_mq_start_hw_queues(nbd->disk->queue); dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); } +static int find_fallback(struct nbd_device *nbd, int index) +{ + struct nbd_config *config = nbd->config; + int new_index = -1; + struct nbd_sock *nsock = config->socks[index]; + int fallback = nsock->fallback_index; + + if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) + return new_index; + + if (config->num_connections <= 1) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on invalid socket\n"); + return new_index; + } + + if (fallback >= 0 && fallback < config->num_connections && + !config->socks[fallback]->dead) + return fallback; + + if (nsock->fallback_index < 0 || + nsock->fallback_index >= config->num_connections || + config->socks[nsock->fallback_index]->dead) { + int i; + for (i = 0; i < config->num_connections; i++) { + if (i == index) + continue; + if (!config->socks[i]->dead) { + new_index = i; + break; + } + } + nsock->fallback_index = new_index; + if (new_index < 0) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Dead connection, failed to find a fallback\n"); + return new_index; + } + } + new_index = nsock->fallback_index; + return new_index; +} + +static int wait_for_reconnect(struct nbd_device *nbd) +{ + struct nbd_config *config = nbd->config; + if (!config->dead_conn_timeout) + return 0; + if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) + return 0; + wait_event_interruptible_timeout(config->conn_wait, + atomic_read(&config->live_connections), + config->dead_conn_timeout); + return atomic_read(&config->live_connections); +} static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; + struct nbd_config *config; struct nbd_sock *nsock; int ret; - if (index >= nbd->num_connections) { + if (!refcount_inc_not_zero(&nbd->config_refs)) { dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on invalid socket\n"); + "Socks array is empty\n"); return -EINVAL; } + config = nbd->config; - if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { + if (index >= config->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); + "Attempted send on invalid socket\n"); + nbd_config_put(nbd); return -EINVAL; } - - req->errors = 0; - - nsock = nbd->socks[index]; + cmd->status = 0; +again: + nsock = config->socks[index]; mutex_lock(&nsock->tx_lock); - if (unlikely(!nsock->sock)) { + if (nsock->dead) { + int old_index = index; + index = find_fallback(nbd, index); mutex_unlock(&nsock->tx_lock); - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); - return -EINVAL; + if (index < 0) { + if (wait_for_reconnect(nbd)) { + index = old_index; + goto again; + } + /* All the sockets should already be down at this point, + * we just want to make sure that DISCONNECTED is set so + * any requests that come in that were queue'ed waiting + * for the reconnect timer don't trigger the timer again + * and instead just error out. + */ + sock_shutdown(nbd); + nbd_config_put(nbd); + return -EIO; + } + goto again; } /* Handle the case that we have a pending request that was partially @@ -572,9 +775,21 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) ret = 0; goto out; } + /* + * Some failures are related to the link going down, so anything that + * returns EAGAIN can be retried on a different socket. + */ ret = nbd_send_cmd(nbd, cmd, index); + if (ret == -EAGAIN) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Request send failed trying another connection\n"); + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); + goto again; + } out: mutex_unlock(&nsock->tx_lock); + nbd_config_put(nbd); return ret; } @@ -611,9 +826,10 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, - unsigned long arg) +static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, + bool netlink) { + struct nbd_config *config = nbd->config; struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; @@ -623,43 +839,107 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, if (!sock) return err; - if (!nbd->task_setup) + if (!netlink && !nbd->task_setup && + !test_bit(NBD_BOUND, &config->runtime_flags)) nbd->task_setup = current; - if (nbd->task_setup != current) { + + if (!netlink && + (nbd->task_setup != current || + test_bit(NBD_BOUND, &config->runtime_flags))) { dev_err(disk_to_dev(nbd->disk), "Device being setup by another task"); - return -EINVAL; + sockfd_put(sock); + return -EBUSY; } - socks = krealloc(nbd->socks, (nbd->num_connections + 1) * + socks = krealloc(config->socks, (config->num_connections + 1) * sizeof(struct nbd_sock *), GFP_KERNEL); - if (!socks) + if (!socks) { + sockfd_put(sock); return -ENOMEM; + } nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); - if (!nsock) + if (!nsock) { + sockfd_put(sock); return -ENOMEM; + } - nbd->socks = socks; + config->socks = socks; + nsock->fallback_index = -1; + nsock->dead = false; mutex_init(&nsock->tx_lock); nsock->sock = sock; nsock->pending = NULL; nsock->sent = 0; - socks[nbd->num_connections++] = nsock; + nsock->cookie = 0; + socks[config->num_connections++] = nsock; + atomic_inc(&config->live_connections); - if (max_part) - bdev->bd_invalidated = 1; return 0; } +static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) +{ + struct nbd_config *config = nbd->config; + struct socket *sock, *old; + struct recv_thread_args *args; + int i; + int err; + + sock = sockfd_lookup(arg, &err); + if (!sock) + return err; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) { + sockfd_put(sock); + return -ENOMEM; + } + + for (i = 0; i < config->num_connections; i++) { + struct nbd_sock *nsock = config->socks[i]; + + if (!nsock->dead) + continue; + + mutex_lock(&nsock->tx_lock); + if (!nsock->dead) { + mutex_unlock(&nsock->tx_lock); + continue; + } + sk_set_memalloc(sock->sk); + atomic_inc(&config->recv_threads); + refcount_inc(&nbd->config_refs); + old = nsock->sock; + nsock->fallback_index = -1; + nsock->sock = sock; + nsock->dead = false; + INIT_WORK(&args->work, recv_work); + args->index = i; + args->nbd = nbd; + nsock->cookie++; + mutex_unlock(&nsock->tx_lock); + sockfd_put(old); + + /* We take the tx_mutex in an error path in the recv_work, so we + * need to queue_work outside of the tx_mutex. + */ + queue_work(recv_workqueue, &args->work); + + atomic_inc(&config->live_connections); + wake_up(&config->conn_wait); + return 0; + } + sockfd_put(sock); + kfree(args); + return -ENOSPC; +} + /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { - nbd->runtime_flags = 0; - nbd->blksize = 1024; - nbd->bytesize = 0; - set_capacity(nbd->disk, 0); - nbd->flags = 0; + nbd->config = NULL; nbd->tag_set.timeout = 0; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } @@ -668,21 +948,23 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - set_device_ro(bdev, false); - bdev->bd_inode->i_size = 0; + bd_set_size(bdev, 0); if (max_part > 0) { blkdev_reread_part(bdev); bdev->bd_invalidated = 1; } } -static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) +static void nbd_parse_flags(struct nbd_device *nbd) { - if (nbd->flags & NBD_FLAG_READ_ONLY) - set_device_ro(bdev, true); - if (nbd->flags & NBD_FLAG_SEND_TRIM) + struct nbd_config *config = nbd->config; + if (config->flags & NBD_FLAG_READ_ONLY) + set_disk_ro(nbd->disk, true); + else + set_disk_ro(nbd->disk, false); + if (config->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); - if (nbd->flags & NBD_FLAG_SEND_FLUSH) + if (config->flags & NBD_FLAG_SEND_FLUSH) blk_queue_write_cache(nbd->disk->queue, true, false); else blk_queue_write_cache(nbd->disk->queue, false, false); @@ -690,6 +972,7 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) static void send_disconnects(struct nbd_device *nbd) { + struct nbd_config *config = nbd->config; struct nbd_request request = { .magic = htonl(NBD_REQUEST_MAGIC), .type = htonl(NBD_CMD_DISC), @@ -698,7 +981,7 @@ static void send_disconnects(struct nbd_device *nbd) struct iov_iter from; int i, ret; - for (i = 0; i < nbd->num_connections; i++) { + for (i = 0; i < config->num_connections; i++) { iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret <= 0) @@ -707,145 +990,162 @@ static void send_disconnects(struct nbd_device *nbd) } } -static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev) +static int nbd_disconnect(struct nbd_device *nbd) { - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->socks) - return -EINVAL; - - mutex_unlock(&nbd->config_lock); - fsync_bdev(bdev); - mutex_lock(&nbd->config_lock); - - /* Check again after getting mutex back. */ - if (!nbd->socks) - return -EINVAL; + struct nbd_config *config = nbd->config; + dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, - &nbd->runtime_flags)) + &config->runtime_flags)) send_disconnects(nbd); return 0; } -static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) +static void nbd_clear_sock(struct nbd_device *nbd) { sock_shutdown(nbd); nbd_clear_que(nbd); + nbd->task_setup = NULL; +} - __invalidate_device(bdev, true); - nbd_bdev_reset(bdev); - /* - * We want to give the run thread a chance to wait for everybody - * to clean up and then do it's own cleanup. - */ - if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) && - nbd->num_connections) { - int i; - - for (i = 0; i < nbd->num_connections; i++) { - sockfd_put(nbd->socks[i]->sock); - kfree(nbd->socks[i]); +static void nbd_config_put(struct nbd_device *nbd) +{ + if (refcount_dec_and_mutex_lock(&nbd->config_refs, + &nbd->config_lock)) { + struct nbd_config *config = nbd->config; + nbd_dev_dbg_close(nbd); + nbd_size_clear(nbd); + if (test_and_clear_bit(NBD_HAS_PID_FILE, + &config->runtime_flags)) + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + nbd->task_recv = NULL; + nbd_clear_sock(nbd); + if (config->num_connections) { + int i; + for (i = 0; i < config->num_connections; i++) { + sockfd_put(config->socks[i]->sock); + kfree(config->socks[i]); + } + kfree(config->socks); } - kfree(nbd->socks); - nbd->socks = NULL; - nbd->num_connections = 0; - } - nbd->task_setup = NULL; + nbd_reset(nbd); - return 0; + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + module_put(THIS_MODULE); + } } -static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) +static int nbd_start_device(struct nbd_device *nbd) { - struct recv_thread_args *args; - int num_connections = nbd->num_connections; + struct nbd_config *config = nbd->config; + int num_connections = config->num_connections; int error = 0, i; if (nbd->task_recv) return -EBUSY; - if (!nbd->socks) + if (!config->socks) return -EINVAL; if (num_connections > 1 && - !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); - error = -EINVAL; - goto out_err; + return -EINVAL; } - set_bit(NBD_RUNNING, &nbd->runtime_flags); - blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); - args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); - if (!args) { - error = -ENOMEM; - goto out_err; - } + blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); nbd->task_recv = current; - mutex_unlock(&nbd->config_lock); - nbd_parse_flags(nbd, bdev); + nbd_parse_flags(nbd); error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (error) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - goto out_recv; + return error; } - - nbd_size_update(nbd, bdev); + set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); nbd_dev_dbg_init(nbd); for (i = 0; i < num_connections; i++) { - sk_set_memalloc(nbd->socks[i]->sock->sk); - atomic_inc(&nbd->recv_threads); - INIT_WORK(&args[i].work, recv_work); - args[i].nbd = nbd; - args[i].index = i; - queue_work(recv_workqueue, &args[i].work); - } - wait_event_interruptible(nbd->recv_wq, - atomic_read(&nbd->recv_threads) == 0); - for (i = 0; i < num_connections; i++) - flush_work(&args[i].work); - nbd_dev_dbg_close(nbd); - nbd_size_clear(nbd, bdev); - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); -out_recv: - mutex_lock(&nbd->config_lock); - nbd->task_recv = NULL; -out_err: - clear_bit(NBD_RUNNING, &nbd->runtime_flags); - nbd_clear_sock(nbd, bdev); + struct recv_thread_args *args; + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) { + sock_shutdown(nbd); + return -ENOMEM; + } + sk_set_memalloc(config->socks[i]->sock->sk); + atomic_inc(&config->recv_threads); + refcount_inc(&nbd->config_refs); + INIT_WORK(&args->work, recv_work); + args->nbd = nbd; + args->index = i; + queue_work(recv_workqueue, &args->work); + } + return error; +} + +static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) +{ + struct nbd_config *config = nbd->config; + int ret; + + ret = nbd_start_device(nbd); + if (ret) + return ret; + + bd_set_size(bdev, config->bytesize); + if (max_part) + bdev->bd_invalidated = 1; + mutex_unlock(&nbd->config_lock); + ret = wait_event_interruptible(config->recv_wq, + atomic_read(&config->recv_threads) == 0); + if (ret) + sock_shutdown(nbd); + mutex_lock(&nbd->config_lock); + bd_set_size(bdev, 0); /* user requested, ignore socket errors */ - if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - error = 0; - if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) - error = -ETIMEDOUT; + if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) + ret = 0; + if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) + ret = -ETIMEDOUT; + return ret; +} - nbd_reset(nbd); - return error; +static void nbd_clear_sock_ioctl(struct nbd_device *nbd, + struct block_device *bdev) +{ + sock_shutdown(nbd); + kill_bdev(bdev); + nbd_bdev_reset(bdev); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); } /* Must be called with config_lock held */ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { + struct nbd_config *config = nbd->config; + switch (cmd) { case NBD_DISCONNECT: - return nbd_disconnect(nbd, bdev); + return nbd_disconnect(nbd); case NBD_CLEAR_SOCK: - return nbd_clear_sock(nbd, bdev); + nbd_clear_sock_ioctl(nbd, bdev); + return 0; case NBD_SET_SOCK: - return nbd_add_socket(nbd, bdev, arg); + return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: - nbd_size_set(nbd, bdev, arg, - div_s64(nbd->bytesize, arg)); + nbd_size_set(nbd, arg, + div_s64(config->bytesize, arg)); return 0; case NBD_SET_SIZE: - nbd_size_set(nbd, bdev, nbd->blksize, - div_s64(arg, nbd->blksize)); + nbd_size_set(nbd, config->blksize, + div_s64(arg, config->blksize)); return 0; case NBD_SET_SIZE_BLOCKS: - nbd_size_set(nbd, bdev, nbd->blksize, arg); + nbd_size_set(nbd, config->blksize, arg); return 0; case NBD_SET_TIMEOUT: if (arg) { @@ -855,10 +1155,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; case NBD_SET_FLAGS: - nbd->flags = arg; + config->flags = arg; return 0; case NBD_DO_IT: - return nbd_start_device(nbd, bdev); + return nbd_start_device_ioctl(nbd, bdev); case NBD_CLEAR_QUE: /* * This is for compatibility only. The queue is always cleared @@ -879,23 +1179,92 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nbd_device *nbd = bdev->bd_disk->private_data; - int error; + struct nbd_config *config = nbd->config; + int error = -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - BUG_ON(nbd->magic != NBD_MAGIC); - mutex_lock(&nbd->config_lock); - error = __nbd_ioctl(bdev, nbd, cmd, arg); - mutex_unlock(&nbd->config_lock); + /* Don't allow ioctl operations on a nbd device that was created with + * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. + */ + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) + error = __nbd_ioctl(bdev, nbd, cmd, arg); + else + dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); + mutex_unlock(&nbd->config_lock); return error; } +static struct nbd_config *nbd_alloc_config(void) +{ + struct nbd_config *config; + + config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); + if (!config) + return NULL; + atomic_set(&config->recv_threads, 0); + init_waitqueue_head(&config->recv_wq); + init_waitqueue_head(&config->conn_wait); + config->blksize = 1024; + atomic_set(&config->live_connections, 0); + try_module_get(THIS_MODULE); + return config; +} + +static int nbd_open(struct block_device *bdev, fmode_t mode) +{ + struct nbd_device *nbd; + int ret = 0; + + mutex_lock(&nbd_index_mutex); + nbd = bdev->bd_disk->private_data; + if (!nbd) { + ret = -ENXIO; + goto out; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + ret = -ENXIO; + goto out; + } + if (!refcount_inc_not_zero(&nbd->config_refs)) { + struct nbd_config *config; + + mutex_lock(&nbd->config_lock); + if (refcount_inc_not_zero(&nbd->config_refs)) { + mutex_unlock(&nbd->config_lock); + goto out; + } + config = nbd->config = nbd_alloc_config(); + if (!config) { + ret = -ENOMEM; + mutex_unlock(&nbd->config_lock); + goto out; + } + refcount_set(&nbd->config_refs, 1); + refcount_inc(&nbd->refs); + mutex_unlock(&nbd->config_lock); + } +out: + mutex_unlock(&nbd_index_mutex); + return ret; +} + +static void nbd_release(struct gendisk *disk, fmode_t mode) +{ + struct nbd_device *nbd = disk->private_data; + nbd_config_put(nbd); + nbd_put(nbd); +} + static const struct block_device_operations nbd_fops = { .owner = THIS_MODULE, + .open = nbd_open, + .release = nbd_release, .ioctl = nbd_ioctl, .compat_ioctl = nbd_ioctl, }; @@ -927,7 +1296,7 @@ static const struct file_operations nbd_dbg_tasks_ops = { static int nbd_dbg_flags_show(struct seq_file *s, void *unused) { struct nbd_device *nbd = s->private; - u32 flags = nbd->flags; + u32 flags = nbd->config->flags; seq_printf(s, "Hex: 0x%08x\n\n", flags); @@ -960,6 +1329,7 @@ static const struct file_operations nbd_dbg_flags_ops = { static int nbd_dev_dbg_init(struct nbd_device *nbd) { struct dentry *dir; + struct nbd_config *config = nbd->config; if (!nbd_dbg_dir) return -EIO; @@ -970,12 +1340,12 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) nbd_name(nbd)); return -EIO; } - nbd->dbg_dir = dir; + config->dbg_dir = dir; debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); - debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); + debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); - debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize); + debugfs_create_u64("blocksize", 0444, dir, &config->blksize); debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); return 0; @@ -983,7 +1353,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) static void nbd_dev_dbg_close(struct nbd_device *nbd) { - debugfs_remove_recursive(nbd->dbg_dir); + debugfs_remove_recursive(nbd->config->dbg_dir); } static int nbd_dbg_init(void) @@ -1035,25 +1405,13 @@ static int nbd_init_request(void *data, struct request *rq, return 0; } -static struct blk_mq_ops nbd_mq_ops = { +static const struct blk_mq_ops nbd_mq_ops = { .queue_rq = nbd_queue_rq, + .complete = nbd_complete_rq, .init_request = nbd_init_request, .timeout = nbd_xmit_timeout, }; -static void nbd_dev_remove(struct nbd_device *nbd) -{ - struct gendisk *disk = nbd->disk; - nbd->magic = 0; - if (disk) { - del_gendisk(disk); - blk_cleanup_queue(disk->queue); - blk_mq_free_tag_set(&nbd->tag_set); - put_disk(disk); - } - kfree(nbd); -} - static int nbd_dev_add(int index) { struct nbd_device *nbd; @@ -1082,6 +1440,7 @@ static int nbd_dev_add(int index) if (err < 0) goto out_free_disk; + nbd->index = index; nbd->disk = disk; nbd->tag_set.ops = &nbd_mq_ops; nbd->tag_set.nr_hw_queues = 1; @@ -1110,20 +1469,23 @@ static int nbd_dev_add(int index) queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 512; blk_queue_max_discard_sectors(disk->queue, UINT_MAX); - disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_segment_size(disk->queue, UINT_MAX); + blk_queue_max_segments(disk->queue, USHRT_MAX); blk_queue_max_hw_sectors(disk->queue, 65536); disk->queue->limits.max_sectors = 256; - nbd->magic = NBD_MAGIC; mutex_init(&nbd->config_lock); + refcount_set(&nbd->config_refs, 0); + refcount_set(&nbd->refs, 1); + INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; disk->first_minor = index << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - init_waitqueue_head(&nbd->recv_wq); nbd_reset(nbd); add_disk(disk); + nbd_total_devices++; return index; out_free_tags: @@ -1138,10 +1500,535 @@ out: return err; } -/* - * And here should be modules and kernel interface - * (Just smiley confuses emacs :-) +static int find_free_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + struct nbd_device **found = data; + + if (!refcount_read(&nbd->config_refs)) { + *found = nbd; + return 1; + } + return 0; +} + +/* Netlink interface. */ +static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { + [NBD_ATTR_INDEX] = { .type = NLA_U32 }, + [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, + [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, +}; + +static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { + [NBD_SOCK_FD] = { .type = NLA_U32 }, +}; + +/* We don't use this right now since we don't parse the incoming list, but we + * still want it here so userspace knows what to expect. */ +static struct nla_policy __attribute__((unused)) +nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { + [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, + [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, +}; + +static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index = -1; + int ret; + bool put_dev = false; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + if (!info->attrs[NBD_ATTR_SOCKETS]) { + printk(KERN_ERR "nbd: must specify at least one socket\n"); + return -EINVAL; + } + if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); + return -EINVAL; + } +again: + mutex_lock(&nbd_index_mutex); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); + if (ret == 0) { + int new_index; + new_index = nbd_dev_add(-1); + if (new_index < 0) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: failed to add new device\n"); + return ret; + } + nbd = idr_find(&nbd_index_idr, new_index); + } + } else { + nbd = idr_find(&nbd_index_idr, index); + } + if (!nbd) { + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + mutex_unlock(&nbd_index_mutex); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + + mutex_lock(&nbd->config_lock); + if (refcount_read(&nbd->config_refs)) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: nbd%d already in use\n", index); + return -EBUSY; + } + if (WARN_ON(nbd->config)) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + return -EINVAL; + } + config = nbd->config = nbd_alloc_config(); + if (!nbd->config) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + printk(KERN_ERR "nbd: couldn't allocate config\n"); + return -ENOMEM; + } + refcount_set(&nbd->config_refs, 1); + set_bit(NBD_BOUND, &config->runtime_flags); + + if (info->attrs[NBD_ATTR_SIZE_BYTES]) { + u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); + nbd_size_set(nbd, config->blksize, + div64_u64(bytes, config->blksize)); + } + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + u64 bsize = + nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); + nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); + } + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } + if (info->attrs[NBD_ATTR_SERVER_FLAGS]) + config->flags = + nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags); + put_dev = true; + } + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, + nbd_sock_policy); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_add_socket(nbd, fd, true); + if (ret) + goto out; + } + } + ret = nbd_start_device(nbd); +out: + mutex_unlock(&nbd->config_lock); + if (!ret) { + set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); + refcount_inc(&nbd->config_refs); + nbd_connect_reply(info, nbd->index); + } + nbd_config_put(nbd); + if (put_dev) + nbd_put(nbd); + return ret; +} + +static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd; + int index; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify an index to disconnect\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + if (!nbd) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + if (!refcount_inc_not_zero(&nbd->config_refs)) { + nbd_put(nbd); + return 0; + } + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); + nbd_config_put(nbd); + nbd_put(nbd); + return 0; +} + +static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index; + int ret = -EINVAL; + bool put_dev = false; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + if (!nbd) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: couldn't find a device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + + if (!refcount_inc_not_zero(&nbd->config_refs)) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + nbd_put(nbd); + return -EINVAL; + } + + mutex_lock(&nbd->config_lock); + config = nbd->config; + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + !nbd->task_recv) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + goto out; + } + + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + put_dev = true; + } else { + if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + refcount_inc(&nbd->refs); + } + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, + nbd_sock_policy); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_reconnect_socket(nbd, fd); + if (ret) { + if (ret == -ENOSPC) + ret = 0; + goto out; + } + dev_info(nbd_to_dev(nbd), "reconnected socket\n"); + } + } +out: + mutex_unlock(&nbd->config_lock); + nbd_config_put(nbd); + nbd_put(nbd); + if (put_dev) + nbd_put(nbd); + return ret; +} + +static const struct genl_ops nbd_connect_genl_ops[] = { + { + .cmd = NBD_CMD_CONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_connect, + }, + { + .cmd = NBD_CMD_DISCONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_disconnect, + }, + { + .cmd = NBD_CMD_RECONFIGURE, + .policy = nbd_attr_policy, + .doit = nbd_genl_reconfigure, + }, + { + .cmd = NBD_CMD_STATUS, + .policy = nbd_attr_policy, + .doit = nbd_genl_status, + }, +}; + +static const struct genl_multicast_group nbd_mcast_grps[] = { + { .name = NBD_GENL_MCAST_GROUP_NAME, }, +}; + +static struct genl_family nbd_genl_family __ro_after_init = { + .hdrsize = 0, + .name = NBD_GENL_FAMILY_NAME, + .version = NBD_GENL_VERSION, + .module = THIS_MODULE, + .ops = nbd_connect_genl_ops, + .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), + .maxattr = NBD_ATTR_MAX, + .mcgrps = nbd_mcast_grps, + .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), +}; + +static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) +{ + struct nlattr *dev_opt; + u8 connected = 0; + int ret; + + /* This is a little racey, but for status it's ok. The + * reason we don't take a ref here is because we can't + * take a ref in the index == -1 case as we would need + * to put under the nbd_index_mutex, which could + * deadlock if we are configured to remove ourselves + * once we're disconnected. + */ + if (refcount_read(&nbd->config_refs)) + connected = 1; + dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); + if (!dev_opt) + return -EMSGSIZE; + ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); + if (ret) + return -EMSGSIZE; + ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, + connected); + if (ret) + return -EMSGSIZE; + nla_nest_end(reply, dev_opt); + return 0; +} + +static int status_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + return populate_nbd_status(nbd, (struct sk_buff *)data); +} + +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *dev_list; + struct sk_buff *reply; + void *reply_head; + size_t msg_size; + int index = -1; + int ret = -ENOMEM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + + mutex_lock(&nbd_index_mutex); + + msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + + nla_attr_size(sizeof(u8))); + msg_size *= (index == -1) ? nbd_total_devices : 1; + + reply = genlmsg_new(msg_size, GFP_KERNEL); + if (!reply) + goto out; + reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, + NBD_CMD_STATUS); + if (!reply_head) { + nlmsg_free(reply); + goto out; + } + + dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &status_cb, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } else { + struct nbd_device *nbd; + nbd = idr_find(&nbd_index_idr, index); + if (nbd) { + ret = populate_nbd_status(nbd, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } + } + nla_nest_end(reply, dev_list); + genlmsg_end(reply, reply_head); + genlmsg_reply(reply, info); + ret = 0; +out: + mutex_unlock(&nbd_index_mutex); + return ret; +} + +static void nbd_connect_reply(struct genl_info *info, int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, + NBD_CMD_CONNECT); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_reply(skb, info); +} + +static void nbd_mcast_index(int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, + NBD_CMD_LINK_DEAD); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); +} + +static void nbd_dead_link_work(struct work_struct *work) +{ + struct link_dead_args *args = container_of(work, struct link_dead_args, + work); + nbd_mcast_index(args->index); + kfree(args); +} static int __init nbd_init(void) { @@ -1184,6 +2071,11 @@ static int __init nbd_init(void) return -EIO; } + if (genl_register_family(&nbd_genl_family)) { + unregister_blkdev(NBD_MAJOR, "nbd"); + destroy_workqueue(recv_workqueue); + return -EINVAL; + } nbd_dbg_init(); mutex_lock(&nbd_index_mutex); @@ -1195,17 +2087,34 @@ static int __init nbd_init(void) static int nbd_exit_cb(int id, void *ptr, void *data) { + struct list_head *list = (struct list_head *)data; struct nbd_device *nbd = ptr; - nbd_dev_remove(nbd); + + list_add_tail(&nbd->list, list); return 0; } static void __exit nbd_cleanup(void) { + struct nbd_device *nbd; + LIST_HEAD(del_list); + nbd_dbg_close(); - idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL); + mutex_lock(&nbd_index_mutex); + idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); + mutex_unlock(&nbd_index_mutex); + + while (!list_empty(&del_list)) { + nbd = list_first_entry(&del_list, struct nbd_device, list); + list_del_init(&nbd->list); + if (refcount_read(&nbd->refs) != 1) + printk(KERN_ERR "nbd: possibly leaking a device\n"); + nbd_put(nbd); + } + idr_destroy(&nbd_index_idr); + genl_unregister_family(&nbd_genl_family); destroy_workqueue(recv_workqueue); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6f2e565bccc5..d946e1eeac8e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -117,6 +117,10 @@ static bool use_lightnvm; module_param(use_lightnvm, bool, S_IRUGO); MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device"); +static bool blocking; +module_param(blocking, bool, S_IRUGO); +MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); + static int irqmode = NULL_IRQ_SOFTIRQ; static int null_set_irqmode(const char *str, const struct kernel_param *kp) @@ -277,7 +281,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (queue_mode) { case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq, cmd->rq->errors); + blk_mq_complete_request(cmd->rq); break; case NULL_Q_RQ: blk_complete_request(cmd->rq); @@ -357,6 +361,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, { struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + if (irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; @@ -392,7 +398,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static struct blk_mq_ops null_mq_ops = { +static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, .init_hctx = null_init_hctx, .complete = null_softirq_done_fn, @@ -437,14 +443,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (IS_ERR(rq)) return -ENOMEM; - rq->__sector = bio->bi_iter.bi_sector; - rq->ioprio = bio_prio(bio); - - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; + blk_init_request_from_bio(rq, bio); rq->end_io_data = rqd; @@ -724,6 +723,9 @@ static int null_add_dev(void) nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; nullb->tag_set.driver_data = nullb; + if (blocking) + nullb->tag_set.flags |= BLK_MQ_F_BLOCKING; + rv = blk_mq_alloc_tag_set(&nullb->tag_set); if (rv) goto out_cleanup_queues; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c deleted file mode 100644 index 8127b8201a01..000000000000 --- a/drivers/block/osdblk.c +++ /dev/null @@ -1,693 +0,0 @@ - -/* - osdblk.c -- Export a single SCSI OSD object as a Linux block device - - - Copyright 2009 Red Hat, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - - Instructions for use - -------------------- - - 1) Map a Linux block device to an existing OSD object. - - In this example, we will use partition id 1234, object id 5678, - OSD device /dev/osd1. - - $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add - - - 2) List all active blkdev<->object mappings. - - In this example, we have performed step #1 twice, creating two blkdevs, - mapped to two separate OSD objects. - - $ cat /sys/class/osdblk/list - 0 174 1234 5678 /dev/osd1 - 1 179 1994 897123 /dev/osd0 - - The columns, in order, are: - - blkdev unique id - - blkdev assigned major - - OSD object partition id - - OSD object id - - OSD device - - - 3) Remove an active blkdev<->object mapping. - - In this example, we remove the mapping with blkdev unique id 1. - - $ echo 1 > /sys/class/osdblk/remove - - - NOTE: The actual creation and deletion of OSD objects is outside the scope - of this driver. - - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <scsi/osd_initiator.h> -#include <scsi/osd_attributes.h> -#include <scsi/osd_sec.h> -#include <scsi/scsi_device.h> - -#define DRV_NAME "osdblk" -#define PFX DRV_NAME ": " - -/* #define _OSDBLK_DEBUG */ -#ifdef _OSDBLK_DEBUG -#define OSDBLK_DEBUG(fmt, a...) \ - printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) -#else -#define OSDBLK_DEBUG(fmt, a...) \ - do { if (0) printk(fmt, ##a); } while (0) -#endif - -MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); -MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); -MODULE_LICENSE("GPL"); - -struct osdblk_device; - -enum { - OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ - OSDBLK_MAX_REQ = 32, /* max parallel requests */ - OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ -}; - -struct osdblk_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct osdblk_device *osdev; /* associated blkdev */ -}; - -struct osdblk_device { - int id; /* blkdev unique id */ - - int major; /* blkdev assigned major */ - struct gendisk *disk; /* blkdev's gendisk and rq */ - struct request_queue *q; - - struct osd_dev *osd; /* associated OSD */ - - char name[32]; /* blkdev name, e.g. osdblk34 */ - - spinlock_t lock; /* queue lock */ - - struct osd_obj_id obj; /* OSD partition, obj id */ - uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ - - struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ - - struct list_head node; - - char osd_path[0]; /* OSD device path */ -}; - -static struct class *class_osdblk; /* /sys/class/osdblk */ -static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ -static LIST_HEAD(osdblkdev_list); - -static const struct block_device_operations osdblk_bd_ops = { - .owner = THIS_MODULE, -}; - -static const struct osd_attr g_attr_logical_length = ATTR_DEF( - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); - -static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], - const struct osd_obj_id *obj) -{ - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); -} - -/* copied from exofs; move to libosd? */ -/* - * Perform a synchronous OSD operation. copied from exofs; move to libosd? - */ -static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) -{ - int ret; - - or->timeout = timeout; - ret = osd_finalize_request(or, 0, credential, NULL); - if (ret) - return ret; - - ret = osd_execute_request(or); - - /* osd_req_decode_sense(or, ret); */ - return ret; -} - -/* - * Perform an asynchronous OSD operation. copied from exofs; move to libosd? - */ -static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, - void *caller_context, u8 *cred) -{ - int ret; - - ret = osd_finalize_request(or, 0, cred, NULL); - if (ret) - return ret; - - ret = osd_execute_request_async(or, async_done, caller_context); - - return ret; -} - -/* copied from exofs; move to libosd? */ -static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) -{ - struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ - void *iter = NULL; - int nelem; - - do { - nelem = 1; - osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); - if ((cur_attr.attr_page == attr->attr_page) && - (cur_attr.attr_id == attr->attr_id)) { - attr->len = cur_attr.len; - attr->val_ptr = cur_attr.val_ptr; - return 0; - } - } while (iter); - - return -EIO; -} - -static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) -{ - struct osd_request *or; - struct osd_attr attr; - int ret; - - /* start request */ - or = osd_start_request(osdev->osd, GFP_KERNEL); - if (!or) - return -ENOMEM; - - /* create a get-attributes(length) request */ - osd_req_get_attributes(or, &osdev->obj); - - osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); - - /* execute op synchronously */ - ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); - if (ret) - goto out; - - /* extract length from returned attribute info */ - attr = g_attr_logical_length; - ret = extract_attr_from_req(or, &attr); - if (ret) - goto out; - - *size_out = get_unaligned_be64(attr.val_ptr); - -out: - osd_end_request(or); - return ret; - -} - -static void osdblk_osd_complete(struct osd_request *or, void *private) -{ - struct osdblk_request *orq = private; - struct osd_sense_info osi; - int ret = osd_req_decode_sense(or, &osi); - - if (ret) { - ret = -EIO; - OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); - } - - /* complete OSD request */ - osd_end_request(or); - - /* complete request passed to osdblk by block layer */ - __blk_end_request_all(orq->rq, ret); -} - -static void bio_chain_put(struct bio *chain) -{ - struct bio *tmp; - - while (chain) { - tmp = chain; - chain = chain->bi_next; - - bio_put(tmp); - } -} - -static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) -{ - struct bio *tmp, *new_chain = NULL, *tail = NULL; - - while (old_chain) { - tmp = bio_clone_kmalloc(old_chain, gfpmask); - if (!tmp) - goto err_out; - - tmp->bi_bdev = NULL; - gfpmask &= ~__GFP_DIRECT_RECLAIM; - tmp->bi_next = NULL; - - if (!new_chain) - new_chain = tail = tmp; - else { - tail->bi_next = tmp; - tail = tmp; - } - - old_chain = old_chain->bi_next; - } - - return new_chain; - -err_out: - OSDBLK_DEBUG("bio_chain_clone with err\n"); - bio_chain_put(new_chain); - return NULL; -} - -static void osdblk_rq_fn(struct request_queue *q) -{ - struct osdblk_device *osdev = q->queuedata; - - while (1) { - struct request *rq; - struct osdblk_request *orq; - struct osd_request *or; - struct bio *bio; - bool do_write, do_flush; - - /* peek at request from block layer */ - rq = blk_fetch_request(q); - if (!rq) - break; - - /* deduce our operation (read, write, flush) */ - /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] - * into a clearly defined set of RPC commands: - * read, write, flush, scsi command, power mgmt req, - * driver-specific, etc. - */ - - do_flush = (req_op(rq) == REQ_OP_FLUSH); - do_write = (rq_data_dir(rq) == WRITE); - - if (!do_flush) { /* osd_flush does not use a bio */ - /* a bio clone to be passed down to OSD request */ - bio = bio_chain_clone(rq->bio, GFP_ATOMIC); - if (!bio) - break; - } else - bio = NULL; - - /* alloc internal OSD request, for OSD command execution */ - or = osd_start_request(osdev->osd, GFP_ATOMIC); - if (!or) { - bio_chain_put(bio); - OSDBLK_DEBUG("osd_start_request with err\n"); - break; - } - - orq = &osdev->req[rq->tag]; - orq->rq = rq; - orq->bio = bio; - orq->osdev = osdev; - - /* init OSD command: flush, write or read */ - if (do_flush) - osd_req_flush_object(or, &osdev->obj, - OSD_CDB_FLUSH_ALL, 0, 0); - else if (do_write) - osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, - bio, blk_rq_bytes(rq)); - else - osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, - bio, blk_rq_bytes(rq)); - - OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", - do_flush ? "flush" : do_write ? - "write" : "read", blk_rq_bytes(rq), - blk_rq_pos(rq) * 512ULL); - - /* begin OSD command execution */ - if (osd_async_op(or, osdblk_osd_complete, orq, - osdev->obj_cred)) { - osd_end_request(or); - blk_requeue_request(q, rq); - bio_chain_put(bio); - OSDBLK_DEBUG("osd_execute_request_async with err\n"); - break; - } - - /* remove the special 'flush' marker, now that the command - * is executing - */ - rq->special = NULL; - } -} - -static void osdblk_free_disk(struct osdblk_device *osdev) -{ - struct gendisk *disk = osdev->disk; - - if (!disk) - return; - - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - put_disk(disk); -} - -static int osdblk_init_disk(struct osdblk_device *osdev) -{ - struct gendisk *disk; - struct request_queue *q; - int rc; - u64 obj_size = 0; - - /* contact OSD, request size info about the object being mapped */ - rc = osdblk_get_obj_size(osdev, &obj_size); - if (rc) - return rc; - - /* create gendisk info */ - disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); - if (!disk) - return -ENOMEM; - - sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); - disk->major = osdev->major; - disk->first_minor = 0; - disk->fops = &osdblk_bd_ops; - disk->private_data = osdev; - - /* init rq */ - q = blk_init_queue(osdblk_rq_fn, &osdev->lock); - if (!q) { - put_disk(disk); - return -ENOMEM; - } - - /* switch queue to TCQ mode; allocate tag map */ - rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO); - if (rc) { - blk_cleanup_queue(q); - put_disk(disk); - return rc; - } - - /* Set our limits to the lower device limits, because osdblk cannot - * sleep when allocating a lower-request and therefore cannot be - * bouncing. - */ - blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); - - blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_write_cache(q, true, false); - - disk->queue = q; - - q->queuedata = osdev; - - osdev->disk = disk; - osdev->q = q; - - /* finally, announce the disk to the world */ - set_capacity(disk, obj_size / 512ULL); - add_disk(disk); - - printk(KERN_INFO "%s: Added of size 0x%llx\n", - disk->disk_name, (unsigned long long)obj_size); - - return 0; -} - -/******************************************************************** - * /sys/class/osdblk/ - * add map OSD object to blkdev - * remove unmap OSD object - * list show mappings - *******************************************************************/ - -static void class_osdblk_release(struct class *cls) -{ - kfree(cls); -} - -static ssize_t class_osdblk_list(struct class *c, - struct class_attribute *attr, - char *data) -{ - int n = 0; - struct list_head *tmp; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - struct osdblk_device *osdev; - - osdev = list_entry(tmp, struct osdblk_device, node); - - n += sprintf(data+n, "%d %d %llu %llu %s\n", - osdev->id, - osdev->major, - osdev->obj.partition, - osdev->obj.id, - osdev->osd_path); - } - - mutex_unlock(&ctl_mutex); - return n; -} - -static ssize_t class_osdblk_add(struct class *c, - struct class_attribute *attr, - const char *buf, size_t count) -{ - struct osdblk_device *osdev; - ssize_t rc; - int irc, new_id = 0; - struct list_head *tmp; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - /* new osdblk_device object */ - osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); - if (!osdev) { - rc = -ENOMEM; - goto err_out_mod; - } - - /* static osdblk_device initialization */ - spin_lock_init(&osdev->lock); - INIT_LIST_HEAD(&osdev->node); - - /* generate unique id: find highest unique id, add one */ - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - struct osdblk_device *osdev; - - osdev = list_entry(tmp, struct osdblk_device, node); - if (osdev->id > new_id) - new_id = osdev->id + 1; - } - - osdev->id = new_id; - - /* add to global list */ - list_add_tail(&osdev->node, &osdblkdev_list); - - mutex_unlock(&ctl_mutex); - - /* parse add command */ - if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, - osdev->osd_path) != 3) { - rc = -EINVAL; - goto err_out_slot; - } - - /* initialize rest of new object */ - sprintf(osdev->name, DRV_NAME "%d", osdev->id); - - /* contact requested OSD */ - osdev->osd = osduld_path_lookup(osdev->osd_path); - if (IS_ERR(osdev->osd)) { - rc = PTR_ERR(osdev->osd); - goto err_out_slot; - } - - /* build OSD credential */ - osdblk_make_credential(osdev->obj_cred, &osdev->obj); - - /* register our block device */ - irc = register_blkdev(0, osdev->name); - if (irc < 0) { - rc = irc; - goto err_out_osd; - } - - osdev->major = irc; - - /* set up and announce blkdev mapping */ - rc = osdblk_init_disk(osdev); - if (rc) - goto err_out_blkdev; - - return count; - -err_out_blkdev: - unregister_blkdev(osdev->major, osdev->name); -err_out_osd: - osduld_put_device(osdev->osd); -err_out_slot: - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - list_del_init(&osdev->node); - mutex_unlock(&ctl_mutex); - - kfree(osdev); -err_out_mod: - OSDBLK_DEBUG("Error adding device %s\n", buf); - module_put(THIS_MODULE); - return rc; -} - -static ssize_t class_osdblk_remove(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct osdblk_device *osdev = NULL; - int target_id, rc; - unsigned long ul; - struct list_head *tmp; - - rc = kstrtoul(buf, 10, &ul); - if (rc) - return rc; - - /* convert to int; abort if we lost anything in the conversion */ - target_id = (int) ul; - if (target_id != ul) - return -EINVAL; - - /* remove object from list immediately */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - osdev = list_entry(tmp, struct osdblk_device, node); - if (osdev->id == target_id) { - list_del_init(&osdev->node); - break; - } - osdev = NULL; - } - - mutex_unlock(&ctl_mutex); - - if (!osdev) - return -ENOENT; - - /* clean up and free blkdev and associated OSD connection */ - osdblk_free_disk(osdev); - unregister_blkdev(osdev->major, osdev->name); - osduld_put_device(osdev->osd); - kfree(osdev); - - /* release module ref */ - module_put(THIS_MODULE); - - return count; -} - -static struct class_attribute class_osdblk_attrs[] = { - __ATTR(add, 0200, NULL, class_osdblk_add), - __ATTR(remove, 0200, NULL, class_osdblk_remove), - __ATTR(list, 0444, class_osdblk_list, NULL), - __ATTR_NULL -}; - -static int osdblk_sysfs_init(void) -{ - int ret = 0; - - /* - * create control files in sysfs - * /sys/class/osdblk/... - */ - class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); - if (!class_osdblk) - return -ENOMEM; - - class_osdblk->name = DRV_NAME; - class_osdblk->owner = THIS_MODULE; - class_osdblk->class_release = class_osdblk_release; - class_osdblk->class_attrs = class_osdblk_attrs; - - ret = class_register(class_osdblk); - if (ret) { - kfree(class_osdblk); - class_osdblk = NULL; - printk(PFX "failed to create class osdblk\n"); - return ret; - } - - return 0; -} - -static void osdblk_sysfs_cleanup(void) -{ - if (class_osdblk) - class_destroy(class_osdblk); - class_osdblk = NULL; -} - -static int __init osdblk_init(void) -{ - int rc; - - rc = osdblk_sysfs_init(); - if (rc) - return rc; - - return 0; -} - -static void __exit osdblk_exit(void) -{ - osdblk_sysfs_cleanup(); -} - -module_init(osdblk_init); -module_exit(osdblk_exit); - diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 939641d6e262..b1267ef34d5a 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -300,6 +300,11 @@ static void pcd_init_units(void) struct gendisk *disk = alloc_disk(1); if (!disk) continue; + disk->queue = blk_init_queue(do_pcd_request, &pcd_lock); + if (!disk->queue) { + put_disk(disk); + continue; + } cd->disk = disk; cd->pi = &cd->pia; cd->present = 0; @@ -735,18 +740,36 @@ static int pcd_detect(void) } /* I/O request processing */ -static struct request_queue *pcd_queue; +static int pcd_queue; + +static int set_next_request(void) +{ + struct pcd_unit *cd; + struct request_queue *q; + int old_pos = pcd_queue; + + do { + cd = &pcd[pcd_queue]; + q = cd->present ? cd->disk->queue : NULL; + if (++pcd_queue == PCD_UNITS) + pcd_queue = 0; + if (q) { + pcd_req = blk_fetch_request(q); + if (pcd_req) + break; + } + } while (pcd_queue != old_pos); + + return pcd_req != NULL; +} -static void do_pcd_request(struct request_queue * q) +static void pcd_request(void) { if (pcd_busy) return; while (1) { - if (!pcd_req) { - pcd_req = blk_fetch_request(q); - if (!pcd_req) - return; - } + if (!pcd_req && !set_next_request()) + return; if (rq_data_dir(pcd_req) == READ) { struct pcd_unit *cd = pcd_req->rq_disk->private_data; @@ -766,6 +789,11 @@ static void do_pcd_request(struct request_queue * q) } } +static void do_pcd_request(struct request_queue *q) +{ + pcd_request(); +} + static inline void next_request(int err) { unsigned long saved_flags; @@ -774,7 +802,7 @@ static inline void next_request(int err) if (!__blk_end_request_cur(pcd_req, err)) pcd_req = NULL; pcd_busy = 0; - do_pcd_request(pcd_queue); + pcd_request(); spin_unlock_irqrestore(&pcd_lock, saved_flags); } @@ -849,7 +877,7 @@ static void do_pcd_read_drq(void) do_pcd_read(); spin_lock_irqsave(&pcd_lock, saved_flags); - do_pcd_request(pcd_queue); + pcd_request(); spin_unlock_irqrestore(&pcd_lock, saved_flags); } @@ -957,19 +985,10 @@ static int __init pcd_init(void) return -EBUSY; } - pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock); - if (!pcd_queue) { - unregister_blkdev(major, name); - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) - put_disk(cd->disk); - return -ENOMEM; - } - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { if (cd->present) { register_cdrom(&cd->info); cd->disk->private_data = cd; - cd->disk->queue = pcd_queue; add_disk(cd->disk); } } @@ -988,9 +1007,9 @@ static void __exit pcd_exit(void) pi_release(cd->pi); unregister_cdrom(&cd->info); } + blk_cleanup_queue(cd->disk->queue); put_disk(cd->disk); } - blk_cleanup_queue(pcd_queue); unregister_blkdev(major, name); pi_unregister_driver(par_drv); } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 9cfd2e06a649..7d2402f90978 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -381,12 +381,33 @@ static enum action do_pd_write_start(void); static enum action do_pd_read_drq(void); static enum action do_pd_write_done(void); -static struct request_queue *pd_queue; +static int pd_queue; static int pd_claimed; static struct pd_unit *pd_current; /* current request's drive */ static PIA *pi_current; /* current request's PIA */ +static int set_next_request(void) +{ + struct gendisk *disk; + struct request_queue *q; + int old_pos = pd_queue; + + do { + disk = pd[pd_queue].gd; + q = disk ? disk->queue : NULL; + if (++pd_queue == PD_UNITS) + pd_queue = 0; + if (q) { + pd_req = blk_fetch_request(q); + if (pd_req) + break; + } + } while (pd_queue != old_pos); + + return pd_req != NULL; +} + static void run_fsm(void) { while (1) { @@ -418,8 +439,7 @@ static void run_fsm(void) spin_lock_irqsave(&pd_lock, saved_flags); if (!__blk_end_request_cur(pd_req, res == Ok ? 0 : -EIO)) { - pd_req = blk_fetch_request(pd_queue); - if (!pd_req) + if (!set_next_request()) stop = 1; } spin_unlock_irqrestore(&pd_lock, saved_flags); @@ -719,18 +739,15 @@ static int pd_special_command(struct pd_unit *disk, enum action (*func)(struct pd_unit *disk)) { struct request *rq; - int err = 0; rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); rq->special = func; - - err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); - + blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); blk_put_request(rq); - return err; + return 0; } /* kernel glue structures */ @@ -839,7 +856,13 @@ static void pd_probe_drive(struct pd_unit *disk) p->first_minor = (disk - pd) << PD_BITS; disk->gd = p; p->private_data = disk; - p->queue = pd_queue; + p->queue = blk_init_queue(do_pd_request, &pd_lock); + if (!p->queue) { + disk->gd = NULL; + put_disk(p); + return; + } + blk_queue_max_hw_sectors(p->queue, cluster); if (disk->drive == -1) { for (disk->drive = 0; disk->drive <= 1; disk->drive++) @@ -919,26 +942,18 @@ static int __init pd_init(void) if (disable) goto out1; - pd_queue = blk_init_queue(do_pd_request, &pd_lock); - if (!pd_queue) - goto out1; - - blk_queue_max_hw_sectors(pd_queue, cluster); - if (register_blkdev(major, name)) - goto out2; + goto out1; printk("%s: %s version %s, major %d, cluster %d, nice %d\n", name, name, PD_VERSION, major, cluster, nice); if (!pd_detect()) - goto out3; + goto out2; return 0; -out3: - unregister_blkdev(major, name); out2: - blk_cleanup_queue(pd_queue); + unregister_blkdev(major, name); out1: return -ENODEV; } @@ -953,11 +968,11 @@ static void __exit pd_exit(void) if (p) { disk->gd = NULL; del_gendisk(p); + blk_cleanup_queue(p->queue); put_disk(p); pi_release(disk->pi); } } - blk_cleanup_queue(pd_queue); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 14c5d32f5d8b..f24ca7315ddc 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -287,6 +287,12 @@ static void __init pf_init_units(void) struct gendisk *disk = alloc_disk(1); if (!disk) continue; + disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock); + if (!disk->queue) { + put_disk(disk); + return; + } + blk_queue_max_segments(disk->queue, cluster); pf->disk = disk; pf->pi = &pf->pia; pf->media_status = PF_NM; @@ -772,7 +778,28 @@ static int pf_ready(void) return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask)); } -static struct request_queue *pf_queue; +static int pf_queue; + +static int set_next_request(void) +{ + struct pf_unit *pf; + struct request_queue *q; + int old_pos = pf_queue; + + do { + pf = &units[pf_queue]; + q = pf->present ? pf->disk->queue : NULL; + if (++pf_queue == PF_UNITS) + pf_queue = 0; + if (q) { + pf_req = blk_fetch_request(q); + if (pf_req) + break; + } + } while (pf_queue != old_pos); + + return pf_req != NULL; +} static void pf_end_request(int err) { @@ -780,16 +807,13 @@ static void pf_end_request(int err) pf_req = NULL; } -static void do_pf_request(struct request_queue * q) +static void pf_request(void) { if (pf_busy) return; repeat: - if (!pf_req) { - pf_req = blk_fetch_request(q); - if (!pf_req) - return; - } + if (!pf_req && !set_next_request()) + return; pf_current = pf_req->rq_disk->private_data; pf_block = blk_rq_pos(pf_req); @@ -817,6 +841,11 @@ repeat: } } +static void do_pf_request(struct request_queue *q) +{ + pf_request(); +} + static int pf_next_buf(void) { unsigned long saved_flags; @@ -846,7 +875,7 @@ static inline void next_request(int err) spin_lock_irqsave(&pf_spin_lock, saved_flags); pf_end_request(err); pf_busy = 0; - do_pf_request(pf_queue); + pf_request(); spin_unlock_irqrestore(&pf_spin_lock, saved_flags); } @@ -972,15 +1001,6 @@ static int __init pf_init(void) put_disk(pf->disk); return -EBUSY; } - pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock); - if (!pf_queue) { - unregister_blkdev(major, name); - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) - put_disk(pf->disk); - return -ENOMEM; - } - - blk_queue_max_segments(pf_queue, cluster); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { struct gendisk *disk = pf->disk; @@ -988,7 +1008,6 @@ static int __init pf_init(void) if (!pf->present) continue; disk->private_data = pf; - disk->queue = pf_queue; add_disk(disk); } return 0; @@ -1003,10 +1022,10 @@ static void __exit pf_exit(void) if (!pf->present) continue; del_gendisk(pf->disk); + blk_cleanup_queue(pf->disk->queue); put_disk(pf->disk); pi_release(pf->pi); } - blk_cleanup_queue(pf_queue); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 66d846ba85a9..205b865ebeb9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -724,7 +724,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->rq_flags |= RQF_QUIET; blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); - if (rq->errors) + if (scsi_req(rq)->result) ret = -EIO; out: blk_put_request(rq); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b65964..089ac4179919 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4317,7 +4317,7 @@ static int rbd_init_request(void *data, struct request *rq, return 0; } -static struct blk_mq_ops rbd_mq_ops = { +static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, .init_request = rbd_init_request, }; @@ -4380,7 +4380,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_granularity = segment_size; q->limits.discard_alignment = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); - q->limits.discard_zeroes_data = 1; if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index f81d70b39d10..9c566364ac9c 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -300,7 +300,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) RSXX_HW_BLK_SIZE >> 9); card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; - card->queue->limits.discard_zeroes_data = 1; } card->queue->queuedata = card; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index b5afd495d482..3064be6cf375 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -211,7 +211,7 @@ enum head { struct swim_priv { struct swim __iomem *base; spinlock_t lock; - struct request_queue *queue; + int fdc_queue; int floppy_count; struct floppy_state unit[FD_MAX_UNIT]; }; @@ -525,12 +525,33 @@ static int floppy_read_sectors(struct floppy_state *fs, return 0; } -static void redo_fd_request(struct request_queue *q) +static struct request *swim_next_request(struct swim_priv *swd) { + struct request_queue *q; + struct request *rq; + int old_pos = swd->fdc_queue; + + do { + q = swd->unit[swd->fdc_queue].disk->queue; + if (++swd->fdc_queue == swd->floppy_count) + swd->fdc_queue = 0; + if (q) { + rq = blk_fetch_request(q); + if (rq) + return rq; + } + } while (swd->fdc_queue != old_pos); + + return NULL; +} + +static void do_fd_request(struct request_queue *q) +{ + struct swim_priv *swd = q->queuedata; struct request *req; struct floppy_state *fs; - req = blk_fetch_request(q); + req = swim_next_request(swd); while (req) { int err = -EIO; @@ -554,15 +575,10 @@ static void redo_fd_request(struct request_queue *q) } done: if (!__blk_end_request_cur(req, err)) - req = blk_fetch_request(q); + req = swim_next_request(swd); } } -static void do_fd_request(struct request_queue *q) -{ - redo_fd_request(q); -} - static struct floppy_struct floppy_type[4] = { { 0, 0, 0, 0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing */ { 720, 9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/ @@ -833,22 +849,25 @@ static int swim_floppy_init(struct swim_priv *swd) return -EBUSY; } + spin_lock_init(&swd->lock); + for (drive = 0; drive < swd->floppy_count; drive++) { swd->unit[drive].disk = alloc_disk(1); if (swd->unit[drive].disk == NULL) { err = -ENOMEM; goto exit_put_disks; } + swd->unit[drive].disk->queue = blk_init_queue(do_fd_request, + &swd->lock); + if (!swd->unit[drive].disk->queue) { + err = -ENOMEM; + put_disk(swd->unit[drive].disk); + goto exit_put_disks; + } + swd->unit[drive].disk->queue->queuedata = swd; swd->unit[drive].swd = swd; } - spin_lock_init(&swd->lock); - swd->queue = blk_init_queue(do_fd_request, &swd->lock); - if (!swd->queue) { - err = -ENOMEM; - goto exit_put_disks; - } - for (drive = 0; drive < swd->floppy_count; drive++) { swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE; swd->unit[drive].disk->major = FLOPPY_MAJOR; @@ -856,7 +875,6 @@ static int swim_floppy_init(struct swim_priv *swd) sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive); swd->unit[drive].disk->fops = &floppy_fops; swd->unit[drive].disk->private_data = &swd->unit[drive]; - swd->unit[drive].disk->queue = swd->queue; set_capacity(swd->unit[drive].disk, 2880); add_disk(swd->unit[drive].disk); } @@ -943,13 +961,12 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); + blk_cleanup_queue(swd->unit[drive].disk->queue); put_disk(swd->unit[drive].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); - blk_cleanup_queue(swd->queue); - /* eject floppies */ for (drive = 0; drive < swd->floppy_count; drive++) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 61b3ffa4f458..ba4809c9bdba 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -343,8 +343,8 @@ static void start_request(struct floppy_state *fs) req->rq_disk->disk_name, req->cmd, (long)blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); - swim3_dbg(" errors=%d current_nr_sectors=%u\n", - req->errors, blk_rq_cur_sectors(req)); + swim3_dbg(" current_nr_sectors=%u\n", + blk_rq_cur_sectors(req)); #endif if (blk_rq_pos(req) >= fs->total_secs) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1d4c9f8bc1e1..f94614257462 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -111,7 +111,7 @@ static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr, return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } -static inline void virtblk_scsi_reques_done(struct request *req) +static inline void virtblk_scsi_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); struct virtio_blk *vblk = req->q->queuedata; @@ -119,7 +119,7 @@ static inline void virtblk_scsi_reques_done(struct request *req) sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); - req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); + sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); } static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, @@ -144,7 +144,7 @@ static inline int virtblk_add_req_scsi(struct virtqueue *vq, { return -EIO; } -static inline void virtblk_scsi_reques_done(struct request *req) +static inline void virtblk_scsi_request_done(struct request *req) { } #define virtblk_ioctl NULL @@ -175,19 +175,15 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - int error = virtblk_result(vbr); switch (req_op(req)) { case REQ_OP_SCSI_IN: case REQ_OP_SCSI_OUT: - virtblk_scsi_reques_done(req); - break; - case REQ_OP_DRV_IN: - req->errors = (error != 0); + virtblk_scsi_request_done(req); break; } - blk_mq_end_request(req, error); + blk_mq_end_request(req, virtblk_result(vbr)); } static void virtblk_done(struct virtqueue *vq) @@ -205,7 +201,7 @@ static void virtblk_done(struct virtqueue *vq) while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); - blk_mq_complete_request(req, req->errors); + blk_mq_complete_request(req); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) @@ -310,7 +306,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) if (err) goto out; - err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + err = virtblk_result(blk_mq_rq_to_pdu(req)); out: blk_put_request(req); return err; @@ -597,7 +594,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) return blk_mq_virtio_map_queues(set, vblk->vdev, 0); } -static struct blk_mq_ops virtio_mq_ops = { +static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .complete = virtblk_request_done, .init_request = virtblk_init_request, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5067a0a952cb..39459631667c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -115,6 +115,15 @@ struct split_bio { atomic_t pending; }; +struct blkif_req { + int error; +}; + +static inline struct blkif_req *blkif_req(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} + static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; @@ -907,8 +916,14 @@ out_busy: return BLK_MQ_RQ_QUEUE_BUSY; } -static struct blk_mq_ops blkfront_mq_ops = { +static void blkif_complete_rq(struct request *rq) +{ + blk_mq_end_request(rq, blkif_req(rq)->error); +} + +static const struct blk_mq_ops blkfront_mq_ops = { .queue_rq = blkif_queue_rq, + .complete = blkif_complete_rq, }; static void blkif_set_queue_limits(struct blkfront_info *info) @@ -969,7 +984,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, info->tag_set.queue_depth = BLK_RING_SIZE(info); info->tag_set.numa_node = NUMA_NO_NODE; info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; - info->tag_set.cmd_size = 0; + info->tag_set.cmd_size = sizeof(struct blkif_req); info->tag_set.driver_data = info; if (blk_mq_alloc_tag_set(&info->tag_set)) @@ -1543,7 +1558,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; - int error; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return IRQ_HANDLED; @@ -1587,37 +1601,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) continue; } - error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } - blk_mq_complete_request(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; } - if (unlikely(error)) { - if (error == -EOPNOTSUPP) - error = 0; + if (unlikely(blkif_req(req)->error)) { + if (blkif_req(req)->error == -EOPNOTSUPP) + blkif_req(req)->error = 0; info->feature_fua = 0; info->feature_flush = 0; xlvbd_flush(info); @@ -1629,11 +1642,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - blk_mq_complete_request(req, error); break; default: BUG(); } + + blk_mq_complete_request(req); } rinfo->ring.rsp_cons = i; @@ -2345,6 +2359,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int physical_sector_size; unsigned int binfo; + char *envp[] = { "RESIZE=1", NULL }; int err, i; switch (info->connected) { @@ -2361,6 +2376,8 @@ static void blkfront_connect(struct blkfront_info *info) sectors); set_capacity(info->gd, sectors); revalidate_disk(info->gd); + kobject_uevent_env(&disk_to_dev(info->gd)->kobj, + KOBJ_CHANGE, envp); return; case BLKIF_STATE_SUSPENDED: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0c09d4256108..6fac5fedd610 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -829,10 +829,14 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: zram_bio_discard(zram, index, offset, bio); bio_endio(bio); return; + default: + break; } bio_for_each_segment(bvec, bio, iter) { @@ -1192,6 +1196,8 @@ static int zram_add(void) zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); + /* * zram_bio_discard() will clear all logical blocks if logical block * size is identical with physical block size(PAGE_SIZE). But if it is @@ -1201,10 +1207,7 @@ static int zram_add(void) * zeroed. */ if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) - zram->disk->queue->limits.discard_zeroes_data = 1; - else - zram->disk->queue->limits.discard_zeroes_data = 0; - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); + blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); add_disk(zram->disk); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 87739649eac2..76c952fd9ab9 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2218,7 +2218,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, rq->timeout = 60 * HZ; bio = rq->bio; - if (blk_execute_rq(q, cdi->disk, rq, 0)) { + blk_execute_rq(q, cdi->disk, rq, 0); + if (scsi_req(rq)->result) { struct request_sense *s = req->sense; ret = -EIO; cdi->last_sense = s->sense_key; diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index d6f5d9eb102d..70d434bc1cbf 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -523,6 +523,7 @@ static int bt_bmc_remove(struct platform_device *pdev) static const struct of_device_id bt_bmc_match[] = { { .compatible = "aspeed,ast2400-ibt-bmc" }, + { .compatible = "aspeed,ast2500-ibt-bmc" }, { }, }; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 2a7c425ddfa7..b2b618f066e0 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1954,7 +1954,9 @@ static int hotmod_handler(const char *val, struct kernel_param *kp) kfree(info); goto out; } + mutex_lock(&smi_infos_lock); rv = try_smi_init(info); + mutex_unlock(&smi_infos_lock); if (rv) { cleanup_one_si(info); goto out; @@ -2042,8 +2044,10 @@ static int hardcode_find_bmc(void) info->slave_addr = slave_addrs[i]; if (!add_smi(info)) { + mutex_lock(&smi_infos_lock); if (try_smi_init(info)) cleanup_one_si(info); + mutex_unlock(&smi_infos_lock); ret = 0; } else { kfree(info); @@ -3492,6 +3496,11 @@ out_err: return rv; } +/* + * Try to start up an interface. Must be called with smi_infos_lock + * held, primarily to keep smi_num consistent, we only one to do these + * one at a time. + */ static int try_smi_init(struct smi_info *new_smi) { int rv = 0; @@ -3524,9 +3533,12 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err; } + new_smi->intf_num = smi_num; + /* Do this early so it's available for logs. */ if (!new_smi->dev) { - init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d", 0); + init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d", + new_smi->intf_num); /* * If we don't already have a device from something @@ -3593,8 +3605,6 @@ static int try_smi_init(struct smi_info *new_smi) new_smi->interrupt_disabled = true; atomic_set(&new_smi->need_watch, 0); - new_smi->intf_num = smi_num; - smi_num++; rv = try_enable_event_buffer(new_smi); if (rv == 0) @@ -3661,6 +3671,9 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err_stop_timer; } + /* Don't increment till we know we have succeeded. */ + smi_num++; + dev_info(new_smi->dev, "IPMI %s interface initialized\n", si_to_str[new_smi->si_type]); diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index cca6e5bc1cea..0b22a9be5029 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -891,6 +891,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, * for details on the intricacies of this. */ int left; + unsigned char *data_to_send; ssif_inc_stat(ssif_info, sent_messages_parts); @@ -899,6 +900,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, left = 32; /* Length byte. */ ssif_info->multi_data[ssif_info->multi_pos] = left; + data_to_send = ssif_info->multi_data + ssif_info->multi_pos; ssif_info->multi_pos += left; if (left < 32) /* @@ -912,7 +914,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, rv = ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE, SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE, - ssif_info->multi_data + ssif_info->multi_pos, + data_to_send, I2C_SMBUS_BLOCK_DATA); if (rv < 0) { /* request failed, just return the error. */ @@ -1642,9 +1644,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) spin_lock_init(&ssif_info->lock); ssif_info->ssif_state = SSIF_NORMAL; - init_timer(&ssif_info->retry_timer); - ssif_info->retry_timer.data = (unsigned long) ssif_info; - ssif_info->retry_timer.function = retry_timeout; + setup_timer(&ssif_info->retry_timer, retry_timeout, + (unsigned long)ssif_info); for (i = 0; i < SSIF_NUM_STATS; i++) atomic_set(&ssif_info->stats[i], 0); diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 5ca24d9b101b..d165af8abe36 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -516,7 +516,7 @@ static void panic_halt_ipmi_heartbeat(void) msg.cmd = IPMI_WDOG_RESET_TIMER; msg.data = NULL; msg.data_len = 0; - atomic_add(2, &panic_done_count); + atomic_add(1, &panic_done_count); rv = ipmi_request_supply_msgs(watchdog_user, (struct ipmi_addr *) &addr, 0, @@ -526,7 +526,7 @@ static void panic_halt_ipmi_heartbeat(void) &panic_halt_heartbeat_recv_msg, 1); if (rv) - atomic_sub(2, &panic_done_count); + atomic_sub(1, &panic_done_count); } static struct ipmi_smi_msg panic_halt_smi_msg = { @@ -550,12 +550,12 @@ static void panic_halt_ipmi_set_timeout(void) /* Wait for the messages to be free. */ while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); - atomic_add(2, &panic_done_count); + atomic_add(1, &panic_done_count); rv = i_ipmi_set_timeout(&panic_halt_smi_msg, &panic_halt_recv_msg, &send_heartbeat_now); if (rv) { - atomic_sub(2, &panic_done_count); + atomic_sub(1, &panic_done_count); printk(KERN_WARNING PFX "Unable to extend the watchdog timeout."); } else { diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig index 1c2357301017..a077ab6edffa 100644 --- a/drivers/clk/sunxi-ng/Kconfig +++ b/drivers/clk/sunxi-ng/Kconfig @@ -16,7 +16,7 @@ config SUNXI_CCU_FRAC bool config SUNXI_CCU_GATE - bool + def_bool y config SUNXI_CCU_MUX bool diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 4773f2867234..96afb2aeed18 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -10,26 +10,16 @@ config EDAC_SUPPORT bool menuconfig EDAC - bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM && EDAC_SUPPORT + tristate "EDAC (Error Detection And Correction) reporting" + depends on HAS_IOMEM && EDAC_SUPPORT && RAS help - EDAC is designed to report errors in the core system. - These are low-level errors that are reported in the CPU or - supporting chipset or other subsystems: + EDAC is a subsystem along with hardware-specific drivers designed to + report hardware errors. These are low-level errors that are reported + in the CPU or supporting chipset or other subsystems: memory errors, cache errors, PCI errors, thermal throttling, etc.. If unsure, select 'Y'. - If this code is reporting problems on your system, please - see the EDAC project web pages for more information at: - - <http://bluesmoke.sourceforge.net/> - - and: - - <http://buttersideup.com/edacwiki> - - There is also a mailing list for the EDAC project, which can - be found via the sourceforge page. + The mailing list for the EDAC project is linux-edac@vger.kernel.org. if EDAC @@ -62,21 +52,9 @@ config EDAC_DECODE_MCE which occur really early upon boot, before the module infrastructure has been initialized. -config EDAC_MM_EDAC - tristate "Main Memory EDAC (Error Detection And Correction) reporting" - select RAS - help - Some systems are able to detect and correct errors in main - memory. EDAC can report statistics on memory error - detection and correction (EDAC - or commonly referred to ECC - errors). EDAC will also try to decode where these errors - occurred so that a particular failing memory module can be - replaced. If unsure, select 'Y'. - config EDAC_GHES bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" - depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) - default y + depends on ACPI_APEI_GHES && (EDAC=y) help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the @@ -98,7 +76,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" - depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE + depends on AMD_NB && EDAC_DECODE_MCE help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. @@ -124,28 +102,28 @@ config EDAC_AMD64_ERROR_INJECTION config EDAC_AMD76X tristate "AMD 76x (760, 762, 768)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the AMD 76x series of chipsets used with the Athlon processor. config EDAC_E7XXX tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel E7205, E7500, E7501 and E7505 server chipsets. config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. config EDAC_I82443BXGX tristate "Intel 82443BX/GX (440BX/GX)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 depends on BROKEN help Support for error detection and correction on the Intel @@ -153,56 +131,56 @@ config EDAC_I82443BXGX config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. config EDAC_I82975X tristate "Intel 82975x (D82975x)" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel DP82975x server chipsets. config EDAC_I3000 tristate "Intel 3000/3010" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3000 and 3010 server chipsets. config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. config EDAC_IE31200 tristate "Intel e312xx" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. config EDAC_X38 tristate "Intel X38" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel X38 server chipsets. config EDAC_I5400 tristate "Intel 5400 (Seaburg) chipsets" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction the Intel i5400 MCH chipset (Seaburg). config EDAC_I7CORE tristate "Intel i7 Core (Nehalem) processors" - depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction the Intel i7 Core (Nehalem) Integrated Memory Controller that exists on @@ -211,58 +189,56 @@ config EDAC_I7CORE config EDAC_I82860 tristate "Intel 82860" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel 82860 chipset. config EDAC_R82600 tristate "Radisys 82600 embedded chipset" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Radisys 82600 embedded chipset. config EDAC_I5000 tristate "Intel Greencreek/Blackford chipset" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Greekcreek/Blackford chipsets. config EDAC_I5100 tristate "Intel San Clemente MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel San Clemente MCH. config EDAC_I7300 tristate "Intel Clarksboro MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Clarksboro MCH (Intel 7300 chipset). config EDAC_SBRIDGE tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. config EDAC_SKX tristate "Intel Skylake server Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. config EDAC_PND2 tristate "Intel Pondicherry2" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + depends on PCI && X86_64 && X86_MCE_INTEL help Support for error detection and correction on the Intel Pondicherry2 Integrated Memory Controller. This SoC IP is @@ -271,36 +247,35 @@ config EDAC_PND2 config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC + depends on FSL_SOC help Support for error detection and correction on the Freescale MPC8349, MPC8560, MPC8540, MPC8548, T4240 config EDAC_LAYERSCAPE tristate "Freescale Layerscape DDR" - depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE + depends on ARCH_LAYERSCAPE help Support for error detection and correction on Freescale memory controllers on Layerscape SoCs. config EDAC_MV64X60 tristate "Marvell MV64x60" - depends on EDAC_MM_EDAC && MV64X60 + depends on MV64X60 help Support for error detection and correction on the Marvell MV64360 and MV64460 chipsets. config EDAC_PASEMI tristate "PA Semi PWRficient" - depends on EDAC_MM_EDAC && PCI - depends on PPC_PASEMI + depends on PPC_PASEMI && PCI help Support for error detection and correction on PA Semi PWRficient. config EDAC_CELL tristate "Cell Broadband Engine memory controller" - depends on EDAC_MM_EDAC && PPC_CELL_COMMON + depends on PPC_CELL_COMMON help Support for error detection and correction on the Cell Broadband Engine internal memory controller @@ -308,7 +283,7 @@ config EDAC_CELL config EDAC_PPC4XX tristate "PPC4xx IBM DDR2 Memory Controller" - depends on EDAC_MM_EDAC && 4xx + depends on 4xx help This enables support for EDAC on the ECC memory used with the IBM DDR2 memory controller found in various @@ -317,7 +292,7 @@ config EDAC_PPC4XX config EDAC_AMD8131 tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8131 HyperTransport PCI-X Tunnel chip. @@ -326,7 +301,7 @@ config EDAC_AMD8131 config EDAC_AMD8111 tristate "AMD8111 HyperTransport I/O Hub" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8111 HyperTransport I/O Hub chip. @@ -335,7 +310,7 @@ config EDAC_AMD8111 config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" - depends on EDAC_MM_EDAC && PPC64 + depends on PPC64 help Support for error detection and correction on the IBM CPC925 Bridge and Memory Controller, which is @@ -344,7 +319,7 @@ config EDAC_CPC925 config EDAC_TILE tristate "Tilera Memory Controller" - depends on EDAC_MM_EDAC && TILE + depends on TILE default y help Support for error detection and correction on the @@ -352,49 +327,59 @@ config EDAC_TILE config EDAC_HIGHBANK_MC tristate "Highbank Memory Controller" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_HIGHBANK_L2 tristate "Highbank L2 Cache" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_OCTEON_PC tristate "Cavium Octeon Primary Caches" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on CPU_CAVIUM_OCTEON help Support for error detection and correction on the primary caches of the cnMIPS cores of Cavium Octeon family SOCs. config EDAC_OCTEON_L2C tristate "Cavium Octeon Secondary Caches (L2C)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_LMC tristate "Cavium Octeon DRAM Memory Controller (LMC)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_PCI tristate "Cavium Octeon PCI Controller" - depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC + depends on PCI && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. +config EDAC_THUNDERX + tristate "Cavium ThunderX EDAC" + depends on ARM64 + depends on PCI + help + Support for error detection and correction on the + Cavium ThunderX memory controllers (LMC), Cache + Coherent Processor Interconnect (CCPI) and L2 cache + blocks (TAD, CBC, MCI). + config EDAC_ALTERA bool "Altera SOCFPGA ECC" - depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA + depends on EDAC=y && ARCH_SOCFPGA help Support for error detection and correction on the Altera SOCs. This must be selected for SDRAM ECC. @@ -460,14 +445,14 @@ config EDAC_ALTERA_SDMMC config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" - depends on EDAC_MM_EDAC && ARCH_ZYNQ + depends on ARCH_ZYNQ help Support for error detection and correction on the Synopsys DDR memory controller. config EDAC_XGENE tristate "APM X-Gene SoC" - depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST) + depends on (ARM64 || COMPILE_TEST) help Support for error detection and correction on the APM X-Gene family of SOCs. diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 587107e90996..0fd9ffa63299 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,8 +6,7 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o -obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o +obj-$(CONFIG_EDAC) := edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o @@ -67,13 +66,14 @@ obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o obj-$(CONFIG_EDAC_TILE) += tile_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o +obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c5a5b91f37f0..7717b094fabb 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1023,13 +1023,23 @@ out: return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) { int irq; - struct device_node *child, *np = of_find_compatible_node(NULL, NULL, - "altr,socfpga-a10-ecc-manager"); + struct device_node *child, *np; + + if (!socfpga_is_a10()) + return -ENODEV; + + np = of_find_compatible_node(NULL, NULL, + "altr,socfpga-a10-ecc-manager"); if (!np) { edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n"); return -ENODEV; @@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = { static int __init socfpga_init_sdmmc_ecc(void) { int rc = -ENODEV; - struct device_node *child = of_find_compatible_node(NULL, NULL, - "altr,socfpga-sdmmc-ecc"); + struct device_node *child; + + if (!socfpga_is_a10()) + return -ENODEV; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); if (!child) { edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); return -ENODEV; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index e5573c56b15e..480072139b7a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -40,6 +40,11 @@ #define edac_atomic_scrub(va, size) do { } while (0) #endif +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + +static int edac_report = EDAC_REPORTING_ENABLED; + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -52,6 +57,65 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; +int edac_get_report_status(void) +{ + return edac_report; +} +EXPORT_SYMBOL_GPL(edac_get_report_status); + +void edac_set_report_status(int new) +{ + if (new == EDAC_REPORTING_ENABLED || + new == EDAC_REPORTING_DISABLED || + new == EDAC_REPORTING_FORCE) + edac_report = new; +} +EXPORT_SYMBOL_GPL(edac_set_report_status); + +static int edac_report_set(const char *str, const struct kernel_param *kp) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + edac_report = EDAC_REPORTING_ENABLED; + else if (!strncmp(str, "off", 3)) + edac_report = EDAC_REPORTING_DISABLED; + else if (!strncmp(str, "force", 5)) + edac_report = EDAC_REPORTING_FORCE; + + return 0; +} + +static int edac_report_get(char *buffer, const struct kernel_param *kp) +{ + int ret = 0; + + switch (edac_report) { + case EDAC_REPORTING_ENABLED: + ret = sprintf(buffer, "on"); + break; + case EDAC_REPORTING_DISABLED: + ret = sprintf(buffer, "off"); + break; + case EDAC_REPORTING_FORCE: + ret = sprintf(buffer, "force"); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct kernel_param_ops edac_report_ops = { + .set = edac_report_set, + .get = edac_report_get, +}; + +module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -505,22 +569,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) EXPORT_SYMBOL_GPL(find_mci_by_dev); /* - * handler for EDAC to check if NMI type handler has asserted interrupt - */ -static int edac_mc_assert_error_check_and_clear(void) -{ - int old_state; - - if (edac_op_state == EDAC_OPSTATE_POLL) - return 1; - - old_state = edac_err_assert; - edac_err_assert = 0; - - return old_state; -} - -/* * edac_mc_workq_function * performs the operation scheduled by a workq request */ @@ -536,7 +584,7 @@ static void edac_mc_workq_function(struct work_struct *work_req) return; } - if (edac_mc_assert_error_check_and_clear()) + if (edac_op_state == EDAC_OPSTATE_POLL) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); @@ -601,7 +649,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); - atomic_inc(&edac_handlers); return 0; fail0: @@ -619,7 +666,6 @@ fail1: static int del_mc_from_global_list(struct mem_ctl_info *mci) { - int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -628,7 +674,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) synchronize_rcu(); INIT_LIST_HEAD(&mci->link); - return handlers; + return list_empty(&mc_devices); } struct mem_ctl_info *edac_mc_find(int idx) @@ -763,7 +809,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) /* mark MCI offline: */ mci->op_state = OP_OFFLINE; - if (!del_mc_from_global_list(mci)) + if (del_mc_from_global_list(mci)) edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); @@ -1195,10 +1241,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* Report the error via the trace interface */ grain_bits = fls_long(e->grain) + 1; - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, - grain_bits, e->syndrome, e->other_detail); + + if (IS_ENABLED(CONFIG_RAS)) + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, + e->low_layer, + (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); edac_raw_mc_handle_error(type, mci, e); } diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c deleted file mode 100644 index 952e411f01f2..000000000000 --- a/drivers/edac/edac_stub.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * common EDAC components that must be in kernel - * - * Author: Dave Jiang <djiang@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. - * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov <bp@alien8.de> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - */ -#include <linux/module.h> -#include <linux/edac.h> -#include <linux/atomic.h> -#include <linux/device.h> - -int edac_op_state = EDAC_OPSTATE_INVAL; -EXPORT_SYMBOL_GPL(edac_op_state); - -atomic_t edac_handlers = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(edac_handlers); - -int edac_err_assert = 0; -EXPORT_SYMBOL_GPL(edac_err_assert); - -int edac_report_status = EDAC_REPORTING_ENABLED; -EXPORT_SYMBOL_GPL(edac_report_status); - -static int __init edac_report_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - set_edac_report_status(EDAC_REPORTING_ENABLED); - else if (!strncmp(str, "off", 3)) - set_edac_report_status(EDAC_REPORTING_DISABLED); - else if (!strncmp(str, "force", 5)) - set_edac_report_status(EDAC_REPORTING_FORCE); - - return 0; -} -__setup("edac_report=", edac_report_setup); - -/* - * called to determine if there is an EDAC driver interested in - * knowing an event (such as NMI) occurred - */ -int edac_handler_set(void) -{ - if (edac_op_state == EDAC_OPSTATE_POLL) - return 0; - - return atomic_read(&edac_handlers); -} -EXPORT_SYMBOL_GPL(edac_handler_set); - -/* - * handler for NMI type of interrupts to assert error - */ -void edac_atomic_assert_error(void) -{ - edac_err_assert++; -} -EXPORT_SYMBOL_GPL(edac_atomic_assert_error); diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 928e0dba41fc..1cad5a9af8d0 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1349,7 +1349,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo struct dram_addr daddr; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = pnd2_mci; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a65ea44e3b0b..ea21cb651b3c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct sbridge_pvt *pvt; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); @@ -3441,7 +3441,7 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 1159dba4671f..64bef6c9cfb4 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; /* ignore unless this is memory related with an address */ diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c new file mode 100644 index 000000000000..86d585cb6d32 --- /dev/null +++ b/drivers/edac/thunderx_edac.c @@ -0,0 +1,2174 @@ +/* + * Cavium ThunderX memory controller kernel module + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved. + * + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/string.h> +#include <linux/stop_machine.h> +#include <linux/delay.h> +#include <linux/sizes.h> +#include <linux/atomic.h> +#include <linux/bitfield.h> +#include <linux/circ_buf.h> + +#include <asm/page.h> + +#include "edac_module.h" + +#define phys_to_pfn(phys) (PFN_DOWN(phys)) + +#define THUNDERX_NODE GENMASK(45, 44) + +enum { + ERR_CORRECTED = 1, + ERR_UNCORRECTED = 2, + ERR_UNKNOWN = 3, +}; + +#define MAX_SYNDROME_REGS 4 + +struct error_syndrome { + u64 reg[MAX_SYNDROME_REGS]; +}; + +struct error_descr { + int type; + u64 mask; + char *descr; +}; + +static void decode_register(char *str, size_t size, + const struct error_descr *descr, + const uint64_t reg) +{ + int ret = 0; + + while (descr->type && descr->mask && descr->descr) { + if (reg & descr->mask) { + ret = snprintf(str, size, "\n\t%s, %s", + descr->type == ERR_CORRECTED ? + "Corrected" : "Uncorrected", + descr->descr); + str += ret; + size -= ret; + } + descr++; + } +} + +static unsigned long get_bits(unsigned long data, int pos, int width) +{ + return (data >> pos) & ((1 << width) - 1); +} + +#define L2C_CTL 0x87E080800000 +#define L2C_CTL_DISIDXALIAS BIT(0) + +#define PCI_DEVICE_ID_THUNDER_LMC 0xa022 + +#define LMC_FADR 0x20 +#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1) +#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1) +#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf) +#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff) +#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff) + +#define LMC_NXM_FADR 0x28 +#define LMC_ECC_SYND 0x38 + +#define LMC_ECC_PARITY_TEST 0x108 + +#define LMC_INT_W1S 0x150 + +#define LMC_INT_ENA_W1C 0x158 +#define LMC_INT_ENA_W1S 0x160 + +#define LMC_CONFIG 0x188 + +#define LMC_CONFIG_BG2 BIT(62) +#define LMC_CONFIG_RANK_ENA BIT(42) +#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF) +#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_XOR_BANK BIT(16) + +#define LMC_INT 0x1F0 + +#define LMC_INT_DDR_ERR BIT(11) +#define LMC_INT_DED_ERR (0xFUL << 5) +#define LMC_INT_SEC_ERR (0xFUL << 1) +#define LMC_INT_NXM_WR_MASK BIT(0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_FADR_SCRAMBLED 0x330 + +#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \ + LMC_INT_NXM_WR_MASK) + +#define LMC_INT_CE (LMC_INT_SEC_ERR) + +static const struct error_descr lmc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = LMC_INT_SEC_ERR, + .descr = "Single-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DDR_ERR, + .descr = "DDR chip error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DED_ERR, + .descr = "Double-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_NXM_WR_MASK, + .descr = "Non-existent memory write", + }, + {0, 0, NULL}, +}; + +#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5) +#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4) +#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3) +#define LMC_INT_INTR_DED_ENA BIT(2) +#define LMC_INT_INTR_SEC_ENA BIT(1) +#define LMC_INT_INTR_NXM_WR_ENA BIT(0) + +#define LMC_INT_ENA_ALL GENMASK(5, 0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_RDIMM BIT(0) + +#define LMC_SCRAM_FADR 0x330 + +#define LMC_CHAR_MASK0 0x228 +#define LMC_CHAR_MASK2 0x238 + +#define RING_ENTRIES 8 + +struct debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations fops; +}; + +struct lmc_err_ctx { + u64 reg_int; + u64 reg_fadr; + u64 reg_nxm_fadr; + u64 reg_scram_fadr; + u64 reg_ecc_synd; +}; + +struct thunderx_lmc { + void __iomem *regs; + struct pci_dev *pdev; + struct msix_entry msix_ent; + + atomic_t ecc_int; + + u64 mask0; + u64 mask2; + u64 parity_test; + u64 node; + + int xbits; + int bank_width; + int pbank_lsb; + int dimm_lsb; + int rank_lsb; + int bank_lsb; + int row_lsb; + int col_hi_lsb; + + int xor_bank; + int l2c_alias; + + struct page *mem; + + struct lmc_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +#define ring_pos(pos, size) ((pos) & (size - 1)) + +#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \ +static struct debugfs_entry debugfs_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .fops = { \ + .open = simple_open, \ + .write = _write, \ + .read = _read, \ + .llseek = generic_file_llseek, \ + }, \ +} + +#define DEBUGFS_FIELD_ATTR(_type, _field) \ +static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + snprintf(buf, count, "0x%016llx", pdata->_field); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &pdata->_field); \ + \ + return res ? res : count; \ +} \ + \ +DEBUGFS_STRUCT(_field, 0600, \ + thunderx_##_type##_##_field##_write, \ + thunderx_##_type##_##_field##_read) \ + +#define DEBUGFS_REG_ATTR(_type, _name, _reg) \ +static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + u64 val; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &val); \ + \ + if (!res) { \ + writeq(val, pdata->regs + _reg); \ + res = count; \ + } \ + \ + return res; \ +} \ + \ +DEBUGFS_STRUCT(_name, 0600, \ + thunderx_##_type##_##_name##_write, \ + thunderx_##_type##_##_name##_read) + +#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field) + +/* + * To get an ECC error injected, the following steps are needed: + * - Setup the ECC injection by writing the appropriate parameters: + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0 + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2 + * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test + * - Do the actual injection: + * echo 1 > /sys/kernel/debug/<device number>/inject_ecc + */ +static ssize_t thunderx_lmc_inject_int_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + u64 val; + int res; + + res = kstrtoull_from_user(data, count, 0, &val); + + if (!res) { + /* Trigger the interrupt */ + writeq(val, lmc->regs + LMC_INT_W1S); + res = count; + } + + return res; +} + +static ssize_t thunderx_lmc_int_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + char buf[20]; + u64 lmc_int = readq(lmc->regs + LMC_INT); + + snprintf(buf, sizeof(buf), "0x%016llx", lmc_int); + return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf)); +} + +#define TEST_PATTERN 0xa5 + +static int inject_ecc_fn(void *arg) +{ + struct thunderx_lmc *lmc = arg; + uintptr_t addr, phys; + unsigned int cline_size = cache_line_size(); + const unsigned int lines = PAGE_SIZE / cline_size; + unsigned int i, cl_idx; + + addr = (uintptr_t)page_address(lmc->mem); + phys = (uintptr_t)page_to_phys(lmc->mem); + + cl_idx = (phys & 0x7f) >> 4; + lmc->parity_test &= ~(7ULL << 8); + lmc->parity_test |= (cl_idx << 8); + + writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0); + writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2); + writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST); + + readq(lmc->regs + LMC_CHAR_MASK0); + readq(lmc->regs + LMC_CHAR_MASK2); + readq(lmc->regs + LMC_ECC_PARITY_TEST); + + for (i = 0; i < lines; i++) { + memset((void *)addr, TEST_PATTERN, cline_size); + barrier(); + + /* + * Flush L1 cachelines to the PoC (L2). + * This will cause cacheline eviction to the L2. + */ + asm volatile("dc civac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Flush L2 cachelines to the DRAM. + * This will cause cacheline eviction to the DRAM + * and ECC corruption according to the masks set. + */ + __asm__ volatile("sys #0,c11,C1,#2, %0\n" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L2 cachelines. + * The subsequent load will cause cacheline fetch + * from the DRAM and an error interrupt + */ + __asm__ volatile("sys #0,c11,C1,#1, %0" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L1 cachelines. + * The subsequent load will cause cacheline fetch + * from the L2 and/or DRAM + */ + asm volatile("dc ivac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + return 0; +} + +static ssize_t thunderx_lmc_inject_ecc_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + + unsigned int cline_size = cache_line_size(); + + u8 tmp[cline_size]; + void __iomem *addr; + unsigned int offs, timeout = 100000; + + atomic_set(&lmc->ecc_int, 0); + + lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0); + + if (!lmc->mem) + return -ENOMEM; + + addr = page_address(lmc->mem); + + while (!atomic_read(&lmc->ecc_int) && timeout--) { + stop_machine(inject_ecc_fn, lmc, NULL); + + for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) { + /* + * Do a load from the previously rigged location + * This should generate an error interrupt. + */ + memcpy(tmp, addr + offs, cline_size); + asm volatile("dsb ld\n"); + } + } + + __free_pages(lmc->mem, 0); + + return count; +} + +LMC_DEBUGFS_ENT(mask0); +LMC_DEBUGFS_ENT(mask2); +LMC_DEBUGFS_ENT(parity_test); + +DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL); +DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL); +DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read); + +struct debugfs_entry *lmc_dfs_ents[] = { + &debugfs_mask0, + &debugfs_mask2, + &debugfs_parity_test, + &debugfs_inject_ecc, + &debugfs_inject_int, + &debugfs_int_w1c, +}; + +static int thunderx_create_debugfs_nodes(struct dentry *parent, + struct debugfs_entry *attrs[], + void *data, + size_t num) +{ + int i; + struct dentry *ent; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return 0; + + if (!parent) + return -ENOENT; + + for (i = 0; i < num; i++) { + ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, + parent, data, &attrs[i]->fops); + + if (!ent) + break; + } + + return i; +} + +static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc) +{ + phys_addr_t addr = 0; + int bank, xbits; + + addr |= lmc->node << 40; + addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb; + addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb; + addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb; + addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb; + + bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb; + + if (lmc->xor_bank) + bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width); + + addr |= bank << lmc->bank_lsb; + + xbits = PCI_FUNC(lmc->pdev->devfn); + + if (lmc->l2c_alias) + xbits ^= get_bits(addr, 20, lmc->xbits) ^ + get_bits(addr, 12, lmc->xbits); + + addr |= xbits << 7; + + return addr; +} + +static unsigned int thunderx_get_num_lmcs(unsigned int node) +{ + unsigned int number = 0; + struct pci_dev *pdev = NULL; + + do { + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_LMC, + pdev); + if (pdev) { +#ifdef CONFIG_NUMA + if (pdev->dev.numa_node == node) + number++; +#else + number++; +#endif + } + } while (pdev); + + return number; +} + +#define LMC_MESSAGE_SIZE 120 +#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors)) + +static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + + unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx)); + struct lmc_err_ctx *ctx = &lmc->err_ctx[head]; + + writeq(0, lmc->regs + LMC_CHAR_MASK0); + writeq(0, lmc->regs + LMC_CHAR_MASK2); + writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST); + + ctx->reg_int = readq(lmc->regs + LMC_INT); + ctx->reg_fadr = readq(lmc->regs + LMC_FADR); + ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR); + ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR); + ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND); + + lmc->ring_head++; + + atomic_set(&lmc->ecc_int, 1); + + /* Clear the interrupt */ + writeq(ctx->reg_int, lmc->regs + LMC_INT); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + phys_addr_t phys_addr; + + unsigned long tail; + struct lmc_err_ctx *ctx; + + irqreturn_t ret = IRQ_NONE; + + char *msg; + char *other; + + msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(lmc->ring_head, lmc->ring_tail, + ARRAY_SIZE(lmc->err_ctx))) { + tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx)); + + ctx = &lmc->err_ctx[tail]; + + dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n", + ctx->reg_int); + dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n", + ctx->reg_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n", + ctx->reg_nxm_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n", + ctx->reg_scram_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n", + ctx->reg_ecc_synd); + + snprintf(msg, LMC_MESSAGE_SIZE, + "DIMM %lld rank %lld bank %lld row %lld col %lld", + LMC_FADR_FDIMM(ctx->reg_scram_fadr), + LMC_FADR_FBUNK(ctx->reg_scram_fadr), + LMC_FADR_FBANK(ctx->reg_scram_fadr), + LMC_FADR_FROW(ctx->reg_scram_fadr), + LMC_FADR_FCOL(ctx->reg_scram_fadr)); + + decode_register(other, LMC_OTHER_SIZE, lmc_errors, + ctx->reg_int); + + phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc); + + if (ctx->reg_int & LMC_INT_UE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + else if (ctx->reg_int & LMC_INT_CE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + + lmc->ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(msg); + kfree(other); + + return ret; +} + +#ifdef CONFIG_PM +static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state) +{ + pci_save_state(pdev); + pci_disable_device(pdev); + + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int thunderx_lmc_resume(struct pci_dev *pdev) +{ + pci_set_power_state(pdev, PCI_D0); + pci_enable_wake(pdev, PCI_D0, 0); + pci_restore_state(pdev); + + return 0; +} +#endif + +static const struct pci_device_id thunderx_lmc_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) }, + { 0, }, +}; + +static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) +{ + int node = dev_to_node(&pdev->dev); + int ret = PCI_FUNC(pdev->devfn); + + ret += max(node, 0) << 3; + + return ret; +} + +static int thunderx_lmc_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_lmc *lmc; + struct edac_mc_layer layer; + struct mem_ctl_info *mci; + u64 lmc_control, lmc_ddr_pll_ctl, lmc_config; + int ret; + u64 lmc_int; + void *l2c_ioaddr; + + layer.type = EDAC_MC_LAYER_SLOT; + layer.size = 2; + layer.is_virt_csrow = false; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer, + sizeof(struct thunderx_lmc)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + lmc = mci->pvt_info; + + pci_set_drvdata(pdev, mci); + + lmc->regs = pcim_iomap_table(pdev)[0]; + + lmc_control = readq(lmc->regs + LMC_CONTROL); + lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL); + lmc_config = readq(lmc->regs + LMC_CONFIG); + + if (lmc_control & LMC_CONTROL_RDIMM) { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_RDDR4 : MEM_RDDR3; + } else { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_DDR4 : MEM_DDR3; + } + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = "thunderx-lmc"; + mci->mod_ver = "1"; + mci->ctl_name = "thunderx-lmc"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + lmc->pdev = pdev; + lmc->msix_ent.entry = 0; + + lmc->ring_head = 0; + lmc->ring_tail = 0; + + ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector, + thunderx_lmc_err_isr, + thunderx_lmc_threaded_isr, 0, + "[EDAC] ThunderX LMC", mci); + if (ret) { + dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret); + goto err_free; + } + + lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0)); + + lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1; + lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) && + FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3; + + lmc->pbank_lsb = (lmc_config >> 5) & 0xf; + lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits; + lmc->rank_lsb = lmc->dimm_lsb; + lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0; + lmc->bank_lsb = 7 + lmc->xbits; + lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits; + + lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width; + + lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK; + + l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), + PAGE_SIZE); + + if (!l2c_ioaddr) { + dev_err(&pdev->dev, "Cannot map L2C_CTL\n"); + goto err_free; + } + + lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS); + + iounmap(l2c_ioaddr); + + ret = edac_mc_add_mc(mci); + if (ret) { + dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret); + goto err_free; + } + + lmc_int = readq(lmc->regs + LMC_INT); + writeq(lmc_int, lmc->regs + LMC_INT); + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S); + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ret = thunderx_create_debugfs_nodes(mci->debugfs, + lmc_dfs_ents, + lmc, + ARRAY_SIZE(lmc_dfs_ents)); + + if (ret != ARRAY_SIZE(lmc_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + return 0; + +err_free: + pci_set_drvdata(pdev, NULL); + edac_mc_free(mci); + + return ret; +} + +static void thunderx_lmc_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = pci_get_drvdata(pdev); + struct thunderx_lmc *lmc = mci->pvt_info; + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl); + +static struct pci_driver thunderx_lmc_driver = { + .name = "thunderx_lmc_edac", + .probe = thunderx_lmc_probe, + .remove = thunderx_lmc_remove, +#ifdef CONFIG_PM + .suspend = thunderx_lmc_suspend, + .resume = thunderx_lmc_resume, +#endif + .id_table = thunderx_lmc_pci_tbl, +}; + +/*---------------------- OCX driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_OCX 0xa013 + +#define OCX_LINK_INTS 3 +#define OCX_INTS (OCX_LINK_INTS + 1) +#define OCX_RX_LANES 24 +#define OCX_RX_LANE_STATS 15 + +#define OCX_COM_INT 0x100 +#define OCX_COM_INT_W1S 0x108 +#define OCX_COM_INT_ENA_W1S 0x110 +#define OCX_COM_INT_ENA_W1C 0x118 + +#define OCX_COM_IO_BADID BIT(54) +#define OCX_COM_MEM_BADID BIT(53) +#define OCX_COM_COPR_BADID BIT(52) +#define OCX_COM_WIN_REQ_BADID BIT(51) +#define OCX_COM_WIN_REQ_TOUT BIT(50) +#define OCX_COM_RX_LANE GENMASK(23, 0) + +#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ + OCX_COM_MEM_BADID | \ + OCX_COM_COPR_BADID | \ + OCX_COM_WIN_REQ_BADID | \ + OCX_COM_WIN_REQ_TOUT) + +static const struct error_descr ocx_com_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_IO_BADID, + .descr = "Invalid IO transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_MEM_BADID, + .descr = "Invalid memory transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_COPR_BADID, + .descr = "Invalid coprocessor transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_BADID, + .descr = "Invalid SLI transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_TOUT, + .descr = "Window/core request timeout", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8) +#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8) + +#define OCX_COM_LINK_BAD_WORD BIT(13) +#define OCX_COM_LINK_ALIGN_FAIL BIT(12) +#define OCX_COM_LINK_ALIGN_DONE BIT(11) +#define OCX_COM_LINK_UP BIT(10) +#define OCX_COM_LINK_STOP BIT(9) +#define OCX_COM_LINK_BLK_ERR BIT(8) +#define OCX_COM_LINK_REINIT BIT(7) +#define OCX_COM_LINK_LNK_DATA BIT(6) +#define OCX_COM_LINK_RXFIFO_DBE BIT(5) +#define OCX_COM_LINK_RXFIFO_SBE BIT(4) +#define OCX_COM_LINK_TXFIFO_DBE BIT(3) +#define OCX_COM_LINK_TXFIFO_SBE BIT(2) +#define OCX_COM_LINK_REPLAY_DBE BIT(1) +#define OCX_COM_LINK_REPLAY_SBE BIT(0) + +static const struct error_descr ocx_com_link_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_REPLAY_SBE, + .descr = "Replay buffer single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_TXFIFO_SBE, + .descr = "TX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_RXFIFO_SBE, + .descr = "RX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BLK_ERR, + .descr = "Block code error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_ALIGN_FAIL, + .descr = "Link alignment failure", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BAD_WORD, + .descr = "Bad code word", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_REPLAY_DBE, + .descr = "Replay buffer double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_TXFIFO_DBE, + .descr = "TX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_RXFIFO_DBE, + .descr = "RX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_STOP, + .descr = "Link stopped", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \ + OCX_COM_LINK_TXFIFO_DBE | \ + OCX_COM_LINK_RXFIFO_DBE | \ + OCX_COM_LINK_STOP) + +#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \ + OCX_COM_LINK_TXFIFO_SBE | \ + OCX_COM_LINK_RXFIFO_SBE | \ + OCX_COM_LINK_BLK_ERR | \ + OCX_COM_LINK_ALIGN_FAIL | \ + OCX_COM_LINK_BAD_WORD) + +#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100) +#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100) +#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100) +#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100) +#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8) + +#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8) +#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2) +#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1) +#define OCX_LNE_CFG_RX_STAT_ENA BIT(0) + + +#define OCX_LANE_BAD_64B67B BIT(8) +#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5) +#define OCX_LANE_SCRM_SYNC_LOSS BIT(4) +#define OCX_LANE_UKWN_CNTL_WORD BIT(3) +#define OCX_LANE_CRC32_ERR BIT(2) +#define OCX_LANE_BDRY_SYNC_LOSS BIT(1) +#define OCX_LANE_SERDES_LOCK_LOSS BIT(0) + +#define OCX_COM_LANE_INT_UE (0) +#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \ + OCX_LANE_BDRY_SYNC_LOSS | \ + OCX_LANE_CRC32_ERR | \ + OCX_LANE_UKWN_CNTL_WORD | \ + OCX_LANE_SCRM_SYNC_LOSS | \ + OCX_LANE_DSKEW_FIFO_OVFL | \ + OCX_LANE_BAD_64B67B) + +static const struct error_descr ocx_lane_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SERDES_LOCK_LOSS, + .descr = "RX SerDes lock lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BDRY_SYNC_LOSS, + .descr = "RX word boundary lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_CRC32_ERR, + .descr = "CRC32 error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_UKWN_CNTL_WORD, + .descr = "Unknown control word", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SCRM_SYNC_LOSS, + .descr = "Scrambler synchronization lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_DSKEW_FIFO_OVFL, + .descr = "RX deskew FIFO overflow", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BAD_64B67B, + .descr = "Bad 64B/67B codeword", + }, + {0, 0, NULL}, +}; + +#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0)) +#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0)) +#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \ + GENMASK(9, 7) | GENMASK(5, 0)) + +#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000) +#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000) + +struct ocx_com_err_ctx { + u64 reg_com_int; + u64 reg_lane_int[OCX_RX_LANES]; + u64 reg_lane_stat11[OCX_RX_LANES]; +}; + +struct ocx_link_err_ctx { + u64 reg_com_link_int; + int link; +}; + +struct thunderx_ocx { + void __iomem *regs; + int com_link; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + struct msix_entry msix_ent[OCX_INTS]; + + struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES]; + struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES]; + + unsigned long com_ring_head; + unsigned long com_ring_tail; + + unsigned long link_ring_head; + unsigned long link_ring_tail; +}; + +#define OCX_MESSAGE_SIZE SZ_1K +#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors)) + +/* This handler is threaded */ +static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + int lane; + unsigned long head = ring_pos(ocx->com_ring_head, + ARRAY_SIZE(ocx->com_err_ctx)); + struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head]; + + ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT); + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + ctx->reg_lane_int[lane] = + readq(ocx->regs + OCX_LNE_INT(lane)); + ctx->reg_lane_stat11[lane] = + readq(ocx->regs + OCX_LNE_STAT(lane, 11)); + + writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane)); + } + + writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT); + + ocx->com_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + irqreturn_t ret = IRQ_NONE; + + unsigned long tail; + struct ocx_com_err_ctx *ctx; + int lane; + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx))) { + tail = ring_pos(ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx)); + ctx = &ocx->com_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx", + ocx->edac_dev->ctl_name, ctx->reg_com_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_errors, ctx->reg_com_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + for (lane = 0; lane < OCX_RX_LANES; lane++) + if (ctx->reg_com_int & BIT(lane)) { + snprintf(other, OCX_OTHER_SIZE, + "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx", + lane, ctx->reg_lane_int[lane], + lane, ctx->reg_lane_stat11[lane]); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + decode_register(other, OCX_OTHER_SIZE, + ocx_lane_errors, + ctx->reg_lane_int[lane]); + strncat(msg, other, OCX_MESSAGE_SIZE); + } + + if (ctx->reg_com_int & OCX_COM_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->com_ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + unsigned long head = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head]; + + ctx->link = msix->entry; + ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + ocx->link_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + irqreturn_t ret = IRQ_NONE; + unsigned long tail; + struct ocx_link_err_ctx *ctx; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail, + ARRAY_SIZE(ocx->link_err_ctx))) { + tail = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + + ctx = &ocx->link_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, + "%s: OCX_COM_LINK_INT[%d]: %016llx", + ocx->edac_dev->ctl_name, + ctx->link, ctx->reg_com_link_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_link_errors, ctx->reg_com_link_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) + edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); + else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->link_ring_tail++; + } + + ret = IRQ_HANDLED; +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg) + +OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0)); +OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1)); +OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2)); + +OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0)); +OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1)); +OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2)); +OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3)); +OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4)); +OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5)); +OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6)); +OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7)); + +OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8)); +OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9)); +OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10)); +OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11)); +OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12)); +OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13)); +OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14)); +OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15)); + +OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16)); +OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17)); +OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18)); +OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19)); +OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20)); +OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21)); +OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22)); +OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); + +OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); + +struct debugfs_entry *ocx_dfs_ents[] = { + &debugfs_tlk0_ecc_ctl, + &debugfs_tlk1_ecc_ctl, + &debugfs_tlk2_ecc_ctl, + + &debugfs_rlk0_ecc_ctl, + &debugfs_rlk1_ecc_ctl, + &debugfs_rlk2_ecc_ctl, + + &debugfs_com_link0_int, + &debugfs_com_link1_int, + &debugfs_com_link2_int, + + &debugfs_lne00_badcnt, + &debugfs_lne01_badcnt, + &debugfs_lne02_badcnt, + &debugfs_lne03_badcnt, + &debugfs_lne04_badcnt, + &debugfs_lne05_badcnt, + &debugfs_lne06_badcnt, + &debugfs_lne07_badcnt, + &debugfs_lne08_badcnt, + &debugfs_lne09_badcnt, + &debugfs_lne10_badcnt, + &debugfs_lne11_badcnt, + &debugfs_lne12_badcnt, + &debugfs_lne13_badcnt, + &debugfs_lne14_badcnt, + &debugfs_lne15_badcnt, + &debugfs_lne16_badcnt, + &debugfs_lne17_badcnt, + &debugfs_lne18_badcnt, + &debugfs_lne19_badcnt, + &debugfs_lne20_badcnt, + &debugfs_lne21_badcnt, + &debugfs_lne22_badcnt, + &debugfs_lne23_badcnt, + + &debugfs_com_int, +}; + +static const struct pci_device_id thunderx_ocx_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) }, + { 0, }, +}; + +static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx) +{ + int lane, stat, cfg; + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + cfg = readq(ocx->regs + OCX_LNE_CFG(lane)); + cfg |= OCX_LNE_CFG_RX_STAT_RDCLR; + cfg &= ~OCX_LNE_CFG_RX_STAT_ENA; + writeq(cfg, ocx->regs + OCX_LNE_CFG(lane)); + + for (stat = 0; stat < OCX_RX_LANE_STATS; stat++) + readq(ocx->regs + OCX_LNE_STAT(lane, stat)); + } +} + +static int thunderx_ocx_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_ocx *ocx; + struct edac_device_ctl_info *edac_dev; + char name[32]; + int idx; + int i; + int ret; + u64 reg; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), "OCX%d", idx); + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), + name, 1, "CCPI", 1, + 0, NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret); + return -ENOMEM; + } + ocx = edac_dev->pvt_info; + ocx->edac_dev = edac_dev; + ocx->com_ring_head = 0; + ocx->com_ring_tail = 0; + ocx->link_ring_head = 0; + ocx->link_ring_tail = 0; + + ocx->regs = pcim_iomap_table(pdev)[0]; + if (!ocx->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + ret = -ENODEV; + goto err_free; + } + + ocx->pdev = pdev; + + for (i = 0; i < OCX_INTS; i++) { + ocx->msix_ent[i].entry = i; + ocx->msix_ent[i].vector = 0; + } + + ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + for (i = 0; i < OCX_INTS; i++) { + ret = devm_request_threaded_irq(&pdev->dev, + ocx->msix_ent[i].vector, + (i == 3) ? + thunderx_ocx_com_isr : + thunderx_ocx_lnk_isr, + (i == 3) ? + thunderx_ocx_com_threaded_isr : + thunderx_ocx_lnk_threaded_isr, + 0, "[EDAC] ThunderX OCX", + &ocx->msix_ent[i]); + if (ret) + goto err_free; + } + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-ocx"; + edac_dev->ctl_name = "thunderx-ocx"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + ret = thunderx_create_debugfs_nodes(ocx->debugfs, + ocx_dfs_ents, + ocx, + ARRAY_SIZE(ocx_dfs_ents)); + if (ret != ARRAY_SIZE(ocx_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + thunderx_ocx_clearstats(ocx); + + for (i = 0; i < OCX_RX_LANES; i++) { + writeq(OCX_LNE_INT_ENA_ALL, + ocx->regs + OCX_LNE_INT_EN(i)); + + reg = readq(ocx->regs + OCX_LNE_INT(i)); + writeq(reg, ocx->regs + OCX_LNE_INT(i)); + + } + + for (i = 0; i < OCX_LINK_INTS; i++) { + reg = readq(ocx->regs + OCX_COM_LINKX_INT(i)); + writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i)); + + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i)); + } + + reg = readq(ocx->regs + OCX_COM_INT); + writeq(reg, ocx->regs + OCX_COM_INT); + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S); + + return 0; +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_ocx_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_ocx *ocx = edac_dev->pvt_info; + int i; + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C); + + for (i = 0; i < OCX_INTS; i++) { + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i)); + } + + edac_debugfs_remove_recursive(ocx->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl); + +static struct pci_driver thunderx_ocx_driver = { + .name = "thunderx_ocx_edac", + .probe = thunderx_ocx_probe, + .remove = thunderx_ocx_remove, + .id_table = thunderx_ocx_pci_tbl, +}; + +/*---------------------- L2C driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e +#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f +#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030 + +#define L2C_TAD_INT_W1C 0x40000 +#define L2C_TAD_INT_W1S 0x40008 + +#define L2C_TAD_INT_ENA_W1C 0x40020 +#define L2C_TAD_INT_ENA_W1S 0x40028 + + +#define L2C_TAD_INT_L2DDBE BIT(1) +#define L2C_TAD_INT_SBFSBE BIT(2) +#define L2C_TAD_INT_SBFDBE BIT(3) +#define L2C_TAD_INT_FBFSBE BIT(4) +#define L2C_TAD_INT_FBFDBE BIT(5) +#define L2C_TAD_INT_TAGDBE BIT(9) +#define L2C_TAD_INT_RDDISLMC BIT(15) +#define L2C_TAD_INT_WRDISLMC BIT(16) +#define L2C_TAD_INT_LFBTO BIT(17) +#define L2C_TAD_INT_GSYNCTO BIT(18) +#define L2C_TAD_INT_RTGSBE BIT(32) +#define L2C_TAD_INT_RTGDBE BIT(33) +#define L2C_TAD_INT_RDDISOCI BIT(34) +#define L2C_TAD_INT_WRDISOCI BIT(35) + +#define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE) + +#define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \ + L2C_TAD_INT_FBFSBE) + +#define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFDBE | \ + L2C_TAD_INT_TAGDBE | \ + L2C_TAD_INT_RTGDBE | \ + L2C_TAD_INT_WRDISOCI | \ + L2C_TAD_INT_RDDISOCI | \ + L2C_TAD_INT_WRDISLMC | \ + L2C_TAD_INT_RDDISLMC | \ + L2C_TAD_INT_LFBTO | \ + L2C_TAD_INT_GSYNCTO) + +static const struct error_descr l2_tad_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_SBFSBE, + .descr = "SBF single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_FBFSBE, + .descr = "FBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_L2DDBE, + .descr = "L2D double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_SBFDBE, + .descr = "SBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_FBFDBE, + .descr = "FBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_TAGDBE, + .descr = "TAG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RTGDBE, + .descr = "RTG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISOCI, + .descr = "Write to a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISLMC, + .descr = "Write to a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISLMC, + .descr = "Read from a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_LFBTO, + .descr = "LFB entry timeout", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_GSYNCTO, + .descr = "Global sync CCPI timeout", + }, + {0, 0, NULL}, +}; + +#define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE) + +#define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) + +#define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) + +#define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) + +#define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ + L2C_TAD_INT_RTG | \ + L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ + L2C_TAD_INT_LFBTO) + +#define L2C_TAD_TIMETWO 0x50000 +#define L2C_TAD_TIMEOUT 0x50100 +#define L2C_TAD_ERR 0x60000 +#define L2C_TAD_TQD_ERR 0x60100 +#define L2C_TAD_TTG_ERR 0x60200 + + +#define L2C_CBC_INT_W1C 0x60000 + +#define L2C_CBC_INT_RSDSBE BIT(0) +#define L2C_CBC_INT_RSDDBE BIT(1) + +#define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE) + +#define L2C_CBC_INT_MIBSBE BIT(4) +#define L2C_CBC_INT_MIBDBE BIT(5) + +#define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE) + +#define L2C_CBC_INT_IORDDISOCI BIT(6) +#define L2C_CBC_INT_IOWRDISOCI BIT(7) + +#define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \ + L2C_CBC_INT_IOWRDISOCI) + +#define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE) +#define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE) + + +static const struct error_descr l2_cbc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_RSDSBE, + .descr = "RSD single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_MIBSBE, + .descr = "MIB single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_RSDDBE, + .descr = "RSD double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_MIBDBE, + .descr = "MIB double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IORDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IOWRDISOCI, + .descr = "Write to a disabled CCPI", + }, + {0, 0, NULL}, +}; + +#define L2C_CBC_INT_W1S 0x60008 +#define L2C_CBC_INT_ENA_W1C 0x60020 + +#define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \ + L2C_CBC_INT_IODISOCI) + +#define L2C_CBC_INT_ENA_W1S 0x60028 + +#define L2C_CBC_IODISOCIERR 0x80008 +#define L2C_CBC_IOCERR 0x80010 +#define L2C_CBC_RSDERR 0x80018 +#define L2C_CBC_MIBERR 0x80020 + + +#define L2C_MCI_INT_W1C 0x0 + +#define L2C_MCI_INT_VBFSBE BIT(0) +#define L2C_MCI_INT_VBFDBE BIT(1) + +static const struct error_descr l2_mci_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_MCI_INT_VBFSBE, + .descr = "VBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_MCI_INT_VBFDBE, + .descr = "VBF double-bit error", + }, + {0, 0, NULL}, +}; + +#define L2C_MCI_INT_W1S 0x8 +#define L2C_MCI_INT_ENA_W1C 0x20 + +#define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE) + +#define L2C_MCI_INT_ENA_W1S 0x28 + +#define L2C_MCI_ERR 0x10000 + +#define L2C_MESSAGE_SIZE SZ_1K +#define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors)) + +struct l2c_err_ctx { + char *reg_ext_name; + u64 reg_int; + u64 reg_ext; +}; + +struct thunderx_l2c { + void __iomem *regs; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + + int index; + + struct msix_entry msix_ent; + + struct l2c_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx)); + struct l2c_err_ctx *ctx = &tad->err_ctx[head]; + + ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C); + + if (ctx->reg_int & L2C_TAD_INT_ECC) { + ctx->reg_ext_name = "TQD_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_TAG) { + ctx->reg_ext_name = "TTG_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { + ctx->reg_ext_name = "TIMEOUT"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); + } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) { + ctx->reg_ext_name = "ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); + } + + writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C); + + tad->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx)); + struct l2c_err_ctx *ctx = &cbc->err_ctx[head]; + + ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C); + + if (ctx->reg_int & L2C_CBC_INT_RSD) { + ctx->reg_ext_name = "RSDERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR); + } else if (ctx->reg_int & L2C_CBC_INT_MIB) { + ctx->reg_ext_name = "MIBERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR); + } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) { + ctx->reg_ext_name = "IODISOCIERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR); + } + + writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C); + + cbc->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx)); + struct l2c_err_ctx *ctx = &mci->err_ctx[head]; + + ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C); + ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR); + + writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C); + + ctx->reg_ext_name = "ERR"; + + mci->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx)); + struct l2c_err_ctx *ctx = &l2c->err_ctx[tail]; + irqreturn_t ret = IRQ_NONE; + + u64 mask_ue, mask_ce; + const struct error_descr *l2_errors; + char *reg_int_name; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + switch (l2c->pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + reg_int_name = "L2C_TAD_INT"; + mask_ue = L2C_TAD_INT_UE; + mask_ce = L2C_TAD_INT_CE; + l2_errors = l2_tad_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + reg_int_name = "L2C_CBC_INT"; + mask_ue = L2C_CBC_INT_UE; + mask_ce = L2C_CBC_INT_CE; + l2_errors = l2_cbc_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + reg_int_name = "L2C_MCI_INT"; + mask_ue = L2C_MCI_INT_VBFDBE; + mask_ce = L2C_MCI_INT_VBFSBE; + l2_errors = l2_mci_errors; + break; + default: + dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", + l2c->pdev->device); + return IRQ_NONE; + } + + while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, + ARRAY_SIZE(l2c->err_ctx))) { + snprintf(msg, L2C_MESSAGE_SIZE, + "%s: %s: %016llx, %s: %016llx", + l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int, + ctx->reg_ext_name, ctx->reg_ext); + + decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); + + strncat(msg, other, L2C_MESSAGE_SIZE); + + if (ctx->reg_int & mask_ue) + edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); + else if (ctx->reg_int & mask_ce) + edac_device_handle_ce(l2c->edac_dev, 0, 0, msg); + + l2c->ring_tail++; + } + + return IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg) + +L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); + +struct debugfs_entry *l2c_tad_dfs_ents[] = { + &debugfs_tad_int, +}; + +L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); + +struct debugfs_entry *l2c_cbc_dfs_ents[] = { + &debugfs_cbc_int, +}; + +L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); + +struct debugfs_entry *l2c_mci_dfs_ents[] = { + &debugfs_mci_int, +}; + +static const struct pci_device_id thunderx_l2c_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), }, + { 0, }, +}; + +static int thunderx_l2c_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_l2c *l2c; + struct edac_device_ctl_info *edac_dev; + struct debugfs_entry **l2c_devattr; + size_t dfs_entries; + irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL; + char name[32]; + const char *fmt; + u64 reg_en_offs, reg_en_mask; + int idx; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + thunderx_l2c_isr = thunderx_l2c_tad_isr; + l2c_devattr = l2c_tad_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents); + fmt = "L2C-TAD%d"; + reg_en_offs = L2C_TAD_INT_ENA_W1S; + reg_en_mask = L2C_TAD_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + thunderx_l2c_isr = thunderx_l2c_cbc_isr; + l2c_devattr = l2c_cbc_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents); + fmt = "L2C-CBC%d"; + reg_en_offs = L2C_CBC_INT_ENA_W1S; + reg_en_mask = L2C_CBC_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + thunderx_l2c_isr = thunderx_l2c_mci_isr; + l2c_devattr = l2c_mci_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents); + fmt = "L2C-MCI%d"; + reg_en_offs = L2C_MCI_INT_ENA_W1S; + reg_en_mask = L2C_MCI_INT_ENA_ALL; + break; + default: + //Should never ever get here + dev_err(&pdev->dev, "Unsupported PCI device: %04x\n", + pdev->device); + return -EINVAL; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), fmt, idx); + + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), + name, 1, "L2C", 1, 0, + NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); + return -ENOMEM; + } + + l2c = edac_dev->pvt_info; + l2c->edac_dev = edac_dev; + + l2c->regs = pcim_iomap_table(pdev)[0]; + if (!l2c->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources\n"); + ret = -ENODEV; + goto err_free; + } + + l2c->pdev = pdev; + + l2c->ring_head = 0; + l2c->ring_tail = 0; + + l2c->msix_ent.entry = 0; + l2c->msix_ent.vector = 0; + + ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector, + thunderx_l2c_isr, + thunderx_l2c_threaded_isr, + 0, "[EDAC] ThunderX L2C", + &l2c->msix_ent); + if (ret) + goto err_free; + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-l2c"; + edac_dev->ctl_name = "thunderx-l2c"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr, + l2c, dfs_entries); + + if (ret != dfs_entries) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + writeq(reg_en_mask, l2c->regs + reg_en_offs); + + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_l2c_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_l2c *l2c = edac_dev->pvt_info; + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); + break; + } + + edac_debugfs_remove_recursive(l2c->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl); + +static struct pci_driver thunderx_l2c_driver = { + .name = "thunderx_l2c_edac", + .probe = thunderx_l2c_probe, + .remove = thunderx_l2c_remove, + .id_table = thunderx_l2c_pci_tbl, +}; + +static int __init thunderx_edac_init(void) +{ + int rc = 0; + + rc = pci_register_driver(&thunderx_lmc_driver); + if (rc) + return rc; + + rc = pci_register_driver(&thunderx_ocx_driver); + if (rc) + goto err_lmc; + + rc = pci_register_driver(&thunderx_l2c_driver); + if (rc) + goto err_ocx; + + return rc; +err_ocx: + pci_unregister_driver(&thunderx_ocx_driver); +err_lmc: + pci_unregister_driver(&thunderx_lmc_driver); + + return rc; +} + +static void __exit thunderx_edac_exit(void) +{ + pci_unregister_driver(&thunderx_l2c_driver); + pci_unregister_driver(&thunderx_ocx_driver); + pci_unregister_driver(&thunderx_lmc_driver); + +} + +module_init(thunderx_edac_init); +module_exit(thunderx_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cavium, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX"); diff --git a/drivers/extcon/devres.c b/drivers/extcon/devres.c index b40eb1805927..186fd735eb28 100644 --- a/drivers/extcon/devres.c +++ b/drivers/extcon/devres.c @@ -50,6 +50,13 @@ static void devm_extcon_dev_notifier_unreg(struct device *dev, void *res) extcon_unregister_notifier(this->edev, this->id, this->nb); } +static void devm_extcon_dev_notifier_all_unreg(struct device *dev, void *res) +{ + struct extcon_dev_notifier_devres *this = res; + + extcon_unregister_notifier_all(this->edev, this->nb); +} + /** * devm_extcon_dev_allocate - Allocate managed extcon device * @dev: device owning the extcon device being created @@ -214,3 +221,57 @@ void devm_extcon_unregister_notifier(struct device *dev, devm_extcon_dev_match, edev)); } EXPORT_SYMBOL(devm_extcon_unregister_notifier); + +/** + * devm_extcon_register_notifier_all() + * - Resource-managed extcon_register_notifier_all() + * @dev: device to allocate extcon device + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + * + * This function manages automatically the notifier of extcon device using + * device resource management and simplify the control of unregistering + * the notifier of extcon device. To get more information, refer that function. + * + * Returns 0 if success or negaive error number if failure. + */ +int devm_extcon_register_notifier_all(struct device *dev, struct extcon_dev *edev, + struct notifier_block *nb) +{ + struct extcon_dev_notifier_devres *ptr; + int ret; + + ptr = devres_alloc(devm_extcon_dev_notifier_all_unreg, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + ret = extcon_register_notifier_all(edev, nb); + if (ret) { + devres_free(ptr); + return ret; + } + + ptr->edev = edev; + ptr->nb = nb; + devres_add(dev, ptr); + + return 0; +} +EXPORT_SYMBOL(devm_extcon_register_notifier_all); + +/** + * devm_extcon_unregister_notifier_all() + * - Resource-managed extcon_unregister_notifier_all() + * @dev: device to allocate extcon device + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + */ +void devm_extcon_unregister_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb) +{ + WARN_ON(devres_release(dev, devm_extcon_dev_notifier_all_unreg, + devm_extcon_dev_match, edev)); +} +EXPORT_SYMBOL(devm_extcon_unregister_notifier_all); diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 09ac5e70c2f3..e7750545469f 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -448,8 +448,19 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) spin_lock_irqsave(&edev->lock, flags); state = !!(edev->state & BIT(index)); + + /* + * Call functions in a raw notifier chain for the specific one + * external connector. + */ raw_notifier_call_chain(&edev->nh[index], state, edev); + /* + * Call functions in a raw notifier chain for the all supported + * external connectors. + */ + raw_notifier_call_chain(&edev->nh_all, state, edev); + /* This could be in interrupt handler */ prop_buf = (char *)get_zeroed_page(GFP_ATOMIC); if (!prop_buf) { @@ -954,6 +965,59 @@ int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id, } EXPORT_SYMBOL_GPL(extcon_unregister_notifier); +/** + * extcon_register_notifier_all() - Register a notifier block for all connectors + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + * + * This fucntion registers a notifier block in order to receive the state + * change of all supported external connectors from extcon device. + * And The second parameter given to the callback of nb (val) is + * the current state and third parameter is the edev pointer. + * + * Returns 0 if success or error number if fail + */ +int extcon_register_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb) +{ + unsigned long flags; + int ret; + + if (!edev || !nb) + return -EINVAL; + + spin_lock_irqsave(&edev->lock, flags); + ret = raw_notifier_chain_register(&edev->nh_all, nb); + spin_unlock_irqrestore(&edev->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(extcon_register_notifier_all); + +/** + * extcon_unregister_notifier_all() - Unregister a notifier block from extcon. + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + * + * Returns 0 if success or error number if fail + */ +int extcon_unregister_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb) +{ + unsigned long flags; + int ret; + + if (!edev || !nb) + return -EINVAL; + + spin_lock_irqsave(&edev->lock, flags); + ret = raw_notifier_chain_unregister(&edev->nh_all, nb); + spin_unlock_irqrestore(&edev->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(extcon_unregister_notifier_all); + static struct attribute *extcon_attrs[] = { &dev_attr_state.attr, &dev_attr_name.attr, @@ -1212,6 +1276,8 @@ int extcon_dev_register(struct extcon_dev *edev) for (index = 0; index < edev->max_supported; index++) RAW_INIT_NOTIFIER_HEAD(&edev->nh[index]); + RAW_INIT_NOTIFIER_HEAD(&edev->nh_all); + dev_set_drvdata(&edev->dev, edev); edev->state = 0; diff --git a/drivers/extcon/extcon.h b/drivers/extcon/extcon.h index 993ddccafe11..dddddcfa0587 100644 --- a/drivers/extcon/extcon.h +++ b/drivers/extcon/extcon.h @@ -21,6 +21,8 @@ * @dev: Device of this extcon. * @state: Attach/detach state of this extcon. Do not provide at * register-time. + * @nh_all: Notifier for the state change events for all supported + * external connectors from this extcon. * @nh: Notifier for the state change events from this extcon * @entry: To support list of extcon devices so that users can * search for extcon devices based on the extcon name. @@ -43,6 +45,7 @@ struct extcon_dev { /* Internal data. Please do not set. */ struct device dev; + struct raw_notifier_head nh_all; struct raw_notifier_head *nh; struct list_head entry; int max_supported; diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 7ef819680acd..26b05106f0d3 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -980,7 +980,7 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; /* Pad to 32-bits - FIXME: Revisit*/ if ((skb->len & 3) && skb_pad(skb, 4 - (skb->len & 3))) - goto drop; + goto inc_dropped; /* * Modem sends Phonet messages over SSI with its own endianess... @@ -1032,8 +1032,9 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) drop2: hsi_free_msg(msg); drop: - dev->stats.tx_dropped++; dev_kfree_skb(skb); +inc_dropped: + dev->stats.tx_dropped++; return 0; } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 0649d53f3d16..22d5eafd6815 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -341,6 +341,15 @@ config SENSORS_ASB100 This driver can also be built as a module. If so, the module will be called asb100. +config SENSORS_ASPEED + tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver" + help + This driver provides support for ASPEED AST2400/AST2500 PWM + and Fan Tacho controllers. + + This driver can also be built as a module. If so, the module + will be called aspeed_pwm_tacho. + config SENSORS_ATXP1 tristate "Attansic ATXP1 VID controller" depends on I2C @@ -1643,16 +1652,6 @@ config SENSORS_TMP421 This driver can also be built as a module. If so, the module will be called tmp421. -config SENSORS_TWL4030_MADC - tristate "Texas Instruments TWL4030 MADC Hwmon" - depends on TWL4030_MADC - help - If you say yes here you get hwmon support for triton - TWL4030-MADC. - - This driver can also be built as a module. If so it will be called - twl4030-madc-hwmon. - config SENSORS_VEXPRESS tristate "Versatile Express" depends on VEXPRESS_CONFIG diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 5509edf6186a..d4641a9f16c1 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_SENSORS_ADT7475) += adt7475.o obj-$(CONFIG_SENSORS_APPLESMC) += applesmc.o obj-$(CONFIG_SENSORS_ARM_SCPI) += scpi-hwmon.o obj-$(CONFIG_SENSORS_ASC7621) += asc7621.o +obj-$(CONFIG_SENSORS_ASPEED) += aspeed-pwm-tacho.o obj-$(CONFIG_SENSORS_ATXP1) += atxp1.o obj-$(CONFIG_SENSORS_CORETEMP) += coretemp.o obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o @@ -157,7 +158,6 @@ obj-$(CONFIG_SENSORS_TMP103) += tmp103.o obj-$(CONFIG_SENSORS_TMP108) += tmp108.o obj-$(CONFIG_SENSORS_TMP401) += tmp401.o obj-$(CONFIG_SENSORS_TMP421) += tmp421.o -obj-$(CONFIG_SENSORS_TWL4030_MADC)+= twl4030-madc-hwmon.o obj-$(CONFIG_SENSORS_VEXPRESS) += vexpress-hwmon.o obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o obj-$(CONFIG_SENSORS_VIA686A) += via686a.o diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c index 763490acc0df..cec227f13874 100644 --- a/drivers/hwmon/ad7414.c +++ b/drivers/hwmon/ad7414.c @@ -217,9 +217,16 @@ static const struct i2c_device_id ad7414_id[] = { }; MODULE_DEVICE_TABLE(i2c, ad7414_id); +static const struct of_device_id ad7414_of_match[] = { + { .compatible = "ad,ad7414" }, + { }, +}; +MODULE_DEVICE_TABLE(of, ad7414_of_match); + static struct i2c_driver ad7414_driver = { .driver = { .name = "ad7414", + .of_match_table = of_match_ptr(ad7414_of_match), }, .probe = ad7414_probe, .id_table = ad7414_id, diff --git a/drivers/hwmon/adc128d818.c b/drivers/hwmon/adc128d818.c index bbe3a5c5b3f5..a557b46dbe8e 100644 --- a/drivers/hwmon/adc128d818.c +++ b/drivers/hwmon/adc128d818.c @@ -546,10 +546,17 @@ static const struct i2c_device_id adc128_id[] = { }; MODULE_DEVICE_TABLE(i2c, adc128_id); +static const struct of_device_id adc128_of_match[] = { + { .compatible = "ti,adc128d818" }, + { }, +}; +MODULE_DEVICE_TABLE(of, adc128_of_match); + static struct i2c_driver adc128_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "adc128d818", + .of_match_table = of_match_ptr(adc128_of_match), }, .probe = adc128_probe, .remove = adc128_remove, diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index 2b3105c8aed3..5140c27d16dd 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -31,6 +31,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/i2c/ads1015.h> @@ -268,7 +269,12 @@ static int ads1015_probe(struct i2c_client *client, GFP_KERNEL); if (!data) return -ENOMEM; - data->id = id->driver_data; + + if (client->dev.of_node) + data->id = (enum ads1015_chips) + of_device_get_match_data(&client->dev); + else + data->id = id->driver_data; i2c_set_clientdata(client, data); mutex_init(&data->update_lock); @@ -303,9 +309,23 @@ static const struct i2c_device_id ads1015_id[] = { }; MODULE_DEVICE_TABLE(i2c, ads1015_id); +static const struct of_device_id ads1015_of_match[] = { + { + .compatible = "ti,ads1015", + .data = (void *)ads1015 + }, + { + .compatible = "ti,ads1115", + .data = (void *)ads1115 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ads1015_of_match); + static struct i2c_driver ads1015_driver = { .driver = { .name = "ads1015", + .of_match_table = of_match_ptr(ads1015_of_match), }, .probe = ads1015_probe, .remove = ads1015_remove, diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c index ee396ff167d9..898607bf682b 100644 --- a/drivers/hwmon/ads7828.c +++ b/drivers/hwmon/ads7828.c @@ -31,9 +31,11 @@ #include <linux/i2c.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/platform_data/ads7828.h> #include <linux/regmap.h> #include <linux/slab.h> +#include <linux/regulator/consumer.h> /* The ADS7828 registers */ #define ADS7828_CMD_SD_SE 0x80 /* Single ended inputs */ @@ -118,9 +120,12 @@ static int ads7828_probe(struct i2c_client *client, struct ads7828_data *data; struct device *hwmon_dev; unsigned int vref_mv = ADS7828_INT_VREF_MV; + unsigned int vref_uv; bool diff_input = false; bool ext_vref = false; unsigned int regval; + enum ads7828_chips chip; + struct regulator *reg; data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL); if (!data) @@ -131,14 +136,32 @@ static int ads7828_probe(struct i2c_client *client, ext_vref = pdata->ext_vref; if (ext_vref && pdata->vref_mv) vref_mv = pdata->vref_mv; + } else if (dev->of_node) { + diff_input = of_property_read_bool(dev->of_node, + "ti,differential-input"); + reg = devm_regulator_get_optional(dev, "vref"); + if (!IS_ERR(reg)) { + vref_uv = regulator_get_voltage(reg); + vref_mv = DIV_ROUND_CLOSEST(vref_uv, 1000); + if (vref_mv < ADS7828_EXT_VREF_MV_MIN || + vref_mv > ADS7828_EXT_VREF_MV_MAX) + return -EINVAL; + ext_vref = true; + } } + if (client->dev.of_node) + chip = (enum ads7828_chips) + of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + /* Bound Vref with min/max values */ vref_mv = clamp_val(vref_mv, ADS7828_EXT_VREF_MV_MIN, ADS7828_EXT_VREF_MV_MAX); /* ADS7828 uses 12-bit samples, while ADS7830 is 8-bit */ - if (id->driver_data == ads7828) { + if (chip == ads7828) { data->lsb_resol = DIV_ROUND_CLOSEST(vref_mv * 1000, 4096); data->regmap = devm_regmap_init_i2c(client, &ads2828_regmap_config); @@ -177,9 +200,23 @@ static const struct i2c_device_id ads7828_device_ids[] = { }; MODULE_DEVICE_TABLE(i2c, ads7828_device_ids); +static const struct of_device_id ads7828_of_match[] = { + { + .compatible = "ti,ads7828", + .data = (void *)ads7828 + }, + { + .compatible = "ti,ads7830", + .data = (void *)ads7830 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ads7828_of_match); + static struct i2c_driver ads7828_driver = { .driver = { .name = "ads7828", + .of_match_table = of_match_ptr(ads7828_of_match), }, .id_table = ads7828_device_ids, diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index c646670b9ea9..c803e3c5fcd4 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -13,6 +13,7 @@ */ #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/i2c.h> @@ -58,6 +59,8 @@ #define REG_VENDID 0x3E #define REG_DEVID2 0x3F +#define REG_CONFIG1 0x40 + #define REG_STATUS1 0x41 #define REG_STATUS2 0x42 @@ -161,6 +164,27 @@ static const struct i2c_device_id adt7475_id[] = { }; MODULE_DEVICE_TABLE(i2c, adt7475_id); +static const struct of_device_id adt7475_of_match[] = { + { + .compatible = "adi,adt7473", + .data = (void *)adt7473 + }, + { + .compatible = "adi,adt7475", + .data = (void *)adt7475 + }, + { + .compatible = "adi,adt7476", + .data = (void *)adt7476 + }, + { + .compatible = "adi,adt7490", + .data = (void *)adt7490 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, adt7475_of_match); + struct adt7475_data { struct device *hwmon_dev; struct mutex lock; @@ -1250,6 +1274,7 @@ static void adt7475_remove_files(struct i2c_client *client, static int adt7475_probe(struct i2c_client *client, const struct i2c_device_id *id) { + enum chips chip; static const char * const names[] = { [adt7473] = "ADT7473", [adt7475] = "ADT7475", @@ -1268,8 +1293,13 @@ static int adt7475_probe(struct i2c_client *client, mutex_init(&data->lock); i2c_set_clientdata(client, data); + if (client->dev.of_node) + chip = (enum chips)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + /* Initialize device-specific values */ - switch (id->driver_data) { + switch (chip) { case adt7476: data->has_voltage = 0x0e; /* in1 to in3 */ revision = adt7475_read(REG_DEVID2) & 0x07; @@ -1343,6 +1373,17 @@ static int adt7475_probe(struct i2c_client *client, for (i = 0; i < ADT7475_PWM_COUNT; i++) adt7475_read_pwm(client, i); + /* Start monitoring */ + switch (chip) { + case adt7475: + case adt7476: + i2c_smbus_write_byte_data(client, REG_CONFIG1, + adt7475_read(REG_CONFIG1) | 0x01); + break; + default: + break; + } + ret = sysfs_create_group(&client->dev.kobj, &adt7475_attr_group); if (ret) return ret; @@ -1428,6 +1469,7 @@ static struct i2c_driver adt7475_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "adt7475", + .of_match_table = of_match_ptr(adt7475_of_match), }, .probe = adt7475_probe, .remove = adt7475_remove, diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c new file mode 100644 index 000000000000..48403a2115be --- /dev/null +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -0,0 +1,835 @@ +/* + * Copyright (c) 2016 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/gpio/consumer.h> +#include <linux/delay.h> +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/sysfs.h> +#include <linux/regmap.h> + +/* ASPEED PWM & FAN Tach Register Definition */ +#define ASPEED_PTCR_CTRL 0x00 +#define ASPEED_PTCR_CLK_CTRL 0x04 +#define ASPEED_PTCR_DUTY0_CTRL 0x08 +#define ASPEED_PTCR_DUTY1_CTRL 0x0c +#define ASPEED_PTCR_TYPEM_CTRL 0x10 +#define ASPEED_PTCR_TYPEM_CTRL1 0x14 +#define ASPEED_PTCR_TYPEN_CTRL 0x18 +#define ASPEED_PTCR_TYPEN_CTRL1 0x1c +#define ASPEED_PTCR_TACH_SOURCE 0x20 +#define ASPEED_PTCR_TRIGGER 0x28 +#define ASPEED_PTCR_RESULT 0x2c +#define ASPEED_PTCR_INTR_CTRL 0x30 +#define ASPEED_PTCR_INTR_STS 0x34 +#define ASPEED_PTCR_TYPEM_LIMIT 0x38 +#define ASPEED_PTCR_TYPEN_LIMIT 0x3C +#define ASPEED_PTCR_CTRL_EXT 0x40 +#define ASPEED_PTCR_CLK_CTRL_EXT 0x44 +#define ASPEED_PTCR_DUTY2_CTRL 0x48 +#define ASPEED_PTCR_DUTY3_CTRL 0x4c +#define ASPEED_PTCR_TYPEO_CTRL 0x50 +#define ASPEED_PTCR_TYPEO_CTRL1 0x54 +#define ASPEED_PTCR_TACH_SOURCE_EXT 0x60 +#define ASPEED_PTCR_TYPEO_LIMIT 0x78 + +/* ASPEED_PTCR_CTRL : 0x00 - General Control Register */ +#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1 15 +#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2 6 +#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK (BIT(7) | BIT(15)) + +#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1 14 +#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2 5 +#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK (BIT(6) | BIT(14)) + +#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1 13 +#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2 4 +#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK (BIT(5) | BIT(13)) + +#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1 12 +#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2 3 +#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK (BIT(4) | BIT(12)) + +#define ASPEED_PTCR_CTRL_FAN_NUM_EN(x) BIT(16 + (x)) + +#define ASPEED_PTCR_CTRL_PWMD_EN BIT(11) +#define ASPEED_PTCR_CTRL_PWMC_EN BIT(10) +#define ASPEED_PTCR_CTRL_PWMB_EN BIT(9) +#define ASPEED_PTCR_CTRL_PWMA_EN BIT(8) + +#define ASPEED_PTCR_CTRL_CLK_SRC BIT(1) +#define ASPEED_PTCR_CTRL_CLK_EN BIT(0) + +/* ASPEED_PTCR_CLK_CTRL : 0x04 - Clock Control Register */ +/* TYPE N */ +#define ASPEED_PTCR_CLK_CTRL_TYPEN_MASK GENMASK(31, 16) +#define ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT 24 +#define ASPEED_PTCR_CLK_CTRL_TYPEN_H 20 +#define ASPEED_PTCR_CLK_CTRL_TYPEN_L 16 +/* TYPE M */ +#define ASPEED_PTCR_CLK_CTRL_TYPEM_MASK GENMASK(15, 0) +#define ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT 8 +#define ASPEED_PTCR_CLK_CTRL_TYPEM_H 4 +#define ASPEED_PTCR_CLK_CTRL_TYPEM_L 0 + +/* + * ASPEED_PTCR_DUTY_CTRL/1/2/3 : 0x08/0x0C/0x48/0x4C - PWM-FAN duty control + * 0/1/2/3 register + */ +#define DUTY_CTRL_PWM2_FALL_POINT 24 +#define DUTY_CTRL_PWM2_RISE_POINT 16 +#define DUTY_CTRL_PWM2_RISE_FALL_MASK GENMASK(31, 16) +#define DUTY_CTRL_PWM1_FALL_POINT 8 +#define DUTY_CTRL_PWM1_RISE_POINT 0 +#define DUTY_CTRL_PWM1_RISE_FALL_MASK GENMASK(15, 0) + +/* ASPEED_PTCR_TYPEM_CTRL : 0x10/0x18/0x50 - Type M/N/O Ctrl 0 Register */ +#define TYPE_CTRL_FAN_MASK (GENMASK(5, 1) | GENMASK(31, 16)) +#define TYPE_CTRL_FAN1_MASK GENMASK(31, 0) +#define TYPE_CTRL_FAN_PERIOD 16 +#define TYPE_CTRL_FAN_MODE 4 +#define TYPE_CTRL_FAN_DIVISION 1 +#define TYPE_CTRL_FAN_TYPE_EN 1 + +/* ASPEED_PTCR_TACH_SOURCE : 0x20/0x60 - Tach Source Register */ +/* bit [0,1] at 0x20, bit [2] at 0x60 */ +#define TACH_PWM_SOURCE_BIT01(x) ((x) * 2) +#define TACH_PWM_SOURCE_BIT2(x) ((x) * 2) +#define TACH_PWM_SOURCE_MASK_BIT01(x) (0x3 << ((x) * 2)) +#define TACH_PWM_SOURCE_MASK_BIT2(x) BIT((x) * 2) + +/* ASPEED_PTCR_RESULT : 0x2c - Result Register */ +#define RESULT_STATUS_MASK BIT(31) +#define RESULT_VALUE_MASK 0xfffff + +/* ASPEED_PTCR_CTRL_EXT : 0x40 - General Control Extension #1 Register */ +#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1 15 +#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2 6 +#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK (BIT(7) | BIT(15)) + +#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1 14 +#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2 5 +#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK (BIT(6) | BIT(14)) + +#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1 13 +#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2 4 +#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK (BIT(5) | BIT(13)) + +#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1 12 +#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2 3 +#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK (BIT(4) | BIT(12)) + +#define ASPEED_PTCR_CTRL_PWMH_EN BIT(11) +#define ASPEED_PTCR_CTRL_PWMG_EN BIT(10) +#define ASPEED_PTCR_CTRL_PWMF_EN BIT(9) +#define ASPEED_PTCR_CTRL_PWME_EN BIT(8) + +/* ASPEED_PTCR_CLK_EXT_CTRL : 0x44 - Clock Control Extension #1 Register */ +/* TYPE O */ +#define ASPEED_PTCR_CLK_CTRL_TYPEO_MASK GENMASK(15, 0) +#define ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT 8 +#define ASPEED_PTCR_CLK_CTRL_TYPEO_H 4 +#define ASPEED_PTCR_CLK_CTRL_TYPEO_L 0 + +#define PWM_MAX 255 + +#define M_PWM_DIV_H 0x00 +#define M_PWM_DIV_L 0x05 +#define M_PWM_PERIOD 0x5F +#define M_TACH_CLK_DIV 0x00 +#define M_TACH_MODE 0x00 +#define M_TACH_UNIT 0x1000 +#define INIT_FAN_CTRL 0xFF + +struct aspeed_pwm_tacho_data { + struct regmap *regmap; + unsigned long clk_freq; + bool pwm_present[8]; + bool fan_tach_present[16]; + u8 type_pwm_clock_unit[3]; + u8 type_pwm_clock_division_h[3]; + u8 type_pwm_clock_division_l[3]; + u8 type_fan_tach_clock_division[3]; + u16 type_fan_tach_unit[3]; + u8 pwm_port_type[8]; + u8 pwm_port_fan_ctrl[8]; + u8 fan_tach_ch_source[16]; + const struct attribute_group *groups[3]; +}; + +enum type { TYPEM, TYPEN, TYPEO }; + +struct type_params { + u32 l_value; + u32 h_value; + u32 unit_value; + u32 clk_ctrl_mask; + u32 clk_ctrl_reg; + u32 ctrl_reg; + u32 ctrl_reg1; +}; + +static const struct type_params type_params[] = { + [TYPEM] = { + .l_value = ASPEED_PTCR_CLK_CTRL_TYPEM_L, + .h_value = ASPEED_PTCR_CLK_CTRL_TYPEM_H, + .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT, + .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEM_MASK, + .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL, + .ctrl_reg = ASPEED_PTCR_TYPEM_CTRL, + .ctrl_reg1 = ASPEED_PTCR_TYPEM_CTRL1, + }, + [TYPEN] = { + .l_value = ASPEED_PTCR_CLK_CTRL_TYPEN_L, + .h_value = ASPEED_PTCR_CLK_CTRL_TYPEN_H, + .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT, + .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEN_MASK, + .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL, + .ctrl_reg = ASPEED_PTCR_TYPEN_CTRL, + .ctrl_reg1 = ASPEED_PTCR_TYPEN_CTRL1, + }, + [TYPEO] = { + .l_value = ASPEED_PTCR_CLK_CTRL_TYPEO_L, + .h_value = ASPEED_PTCR_CLK_CTRL_TYPEO_H, + .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT, + .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEO_MASK, + .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL_EXT, + .ctrl_reg = ASPEED_PTCR_TYPEO_CTRL, + .ctrl_reg1 = ASPEED_PTCR_TYPEO_CTRL1, + } +}; + +enum pwm_port { PWMA, PWMB, PWMC, PWMD, PWME, PWMF, PWMG, PWMH }; + +struct pwm_port_params { + u32 pwm_en; + u32 ctrl_reg; + u32 type_part1; + u32 type_part2; + u32 type_mask; + u32 duty_ctrl_rise_point; + u32 duty_ctrl_fall_point; + u32 duty_ctrl_reg; + u32 duty_ctrl_rise_fall_mask; +}; + +static const struct pwm_port_params pwm_port_params[] = { + [PWMA] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMA_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMB] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMB_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + }, + [PWMC] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMC_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMD] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMD_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + }, + [PWME] = { + .pwm_en = ASPEED_PTCR_CTRL_PWME_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMF] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMF_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + }, + [PWMG] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMG_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMH] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMH_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + } +}; + +static int regmap_aspeed_pwm_tacho_reg_write(void *context, unsigned int reg, + unsigned int val) +{ + void __iomem *regs = (void __iomem *)context; + + writel(val, regs + reg); + return 0; +} + +static int regmap_aspeed_pwm_tacho_reg_read(void *context, unsigned int reg, + unsigned int *val) +{ + void __iomem *regs = (void __iomem *)context; + + *val = readl(regs + reg); + return 0; +} + +static const struct regmap_config aspeed_pwm_tacho_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = ASPEED_PTCR_TYPEO_LIMIT, + .reg_write = regmap_aspeed_pwm_tacho_reg_write, + .reg_read = regmap_aspeed_pwm_tacho_reg_read, + .fast_io = true, +}; + +static void aspeed_set_clock_enable(struct regmap *regmap, bool val) +{ + regmap_update_bits(regmap, ASPEED_PTCR_CTRL, + ASPEED_PTCR_CTRL_CLK_EN, + val ? ASPEED_PTCR_CTRL_CLK_EN : 0); +} + +static void aspeed_set_clock_source(struct regmap *regmap, int val) +{ + regmap_update_bits(regmap, ASPEED_PTCR_CTRL, + ASPEED_PTCR_CTRL_CLK_SRC, + val ? ASPEED_PTCR_CTRL_CLK_SRC : 0); +} + +static void aspeed_set_pwm_clock_values(struct regmap *regmap, u8 type, + u8 div_high, u8 div_low, u8 unit) +{ + u32 reg_value = ((div_high << type_params[type].h_value) | + (div_low << type_params[type].l_value) | + (unit << type_params[type].unit_value)); + + regmap_update_bits(regmap, type_params[type].clk_ctrl_reg, + type_params[type].clk_ctrl_mask, reg_value); +} + +static void aspeed_set_pwm_port_enable(struct regmap *regmap, u8 pwm_port, + bool enable) +{ + regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg, + pwm_port_params[pwm_port].pwm_en, + enable ? pwm_port_params[pwm_port].pwm_en : 0); +} + +static void aspeed_set_pwm_port_type(struct regmap *regmap, + u8 pwm_port, u8 type) +{ + u32 reg_value = (type & 0x1) << pwm_port_params[pwm_port].type_part1; + + reg_value |= (type & 0x2) << pwm_port_params[pwm_port].type_part2; + + regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg, + pwm_port_params[pwm_port].type_mask, reg_value); +} + +static void aspeed_set_pwm_port_duty_rising_falling(struct regmap *regmap, + u8 pwm_port, u8 rising, + u8 falling) +{ + u32 reg_value = (rising << + pwm_port_params[pwm_port].duty_ctrl_rise_point); + reg_value |= (falling << + pwm_port_params[pwm_port].duty_ctrl_fall_point); + + regmap_update_bits(regmap, pwm_port_params[pwm_port].duty_ctrl_reg, + pwm_port_params[pwm_port].duty_ctrl_rise_fall_mask, + reg_value); +} + +static void aspeed_set_tacho_type_enable(struct regmap *regmap, u8 type, + bool enable) +{ + regmap_update_bits(regmap, type_params[type].ctrl_reg, + TYPE_CTRL_FAN_TYPE_EN, + enable ? TYPE_CTRL_FAN_TYPE_EN : 0); +} + +static void aspeed_set_tacho_type_values(struct regmap *regmap, u8 type, + u8 mode, u16 unit, u8 division) +{ + u32 reg_value = ((mode << TYPE_CTRL_FAN_MODE) | + (unit << TYPE_CTRL_FAN_PERIOD) | + (division << TYPE_CTRL_FAN_DIVISION)); + + regmap_update_bits(regmap, type_params[type].ctrl_reg, + TYPE_CTRL_FAN_MASK, reg_value); + regmap_update_bits(regmap, type_params[type].ctrl_reg1, + TYPE_CTRL_FAN1_MASK, unit << 16); +} + +static void aspeed_set_fan_tach_ch_enable(struct regmap *regmap, u8 fan_tach_ch, + bool enable) +{ + regmap_update_bits(regmap, ASPEED_PTCR_CTRL, + ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch), + enable ? + ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch) : 0); +} + +static void aspeed_set_fan_tach_ch_source(struct regmap *regmap, u8 fan_tach_ch, + u8 fan_tach_ch_source) +{ + u32 reg_value1 = ((fan_tach_ch_source & 0x3) << + TACH_PWM_SOURCE_BIT01(fan_tach_ch)); + u32 reg_value2 = (((fan_tach_ch_source & 0x4) >> 2) << + TACH_PWM_SOURCE_BIT2(fan_tach_ch)); + + regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE, + TACH_PWM_SOURCE_MASK_BIT01(fan_tach_ch), + reg_value1); + + regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE_EXT, + TACH_PWM_SOURCE_MASK_BIT2(fan_tach_ch), + reg_value2); +} + +static void aspeed_set_pwm_port_fan_ctrl(struct aspeed_pwm_tacho_data *priv, + u8 index, u8 fan_ctrl) +{ + u16 period, dc_time_on; + + period = priv->type_pwm_clock_unit[priv->pwm_port_type[index]]; + period += 1; + dc_time_on = (fan_ctrl * period) / PWM_MAX; + + if (dc_time_on == 0) { + aspeed_set_pwm_port_enable(priv->regmap, index, false); + } else { + if (dc_time_on == period) + dc_time_on = 0; + + aspeed_set_pwm_port_duty_rising_falling(priv->regmap, index, 0, + dc_time_on); + aspeed_set_pwm_port_enable(priv->regmap, index, true); + } +} + +static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data + *priv, u8 type) +{ + u32 clk; + u16 tacho_unit; + u8 clk_unit, div_h, div_l, tacho_div; + + clk = priv->clk_freq; + clk_unit = priv->type_pwm_clock_unit[type]; + div_h = priv->type_pwm_clock_division_h[type]; + div_h = 0x1 << div_h; + div_l = priv->type_pwm_clock_division_l[type]; + if (div_l == 0) + div_l = 1; + else + div_l = div_l * 2; + + tacho_unit = priv->type_fan_tach_unit[type]; + tacho_div = priv->type_fan_tach_clock_division[type]; + + tacho_div = 0x4 << (tacho_div * 2); + return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit); +} + +static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, + u8 fan_tach_ch) +{ + u32 raw_data, tach_div, clk_source, sec, val; + u8 fan_tach_ch_source, type; + + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); + + fan_tach_ch_source = priv->fan_tach_ch_source[fan_tach_ch]; + type = priv->pwm_port_type[fan_tach_ch_source]; + + sec = (1000 / aspeed_get_fan_tach_ch_measure_period(priv, type)); + msleep(sec); + + regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val); + raw_data = val & RESULT_VALUE_MASK; + tach_div = priv->type_fan_tach_clock_division[type]; + tach_div = 0x4 << (tach_div * 2); + clk_source = priv->clk_freq; + + if (raw_data == 0) + return 0; + + return (clk_source * 60) / (2 * raw_data * tach_div); +} + +static ssize_t set_pwm(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int index = sensor_attr->index; + int ret; + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + long fan_ctrl; + + ret = kstrtol(buf, 10, &fan_ctrl); + if (ret != 0) + return ret; + + if (fan_ctrl < 0 || fan_ctrl > PWM_MAX) + return -EINVAL; + + if (priv->pwm_port_fan_ctrl[index] == fan_ctrl) + return count; + + priv->pwm_port_fan_ctrl[index] = fan_ctrl; + aspeed_set_pwm_port_fan_ctrl(priv, index, fan_ctrl); + + return count; +} + +static ssize_t show_pwm(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int index = sensor_attr->index; + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", priv->pwm_port_fan_ctrl[index]); +} + +static ssize_t show_rpm(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int index = sensor_attr->index; + u32 rpm; + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + rpm = aspeed_get_fan_tach_ch_rpm(priv, index); + + return sprintf(buf, "%u\n", rpm); +} + +static umode_t pwm_is_visible(struct kobject *kobj, + struct attribute *a, int index) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + if (!priv->pwm_present[index]) + return 0; + return a->mode; +} + +static umode_t fan_dev_is_visible(struct kobject *kobj, + struct attribute *a, int index) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + if (!priv->fan_tach_present[index]) + return 0; + return a->mode; +} + +static SENSOR_DEVICE_ATTR(pwm0, 0644, + show_pwm, set_pwm, 0); +static SENSOR_DEVICE_ATTR(pwm1, 0644, + show_pwm, set_pwm, 1); +static SENSOR_DEVICE_ATTR(pwm2, 0644, + show_pwm, set_pwm, 2); +static SENSOR_DEVICE_ATTR(pwm3, 0644, + show_pwm, set_pwm, 3); +static SENSOR_DEVICE_ATTR(pwm4, 0644, + show_pwm, set_pwm, 4); +static SENSOR_DEVICE_ATTR(pwm5, 0644, + show_pwm, set_pwm, 5); +static SENSOR_DEVICE_ATTR(pwm6, 0644, + show_pwm, set_pwm, 6); +static SENSOR_DEVICE_ATTR(pwm7, 0644, + show_pwm, set_pwm, 7); +static struct attribute *pwm_dev_attrs[] = { + &sensor_dev_attr_pwm0.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_pwm2.dev_attr.attr, + &sensor_dev_attr_pwm3.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm5.dev_attr.attr, + &sensor_dev_attr_pwm6.dev_attr.attr, + &sensor_dev_attr_pwm7.dev_attr.attr, + NULL, +}; + +static const struct attribute_group pwm_dev_group = { + .attrs = pwm_dev_attrs, + .is_visible = pwm_is_visible, +}; + +static SENSOR_DEVICE_ATTR(fan0_input, 0444, + show_rpm, NULL, 0); +static SENSOR_DEVICE_ATTR(fan1_input, 0444, + show_rpm, NULL, 1); +static SENSOR_DEVICE_ATTR(fan2_input, 0444, + show_rpm, NULL, 2); +static SENSOR_DEVICE_ATTR(fan3_input, 0444, + show_rpm, NULL, 3); +static SENSOR_DEVICE_ATTR(fan4_input, 0444, + show_rpm, NULL, 4); +static SENSOR_DEVICE_ATTR(fan5_input, 0444, + show_rpm, NULL, 5); +static SENSOR_DEVICE_ATTR(fan6_input, 0444, + show_rpm, NULL, 6); +static SENSOR_DEVICE_ATTR(fan7_input, 0444, + show_rpm, NULL, 7); +static SENSOR_DEVICE_ATTR(fan8_input, 0444, + show_rpm, NULL, 8); +static SENSOR_DEVICE_ATTR(fan9_input, 0444, + show_rpm, NULL, 9); +static SENSOR_DEVICE_ATTR(fan10_input, 0444, + show_rpm, NULL, 10); +static SENSOR_DEVICE_ATTR(fan11_input, 0444, + show_rpm, NULL, 11); +static SENSOR_DEVICE_ATTR(fan12_input, 0444, + show_rpm, NULL, 12); +static SENSOR_DEVICE_ATTR(fan13_input, 0444, + show_rpm, NULL, 13); +static SENSOR_DEVICE_ATTR(fan14_input, 0444, + show_rpm, NULL, 14); +static SENSOR_DEVICE_ATTR(fan15_input, 0444, + show_rpm, NULL, 15); +static struct attribute *fan_dev_attrs[] = { + &sensor_dev_attr_fan0_input.dev_attr.attr, + &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan2_input.dev_attr.attr, + &sensor_dev_attr_fan3_input.dev_attr.attr, + &sensor_dev_attr_fan4_input.dev_attr.attr, + &sensor_dev_attr_fan5_input.dev_attr.attr, + &sensor_dev_attr_fan6_input.dev_attr.attr, + &sensor_dev_attr_fan7_input.dev_attr.attr, + &sensor_dev_attr_fan8_input.dev_attr.attr, + &sensor_dev_attr_fan9_input.dev_attr.attr, + &sensor_dev_attr_fan10_input.dev_attr.attr, + &sensor_dev_attr_fan11_input.dev_attr.attr, + &sensor_dev_attr_fan12_input.dev_attr.attr, + &sensor_dev_attr_fan13_input.dev_attr.attr, + &sensor_dev_attr_fan14_input.dev_attr.attr, + &sensor_dev_attr_fan15_input.dev_attr.attr, + NULL +}; + +static const struct attribute_group fan_dev_group = { + .attrs = fan_dev_attrs, + .is_visible = fan_dev_is_visible, +}; + +/* + * The clock type is type M : + * The PWM frequency = 24MHz / (type M clock division L bit * + * type M clock division H bit * (type M PWM period bit + 1)) + */ +static void aspeed_create_type(struct aspeed_pwm_tacho_data *priv) +{ + priv->type_pwm_clock_division_h[TYPEM] = M_PWM_DIV_H; + priv->type_pwm_clock_division_l[TYPEM] = M_PWM_DIV_L; + priv->type_pwm_clock_unit[TYPEM] = M_PWM_PERIOD; + aspeed_set_pwm_clock_values(priv->regmap, TYPEM, M_PWM_DIV_H, + M_PWM_DIV_L, M_PWM_PERIOD); + aspeed_set_tacho_type_enable(priv->regmap, TYPEM, true); + priv->type_fan_tach_clock_division[TYPEM] = M_TACH_CLK_DIV; + priv->type_fan_tach_unit[TYPEM] = M_TACH_UNIT; + aspeed_set_tacho_type_values(priv->regmap, TYPEM, M_TACH_MODE, + M_TACH_UNIT, M_TACH_CLK_DIV); +} + +static void aspeed_create_pwm_port(struct aspeed_pwm_tacho_data *priv, + u8 pwm_port) +{ + aspeed_set_pwm_port_enable(priv->regmap, pwm_port, true); + priv->pwm_present[pwm_port] = true; + + priv->pwm_port_type[pwm_port] = TYPEM; + aspeed_set_pwm_port_type(priv->regmap, pwm_port, TYPEM); + + priv->pwm_port_fan_ctrl[pwm_port] = INIT_FAN_CTRL; + aspeed_set_pwm_port_fan_ctrl(priv, pwm_port, INIT_FAN_CTRL); +} + +static void aspeed_create_fan_tach_channel(struct aspeed_pwm_tacho_data *priv, + u8 *fan_tach_ch, + int count, + u8 pwm_source) +{ + u8 val, index; + + for (val = 0; val < count; val++) { + index = fan_tach_ch[val]; + aspeed_set_fan_tach_ch_enable(priv->regmap, index, true); + priv->fan_tach_present[index] = true; + priv->fan_tach_ch_source[index] = pwm_source; + aspeed_set_fan_tach_ch_source(priv->regmap, index, pwm_source); + } +} + +static int aspeed_create_fan(struct device *dev, + struct device_node *child, + struct aspeed_pwm_tacho_data *priv) +{ + u8 *fan_tach_ch; + u32 pwm_port; + int ret, count; + + ret = of_property_read_u32(child, "reg", &pwm_port); + if (ret) + return ret; + aspeed_create_pwm_port(priv, (u8)pwm_port); + + count = of_property_count_u8_elems(child, "aspeed,fan-tach-ch"); + if (count < 1) + return -EINVAL; + fan_tach_ch = devm_kzalloc(dev, sizeof(*fan_tach_ch) * count, + GFP_KERNEL); + if (!fan_tach_ch) + return -ENOMEM; + ret = of_property_read_u8_array(child, "aspeed,fan-tach-ch", + fan_tach_ch, count); + if (ret) + return ret; + aspeed_create_fan_tach_channel(priv, fan_tach_ch, count, pwm_port); + + return 0; +} + +static int aspeed_pwm_tacho_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np, *child; + struct aspeed_pwm_tacho_data *priv; + void __iomem *regs; + struct resource *res; + struct device *hwmon; + struct clk *clk; + int ret; + + np = dev->of_node; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + regs = devm_ioremap_resource(dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, + &aspeed_pwm_tacho_regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE, 0); + regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE_EXT, 0); + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) + return -ENODEV; + priv->clk_freq = clk_get_rate(clk); + aspeed_set_clock_enable(priv->regmap, true); + aspeed_set_clock_source(priv->regmap, 0); + + aspeed_create_type(priv); + + for_each_child_of_node(np, child) { + ret = aspeed_create_fan(dev, child, priv); + of_node_put(child); + if (ret) + return ret; + } + of_node_put(np); + + priv->groups[0] = &pwm_dev_group; + priv->groups[1] = &fan_dev_group; + priv->groups[2] = NULL; + hwmon = devm_hwmon_device_register_with_groups(dev, + "aspeed_pwm_tacho", + priv, priv->groups); + return PTR_ERR_OR_ZERO(hwmon); +} + +static const struct of_device_id of_pwm_tacho_match_table[] = { + { .compatible = "aspeed,ast2400-pwm-tacho", }, + { .compatible = "aspeed,ast2500-pwm-tacho", }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_pwm_tacho_match_table); + +static struct platform_driver aspeed_pwm_tacho_driver = { + .probe = aspeed_pwm_tacho_probe, + .driver = { + .name = "aspeed_pwm_tacho", + .of_match_table = of_pwm_tacho_match_table, + }, +}; + +module_platform_driver(aspeed_pwm_tacho_driver); + +MODULE_AUTHOR("Jaghathiswari Rankappagounder Natarajan <jaghu@google.com>"); +MODULE_DESCRIPTION("ASPEED PWM and Fan Tacho device driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 34704b0451b4..3189246302a6 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -995,6 +995,13 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = { }, .driver_data = (void *)&i8k_config_data[DELL_XPS], }, + { + .ident = "Dell XPS 15 9560", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 15 9560"), + }, + }, { } }; diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 28375d59cc36..dd6e17c1076b 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -186,7 +186,7 @@ static ssize_t hwmon_attr_show_string(struct device *dev, char *buf) { struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr); - char *s; + const char *s; int ret; ret = hattr->ops->read_string(dev, hattr->type, hattr->attr, diff --git a/drivers/hwmon/ina209.c b/drivers/hwmon/ina209.c index 5378fdefc1f7..aa0768ce8aea 100644 --- a/drivers/hwmon/ina209.c +++ b/drivers/hwmon/ina209.c @@ -117,7 +117,7 @@ static long ina209_from_reg(const u8 reg, const u16 val) case INA209_SHUNT_VOLTAGE_POS_WARN: case INA209_SHUNT_VOLTAGE_NEG_WARN: /* LSB=10 uV. Convert to mV. */ - return DIV_ROUND_CLOSEST(val, 100); + return DIV_ROUND_CLOSEST((s16)val, 100); case INA209_BUS_VOLTAGE: case INA209_BUS_VOLTAGE_MAX_PEAK: @@ -146,7 +146,7 @@ static long ina209_from_reg(const u8 reg, const u16 val) case INA209_CURRENT: /* LSB=1 mA (selected). Is in mA */ - return val; + return (s16)val; } /* programmer goofed */ @@ -608,11 +608,18 @@ static const struct i2c_device_id ina209_id[] = { }; MODULE_DEVICE_TABLE(i2c, ina209_id); +static const struct of_device_id ina209_of_match[] = { + { .compatible = "ti,ina209" }, + { }, +}; +MODULE_DEVICE_TABLE(of, ina209_of_match); + /* This is the driver that will be inserted */ static struct i2c_driver ina209_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "ina209", + .of_match_table = of_match_ptr(ina209_of_match), }, .probe = ina209_probe, .remove = ina209_remove, diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index b24f1d3045f0..62e38fa8cda2 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -34,6 +34,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/jiffies.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/delay.h> #include <linux/util_macros.h> @@ -424,13 +425,19 @@ static int ina2xx_probe(struct i2c_client *client, struct device *hwmon_dev; u32 val; int ret, group = 0; + enum ina2xx_ids chip; + + if (client->dev.of_node) + chip = (enum ina2xx_ids)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; /* set the device type */ - data->config = &ina2xx_config[id->driver_data]; + data->config = &ina2xx_config[chip]; if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) { struct ina2xx_platform_data *pdata = dev_get_platdata(dev); @@ -487,9 +494,35 @@ static const struct i2c_device_id ina2xx_id[] = { }; MODULE_DEVICE_TABLE(i2c, ina2xx_id); +static const struct of_device_id ina2xx_of_match[] = { + { + .compatible = "ti,ina219", + .data = (void *)ina219 + }, + { + .compatible = "ti,ina220", + .data = (void *)ina219 + }, + { + .compatible = "ti,ina226", + .data = (void *)ina226 + }, + { + .compatible = "ti,ina230", + .data = (void *)ina226 + }, + { + .compatible = "ti,ina231", + .data = (void *)ina226 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ina2xx_of_match); + static struct i2c_driver ina2xx_driver = { .driver = { .name = "ina2xx", + .of_match_table = of_match_ptr(ina2xx_of_match), }, .probe = ina2xx_probe, .id_table = ina2xx_id, diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index 2e1948699114..4c1770920d29 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -46,6 +46,7 @@ #include <linux/hwmon.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/sysfs.h> #include <linux/types.h> @@ -1115,6 +1116,10 @@ static int lm63_probe(struct i2c_client *client, mutex_init(&data->update_lock); /* Set the device type */ + if (client->dev.of_node) + data->kind = (enum chips)of_device_get_match_data(&client->dev); + else + data->kind = id->driver_data; data->kind = id->driver_data; if (data->kind == lm64) data->temp2_offset = 16000; @@ -1149,10 +1154,28 @@ static const struct i2c_device_id lm63_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm63_id); +static const struct of_device_id lm63_of_match[] = { + { + .compatible = "national,lm63", + .data = (void *)lm63 + }, + { + .compatible = "national,lm64", + .data = (void *)lm64 + }, + { + .compatible = "national,lm96163", + .data = (void *)lm96163 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm63_of_match); + static struct i2c_driver lm63_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm63", + .of_match_table = of_match_ptr(lm63_of_match), }, .probe = lm63_probe, .id_table = lm63_id, diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index eff3b24d8473..005ffb5ffa92 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -26,6 +26,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/err.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/regmap.h> #include "lm75.h" @@ -273,7 +274,12 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) int status, err; u8 set_mask, clr_mask; int new; - enum lm75_type kind = id->driver_data; + enum lm75_type kind; + + if (client->dev.of_node) + kind = (enum lm75_type)of_device_get_match_data(&client->dev); + else + kind = id->driver_data; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) @@ -424,6 +430,95 @@ static const struct i2c_device_id lm75_ids[] = { }; MODULE_DEVICE_TABLE(i2c, lm75_ids); +static const struct of_device_id lm75_of_match[] = { + { + .compatible = "adi,adt75", + .data = (void *)adt75 + }, + { + .compatible = "dallas,ds1775", + .data = (void *)ds1775 + }, + { + .compatible = "dallas,ds75", + .data = (void *)ds75 + }, + { + .compatible = "dallas,ds7505", + .data = (void *)ds7505 + }, + { + .compatible = "gmt,g751", + .data = (void *)g751 + }, + { + .compatible = "national,lm75", + .data = (void *)lm75 + }, + { + .compatible = "national,lm75a", + .data = (void *)lm75a + }, + { + .compatible = "national,lm75b", + .data = (void *)lm75b + }, + { + .compatible = "maxim,max6625", + .data = (void *)max6625 + }, + { + .compatible = "maxim,max6626", + .data = (void *)max6626 + }, + { + .compatible = "maxim,mcp980x", + .data = (void *)mcp980x + }, + { + .compatible = "st,stds75", + .data = (void *)stds75 + }, + { + .compatible = "microchip,tcn75", + .data = (void *)tcn75 + }, + { + .compatible = "ti,tmp100", + .data = (void *)tmp100 + }, + { + .compatible = "ti,tmp101", + .data = (void *)tmp101 + }, + { + .compatible = "ti,tmp105", + .data = (void *)tmp105 + }, + { + .compatible = "ti,tmp112", + .data = (void *)tmp112 + }, + { + .compatible = "ti,tmp175", + .data = (void *)tmp175 + }, + { + .compatible = "ti,tmp275", + .data = (void *)tmp275 + }, + { + .compatible = "ti,tmp75", + .data = (void *)tmp75 + }, + { + .compatible = "ti,tmp75c", + .data = (void *)tmp75c + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm75_of_match); + #define LM75A_ID 0xA1 /* Return 0 if detection is successful, -ENODEV otherwise */ @@ -560,6 +655,7 @@ static struct i2c_driver lm75_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm75", + .of_match_table = of_match_ptr(lm75_of_match), .pm = LM75_DEV_PM_OPS, }, .probe = lm75_probe, diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 691469ffa24e..0a325878e8f5 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -25,6 +25,7 @@ */ #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/jiffies.h> @@ -1552,7 +1553,10 @@ static int lm85_probe(struct i2c_client *client, const struct i2c_device_id *id) return -ENOMEM; data->client = client; - data->type = id->driver_data; + if (client->dev.of_node) + data->type = (enum chips)of_device_get_match_data(&client->dev); + else + data->type = id->driver_data; mutex_init(&data->update_lock); /* Fill in the chip specific driver values */ @@ -1623,10 +1627,60 @@ static const struct i2c_device_id lm85_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm85_id); +static const struct of_device_id lm85_of_match[] = { + { + .compatible = "adi,adm1027", + .data = (void *)adm1027 + }, + { + .compatible = "adi,adt7463", + .data = (void *)adt7463 + }, + { + .compatible = "adi,adt7468", + .data = (void *)adt7468 + }, + { + .compatible = "national,lm85", + .data = (void *)lm85 + }, + { + .compatible = "national,lm85b", + .data = (void *)lm85 + }, + { + .compatible = "national,lm85c", + .data = (void *)lm85 + }, + { + .compatible = "smsc,emc6d100", + .data = (void *)emc6d100 + }, + { + .compatible = "smsc,emc6d101", + .data = (void *)emc6d100 + }, + { + .compatible = "smsc,emc6d102", + .data = (void *)emc6d102 + }, + { + .compatible = "smsc,emc6d103", + .data = (void *)emc6d103 + }, + { + .compatible = "smsc,emc6d103s", + .data = (void *)emc6d103s + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm85_of_match); + static struct i2c_driver lm85_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm85", + .of_match_table = of_match_ptr(lm85_of_match), }, .probe = lm85_probe, .id_table = lm85_id, diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index e06faf9d3f0f..b48d30760388 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -66,6 +66,7 @@ #include <linux/hwmon-vid.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/regulator/consumer.h> /* * Addresses to scan @@ -74,8 +75,6 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -enum chips { lm87, adm1024 }; - /* * The LM87 registers */ @@ -855,8 +854,26 @@ static int lm87_init_client(struct i2c_client *client) { struct lm87_data *data = i2c_get_clientdata(client); int rc; - - if (dev_get_platdata(&client->dev)) { + struct device_node *of_node = client->dev.of_node; + u8 val = 0; + struct regulator *vcc = NULL; + + if (of_node) { + if (of_property_read_bool(of_node, "has-temp3")) + val |= CHAN_TEMP3; + if (of_property_read_bool(of_node, "has-in6")) + val |= CHAN_NO_FAN(0); + if (of_property_read_bool(of_node, "has-in7")) + val |= CHAN_NO_FAN(1); + vcc = devm_regulator_get_optional(&client->dev, "vcc"); + if (!IS_ERR(vcc)) { + if (regulator_get_voltage(vcc) == 5000000) + val |= CHAN_VCC_5V; + } + data->channel = val; + lm87_write_value(client, + LM87_REG_CHANNEL_MODE, data->channel); + } else if (dev_get_platdata(&client->dev)) { data->channel = *(u8 *)dev_get_platdata(&client->dev); lm87_write_value(client, LM87_REG_CHANNEL_MODE, data->channel); @@ -962,16 +979,24 @@ static int lm87_probe(struct i2c_client *client, const struct i2c_device_id *id) */ static const struct i2c_device_id lm87_id[] = { - { "lm87", lm87 }, - { "adm1024", adm1024 }, + { "lm87", 0 }, + { "adm1024", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, lm87_id); +static const struct of_device_id lm87_of_match[] = { + { .compatible = "ti,lm87" }, + { .compatible = "adi,adm1024" }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm87_of_match); + static struct i2c_driver lm87_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm87", + .of_match_table = lm87_of_match, }, .probe = lm87_probe, .id_table = lm87_id, diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index aff5297bc2bc..c2f411c290bf 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -92,6 +92,7 @@ #include <linux/hwmon.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/sysfs.h> #include <linux/interrupt.h> #include <linux/regulator/consumer.h> @@ -235,6 +236,99 @@ static const struct i2c_device_id lm90_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm90_id); +static const struct of_device_id lm90_of_match[] = { + { + .compatible = "adi,adm1032", + .data = (void *)adm1032 + }, + { + .compatible = "adi,adt7461", + .data = (void *)adt7461 + }, + { + .compatible = "adi,adt7461a", + .data = (void *)adt7461 + }, + { + .compatible = "gmt,g781", + .data = (void *)g781 + }, + { + .compatible = "national,lm90", + .data = (void *)lm90 + }, + { + .compatible = "national,lm86", + .data = (void *)lm86 + }, + { + .compatible = "national,lm89", + .data = (void *)lm86 + }, + { + .compatible = "national,lm99", + .data = (void *)lm99 + }, + { + .compatible = "dallas,max6646", + .data = (void *)max6646 + }, + { + .compatible = "dallas,max6647", + .data = (void *)max6646 + }, + { + .compatible = "dallas,max6649", + .data = (void *)max6646 + }, + { + .compatible = "dallas,max6657", + .data = (void *)max6657 + }, + { + .compatible = "dallas,max6658", + .data = (void *)max6657 + }, + { + .compatible = "dallas,max6659", + .data = (void *)max6659 + }, + { + .compatible = "dallas,max6680", + .data = (void *)max6680 + }, + { + .compatible = "dallas,max6681", + .data = (void *)max6680 + }, + { + .compatible = "dallas,max6695", + .data = (void *)max6696 + }, + { + .compatible = "dallas,max6696", + .data = (void *)max6696 + }, + { + .compatible = "onnn,nct1008", + .data = (void *)adt7461 + }, + { + .compatible = "winbond,w83l771", + .data = (void *)w83l771 + }, + { + .compatible = "nxp,sa56004", + .data = (void *)sa56004 + }, + { + .compatible = "ti,tmp451", + .data = (void *)tmp451 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm90_of_match); + /* * chip type specific parameters */ @@ -1677,7 +1771,10 @@ static int lm90_probe(struct i2c_client *client, mutex_init(&data->update_lock); /* Set the device type */ - data->kind = id->driver_data; + if (client->dev.of_node) + data->kind = (enum chips)of_device_get_match_data(&client->dev); + else + data->kind = id->driver_data; if (data->kind == adm1032) { if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) client->flags &= ~I2C_CLIENT_PEC; @@ -1816,6 +1913,7 @@ static struct i2c_driver lm90_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm90", + .of_match_table = of_match_ptr(lm90_of_match), }, .probe = lm90_probe, .alert = lm90_alert, diff --git a/drivers/hwmon/lm95245.c b/drivers/hwmon/lm95245.c index a3bfd88752ca..27cb06d65594 100644 --- a/drivers/hwmon/lm95245.c +++ b/drivers/hwmon/lm95245.c @@ -622,10 +622,18 @@ static const struct i2c_device_id lm95245_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm95245_id); +static const struct of_device_id lm95245_of_match[] = { + { .compatible = "national,lm95235" }, + { .compatible = "national,lm95245" }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm95245_of_match); + static struct i2c_driver lm95245_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm95245", + .of_match_table = of_match_ptr(lm95245_of_match), }, .probe = lm95245_probe, .id_table = lm95245_id, diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c index f03a71722849..221fd1492057 100644 --- a/drivers/hwmon/max6697.c +++ b/drivers/hwmon/max6697.c @@ -24,6 +24,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/platform_data/max6697.h> @@ -632,7 +633,10 @@ static int max6697_probe(struct i2c_client *client, if (!data) return -ENOMEM; - data->type = id->driver_data; + if (client->dev.of_node) + data->type = (enum chips)of_device_get_match_data(&client->dev); + else + data->type = id->driver_data; data->chip = &max6697_chip_data[data->type]; data->client = client; mutex_init(&data->update_lock); @@ -662,10 +666,56 @@ static const struct i2c_device_id max6697_id[] = { }; MODULE_DEVICE_TABLE(i2c, max6697_id); +static const struct of_device_id max6697_of_match[] = { + { + .compatible = "maxim,max6581", + .data = (void *)max6581 + }, + { + .compatible = "maxim,max6602", + .data = (void *)max6602 + }, + { + .compatible = "maxim,max6622", + .data = (void *)max6622 + }, + { + .compatible = "maxim,max6636", + .data = (void *)max6636 + }, + { + .compatible = "maxim,max6689", + .data = (void *)max6689 + }, + { + .compatible = "maxim,max6693", + .data = (void *)max6693 + }, + { + .compatible = "maxim,max6694", + .data = (void *)max6694 + }, + { + .compatible = "maxim,max6697", + .data = (void *)max6697 + }, + { + .compatible = "maxim,max6698", + .data = (void *)max6698 + }, + { + .compatible = "maxim,max6699", + .data = (void *)max6699 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, max6697_of_match); + static struct i2c_driver max6697_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "max6697", + .of_match_table = of_match_ptr(max6697_of_match), }, .probe = max6697_probe, .id_table = max6697_id, diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c index 4ab5293c7bf0..00d6995af4c2 100644 --- a/drivers/hwmon/pmbus/adm1275.c +++ b/drivers/hwmon/pmbus/adm1275.c @@ -101,8 +101,8 @@ static const struct coefficients adm1075_coefficients[] = { [0] = { 27169, 0, -1 }, /* voltage */ [1] = { 806, 20475, -1 }, /* current, irange25 */ [2] = { 404, 20475, -1 }, /* current, irange50 */ - [3] = { 0, -1, 8549 }, /* power, irange25 */ - [4] = { 0, -1, 4279 }, /* power, irange50 */ + [3] = { 8549, 0, -1 }, /* power, irange25 */ + [4] = { 4279, 0, -1 }, /* power, irange50 */ }; static const struct coefficients adm1275_coefficients[] = { diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 3e3aa950277f..3518f0c08934 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/err.h> #include <linux/slab.h> @@ -119,6 +120,35 @@ static const struct i2c_device_id ucd9000_id[] = { }; MODULE_DEVICE_TABLE(i2c, ucd9000_id); +static const struct of_device_id ucd9000_of_match[] = { + { + .compatible = "ti,ucd9000", + .data = (void *)ucd9000 + }, + { + .compatible = "ti,ucd90120", + .data = (void *)ucd90120 + }, + { + .compatible = "ti,ucd90124", + .data = (void *)ucd90124 + }, + { + .compatible = "ti,ucd90160", + .data = (void *)ucd90160 + }, + { + .compatible = "ti,ucd9090", + .data = (void *)ucd9090 + }, + { + .compatible = "ti,ucd90910", + .data = (void *)ucd90910 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ucd9000_of_match); + static int ucd9000_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -126,6 +156,7 @@ static int ucd9000_probe(struct i2c_client *client, struct ucd9000_data *data; struct pmbus_driver_info *info; const struct i2c_device_id *mid; + enum chips chip; int i, ret; if (!i2c_check_functionality(client->adapter, @@ -151,7 +182,12 @@ static int ucd9000_probe(struct i2c_client *client, return -ENODEV; } - if (id->driver_data != ucd9000 && id->driver_data != mid->driver_data) + if (client->dev.of_node) + chip = (enum chips)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + + if (chip != ucd9000 && chip != mid->driver_data) dev_notice(&client->dev, "Device mismatch: Configured %s, detected %s\n", id->name, mid->name); @@ -234,6 +270,7 @@ static int ucd9000_probe(struct i2c_client *client, static struct i2c_driver ucd9000_driver = { .driver = { .name = "ucd9000", + .of_match_table = of_match_ptr(ucd9000_of_match), }, .probe = ucd9000_probe, .remove = pmbus_do_remove, diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c index 033d6aca47d3..a8712c5ded4e 100644 --- a/drivers/hwmon/pmbus/ucd9200.c +++ b/drivers/hwmon/pmbus/ucd9200.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/err.h> #include <linux/slab.h> @@ -46,12 +47,50 @@ static const struct i2c_device_id ucd9200_id[] = { }; MODULE_DEVICE_TABLE(i2c, ucd9200_id); +static const struct of_device_id ucd9200_of_match[] = { + { + .compatible = "ti,cd9200", + .data = (void *)ucd9200 + }, + { + .compatible = "ti,cd9220", + .data = (void *)ucd9220 + }, + { + .compatible = "ti,cd9222", + .data = (void *)ucd9222 + }, + { + .compatible = "ti,cd9224", + .data = (void *)ucd9224 + }, + { + .compatible = "ti,cd9240", + .data = (void *)ucd9240 + }, + { + .compatible = "ti,cd9244", + .data = (void *)ucd9244 + }, + { + .compatible = "ti,cd9246", + .data = (void *)ucd9246 + }, + { + .compatible = "ti,cd9248", + .data = (void *)ucd9248 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ucd9200_of_match); + static int ucd9200_probe(struct i2c_client *client, const struct i2c_device_id *id) { u8 block_buffer[I2C_SMBUS_BLOCK_MAX + 1]; struct pmbus_driver_info *info; const struct i2c_device_id *mid; + enum chips chip; int i, j, ret; if (!i2c_check_functionality(client->adapter, @@ -76,7 +115,13 @@ static int ucd9200_probe(struct i2c_client *client, dev_err(&client->dev, "Unsupported device\n"); return -ENODEV; } - if (id->driver_data != ucd9200 && id->driver_data != mid->driver_data) + + if (client->dev.of_node) + chip = (enum chips)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + + if (chip != ucd9200 && chip != mid->driver_data) dev_notice(&client->dev, "Device mismatch: Configured %s, detected %s\n", id->name, mid->name); @@ -167,6 +212,7 @@ static int ucd9200_probe(struct i2c_client *client, static struct i2c_driver ucd9200_driver = { .driver = { .name = "ucd9200", + .of_match_table = of_match_ptr(ucd9200_of_match), }, .probe = ucd9200_probe, .remove = pmbus_do_remove, diff --git a/drivers/hwmon/stts751.c b/drivers/hwmon/stts751.c index 55450680fb58..d56251d6eec2 100644 --- a/drivers/hwmon/stts751.c +++ b/drivers/hwmon/stts751.c @@ -85,6 +85,12 @@ static const struct i2c_device_id stts751_id[] = { { } }; +static const struct of_device_id stts751_of_match[] = { + { .compatible = "stts751" }, + { }, +}; +MODULE_DEVICE_TABLE(of, stts751_of_match); + struct stts751_priv { struct device *dev; struct i2c_client *client; @@ -819,6 +825,7 @@ static struct i2c_driver stts751_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = DEVNAME, + .of_match_table = of_match_ptr(stts751_of_match), }, .probe = stts751_probe, .id_table = stts751_id, diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 36bba2a816a4..5eafbaada795 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -323,8 +323,15 @@ static const struct i2c_device_id tmp102_id[] = { }; MODULE_DEVICE_TABLE(i2c, tmp102_id); +static const struct of_device_id tmp102_of_match[] = { + { .compatible = "ti,tmp102" }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp102_of_match); + static struct i2c_driver tmp102_driver = { .driver.name = DRIVER_NAME, + .driver.of_match_table = of_match_ptr(tmp102_of_match), .driver.pm = &tmp102_dev_pm_ops, .probe = tmp102_probe, .id_table = tmp102_id, diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c index ad571ec795a3..7f85b14544df 100644 --- a/drivers/hwmon/tmp103.c +++ b/drivers/hwmon/tmp103.c @@ -150,8 +150,7 @@ static int tmp103_probe(struct i2c_client *client, return PTR_ERR_OR_ZERO(hwmon_dev); } -#ifdef CONFIG_PM -static int tmp103_suspend(struct device *dev) +static int __maybe_unused tmp103_suspend(struct device *dev) { struct regmap *regmap = dev_get_drvdata(dev); @@ -159,7 +158,7 @@ static int tmp103_suspend(struct device *dev) TMP103_CONF_SD_MASK, 0); } -static int tmp103_resume(struct device *dev) +static int __maybe_unused tmp103_resume(struct device *dev) { struct regmap *regmap = dev_get_drvdata(dev); @@ -167,15 +166,7 @@ static int tmp103_resume(struct device *dev) TMP103_CONF_SD_MASK, TMP103_CONF_SD); } -static const struct dev_pm_ops tmp103_dev_pm_ops = { - .suspend = tmp103_suspend, - .resume = tmp103_resume, -}; - -#define TMP103_DEV_PM_OPS (&tmp103_dev_pm_ops) -#else -#define TMP103_DEV_PM_OPS NULL -#endif /* CONFIG_PM */ +static SIMPLE_DEV_PM_OPS(tmp103_dev_pm_ops, tmp103_suspend, tmp103_resume); static const struct i2c_device_id tmp103_id[] = { { "tmp103", 0 }, @@ -183,10 +174,17 @@ static const struct i2c_device_id tmp103_id[] = { }; MODULE_DEVICE_TABLE(i2c, tmp103_id); +static const struct of_device_id tmp103_of_match[] = { + { .compatible = "ti,tmp103" }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp103_of_match); + static struct i2c_driver tmp103_driver = { .driver = { .name = "tmp103", - .pm = TMP103_DEV_PM_OPS, + .of_match_table = of_match_ptr(tmp103_of_match), + .pm = &tmp103_dev_pm_ops, }, .probe = tmp103_probe, .id_table = tmp103_id, diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index bfb98b96c781..e36399213324 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -29,6 +29,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/sysfs.h> /* Addresses to scan */ @@ -69,6 +70,31 @@ static const struct i2c_device_id tmp421_id[] = { }; MODULE_DEVICE_TABLE(i2c, tmp421_id); +static const struct of_device_id tmp421_of_match[] = { + { + .compatible = "ti,tmp421", + .data = (void *)2 + }, + { + .compatible = "ti,tmp422", + .data = (void *)3 + }, + { + .compatible = "ti,tmp423", + .data = (void *)4 + }, + { + .compatible = "ti,tmp441", + .data = (void *)2 + }, + { + .compatible = "ti,tmp422", + .data = (void *)3 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp421_of_match); + struct tmp421_data { struct i2c_client *client; struct mutex update_lock; @@ -78,7 +104,7 @@ struct tmp421_data { struct hwmon_chip_info chip; char valid; unsigned long last_updated; - int channels; + unsigned long channels; u8 config; s16 temp[4]; }; @@ -272,7 +298,11 @@ static int tmp421_probe(struct i2c_client *client, return -ENOMEM; mutex_init(&data->update_lock); - data->channels = id->driver_data; + if (client->dev.of_node) + data->channels = (unsigned long) + of_device_get_match_data(&client->dev); + else + data->channels = id->driver_data; data->client = client; err = tmp421_init_client(client); @@ -301,6 +331,7 @@ static struct i2c_driver tmp421_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "tmp421", + .of_match_table = of_match_ptr(tmp421_of_match), }, .probe = tmp421_probe, .id_table = tmp421_id, diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c deleted file mode 100644 index b5caf7fdb487..000000000000 --- a/drivers/hwmon/twl4030-madc-hwmon.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * - * TWL4030 MADC Hwmon driver-This driver monitors the real time - * conversion of analog signals like battery temperature, - * battery type, battery level etc. User can ask for the conversion on a - * particular channel using the sysfs nodes. - * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ - * J Keerthy <j-keerthy@ti.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/i2c/twl.h> -#include <linux/device.h> -#include <linux/platform_device.h> -#include <linux/i2c/twl4030-madc.h> -#include <linux/hwmon.h> -#include <linux/hwmon-sysfs.h> -#include <linux/stddef.h> -#include <linux/sysfs.h> -#include <linux/err.h> -#include <linux/types.h> - -/* - * sysfs hook function - */ -static ssize_t madc_read(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct twl4030_madc_request req = { - .channels = 1 << attr->index, - .method = TWL4030_MADC_SW2, - .type = TWL4030_MADC_WAIT, - }; - long val; - - val = twl4030_madc_conversion(&req); - if (val < 0) - return val; - - return sprintf(buf, "%d\n", req.rbuf[attr->index]); -} - -/* sysfs nodes to read individual channels from user side */ -static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, madc_read, NULL, 0); -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, madc_read, NULL, 1); -static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, madc_read, NULL, 2); -static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, madc_read, NULL, 3); -static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, madc_read, NULL, 4); -static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, madc_read, NULL, 5); -static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, madc_read, NULL, 6); -static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, madc_read, NULL, 7); -static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, madc_read, NULL, 8); -static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, madc_read, NULL, 9); -static SENSOR_DEVICE_ATTR(curr10_input, S_IRUGO, madc_read, NULL, 10); -static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, madc_read, NULL, 11); -static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, madc_read, NULL, 12); -static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, madc_read, NULL, 15); - -static struct attribute *twl4030_madc_attrs[] = { - &sensor_dev_attr_in0_input.dev_attr.attr, - &sensor_dev_attr_temp1_input.dev_attr.attr, - &sensor_dev_attr_in2_input.dev_attr.attr, - &sensor_dev_attr_in3_input.dev_attr.attr, - &sensor_dev_attr_in4_input.dev_attr.attr, - &sensor_dev_attr_in5_input.dev_attr.attr, - &sensor_dev_attr_in6_input.dev_attr.attr, - &sensor_dev_attr_in7_input.dev_attr.attr, - &sensor_dev_attr_in8_input.dev_attr.attr, - &sensor_dev_attr_in9_input.dev_attr.attr, - &sensor_dev_attr_curr10_input.dev_attr.attr, - &sensor_dev_attr_in11_input.dev_attr.attr, - &sensor_dev_attr_in12_input.dev_attr.attr, - &sensor_dev_attr_in15_input.dev_attr.attr, - NULL -}; -ATTRIBUTE_GROUPS(twl4030_madc); - -static int twl4030_madc_hwmon_probe(struct platform_device *pdev) -{ - struct device *hwmon; - - hwmon = devm_hwmon_device_register_with_groups(&pdev->dev, - "twl4030_madc", NULL, - twl4030_madc_groups); - return PTR_ERR_OR_ZERO(hwmon); -} - -static struct platform_driver twl4030_madc_hwmon_driver = { - .probe = twl4030_madc_hwmon_probe, - .driver = { - .name = "twl4030_madc_hwmon", - }, -}; - -module_platform_driver(twl4030_madc_hwmon_driver); - -MODULE_DESCRIPTION("TWL4030 ADC Hwmon driver"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("J Keerthy"); -MODULE_ALIAS("platform:twl4030_madc_hwmon"); diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index ab346ed142de..ad68b6d9ff17 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -135,11 +135,16 @@ superio_select(int ioreg, int ld) outb(ld, ioreg + 1); } -static inline void +static inline int superio_enter(int ioreg) { + if (!request_muxed_region(ioreg, 2, DRVNAME)) + return -EBUSY; + outb(0x87, ioreg); outb(0x87, ioreg); + + return 0; } static inline void @@ -148,6 +153,7 @@ superio_exit(int ioreg) outb(0xaa, ioreg); outb(0x02, ioreg); outb(0x02, ioreg + 1); + release_region(ioreg, 2); } /* @@ -1970,8 +1976,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data, return; } - superio_enter(sio_data->sioreg); - /* fan4 and fan5 share some pins with the GPIO and serial flash */ if (sio_data->kind == nct6775) { /* On NCT6775, fan4 shares pins with the fdc interface */ @@ -2013,8 +2017,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data, fan4min = fan4pin; } - superio_exit(sio_data->sioreg); - data->has_fan = data->has_fan_min = 0x03; /* fan1 and fan2 */ data->has_fan |= (fan3pin << 2); data->has_fan_min |= (fan3pin << 2); @@ -2352,7 +2354,11 @@ static int w83627ehf_probe(struct platform_device *pdev) w83627ehf_init_device(data, sio_data->kind); data->vrm = vid_which_vrm(); - superio_enter(sio_data->sioreg); + + err = superio_enter(sio_data->sioreg); + if (err) + goto exit_release; + /* Read VID value */ if (sio_data->kind == w83667hg || sio_data->kind == w83667hg_b || sio_data->kind == nct6775 || sio_data->kind == nct6776) { @@ -2364,8 +2370,10 @@ static int w83627ehf_probe(struct platform_device *pdev) superio_select(sio_data->sioreg, W83667HG_LD_VID); data->vid = superio_inb(sio_data->sioreg, 0xe3); err = device_create_file(dev, &dev_attr_cpu0_vid); - if (err) + if (err) { + superio_exit(sio_data->sioreg); goto exit_release; + } } else if (sio_data->kind != w83627uhg) { superio_select(sio_data->sioreg, W83627EHF_LD_HWM); if (superio_inb(sio_data->sioreg, SIO_REG_VID_CTRL) & 0x80) { @@ -2401,8 +2409,10 @@ static int w83627ehf_probe(struct platform_device *pdev) data->vid &= 0x3f; err = device_create_file(dev, &dev_attr_cpu0_vid); - if (err) + if (err) { + superio_exit(sio_data->sioreg); goto exit_release; + } } else { dev_info(dev, "VID pins in output mode, CPU VID not available\n"); @@ -2424,10 +2434,10 @@ static int w83627ehf_probe(struct platform_device *pdev) pr_info("Enabled fan debounce for chip %s\n", data->name); } - superio_exit(sio_data->sioreg); - w83627ehf_check_fan_inputs(sio_data, data); + superio_exit(sio_data->sioreg); + /* Read fan clock dividers immediately */ w83627ehf_update_fan_div_common(dev, data); @@ -2712,8 +2722,11 @@ static int __init w83627ehf_find(int sioaddr, unsigned short *addr, u16 val; const char *sio_name; + int err; - superio_enter(sioaddr); + err = superio_enter(sioaddr); + if (err) + return err; if (force_id) val = force_id; diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index feb30061123b..5901937284e7 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -107,7 +107,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, memcpy(scsi_req(rq)->cmd, pc->c, 12); if (drive->media == ide_tape) scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1; - error = blk_execute_rq(drive->queue, disk, rq, 0); + blk_execute_rq(drive->queue, disk, rq, 0); + error = scsi_req(rq)->result ? -EIO : 0; put_req: blk_put_request(rq); return error; @@ -454,7 +455,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("%s: I/O error\n", drive->name); if (drive->media != ide_tape) - pc->rq->errors++; + scsi_req(pc->rq)->result++; if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) { printk(KERN_ERR PFX "%s: I/O error in request " @@ -488,13 +489,13 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->failed_pc = NULL; if (ata_misc_request(rq)) { - rq->errors = 0; + scsi_req(rq)->result = 0; error = 0; } else { if (blk_rq_is_passthrough(rq) && uptodate <= 0) { - if (rq->errors == 0) - rq->errors = -EIO; + if (scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; } error = uptodate ? 0 : -EIO; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 74f1b7dc03f7..07e5ff3a64c3 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -247,10 +247,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq) struct cdrom_info *info = drive->driver_data; - if (!rq->errors) + if (!scsi_req(rq)->result) info->write_timeout = jiffies + ATAPI_WAIT_WRITE_BUSY; - rq->errors = 1; + scsi_req(rq)->result = 1; if (time_after(jiffies, info->write_timeout)) return 0; @@ -294,8 +294,8 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */ - if (blk_rq_is_scsi(rq) && !rq->errors) - rq->errors = SAM_STAT_CHECK_CONDITION; + if (blk_rq_is_scsi(rq) && !scsi_req(rq)->result) + scsi_req(rq)->result = SAM_STAT_CHECK_CONDITION; if (blk_noretry_request(rq)) do_end_request = 1; @@ -325,7 +325,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * Arrange to retry the request but be sure to give up if we've * retried too many times. */ - if (++rq->errors > ERROR_MAX) + if (++scsi_req(rq)->result > ERROR_MAX) do_end_request = 1; break; case ILLEGAL_REQUEST: @@ -372,7 +372,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* go to the default handler for other errors */ ide_error(drive, "cdrom_decode_status", stat); return 1; - } else if (++rq->errors > ERROR_MAX) + } else if (++scsi_req(rq)->result > ERROR_MAX) /* we've racked up too many retries, abort */ do_end_request = 1; } @@ -452,7 +452,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, } } - error = blk_execute_rq(drive->queue, info->disk, rq, 0); + blk_execute_rq(drive->queue, info->disk, rq, 0); + error = scsi_req(rq)->result ? -EIO : 0; if (buffer) *bufflen = scsi_req(rq)->resid_len; @@ -683,8 +684,8 @@ out_end: if (cmd->nleft == 0) uptodate = 1; } else { - if (uptodate <= 0 && rq->errors == 0) - rq->errors = -EIO; + if (uptodate <= 0 && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; } if (uptodate == 0 && rq->bio) @@ -1379,7 +1380,7 @@ static int ide_cdrom_prep_pc(struct request *rq) * appropriate action */ if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { - rq->errors = ILLEGAL_REQUEST; + scsi_req(rq)->result = ILLEGAL_REQUEST; return BLKPREP_KILL; } diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 9fcefbc8425e..55cd736c39c6 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -307,7 +307,8 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) scsi_req_init(rq); ide_req(rq)->type = ATA_PRIV_MISC; rq->rq_flags = RQF_QUIET; - ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); + blk_execute_rq(drive->queue, cd->disk, rq, 0); + ret = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); /* * A reset will unlock the door. If it was previously locked, diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index a45dda5386e4..9b69c32ee560 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -173,8 +173,8 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, *(int *)&scsi_req(rq)->cmd[1] = arg; rq->special = setting->set; - if (blk_execute_rq(q, NULL, rq, 0)) - ret = rq->errors; + blk_execute_rq(q, NULL, rq, 0); + ret = scsi_req(rq)->result; blk_put_request(rq); return ret; @@ -186,7 +186,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]); if (err) - rq->errors = err; - ide_complete_rq(drive, err, blk_rq_bytes(rq)); + scsi_req(rq)->result = err; + ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 186159715b71..7c06237f3479 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -470,7 +470,6 @@ ide_devset_get(multcount, mult_count); static int set_multcount(ide_drive_t *drive, int arg) { struct request *rq; - int error; if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) return -EINVAL; @@ -484,7 +483,7 @@ static int set_multcount(ide_drive_t *drive, int arg) drive->mult_req = arg; drive->special_flags |= IDE_SFLAG_SET_MULTMODE; - error = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_execute_rq(drive->queue, NULL, rq, 0); blk_put_request(rq); return (drive->mult_count == arg) ? 0 : -EIO; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 17a65ac56491..51c81223e56d 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -490,7 +490,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) * make sure request is sane */ if (hwif->rq) - hwif->rq->errors = 0; + scsi_req(hwif->rq)->result = 0; return ret; } diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index cf3af6840368..4b7ffd7d158d 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -12,7 +12,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, if ((stat & ATA_BUSY) || ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) { /* other bits are useless when BUSY */ - rq->errors |= ERROR_RESET; + scsi_req(rq)->result |= ERROR_RESET; } else if (stat & ATA_ERR) { /* err has different meaning on cdrom and tape */ if (err == ATA_ABORTED) { @@ -25,10 +25,10 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, drive->crc_count++; } else if (err & (ATA_BBK | ATA_UNC)) { /* retries won't help these */ - rq->errors = ERROR_MAX; + scsi_req(rq)->result = ERROR_MAX; } else if (err & ATA_TRK0NF) { /* help it find track zero */ - rq->errors |= ERROR_RECAL; + scsi_req(rq)->result |= ERROR_RECAL; } } @@ -39,23 +39,23 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE); } - if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) { + if (scsi_req(rq)->result >= ERROR_MAX || blk_noretry_request(rq)) { ide_kill_rq(drive, rq); return ide_stopped; } if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) - rq->errors |= ERROR_RESET; + scsi_req(rq)->result |= ERROR_RESET; - if ((rq->errors & ERROR_RESET) == ERROR_RESET) { - ++rq->errors; + if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) { + ++scsi_req(rq)->result; return ide_do_reset(drive); } - if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) + if ((scsi_req(rq)->result & ERROR_RECAL) == ERROR_RECAL) drive->special_flags |= IDE_SFLAG_RECALIBRATE; - ++rq->errors; + ++scsi_req(rq)->result; return ide_stopped; } @@ -68,7 +68,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, if ((stat & ATA_BUSY) || ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) { /* other bits are useless when BUSY */ - rq->errors |= ERROR_RESET; + scsi_req(rq)->result |= ERROR_RESET; } else { /* add decoding error stuff */ } @@ -77,14 +77,14 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, /* force an abort */ hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); - if (rq->errors >= ERROR_MAX) { + if (scsi_req(rq)->result >= ERROR_MAX) { ide_kill_rq(drive, rq); } else { - if ((rq->errors & ERROR_RESET) == ERROR_RESET) { - ++rq->errors; + if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) { + ++scsi_req(rq)->result; return ide_do_reset(drive); } - ++rq->errors; + ++scsi_req(rq)->result; } return ide_stopped; @@ -130,11 +130,11 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) if (cmd) ide_complete_cmd(drive, cmd, stat, err); } else if (ata_pm_request(rq)) { - rq->errors = 1; + scsi_req(rq)->result = 1; ide_complete_pm_rq(drive, rq); return ide_stopped; } - rq->errors = err; + scsi_req(rq)->result = err; ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); return ide_stopped; } @@ -149,8 +149,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) if (rq && ata_misc_request(rq) && scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) { - if (err <= 0 && rq->errors == 0) - rq->errors = -EIO; + if (err <= 0 && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); } } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index a69e8013f1df..8ac6048cd2df 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -98,7 +98,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc) } if (ata_misc_request(rq)) - rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; + scsi_req(rq)->result = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; return uptodate; } @@ -239,7 +239,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, ? rq->rq_disk->disk_name : "dev?")); - if (rq->errors >= ERROR_MAX) { + if (scsi_req(rq)->result >= ERROR_MAX) { if (drive->failed_pc) { ide_floppy_report_error(floppy, drive->failed_pc); drive->failed_pc = NULL; @@ -247,7 +247,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, printk(KERN_ERR PFX "%s: I/O error\n", drive->name); if (ata_misc_request(rq)) { - rq->errors = 0; + scsi_req(rq)->result = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; } else @@ -301,8 +301,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, return ide_floppy_issue_pc(drive, &cmd, pc); out_end: drive->failed_pc = NULL; - if (blk_rq_is_passthrough(rq) && rq->errors == 0) - rq->errors = -EIO; + if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 043b1fb963cb..45b3f41a43d4 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -141,12 +141,12 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq) drive->failed_pc = NULL; if ((media == ide_floppy || media == ide_tape) && drv_req) { - rq->errors = 0; + scsi_req(rq)->result = 0; } else { if (media == ide_tape) - rq->errors = IDE_DRV_ERROR_GENERAL; - else if (blk_rq_is_passthrough(rq) && rq->errors == 0) - rq->errors = -EIO; + scsi_req(rq)->result = IDE_DRV_ERROR_GENERAL; + else if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; } ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); @@ -271,7 +271,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, #ifdef DEBUG printk("%s: DRIVE_CMD (null)\n", drive->name); #endif - rq->errors = 0; + scsi_req(rq)->result = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 248a3e0ceb46..8c0d17297a7a 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -128,7 +128,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); scsi_req_init(rq); ide_req(rq)->type = ATA_PRIV_TASKFILE; - err = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_execute_rq(drive->queue, NULL, rq, 0); + err = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); return err; @@ -227,8 +228,8 @@ static int generic_drive_reset(ide_drive_t *drive) ide_req(rq)->type = ATA_PRIV_MISC; scsi_req(rq)->cmd_len = 1; scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET; - if (blk_execute_rq(drive->queue, NULL, rq, 1)) - ret = rq->errors; + blk_execute_rq(drive->queue, NULL, rq, 1); + ret = scsi_req(rq)->result; blk_put_request(rq); return ret; } diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 101aed9a61ca..94e3107f59b9 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -37,7 +37,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; rq->special = &timeout; - rc = blk_execute_rq(q, NULL, rq, 1); + blk_execute_rq(q, NULL, rq, 1); + rc = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); if (rc) goto out; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index ec951be4b0c8..0977fc1f40ce 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -27,7 +27,8 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) mesg.event = PM_EVENT_FREEZE; rqpm.pm_state = mesg.event; - ret = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_execute_rq(drive->queue, NULL, rq, 0); + ret = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); if (ret == 0 && ide_port_acpi(hwif)) { @@ -55,8 +56,8 @@ static int ide_pm_execute_rq(struct request *rq) spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; - rq->errors = -ENXIO; - __blk_end_request_all(rq, rq->errors); + scsi_req(rq)->result = -ENXIO; + __blk_end_request_all(rq, 0); spin_unlock_irq(q->queue_lock); return -ENXIO; } @@ -66,7 +67,7 @@ static int ide_pm_execute_rq(struct request *rq) wait_for_completion_io(&wait); - return rq->errors ? -EIO : 0; + return scsi_req(rq)->result ? -EIO : 0; } int generic_ide_resume(struct device *dev) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d8a552b47718..a0651f948b76 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -366,7 +366,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) err = pc->error; } } - rq->errors = err; + scsi_req(rq)->result = err; return uptodate; } @@ -879,7 +879,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) tape->valid = 0; ret = size; - if (rq->errors == IDE_DRV_ERROR_GENERAL) + if (scsi_req(rq)->result == IDE_DRV_ERROR_GENERAL) ret = -EIO; out_put: blk_put_request(rq); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 4c0007cb74e3..d71199d23c9e 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -287,7 +287,7 @@ static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd, u8 saved_io_32bit = drive->io_32bit; if (cmd->tf_flags & IDE_TFLAG_FS) - cmd->rq->errors = 0; + scsi_req(cmd->rq)->result = 0; if (cmd->tf_flags & IDE_TFLAG_IO_16BIT) drive->io_32bit = 0; @@ -329,7 +329,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat) u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER); ide_complete_cmd(drive, cmd, stat, err); - rq->errors = err; + scsi_req(rq)->result = err; if (err == 0 && set_xfer) { ide_set_xfer_rate(drive, nsect); @@ -452,8 +452,8 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq->special = cmd; cmd->rq = rq; - error = blk_execute_rq(drive->queue, NULL, rq, 0); - + blk_execute_rq(drive->queue, NULL, rq, 0); + error = scsi_req(rq)->result ? -EIO : 0; put_req: blk_put_request(rq); return error; diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 312bd6ca9198..09720d950686 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -620,6 +620,13 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "20046"), }, }, + { + /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), + }, + }, { } }; diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 275f467956ee..6c2999872090 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -76,6 +76,15 @@ config LEDS_BCM6358 This option enables support for LEDs connected to the BCM6358 LED HW controller accessed via MMIO registers. +config LEDS_CPCAP + tristate "LED Support for Motorola CPCAP" + depends on LEDS_CLASS + depends on MFD_CPCAP + depends on OF + help + This option enables support for LEDs offered by Motorola's + CPCAP PMIC. + config LEDS_LM3530 tristate "LCD Backlight driver for LM3530" depends on LEDS_CLASS @@ -126,6 +135,14 @@ config LEDS_MIKROTIK_RB532 This option enables support for the so called "User LED" of Mikrotik's Routerboard 532. +config LEDS_MT6323 + tristate "LED Support for Mediatek MT6323 PMIC" + depends on LEDS_CLASS + depends on MFD_MT6397 + help + This option enables support for on-chip LED drivers found on + Mediatek MT6323 PMIC. + config LEDS_S3C24XX tristate "LED Support for Samsung S3C24XX GPIO LEDs" depends on LEDS_CLASS @@ -241,7 +258,6 @@ config LEDS_LP3952 tristate "LED Support for TI LP3952 2 channel LED driver" depends on LEDS_CLASS depends on I2C - depends on ACPI depends on GPIOLIB select REGMAP_I2C help @@ -463,15 +479,6 @@ config LEDS_ADP5520 To compile this driver as a module, choose M here: the module will be called leds-adp5520. -config LEDS_DELL_NETBOOKS - tristate "External LED on Dell Business Netbooks" - depends on LEDS_CLASS - depends on X86 && ACPI_WMI - depends on DELL_SMBIOS - help - This adds support for the Latitude 2100 and similar - notebooks that have an external LED. - config LEDS_MC13783 tristate "LED Support for MC13XXX PMIC" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 6b8273736478..45f133962ed8 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_LEDS_AAT1290) += leds-aat1290.o obj-$(CONFIG_LEDS_BCM6328) += leds-bcm6328.o obj-$(CONFIG_LEDS_BCM6358) += leds-bcm6358.o obj-$(CONFIG_LEDS_BD2802) += leds-bd2802.o +obj-$(CONFIG_LEDS_CPCAP) += leds-cpcap.o obj-$(CONFIG_LEDS_LOCOMO) += leds-locomo.o obj-$(CONFIG_LEDS_LM3530) += leds-lm3530.o obj-$(CONFIG_LEDS_LM3533) += leds-lm3533.o @@ -52,7 +53,6 @@ obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o obj-$(CONFIG_LEDS_INTEL_SS4200) += leds-ss4200.o obj-$(CONFIG_LEDS_LT3593) += leds-lt3593.o obj-$(CONFIG_LEDS_ADP5520) += leds-adp5520.o -obj-$(CONFIG_LEDS_DELL_NETBOOKS) += dell-led.o obj-$(CONFIG_LEDS_MC13783) += leds-mc13783.o obj-$(CONFIG_LEDS_NS2) += leds-ns2.o obj-$(CONFIG_LEDS_NETXBIG) += leds-netxbig.o @@ -72,6 +72,7 @@ obj-$(CONFIG_LEDS_IS31FL32XX) += leds-is31fl32xx.o obj-$(CONFIG_LEDS_PM8058) += leds-pm8058.o obj-$(CONFIG_LEDS_MLXCPLD) += leds-mlxcpld.o obj-$(CONFIG_LEDS_NIC78BX) += leds-nic78bx.o +obj-$(CONFIG_LEDS_MT6323) += leds-mt6323.o # LED SPI Drivers obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index f2b0a80a62b4..b0e2d55acbd6 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -244,11 +244,14 @@ static int led_classdev_next_name(const char *init_name, char *name, } /** - * led_classdev_register - register a new object of led_classdev class. - * @parent: The device to register. + * of_led_classdev_register - register a new object of led_classdev class. + * + * @parent: parent of LED device * @led_cdev: the led_classdev structure for this device. + * @np: DT node describing this LED */ -int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) +int of_led_classdev_register(struct device *parent, struct device_node *np, + struct led_classdev *led_cdev) { char name[LED_MAX_NAME_SIZE]; int ret; @@ -261,6 +264,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) led_cdev, led_cdev->groups, "%s", name); if (IS_ERR(led_cdev->dev)) return PTR_ERR(led_cdev->dev); + led_cdev->dev->of_node = np; if (ret) dev_warn(parent, "Led %s renamed to %s due to name collision", @@ -303,7 +307,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) return 0; } -EXPORT_SYMBOL_GPL(led_classdev_register); +EXPORT_SYMBOL_GPL(of_led_classdev_register); /** * led_classdev_unregister - unregisters a object of led_properties class. @@ -348,12 +352,14 @@ static void devm_led_classdev_release(struct device *dev, void *res) } /** - * devm_led_classdev_register - resource managed led_classdev_register() - * @parent: The device to register. + * devm_of_led_classdev_register - resource managed led_classdev_register() + * + * @parent: parent of LED device * @led_cdev: the led_classdev structure for this device. */ -int devm_led_classdev_register(struct device *parent, - struct led_classdev *led_cdev) +int devm_of_led_classdev_register(struct device *parent, + struct device_node *np, + struct led_classdev *led_cdev) { struct led_classdev **dr; int rc; @@ -362,7 +368,7 @@ int devm_led_classdev_register(struct device *parent, if (!dr) return -ENOMEM; - rc = led_classdev_register(parent, led_cdev); + rc = of_led_classdev_register(parent, np, led_cdev); if (rc) { devres_free(dr); return rc; @@ -373,7 +379,7 @@ int devm_led_classdev_register(struct device *parent, return 0; } -EXPORT_SYMBOL_GPL(devm_led_classdev_register); +EXPORT_SYMBOL_GPL(devm_of_led_classdev_register); static int devm_led_classdev_match(struct device *dev, void *res, void *data) { diff --git a/drivers/leds/leds-cpcap.c b/drivers/leds/leds-cpcap.c new file mode 100644 index 000000000000..f0f28c442807 --- /dev/null +++ b/drivers/leds/leds-cpcap.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2017 Sebastian Reichel <sre@kernel.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * later as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/leds.h> +#include <linux/mfd/motorola-cpcap.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> + +#define CPCAP_LED_NO_CURRENT 0x0001 + +struct cpcap_led_info { + u16 reg; + u16 mask; + u16 limit; + u16 init_mask; + u16 init_val; +}; + +static const struct cpcap_led_info cpcap_led_red = { + .reg = CPCAP_REG_REDC, + .mask = 0x03FF, + .limit = 31, +}; + +static const struct cpcap_led_info cpcap_led_green = { + .reg = CPCAP_REG_GREENC, + .mask = 0x03FF, + .limit = 31, +}; + +static const struct cpcap_led_info cpcap_led_blue = { + .reg = CPCAP_REG_BLUEC, + .mask = 0x03FF, + .limit = 31, +}; + +/* aux display light */ +static const struct cpcap_led_info cpcap_led_adl = { + .reg = CPCAP_REG_ADLC, + .mask = 0x000F, + .limit = 1, + .init_mask = 0x7FFF, + .init_val = 0x5FF0, +}; + +/* camera privacy led */ +static const struct cpcap_led_info cpcap_led_cp = { + .reg = CPCAP_REG_CLEDC, + .mask = 0x0007, + .limit = 1, + .init_mask = 0x03FF, + .init_val = 0x0008, +}; + +struct cpcap_led { + struct led_classdev led; + const struct cpcap_led_info *info; + struct device *dev; + struct regmap *regmap; + struct mutex update_lock; + struct regulator *vdd; + bool powered; + + u32 current_limit; +}; + +static u16 cpcap_led_val(u8 current_limit, u8 duty_cycle) +{ + current_limit &= 0x1f; /* 5 bit */ + duty_cycle &= 0x0f; /* 4 bit */ + + return current_limit << 4 | duty_cycle; +} + +static int cpcap_led_set_power(struct cpcap_led *led, bool status) +{ + int err; + + if (status == led->powered) + return 0; + + if (status) + err = regulator_enable(led->vdd); + else + err = regulator_disable(led->vdd); + + if (err) { + dev_err(led->dev, "regulator failure: %d", err); + return err; + } + + led->powered = status; + + return 0; +} + +static int cpcap_led_set(struct led_classdev *ledc, enum led_brightness value) +{ + struct cpcap_led *led = container_of(ledc, struct cpcap_led, led); + int brightness; + int err; + + mutex_lock(&led->update_lock); + + if (value > LED_OFF) { + err = cpcap_led_set_power(led, true); + if (err) + goto exit; + } + + if (value == LED_OFF) { + /* Avoid HW issue by turning off current before duty cycle */ + err = regmap_update_bits(led->regmap, + led->info->reg, led->info->mask, CPCAP_LED_NO_CURRENT); + if (err) { + dev_err(led->dev, "regmap failed: %d", err); + goto exit; + } + + brightness = cpcap_led_val(value, LED_OFF); + } else { + brightness = cpcap_led_val(value, LED_ON); + } + + err = regmap_update_bits(led->regmap, led->info->reg, led->info->mask, + brightness); + if (err) { + dev_err(led->dev, "regmap failed: %d", err); + goto exit; + } + + if (value == LED_OFF) { + err = cpcap_led_set_power(led, false); + if (err) + goto exit; + } + +exit: + mutex_unlock(&led->update_lock); + return err; +} + +static const struct of_device_id cpcap_led_of_match[] = { + { .compatible = "motorola,cpcap-led-red", .data = &cpcap_led_red }, + { .compatible = "motorola,cpcap-led-green", .data = &cpcap_led_green }, + { .compatible = "motorola,cpcap-led-blue", .data = &cpcap_led_blue }, + { .compatible = "motorola,cpcap-led-adl", .data = &cpcap_led_adl }, + { .compatible = "motorola,cpcap-led-cp", .data = &cpcap_led_cp }, + {}, +}; +MODULE_DEVICE_TABLE(of, cpcap_led_of_match); + +static int cpcap_led_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct cpcap_led *led; + int err; + + match = of_match_device(of_match_ptr(cpcap_led_of_match), &pdev->dev); + if (!match || !match->data) + return -EINVAL; + + led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL); + if (!led) + return -ENOMEM; + platform_set_drvdata(pdev, led); + led->info = match->data; + led->dev = &pdev->dev; + + if (led->info->reg == 0x0000) { + dev_err(led->dev, "Unsupported LED"); + return -ENODEV; + } + + led->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!led->regmap) + return -ENODEV; + + led->vdd = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(led->vdd)) { + err = PTR_ERR(led->vdd); + dev_err(led->dev, "Couldn't get regulator: %d", err); + return err; + } + + err = device_property_read_string(&pdev->dev, "label", &led->led.name); + if (err) { + dev_err(led->dev, "Couldn't read LED label: %d", err); + return err; + } + + if (led->info->init_mask) { + err = regmap_update_bits(led->regmap, led->info->reg, + led->info->init_mask, led->info->init_val); + if (err) { + dev_err(led->dev, "regmap failed: %d", err); + return err; + } + } + + mutex_init(&led->update_lock); + + led->led.max_brightness = led->info->limit; + led->led.brightness_set_blocking = cpcap_led_set; + err = devm_led_classdev_register(&pdev->dev, &led->led); + if (err) { + dev_err(led->dev, "Couldn't register LED: %d", err); + return err; + } + + return 0; +} + +static struct platform_driver cpcap_led_driver = { + .probe = cpcap_led_probe, + .driver = { + .name = "cpcap-led", + .of_match_table = cpcap_led_of_match, + }, +}; +module_platform_driver(cpcap_led_driver); + +MODULE_DESCRIPTION("CPCAP LED driver"); +MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 066fc7590729..e753ba93ba1e 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -77,7 +77,7 @@ static int gpio_blink_set(struct led_classdev *led_cdev, static int create_gpio_led(const struct gpio_led *template, struct gpio_led_data *led_dat, struct device *parent, - gpio_blink_set_t blink_set) + struct device_node *np, gpio_blink_set_t blink_set) { int ret, state; @@ -139,7 +139,7 @@ static int create_gpio_led(const struct gpio_led *template, if (ret < 0) return ret; - return devm_led_classdev_register(parent, &led_dat->cdev); + return devm_of_led_classdev_register(parent, np, &led_dat->cdev); } struct gpio_leds_priv { @@ -208,7 +208,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev) if (fwnode_property_present(child, "panic-indicator")) led.panic_indicator = 1; - ret = create_gpio_led(&led, led_dat, dev, NULL); + ret = create_gpio_led(&led, led_dat, dev, np, NULL); if (ret < 0) { fwnode_handle_put(child); return ERR_PTR(ret); @@ -242,9 +242,9 @@ static int gpio_led_probe(struct platform_device *pdev) priv->num_leds = pdata->num_leds; for (i = 0; i < priv->num_leds; i++) { - ret = create_gpio_led(&pdata->leds[i], - &priv->leds[i], - &pdev->dev, pdata->gpio_blink_set); + ret = create_gpio_led(&pdata->leds[i], &priv->leds[i], + &pdev->dev, NULL, + pdata->gpio_blink_set); if (ret < 0) return ret; } diff --git a/drivers/leds/leds-lp3952.c b/drivers/leds/leds-lp3952.c index 4847e89883a7..847f7f282126 100644 --- a/drivers/leds/leds-lp3952.c +++ b/drivers/leds/leds-lp3952.c @@ -10,7 +10,6 @@ * */ -#include <linux/acpi.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/i2c.h> @@ -103,10 +102,11 @@ static int lp3952_get_label(struct device *dev, const char *label, char *dest) const char *str; ret = device_property_read_string(dev, label, &str); - if (!ret) - strncpy(dest, str, LP3952_LABEL_MAX_LEN); + if (ret) + return ret; - return ret; + strncpy(dest, str, LP3952_LABEL_MAX_LEN); + return 0; } static int lp3952_register_led_classdev(struct lp3952_led_array *priv) @@ -276,19 +276,9 @@ static const struct i2c_device_id lp3952_id[] = { }; MODULE_DEVICE_TABLE(i2c, lp3952_id); -#ifdef CONFIG_ACPI -static const struct acpi_device_id lp3952_acpi_match[] = { - {"TXNW3952", 0}, - {} -}; - -MODULE_DEVICE_TABLE(acpi, lp3952_acpi_match); -#endif - static struct i2c_driver lp3952_i2c_driver = { .driver = { .name = LP3952_NAME, - .acpi_match_table = ACPI_PTR(lp3952_acpi_match), }, .probe = lp3952_probe, .remove = lp3952_remove, diff --git a/drivers/leds/leds-mt6323.c b/drivers/leds/leds-mt6323.c new file mode 100644 index 000000000000..8893c74e9a1f --- /dev/null +++ b/drivers/leds/leds-mt6323.c @@ -0,0 +1,502 @@ +/* + * LED driver for Mediatek MT6323 PMIC + * + * Copyright (C) 2017 Sean Wang <sean.wang@mediatek.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/kernel.h> +#include <linux/leds.h> +#include <linux/mfd/mt6323/registers.h> +#include <linux/mfd/mt6397/core.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +/* + * Register field for MT6323_TOP_CKPDN0 to enable + * 32K clock common for LED device. + */ +#define MT6323_RG_DRV_32K_CK_PDN BIT(11) +#define MT6323_RG_DRV_32K_CK_PDN_MASK BIT(11) + +/* + * Register field for MT6323_TOP_CKPDN2 to enable + * individual clock for LED device. + */ +#define MT6323_RG_ISINK_CK_PDN(i) BIT(i) +#define MT6323_RG_ISINK_CK_PDN_MASK(i) BIT(i) + +/* + * Register field for MT6323_TOP_CKCON1 to select + * clock source. + */ +#define MT6323_RG_ISINK_CK_SEL_MASK(i) (BIT(10) << (i)) + +/* + * Register for MT6323_ISINK_CON0 to setup the + * duty cycle of the blink. + */ +#define MT6323_ISINK_CON0(i) (MT6323_ISINK0_CON0 + 0x8 * (i)) +#define MT6323_ISINK_DIM_DUTY_MASK (0x1f << 8) +#define MT6323_ISINK_DIM_DUTY(i) (((i) << 8) & \ + MT6323_ISINK_DIM_DUTY_MASK) + +/* Register to setup the period of the blink. */ +#define MT6323_ISINK_CON1(i) (MT6323_ISINK0_CON1 + 0x8 * (i)) +#define MT6323_ISINK_DIM_FSEL_MASK (0xffff) +#define MT6323_ISINK_DIM_FSEL(i) ((i) & MT6323_ISINK_DIM_FSEL_MASK) + +/* Register to control the brightness. */ +#define MT6323_ISINK_CON2(i) (MT6323_ISINK0_CON2 + 0x8 * (i)) +#define MT6323_ISINK_CH_STEP_SHIFT 12 +#define MT6323_ISINK_CH_STEP_MASK (0x7 << 12) +#define MT6323_ISINK_CH_STEP(i) (((i) << 12) & \ + MT6323_ISINK_CH_STEP_MASK) +#define MT6323_ISINK_SFSTR0_TC_MASK (0x3 << 1) +#define MT6323_ISINK_SFSTR0_TC(i) (((i) << 1) & \ + MT6323_ISINK_SFSTR0_TC_MASK) +#define MT6323_ISINK_SFSTR0_EN_MASK BIT(0) +#define MT6323_ISINK_SFSTR0_EN BIT(0) + +/* Register to LED channel enablement. */ +#define MT6323_ISINK_CH_EN_MASK(i) BIT(i) +#define MT6323_ISINK_CH_EN(i) BIT(i) + +#define MT6323_MAX_PERIOD 10000 +#define MT6323_MAX_LEDS 4 +#define MT6323_MAX_BRIGHTNESS 6 +#define MT6323_UNIT_DUTY 3125 +#define MT6323_CAL_HW_DUTY(o, p) DIV_ROUND_CLOSEST((o) * 100000ul,\ + (p) * MT6323_UNIT_DUTY) + +struct mt6323_leds; + +/** + * struct mt6323_led - state container for the LED device + * @id: the identifier in MT6323 LED device + * @parent: the pointer to MT6323 LED controller + * @cdev: LED class device for this LED device + * @current_brightness: current state of the LED device + */ +struct mt6323_led { + int id; + struct mt6323_leds *parent; + struct led_classdev cdev; + enum led_brightness current_brightness; +}; + +/** + * struct mt6323_leds - state container for holding LED controller + * of the driver + * @dev: the device pointer + * @hw: the underlying hardware providing shared + * bus for the register operations + * @lock: the lock among process context + * @led: the array that contains the state of individual + * LED device + */ +struct mt6323_leds { + struct device *dev; + struct mt6397_chip *hw; + /* protect among process context */ + struct mutex lock; + struct mt6323_led *led[MT6323_MAX_LEDS]; +}; + +static int mt6323_led_hw_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + u32 con2_mask = 0, con2_val = 0; + int ret; + + /* + * Setup current output for the corresponding + * brightness level. + */ + con2_mask |= MT6323_ISINK_CH_STEP_MASK | + MT6323_ISINK_SFSTR0_TC_MASK | + MT6323_ISINK_SFSTR0_EN_MASK; + con2_val |= MT6323_ISINK_CH_STEP(brightness - 1) | + MT6323_ISINK_SFSTR0_TC(2) | + MT6323_ISINK_SFSTR0_EN; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON2(led->id), + con2_mask, con2_val); + return ret; +} + +static int mt6323_led_hw_off(struct led_classdev *cdev) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned int status; + int ret; + + status = MT6323_ISINK_CH_EN(led->id); + ret = regmap_update_bits(regmap, MT6323_ISINK_EN_CTRL, + MT6323_ISINK_CH_EN_MASK(led->id), ~status); + if (ret < 0) + return ret; + + usleep_range(100, 300); + ret = regmap_update_bits(regmap, MT6323_TOP_CKPDN2, + MT6323_RG_ISINK_CK_PDN_MASK(led->id), + MT6323_RG_ISINK_CK_PDN(led->id)); + if (ret < 0) + return ret; + + return 0; +} + +static enum led_brightness +mt6323_get_led_hw_brightness(struct led_classdev *cdev) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned int status; + int ret; + + ret = regmap_read(regmap, MT6323_TOP_CKPDN2, &status); + if (ret < 0) + return ret; + + if (status & MT6323_RG_ISINK_CK_PDN_MASK(led->id)) + return 0; + + ret = regmap_read(regmap, MT6323_ISINK_EN_CTRL, &status); + if (ret < 0) + return ret; + + if (!(status & MT6323_ISINK_CH_EN(led->id))) + return 0; + + ret = regmap_read(regmap, MT6323_ISINK_CON2(led->id), &status); + if (ret < 0) + return ret; + + return ((status & MT6323_ISINK_CH_STEP_MASK) + >> MT6323_ISINK_CH_STEP_SHIFT) + 1; +} + +static int mt6323_led_hw_on(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned int status; + int ret; + + /* + * Setup required clock source, enable the corresponding + * clock and channel and let work with continuous blink as + * the default. + */ + ret = regmap_update_bits(regmap, MT6323_TOP_CKCON1, + MT6323_RG_ISINK_CK_SEL_MASK(led->id), 0); + if (ret < 0) + return ret; + + status = MT6323_RG_ISINK_CK_PDN(led->id); + ret = regmap_update_bits(regmap, MT6323_TOP_CKPDN2, + MT6323_RG_ISINK_CK_PDN_MASK(led->id), + ~status); + if (ret < 0) + return ret; + + usleep_range(100, 300); + + ret = regmap_update_bits(regmap, MT6323_ISINK_EN_CTRL, + MT6323_ISINK_CH_EN_MASK(led->id), + MT6323_ISINK_CH_EN(led->id)); + if (ret < 0) + return ret; + + ret = mt6323_led_hw_brightness(cdev, brightness); + if (ret < 0) + return ret; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON0(led->id), + MT6323_ISINK_DIM_DUTY_MASK, + MT6323_ISINK_DIM_DUTY(31)); + if (ret < 0) + return ret; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON1(led->id), + MT6323_ISINK_DIM_FSEL_MASK, + MT6323_ISINK_DIM_FSEL(1000)); + if (ret < 0) + return ret; + + return 0; +} + +static int mt6323_led_set_blink(struct led_classdev *cdev, + unsigned long *delay_on, + unsigned long *delay_off) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned long period; + u8 duty_hw; + int ret; + + /* + * Units are in ms, if over the hardware able + * to support, fallback into software blink + */ + period = *delay_on + *delay_off; + + if (period > MT6323_MAX_PERIOD) + return -EINVAL; + + /* + * LED subsystem requires a default user + * friendly blink pattern for the LED so using + * 1Hz duty cycle 50% here if without specific + * value delay_on and delay off being assigned. + */ + if (!*delay_on && !*delay_off) { + *delay_on = 500; + *delay_off = 500; + } + + /* + * Calculate duty_hw based on the percentage of period during + * which the led is ON. + */ + duty_hw = MT6323_CAL_HW_DUTY(*delay_on, period); + + /* hardware doesn't support zero duty cycle. */ + if (!duty_hw) + return -EINVAL; + + mutex_lock(&leds->lock); + /* + * Set max_brightness as the software blink behavior + * when no blink brightness. + */ + if (!led->current_brightness) { + ret = mt6323_led_hw_on(cdev, cdev->max_brightness); + if (ret < 0) + goto out; + led->current_brightness = cdev->max_brightness; + } + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON0(led->id), + MT6323_ISINK_DIM_DUTY_MASK, + MT6323_ISINK_DIM_DUTY(duty_hw - 1)); + if (ret < 0) + goto out; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON1(led->id), + MT6323_ISINK_DIM_FSEL_MASK, + MT6323_ISINK_DIM_FSEL(period - 1)); +out: + mutex_unlock(&leds->lock); + + return ret; +} + +static int mt6323_led_set_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + int ret; + + mutex_lock(&leds->lock); + + if (!led->current_brightness && brightness) { + ret = mt6323_led_hw_on(cdev, brightness); + if (ret < 0) + goto out; + } else if (brightness) { + ret = mt6323_led_hw_brightness(cdev, brightness); + if (ret < 0) + goto out; + } else { + ret = mt6323_led_hw_off(cdev); + if (ret < 0) + goto out; + } + + led->current_brightness = brightness; +out: + mutex_unlock(&leds->lock); + + return ret; +} + +static int mt6323_led_set_dt_default(struct led_classdev *cdev, + struct device_node *np) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + const char *state; + int ret = 0; + + led->cdev.name = of_get_property(np, "label", NULL) ? : np->name; + led->cdev.default_trigger = of_get_property(np, + "linux,default-trigger", + NULL); + + state = of_get_property(np, "default-state", NULL); + if (state) { + if (!strcmp(state, "keep")) { + ret = mt6323_get_led_hw_brightness(cdev); + if (ret < 0) + return ret; + led->current_brightness = ret; + ret = 0; + } else if (!strcmp(state, "on")) { + ret = + mt6323_led_set_brightness(cdev, cdev->max_brightness); + } else { + ret = mt6323_led_set_brightness(cdev, LED_OFF); + } + } + + return ret; +} + +static int mt6323_led_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = pdev->dev.of_node; + struct device_node *child; + struct mt6397_chip *hw = dev_get_drvdata(pdev->dev.parent); + struct mt6323_leds *leds; + struct mt6323_led *led; + int ret; + unsigned int status; + u32 reg; + + leds = devm_kzalloc(dev, sizeof(*leds), GFP_KERNEL); + if (!leds) + return -ENOMEM; + + platform_set_drvdata(pdev, leds); + leds->dev = dev; + + /* + * leds->hw points to the underlying bus for the register + * controlled. + */ + leds->hw = hw; + mutex_init(&leds->lock); + + status = MT6323_RG_DRV_32K_CK_PDN; + ret = regmap_update_bits(leds->hw->regmap, MT6323_TOP_CKPDN0, + MT6323_RG_DRV_32K_CK_PDN_MASK, ~status); + if (ret < 0) { + dev_err(leds->dev, + "Failed to update MT6323_TOP_CKPDN0 Register\n"); + return ret; + } + + for_each_available_child_of_node(np, child) { + ret = of_property_read_u32(child, "reg", ®); + if (ret) { + dev_err(dev, "Failed to read led 'reg' property\n"); + goto put_child_node; + } + + if (reg >= MT6323_MAX_LEDS || leds->led[reg]) { + dev_err(dev, "Invalid led reg %u\n", reg); + ret = -EINVAL; + goto put_child_node; + } + + led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL); + if (!led) { + ret = -ENOMEM; + goto put_child_node; + } + + leds->led[reg] = led; + leds->led[reg]->id = reg; + leds->led[reg]->cdev.max_brightness = MT6323_MAX_BRIGHTNESS; + leds->led[reg]->cdev.brightness_set_blocking = + mt6323_led_set_brightness; + leds->led[reg]->cdev.blink_set = mt6323_led_set_blink; + leds->led[reg]->cdev.brightness_get = + mt6323_get_led_hw_brightness; + leds->led[reg]->parent = leds; + + ret = mt6323_led_set_dt_default(&leds->led[reg]->cdev, child); + if (ret < 0) { + dev_err(leds->dev, + "Failed to LED set default from devicetree\n"); + goto put_child_node; + } + + ret = devm_led_classdev_register(dev, &leds->led[reg]->cdev); + if (ret) { + dev_err(&pdev->dev, "Failed to register LED: %d\n", + ret); + goto put_child_node; + } + leds->led[reg]->cdev.dev->of_node = child; + } + + return 0; + +put_child_node: + of_node_put(child); + return ret; +} + +static int mt6323_led_remove(struct platform_device *pdev) +{ + struct mt6323_leds *leds = platform_get_drvdata(pdev); + int i; + + /* Turn the LEDs off on driver removal. */ + for (i = 0 ; leds->led[i] ; i++) + mt6323_led_hw_off(&leds->led[i]->cdev); + + regmap_update_bits(leds->hw->regmap, MT6323_TOP_CKPDN0, + MT6323_RG_DRV_32K_CK_PDN_MASK, + MT6323_RG_DRV_32K_CK_PDN); + + mutex_destroy(&leds->lock); + + return 0; +} + +static const struct of_device_id mt6323_led_dt_match[] = { + { .compatible = "mediatek,mt6323-led" }, + {}, +}; +MODULE_DEVICE_TABLE(of, mt6323_led_dt_match); + +static struct platform_driver mt6323_led_driver = { + .probe = mt6323_led_probe, + .remove = mt6323_led_remove, + .driver = { + .name = "mt6323-led", + .of_match_table = mt6323_led_dt_match, + }, +}; + +module_platform_driver(mt6323_led_driver); + +MODULE_DESCRIPTION("LED driver for Mediatek MT6323 PMIC"); +MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 06e63106ae1e..7fea18b0c15d 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -254,6 +254,21 @@ static void pca9532_input_work(struct work_struct *work) mutex_unlock(&data->update_lock); } +static enum pca9532_state pca9532_getled(struct pca9532_led *led) +{ + struct i2c_client *client = led->client; + struct pca9532_data *data = i2c_get_clientdata(client); + u8 maxleds = data->chip_info->num_leds; + char reg; + enum pca9532_state ret; + + mutex_lock(&data->update_lock); + reg = i2c_smbus_read_byte_data(client, LED_REG(maxleds, led->id)); + ret = reg >> LED_NUM(led->id)/2; + mutex_unlock(&data->update_lock); + return ret; +} + #ifdef CONFIG_LEDS_PCA9532_GPIO static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset) { @@ -366,7 +381,10 @@ static int pca9532_configure(struct i2c_client *client, gpios++; break; case PCA9532_TYPE_LED: - led->state = pled->state; + if (pled->state == PCA9532_KEEP) + led->state = pca9532_getled(led); + else + led->state = pled->state; led->name = pled->name; led->ldev.name = led->name; led->ldev.default_trigger = pled->default_trigger; @@ -456,6 +474,7 @@ pca9532_of_populate_pdata(struct device *dev, struct device_node *np) const struct of_device_id *match; int devid, maxleds; int i = 0; + const char *state; match = of_match_device(of_pca9532_leds_match, dev); if (!match) @@ -475,6 +494,12 @@ pca9532_of_populate_pdata(struct device *dev, struct device_node *np) of_property_read_u32(child, "type", &pdata->leds[i].type); of_property_read_string(child, "linux,default-trigger", &pdata->leds[i].default_trigger); + if (!of_property_read_string(child, "default-state", &state)) { + if (!strcmp(state, "on")) + pdata->leds[i].state = PCA9532_ON; + else if (!strcmp(state, "keep")) + pdata->leds[i].state = PCA9532_KEEP; + } if (++i >= maxleds) { of_node_put(child); break; diff --git a/drivers/leds/trigger/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c index a41896468cb3..66a626091936 100644 --- a/drivers/leds/trigger/ledtrig-cpu.c +++ b/drivers/leds/trigger/ledtrig-cpu.c @@ -31,12 +31,16 @@ #define MAX_NAME_LEN 8 struct led_trigger_cpu { + bool is_active; char name[MAX_NAME_LEN]; struct led_trigger *_trig; }; static DEFINE_PER_CPU(struct led_trigger_cpu, cpu_trig); +static struct led_trigger *trig_cpu_all; +static atomic_t num_active_cpus = ATOMIC_INIT(0); + /** * ledtrig_cpu - emit a CPU event as a trigger * @evt: CPU event to be emitted @@ -47,26 +51,46 @@ static DEFINE_PER_CPU(struct led_trigger_cpu, cpu_trig); void ledtrig_cpu(enum cpu_led_event ledevt) { struct led_trigger_cpu *trig = this_cpu_ptr(&cpu_trig); + bool is_active = trig->is_active; /* Locate the correct CPU LED */ switch (ledevt) { case CPU_LED_IDLE_END: case CPU_LED_START: /* Will turn the LED on, max brightness */ - led_trigger_event(trig->_trig, LED_FULL); + is_active = true; break; case CPU_LED_IDLE_START: case CPU_LED_STOP: case CPU_LED_HALTED: /* Will turn the LED off */ - led_trigger_event(trig->_trig, LED_OFF); + is_active = false; break; default: /* Will leave the LED as it is */ break; } + + if (is_active != trig->is_active) { + unsigned int active_cpus; + unsigned int total_cpus; + + /* Update trigger state */ + trig->is_active = is_active; + atomic_add(is_active ? 1 : -1, &num_active_cpus); + active_cpus = atomic_read(&num_active_cpus); + total_cpus = num_present_cpus(); + + led_trigger_event(trig->_trig, + is_active ? LED_FULL : LED_OFF); + + + led_trigger_event(trig_cpu_all, + DIV_ROUND_UP(LED_FULL * active_cpus, total_cpus)); + + } } EXPORT_SYMBOL(ledtrig_cpu); @@ -113,6 +137,11 @@ static int __init ledtrig_cpu_init(void) BUILD_BUG_ON(CONFIG_NR_CPUS > 9999); /* + * Registering a trigger for all CPUs. + */ + led_trigger_register_simple("cpu", &trig_cpu_all); + + /* * Registering CPU led trigger for each CPU core here * ignores CPU hotplug, but after this CPU hotplug works * fine with this trigger. diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 052714106b7b..ead61a93cb4e 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -33,4 +33,13 @@ config NVM_RRPC host. The target is implemented using a linear mapping table and cost-based garbage collection. It is optimized for 4K IO sizes. +config NVM_PBLK + tristate "Physical Block Device Open-Channel SSD target" + ---help--- + Allows an open-channel SSD to be exposed as a block device to the + host. The target assumes the device exposes raw flash and must be + explicitly managed by the host. + + Please note the disk format is considered EXPERIMENTAL for now. + endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index b2a39e2d2895..82d1a117fb27 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -4,3 +4,8 @@ obj-$(CONFIG_NVM) := core.o obj-$(CONFIG_NVM_RRPC) += rrpc.o +obj-$(CONFIG_NVM_PBLK) += pblk.o +pblk-y := pblk-init.o pblk-core.o pblk-rb.o \ + pblk-write.o pblk-cache.o pblk-read.o \ + pblk-gc.o pblk-recovery.o pblk-map.o \ + pblk-rl.o pblk-sysfs.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 5262ba66a7a7..54a06c3a2b8c 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -89,7 +89,7 @@ static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin, WARN_ON(!test_and_clear_bit(i, dev->lun_map)); } -static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) +static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) { struct nvm_dev *dev = tgt_dev->parent; struct nvm_dev_map *dev_map = tgt_dev->map; @@ -100,11 +100,14 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) int *lun_offs = ch_map->lun_offs; int ch = i + ch_map->ch_off; - for (j = 0; j < ch_map->nr_luns; j++) { - int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.luns_per_chnl) + lun; + if (clear) { + for (j = 0; j < ch_map->nr_luns; j++) { + int lun = j + lun_offs[j]; + int lunid = (ch * dev->geo.luns_per_chnl) + lun; - WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); + WARN_ON(!test_and_clear_bit(lunid, + dev->lun_map)); + } } kfree(ch_map->lun_offs); @@ -232,6 +235,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) struct nvm_target *t; struct nvm_tgt_dev *tgt_dev; void *targetdata; + int ret; tt = nvm_find_target_type(create->tgttype, 1); if (!tt) { @@ -252,34 +256,43 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -ENOMEM; t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); - if (!t) + if (!t) { + ret = -ENOMEM; goto err_reserve; + } tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end); if (!tgt_dev) { pr_err("nvm: could not create target device\n"); + ret = -ENOMEM; goto err_t; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); - if (!tqueue) + tdisk = alloc_disk(0); + if (!tdisk) { + ret = -ENOMEM; goto err_dev; - blk_queue_make_request(tqueue, tt->make_rq); + } - tdisk = alloc_disk(0); - if (!tdisk) - goto err_queue; + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + if (!tqueue) { + ret = -ENOMEM; + goto err_disk; + } + blk_queue_make_request(tqueue, tt->make_rq); - sprintf(tdisk->disk_name, "%s", create->tgtname); + strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name)); tdisk->flags = GENHD_FL_EXT_DEVT; tdisk->major = 0; tdisk->first_minor = 0; tdisk->fops = &nvm_fops; tdisk->queue = tqueue; - targetdata = tt->init(tgt_dev, tdisk); - if (IS_ERR(targetdata)) + targetdata = tt->init(tgt_dev, tdisk, create->flags); + if (IS_ERR(targetdata)) { + ret = PTR_ERR(targetdata); goto err_init; + } tdisk->private_data = targetdata; tqueue->queuedata = targetdata; @@ -289,8 +302,10 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) set_capacity(tdisk, tt->capacity(targetdata)); add_disk(tdisk); - if (tt->sysfs_init && tt->sysfs_init(tdisk)) + if (tt->sysfs_init && tt->sysfs_init(tdisk)) { + ret = -ENOMEM; goto err_sysfs; + } t->type = tt; t->disk = tdisk; @@ -305,16 +320,17 @@ err_sysfs: if (tt->exit) tt->exit(targetdata); err_init: - put_disk(tdisk); -err_queue: blk_cleanup_queue(tqueue); + tdisk->queue = NULL; +err_disk: + put_disk(tdisk); err_dev: - nvm_remove_tgt_dev(tgt_dev); + nvm_remove_tgt_dev(tgt_dev, 0); err_t: kfree(t); err_reserve: nvm_release_luns_err(dev, s->lun_begin, s->lun_end); - return -ENOMEM; + return ret; } static void __nvm_remove_target(struct nvm_target *t) @@ -332,7 +348,7 @@ static void __nvm_remove_target(struct nvm_target *t) if (tt->exit) tt->exit(tdisk->private_data); - nvm_remove_tgt_dev(t->dev); + nvm_remove_tgt_dev(t->dev, 1); put_disk(tdisk); list_del(&t->list); @@ -411,6 +427,18 @@ err_rmap: return -ENOMEM; } +static void nvm_unregister_map(struct nvm_dev *dev) +{ + struct nvm_dev_map *rmap = dev->rmap; + int i; + + for (i = 0; i < dev->geo.nr_chnls; i++) + kfree(rmap->chnls[i].lun_offs); + + kfree(rmap->chnls); + kfree(rmap); +} + static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) { struct nvm_dev_map *dev_map = tgt_dev->map; @@ -486,7 +514,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, int *lun_roffs; struct ppa_addr gaddr; u64 pba = le64_to_cpu(entries[i]); - int off; u64 diff; if (!pba) @@ -496,8 +523,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; lun_roffs = ch_rmap->lun_offs; - off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun; - diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; @@ -590,11 +615,11 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, memset(&rqd, 0, sizeof(struct nvm_rq)); - nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); nvm_rq_tgt_to_dev(tgt_dev, &rqd); ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(dev, &rqd); + nvm_free_rqd_ppalist(tgt_dev, &rqd); if (ret) { pr_err("nvm: failed bb mark\n"); return -EINVAL; @@ -626,34 +651,45 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_submit_io); -int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags) +static void nvm_end_io_sync(struct nvm_rq *rqd) { - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; + struct completion *waiting = rqd->private; - if (!dev->ops->erase_block) - return 0; + complete(waiting); +} - nvm_map_to_dev(tgt_dev, ppas); +int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, + int nr_ppas) +{ + struct nvm_geo *geo = &tgt_dev->geo; + struct nvm_rq rqd; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); - ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1); + rqd.opcode = NVM_OP_ERASE; + rqd.end_io = nvm_end_io_sync; + rqd.private = &wait; + rqd.flags = geo->plane_mode >> 1; + + ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); if (ret) return ret; - nvm_rq_tgt_to_dev(tgt_dev, &rqd); - - rqd.flags = flags; - - ret = dev->ops->erase_block(dev, &rqd); + ret = nvm_submit_io(tgt_dev, &rqd); + if (ret) { + pr_err("rrpr: erase I/O submission failed: %d\n", ret); + goto free_ppa_list; + } + wait_for_completion_io(&wait); - nvm_free_rqd_ppalist(dev, &rqd); +free_ppa_list: + nvm_free_rqd_ppalist(tgt_dev, &rqd); return ret; } -EXPORT_SYMBOL(nvm_erase_blk); +EXPORT_SYMBOL(nvm_erase_sync); int nvm_get_l2p_tbl(struct nvm_tgt_dev *tgt_dev, u64 slba, u32 nlb, nvm_l2p_update_fn *update_l2p, void *priv) @@ -732,10 +768,11 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) } EXPORT_SYMBOL(nvm_put_area); -int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, +int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, const struct ppa_addr *ppas, int nr_ppas, int vblk) { - struct nvm_geo *geo = &dev->geo; + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_geo *geo = &tgt_dev->geo; int i, plane_cnt, pl_idx; struct ppa_addr ppa; @@ -773,12 +810,12 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, } EXPORT_SYMBOL(nvm_set_rqd_ppalist); -void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd) +void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { if (!rqd->ppa_list) return; - nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list); + nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } EXPORT_SYMBOL(nvm_free_rqd_ppalist); @@ -972,7 +1009,7 @@ err_fmtype: return ret; } -void nvm_free(struct nvm_dev *dev) +static void nvm_free(struct nvm_dev *dev) { if (!dev) return; @@ -980,7 +1017,7 @@ void nvm_free(struct nvm_dev *dev) if (dev->dma_pool) dev->ops->destroy_dma_pool(dev->dma_pool); - kfree(dev->rmap); + nvm_unregister_map(dev); kfree(dev->lptbl); kfree(dev->lun_map); kfree(dev); @@ -1174,13 +1211,13 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg) list_for_each_entry(dev, &nvm_devices, devices) { struct nvm_ioctl_device_info *info = &devices->info[i]; - sprintf(info->devname, "%s", dev->name); + strlcpy(info->devname, dev->name, sizeof(info->devname)); /* kept for compatibility */ info->bmversion[0] = 1; info->bmversion[1] = 0; info->bmversion[2] = 0; - sprintf(info->bmname, "%s", "gennvm"); + strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); i++; if (i > 31) { @@ -1217,8 +1254,16 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) create.tgtname[DISK_NAME_LEN - 1] = '\0'; if (create.flags != 0) { - pr_err("nvm: no flags supported\n"); - return -EINVAL; + __u32 flags = create.flags; + + /* Check for valid flags */ + if (flags & NVM_TARGET_FACTORY) + flags &= ~NVM_TARGET_FACTORY; + + if (flags) { + pr_err("nvm: flag not supported\n"); + return -EINVAL; + } } return __nvm_configure_create(&create); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c new file mode 100644 index 000000000000..59bcea88db84 --- /dev/null +++ b/drivers/lightnvm/pblk-cache.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-cache.c - pblk's write cache + */ + +#include "pblk.h" + +int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) +{ + struct pblk_w_ctx w_ctx; + sector_t lba = pblk_get_lba(bio); + unsigned int bpos, pos; + int nr_entries = pblk_get_secs(bio); + int i, ret; + + /* Update the write buffer head (mem) with the entries that we can + * write. The write in itself cannot fail, so there is no need to + * rollback from here on. + */ +retry: + ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); + if (ret == NVM_IO_REQUEUE) { + io_schedule(); + goto retry; + } + + if (unlikely(!bio_has_data(bio))) + goto out; + + w_ctx.flags = flags; + pblk_ppa_set_empty(&w_ctx.ppa); + + for (i = 0; i < nr_entries; i++) { + void *data = bio_data(bio); + + w_ctx.lba = lba + i; + + pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i); + pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos); + + bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(nr_entries, &pblk->inflight_writes); + atomic_long_add(nr_entries, &pblk->req_writes); +#endif + +out: + pblk_write_should_kick(pblk); + return ret; +} + +/* + * On GC the incoming lbas are not necessarily sequential. Also, some of the + * lbas might not be valid entries, which are marked as empty by the GC thread + */ +int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, + unsigned int nr_entries, unsigned int nr_rec_entries, + struct pblk_line *gc_line, unsigned long flags) +{ + struct pblk_w_ctx w_ctx; + unsigned int bpos, pos; + int i, valid_entries; + + /* Update the write buffer head (mem) with the entries that we can + * write. The write in itself cannot fail, so there is no need to + * rollback from here on. + */ +retry: + if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) { + io_schedule(); + goto retry; + } + + w_ctx.flags = flags; + pblk_ppa_set_empty(&w_ctx.ppa); + + for (i = 0, valid_entries = 0; i < nr_entries; i++) { + if (lba_list[i] == ADDR_EMPTY) + continue; + + w_ctx.lba = lba_list[i]; + + pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries); + pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos); + + data += PBLK_EXPOSED_PAGE_SIZE; + valid_entries++; + } + + WARN_ONCE(nr_rec_entries != valid_entries, + "pblk: inconsistent GC write\n"); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(valid_entries, &pblk->inflight_writes); + atomic_long_add(valid_entries, &pblk->recov_gc_writes); +#endif + + pblk_write_should_kick(pblk); + return NVM_IO_OK; +} diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c new file mode 100644 index 000000000000..5e44768ccffa --- /dev/null +++ b/drivers/lightnvm/pblk-core.c @@ -0,0 +1,1667 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-core.c - pblk's core functionality + * + */ + +#include "pblk.h" +#include <linux/time.h> + +static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, + struct ppa_addr *ppa) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int pos = pblk_dev_ppa_to_pos(geo, *ppa); + + pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos); + atomic_long_inc(&pblk->erase_failed); + + atomic_dec(&line->blk_in_line); + if (test_and_set_bit(pos, line->blk_bitmap)) + pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", + line->id, pos); + + pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb); +} + +static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct pblk_line *line; + + line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)]; + atomic_dec(&line->left_seblks); + + if (rqd->error) { + struct ppa_addr *ppa; + + ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); + if (!ppa) + return; + + *ppa = rqd->ppa_addr; + pblk_mark_bb(pblk, line, ppa); + } +} + +/* Erase completion assumes that only one block is erased at the time */ +static void pblk_end_io_erase(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + + up(&pblk->erase_sem); + __pblk_end_io_erase(pblk, rqd); + mempool_free(rqd, pblk->r_rq_pool); +} + +static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, + u64 paddr) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list = NULL; + + /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P + * table is modified with reclaimed sectors, a check is done to endure + * that newer updates are not overwritten. + */ + spin_lock(&line->lock); + if (line->state == PBLK_LINESTATE_GC || + line->state == PBLK_LINESTATE_FREE) { + spin_unlock(&line->lock); + return; + } + + if (test_and_set_bit(paddr, line->invalid_bitmap)) { + WARN_ONCE(1, "pblk: double invalidate\n"); + spin_unlock(&line->lock); + return; + } + line->vsc--; + + if (line->state == PBLK_LINESTATE_CLOSED) + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + if (move_list) { + spin_lock(&l_mg->gc_lock); + spin_lock(&line->lock); + /* Prevent moving a line that has just been chosen for GC */ + if (line->state == PBLK_LINESTATE_GC || + line->state == PBLK_LINESTATE_FREE) { + spin_unlock(&line->lock); + spin_unlock(&l_mg->gc_lock); + return; + } + spin_unlock(&line->lock); + + list_move_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + } +} + +void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) +{ + struct pblk_line *line; + u64 paddr; + int line_id; + +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a device address */ + BUG_ON(pblk_addr_in_cache(ppa)); + BUG_ON(pblk_ppa_empty(ppa)); +#endif + + line_id = pblk_tgt_ppa_to_line(ppa); + line = &pblk->lines[line_id]; + paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); + + __pblk_map_invalidate(pblk, line, paddr); +} + +void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line, + u64 paddr) +{ + __pblk_map_invalidate(pblk, line, paddr); + + pblk_rb_sync_init(&pblk->rwb, NULL); + line->left_ssecs--; + if (!line->left_ssecs) + pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws); + pblk_rb_sync_end(&pblk->rwb, NULL); +} + +static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, + unsigned int nr_secs) +{ + sector_t lba; + + spin_lock(&pblk->trans_lock); + for (lba = slba; lba < slba + nr_secs; lba++) { + struct ppa_addr ppa; + + ppa = pblk_trans_map_get(pblk, lba); + + if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa)) + pblk_map_invalidate(pblk, ppa); + + pblk_ppa_set_empty(&ppa); + pblk_trans_map_set(pblk, lba, ppa); + } + spin_unlock(&pblk->trans_lock); +} + +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) +{ + mempool_t *pool; + struct nvm_rq *rqd; + int rq_size; + + if (rw == WRITE) { + pool = pblk->w_rq_pool; + rq_size = pblk_w_rq_size; + } else { + pool = pblk->r_rq_pool; + rq_size = pblk_r_rq_size; + } + + rqd = mempool_alloc(pool, GFP_KERNEL); + memset(rqd, 0, rq_size); + + return rqd; +} + +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw) +{ + mempool_t *pool; + + if (rw == WRITE) + pool = pblk->w_rq_pool; + else + pool = pblk->r_rq_pool; + + mempool_free(rqd, pool); +} + +void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, + int nr_pages) +{ + struct bio_vec bv; + int i; + + WARN_ON(off + nr_pages != bio->bi_vcnt); + + bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE); + for (i = off; i < nr_pages + off; i++) { + bv = bio->bi_io_vec[i]; + mempool_free(bv.bv_page, pblk->page_pool); + } +} + +int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, + int nr_pages) +{ + struct request_queue *q = pblk->dev->q; + struct page *page; + int i, ret; + + for (i = 0; i < nr_pages; i++) { + page = mempool_alloc(pblk->page_pool, flags); + if (!page) + goto err; + + ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); + if (ret != PBLK_EXPOSED_PAGE_SIZE) { + pr_err("pblk: could not add page to bio\n"); + mempool_free(page, pblk->page_pool); + goto err; + } + } + + return 0; +err: + pblk_bio_free_pages(pblk, bio, 0, i - 1); + return -1; +} + +static void pblk_write_kick(struct pblk *pblk) +{ + wake_up_process(pblk->writer_ts); + mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000)); +} + +void pblk_write_timer_fn(unsigned long data) +{ + struct pblk *pblk = (struct pblk *)data; + + /* kick the write thread every tick to flush outstanding data */ + pblk_write_kick(pblk); +} + +void pblk_write_should_kick(struct pblk *pblk) +{ + unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb); + + if (secs_avail >= pblk->min_write_pgs) + pblk_write_kick(pblk); +} + +void pblk_end_bio_sync(struct bio *bio) +{ + struct completion *waiting = bio->bi_private; + + complete(waiting); +} + +void pblk_end_io_sync(struct nvm_rq *rqd) +{ + struct completion *waiting = rqd->private; + + complete(waiting); +} + +void pblk_flush_writer(struct pblk *pblk) +{ + struct bio *bio; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + bio = bio_alloc(GFP_KERNEL, 1); + if (!bio) + return; + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH); + bio->bi_private = &wait; + bio->bi_end_io = pblk_end_bio_sync; + + ret = pblk_write_to_cache(pblk, bio, 0); + if (ret == NVM_IO_OK) { + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: flush cache timed out\n"); + } + } else if (ret != NVM_IO_DONE) { + pr_err("pblk: tear down bio failed\n"); + } + + if (bio->bi_error) + pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error); + + bio_put(bio); +} + +struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list = NULL; + + if (!line->vsc) { + if (line->gc_group != PBLK_LINEGC_FULL) { + line->gc_group = PBLK_LINEGC_FULL; + move_list = &l_mg->gc_full_list; + } + } else if (line->vsc < lm->mid_thrs) { + if (line->gc_group != PBLK_LINEGC_HIGH) { + line->gc_group = PBLK_LINEGC_HIGH; + move_list = &l_mg->gc_high_list; + } + } else if (line->vsc < lm->high_thrs) { + if (line->gc_group != PBLK_LINEGC_MID) { + line->gc_group = PBLK_LINEGC_MID; + move_list = &l_mg->gc_mid_list; + } + } else if (line->vsc < line->sec_in_line) { + if (line->gc_group != PBLK_LINEGC_LOW) { + line->gc_group = PBLK_LINEGC_LOW; + move_list = &l_mg->gc_low_list; + } + } else if (line->vsc == line->sec_in_line) { + if (line->gc_group != PBLK_LINEGC_EMPTY) { + line->gc_group = PBLK_LINEGC_EMPTY; + move_list = &l_mg->gc_empty_list; + } + } else { + line->state = PBLK_LINESTATE_CORRUPT; + line->gc_group = PBLK_LINEGC_NONE; + move_list = &l_mg->corrupt_list; + pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", + line->id, line->vsc, + line->sec_in_line, + lm->high_thrs, lm->mid_thrs); + } + + return move_list; +} + +void pblk_discard(struct pblk *pblk, struct bio *bio) +{ + sector_t slba = pblk_get_lba(bio); + sector_t nr_secs = pblk_get_secs(bio); + + pblk_invalidate_range(pblk, slba, nr_secs); +} + +struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba) +{ + struct ppa_addr ppa; + + spin_lock(&pblk->trans_lock); + ppa = pblk_trans_map_get(pblk, lba); + spin_unlock(&pblk->trans_lock); + + return ppa; +} + +void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd) +{ + atomic_long_inc(&pblk->write_failed); +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, rqd, rqd->error); +#endif +} + +void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd) +{ + /* Empty page read is not necessarily an error (e.g., L2P recovery) */ + if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { + atomic_long_inc(&pblk->read_empty); + return; + } + + switch (rqd->error) { + case NVM_RSP_WARN_HIGHECC: + atomic_long_inc(&pblk->read_high_ecc); + break; + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_ERR_FAILCRC: + atomic_long_inc(&pblk->read_failed); + break; + default: + pr_err("pblk: unknown read error:%d\n", rqd->error); + } +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, rqd, rqd->error); +#endif +} + +int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + +#ifdef CONFIG_NVM_DEBUG + struct ppa_addr *ppa_list; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { + WARN_ON(1); + return -EINVAL; + } + + if (rqd->opcode == NVM_OP_PWRITE) { + struct pblk_line *line; + struct ppa_addr ppa; + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + ppa = ppa_list[i]; + line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + + spin_lock(&line->lock); + if (line->state != PBLK_LINESTATE_OPEN) { + pr_err("pblk: bad ppa: line:%d,state:%d\n", + line->id, line->state); + WARN_ON(1); + spin_unlock(&line->lock); + return -EINVAL; + } + spin_unlock(&line->lock); + } + } +#endif + return nvm_submit_io(dev, rqd); +} + +struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, + unsigned int nr_secs, unsigned int len, + gfp_t gfp_mask) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + void *kaddr = data; + struct page *page; + struct bio *bio; + int i, ret; + + if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META) + return bio_map_kern(dev->q, kaddr, len, gfp_mask); + + bio = bio_kmalloc(gfp_mask, nr_secs); + if (!bio) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nr_secs; i++) { + page = vmalloc_to_page(kaddr); + if (!page) { + pr_err("pblk: could not map vmalloc bio\n"); + bio_put(bio); + bio = ERR_PTR(-ENOMEM); + goto out; + } + + ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0); + if (ret != PAGE_SIZE) { + pr_err("pblk: could not add page to bio\n"); + bio_put(bio); + bio = ERR_PTR(-ENOMEM); + goto out; + } + + kaddr += PAGE_SIZE; + } +out: + return bio; +} + +int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, + unsigned long secs_to_flush) +{ + int max = pblk->max_write_pgs; + int min = pblk->min_write_pgs; + int secs_to_sync = 0; + + if (secs_avail >= max) + secs_to_sync = max; + else if (secs_avail >= min) + secs_to_sync = min * (secs_avail / min); + else if (secs_to_flush) + secs_to_sync = min; + + return secs_to_sync; +} + +static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, + int nr_secs) +{ + u64 addr; + int i; + + /* logic error: ppa out-of-bounds. Prevent generating bad address */ + if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) { + WARN(1, "pblk: page allocation out of bounds\n"); + nr_secs = pblk->lm.sec_per_line - line->cur_sec; + } + + line->cur_sec = addr = find_next_zero_bit(line->map_bitmap, + pblk->lm.sec_per_line, line->cur_sec); + for (i = 0; i < nr_secs; i++, line->cur_sec++) + WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap)); + + return addr; +} + +u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) +{ + u64 addr; + + /* Lock needed in case a write fails and a recovery needs to remap + * failed write buffer entries + */ + spin_lock(&line->lock); + addr = __pblk_alloc_page(pblk, line, nr_secs); + line->left_msecs -= nr_secs; + WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n"); + spin_unlock(&line->lock); + + return addr; +} + +/* + * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when + * taking the per LUN semaphore. + */ +static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, + u64 paddr, int dir) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + struct ppa_addr *ppa_list; + dma_addr_t dma_ppa_list; + void *emeta = line->emeta; + int min = pblk->min_write_pgs; + int left_ppas = lm->emeta_sec; + int id = line->id; + int rq_ppas, rq_len; + int cmd_op, bio_op; + int flags; + int i, j; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + if (dir == WRITE) { + bio_op = REQ_OP_WRITE; + cmd_op = NVM_OP_PWRITE; + flags = pblk_set_progr_mode(pblk, WRITE); + } else if (dir == READ) { + bio_op = REQ_OP_READ; + cmd_op = NVM_OP_PREAD; + flags = pblk_set_read_mode(pblk); + } else + return -EINVAL; + + ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list); + if (!ppa_list) + return -ENOMEM; + +next_rq: + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_len = rq_ppas * geo->sec_size; + + bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_rqd_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, bio_op, 0); + + rqd.bio = bio; + rqd.opcode = cmd_op; + rqd.flags = flags; + rqd.nr_ppas = rq_ppas; + rqd.ppa_list = ppa_list; + rqd.dma_ppa_list = dma_ppa_list; + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + + if (dir == WRITE) { + for (i = 0; i < rqd.nr_ppas; ) { + spin_lock(&line->lock); + paddr = __pblk_alloc_page(pblk, line, min); + spin_unlock(&line->lock); + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = + addr_to_gen_ppa(pblk, paddr, id); + } + } else { + for (i = 0; i < rqd.nr_ppas; ) { + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); + int pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + paddr += min; + if (pblk_boundary_paddr_checks(pblk, paddr)) { + pr_err("pblk: corrupt emeta line:%d\n", + line->id); + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + ppa = addr_to_gen_ppa(pblk, paddr, id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + if (pblk_boundary_paddr_checks(pblk, paddr + min)) { + pr_err("pblk: corrupt emeta line:%d\n", + line->id); + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = + addr_to_gen_ppa(pblk, paddr, line->id); + } + } + + ret = pblk_submit_io(pblk, &rqd); + if (ret) { + pr_err("pblk: emeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_rqd_dma; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: emeta I/O timed out\n"); + } + reinit_completion(&wait); + + bio_put(bio); + + if (rqd.error) { + if (dir == WRITE) + pblk_log_write_err(pblk, &rqd); + else + pblk_log_read_err(pblk, &rqd); + } + + emeta += rq_len; + left_ppas -= rq_ppas; + if (left_ppas) + goto next_rq; +free_rqd_dma: + nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list); + return ret; +} + +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + int bit; + + /* This usually only happens on bad lines */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (bit >= lm->blk_per_line) + return -1; + + return bit * geo->sec_per_pl; +} + +static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, + u64 paddr, int dir) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + __le64 *lba_list = NULL; + int i, ret; + int cmd_op, bio_op; + int flags; + DECLARE_COMPLETION_ONSTACK(wait); + + if (dir == WRITE) { + bio_op = REQ_OP_WRITE; + cmd_op = NVM_OP_PWRITE; + flags = pblk_set_progr_mode(pblk, WRITE); + lba_list = pblk_line_emeta_to_lbas(line->emeta); + } else if (dir == READ) { + bio_op = REQ_OP_READ; + cmd_op = NVM_OP_PREAD; + flags = pblk_set_read_mode(pblk); + } else + return -EINVAL; + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd.dma_ppa_list); + if (!rqd.ppa_list) + return -ENOMEM; + + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_ppa_list; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, bio_op, 0); + + rqd.bio = bio; + rqd.opcode = cmd_op; + rqd.flags = flags; + rqd.nr_ppas = lm->smeta_sec; + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + + for (i = 0; i < lm->smeta_sec; i++, paddr++) { + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); + if (dir == WRITE) + lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); + } + + /* + * This I/O is sent by the write thread when a line is replace. Since + * the write thread is the only one sending write and erase commands, + * there is no need to take the LUN semaphore. + */ + ret = pblk_submit_io(pblk, &rqd); + if (ret) { + pr_err("pblk: smeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_ppa_list; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: smeta I/O timed out\n"); + } + + if (rqd.error) { + if (dir == WRITE) + pblk_log_write_err(pblk, &rqd); + else + pblk_log_read_err(pblk, &rqd); + } + +free_ppa_list: + nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); + + return ret; +} + +int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) +{ + u64 bpaddr = pblk_line_smeta_start(pblk, line); + + return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ); +} + +int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line) +{ + return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ); +} + +static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct ppa_addr ppa) +{ + rqd->opcode = NVM_OP_ERASE; + rqd->ppa_addr = ppa; + rqd->nr_ppas = 1; + rqd->flags = pblk_set_progr_mode(pblk, ERASE); + rqd->bio = NULL; +} + +static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) +{ + struct nvm_rq rqd; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + pblk_setup_e_rq(pblk, &rqd, ppa); + + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + + /* The write thread schedules erases so that it minimizes disturbances + * with writes. Thus, there is no need to take the LUN semaphore. + */ + ret = pblk_submit_io(pblk, &rqd); + if (ret) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + pr_err("pblk: could not sync erase line:%d,blk:%d\n", + pblk_dev_ppa_to_line(ppa), + pblk_dev_ppa_to_pos(geo, ppa)); + + rqd.error = ret; + goto out; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: sync erase timed out\n"); + } + +out: + rqd.private = pblk; + __pblk_end_io_erase(pblk, &rqd); + + return 0; +} + +int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct ppa_addr ppa; + int bit = -1; + + /* Erase only good blocks, one at a time */ + do { + spin_lock(&line->lock); + bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line, + bit + 1); + if (bit >= lm->blk_per_line) { + spin_unlock(&line->lock); + break; + } + + ppa = pblk->luns[bit].bppa; /* set ch and lun */ + ppa.g.blk = line->id; + + atomic_dec(&line->left_eblks); + WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); + spin_unlock(&line->lock); + + if (pblk_blk_erase_sync(pblk, ppa)) { + pr_err("pblk: failed to erase line %d\n", line->id); + return -ENOMEM; + } + } while (1); + + return 0; +} + +/* For now lines are always assumed full lines. Thus, smeta former and current + * lun bitmaps are omitted. + */ +static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line, + struct pblk_line *cur) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct line_smeta *smeta = line->smeta; + struct line_emeta *emeta = line->emeta; + int nr_blk_line; + + /* After erasing the line, new bad blocks might appear and we risk + * having an invalid line + */ + nr_blk_line = lm->blk_per_line - + bitmap_weight(line->blk_bitmap, lm->blk_per_line); + if (nr_blk_line < lm->min_blk_line) { + spin_lock(&l_mg->free_lock); + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_BAD; + spin_unlock(&line->lock); + + list_add_tail(&line->list, &l_mg->bad_list); + spin_unlock(&l_mg->free_lock); + + pr_debug("pblk: line %d is bad\n", line->id); + + return 0; + } + + /* Run-time metadata */ + line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta); + + /* Mark LUNs allocated in this line (all for now) */ + bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len); + + smeta->header.identifier = cpu_to_le32(PBLK_MAGIC); + memcpy(smeta->header.uuid, pblk->instance_uuid, 16); + smeta->header.id = cpu_to_le32(line->id); + smeta->header.type = cpu_to_le16(line->type); + smeta->header.version = cpu_to_le16(1); + + /* Start metadata */ + smeta->seq_nr = cpu_to_le64(line->seq_nr); + smeta->window_wr_lun = cpu_to_le32(geo->nr_luns); + + /* Fill metadata among lines */ + if (cur) { + memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len); + smeta->prev_id = cpu_to_le32(cur->id); + cur->emeta->next_id = cpu_to_le32(line->id); + } else { + smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY); + } + + /* All smeta must be set at this point */ + smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta)); + smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta)); + + /* End metadata */ + memcpy(&emeta->header, &smeta->header, sizeof(struct line_header)); + emeta->seq_nr = cpu_to_le64(line->seq_nr); + emeta->nr_lbas = cpu_to_le64(line->sec_in_line); + emeta->nr_valid_lbas = cpu_to_le64(0); + emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY); + emeta->crc = cpu_to_le32(0); + emeta->prev_id = smeta->prev_id; + + return 1; +} + +/* For now lines are always assumed full lines. Thus, smeta former and current + * lun bitmaps are omitted. + */ +static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, + int init) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int nr_bb = 0; + u64 off; + int bit = -1; + + line->sec_in_line = lm->sec_per_line; + + /* Capture bad block information on line mapping bitmaps */ + while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line, + bit + 1)) < lm->blk_per_line) { + off = bit * geo->sec_per_pl; + bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off, + lm->sec_per_line); + bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, + lm->sec_per_line); + line->sec_in_line -= geo->sec_per_blk; + if (bit >= lm->emeta_bb) + nr_bb++; + } + + /* Mark smeta metadata sectors as bad sectors */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + off = bit * geo->sec_per_pl; +retry_smeta: + bitmap_set(line->map_bitmap, off, lm->smeta_sec); + line->sec_in_line -= lm->smeta_sec; + line->smeta_ssec = off; + line->cur_sec = off + lm->smeta_sec; + + if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { + pr_debug("pblk: line smeta I/O failed. Retry\n"); + off += geo->sec_per_pl; + goto retry_smeta; + } + + bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); + + /* Mark emeta metadata sectors as bad sectors. We need to consider bad + * blocks to make sure that there are enough sectors to store emeta + */ + bit = lm->sec_per_line; + off = lm->sec_per_line - lm->emeta_sec; + bitmap_set(line->invalid_bitmap, off, lm->emeta_sec); + while (nr_bb) { + off -= geo->sec_per_pl; + if (!test_bit(off, line->invalid_bitmap)) { + bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl); + nr_bb--; + } + } + + line->sec_in_line -= lm->emeta_sec; + line->emeta_ssec = off; + line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line; + + if (lm->sec_per_line - line->sec_in_line != + bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_BAD; + spin_unlock(&line->lock); + + list_add_tail(&line->list, &l_mg->bad_list); + pr_err("pblk: unexpected line %d is bad\n", line->id); + + return 0; + } + + return 1; +} + +static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + int blk_in_line = atomic_read(&line->blk_in_line); + + line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + if (!line->map_bitmap) + return -ENOMEM; + memset(line->map_bitmap, 0, lm->sec_bitmap_len); + + /* invalid_bitmap is special since it is used when line is closed. No + * need to zeroized; it will be initialized using bb info form + * map_bitmap + */ + line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + if (!line->invalid_bitmap) { + mempool_free(line->map_bitmap, pblk->line_meta_pool); + return -ENOMEM; + } + + spin_lock(&line->lock); + if (line->state != PBLK_LINESTATE_FREE) { + spin_unlock(&line->lock); + WARN(1, "pblk: corrupted line state\n"); + return -EINTR; + } + line->state = PBLK_LINESTATE_OPEN; + + atomic_set(&line->left_eblks, blk_in_line); + atomic_set(&line->left_seblks, blk_in_line); + spin_unlock(&line->lock); + + /* Bad blocks do not need to be erased */ + bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); + + kref_init(&line->ref); + + return 0; +} + +int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int ret; + + spin_lock(&l_mg->free_lock); + l_mg->data_line = line; + list_del(&line->list); + + ret = pblk_line_prepare(pblk, line); + if (ret) { + list_add(&line->list, &l_mg->free_list); + spin_unlock(&l_mg->free_lock); + return ret; + } + spin_unlock(&l_mg->free_lock); + + pblk_rl_free_lines_dec(&pblk->rl, line); + + if (!pblk_line_init_bb(pblk, line, 0)) { + list_add(&line->list, &l_mg->free_list); + return -EINTR; + } + + return 0; +} + +void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) +{ + mempool_free(line->map_bitmap, pblk->line_meta_pool); + line->map_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; +} + +struct pblk_line *pblk_line_get(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *line = NULL; + int bit; + + lockdep_assert_held(&l_mg->free_lock); + +retry_get: + if (list_empty(&l_mg->free_list)) { + pr_err("pblk: no free lines\n"); + goto out; + } + + line = list_first_entry(&l_mg->free_list, struct pblk_line, list); + list_del(&line->list); + l_mg->nr_free_lines--; + + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (unlikely(bit >= lm->blk_per_line)) { + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_BAD; + spin_unlock(&line->lock); + + list_add_tail(&line->list, &l_mg->bad_list); + + pr_debug("pblk: line %d is bad\n", line->id); + goto retry_get; + } + + if (pblk_line_prepare(pblk, line)) { + pr_err("pblk: failed to prepare line %d\n", line->id); + list_add(&line->list, &l_mg->free_list); + return NULL; + } + +out: + return line; +} + +static struct pblk_line *pblk_line_retry(struct pblk *pblk, + struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *retry_line; + + spin_lock(&l_mg->free_lock); + retry_line = pblk_line_get(pblk); + if (!retry_line) { + l_mg->data_line = NULL; + spin_unlock(&l_mg->free_lock); + return NULL; + } + + retry_line->smeta = line->smeta; + retry_line->emeta = line->emeta; + retry_line->meta_line = line->meta_line; + + pblk_line_free(pblk, line); + l_mg->data_line = retry_line; + spin_unlock(&l_mg->free_lock); + + if (pblk_line_erase(pblk, retry_line)) { + spin_lock(&l_mg->free_lock); + l_mg->data_line = NULL; + spin_unlock(&l_mg->free_lock); + return NULL; + } + + pblk_rl_free_lines_dec(&pblk->rl, retry_line); + + return retry_line; +} + +struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + int meta_line; + int is_next = 0; + + spin_lock(&l_mg->free_lock); + line = pblk_line_get(pblk); + if (!line) { + spin_unlock(&l_mg->free_lock); + return NULL; + } + + line->seq_nr = l_mg->d_seq_nr++; + line->type = PBLK_LINETYPE_DATA; + l_mg->data_line = line; + + meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); + set_bit(meta_line, &l_mg->meta_bitmap); + line->smeta = l_mg->sline_meta[meta_line].meta; + line->emeta = l_mg->eline_meta[meta_line].meta; + line->meta_line = meta_line; + + /* Allocate next line for preparation */ + l_mg->data_next = pblk_line_get(pblk); + if (l_mg->data_next) { + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; + l_mg->data_next->type = PBLK_LINETYPE_DATA; + is_next = 1; + } + spin_unlock(&l_mg->free_lock); + + pblk_rl_free_lines_dec(&pblk->rl, line); + if (is_next) + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + + if (pblk_line_erase(pblk, line)) + return NULL; + +retry_setup: + if (!pblk_line_set_metadata(pblk, line, NULL)) { + line = pblk_line_retry(pblk, line); + if (!line) + return NULL; + + goto retry_setup; + } + + if (!pblk_line_init_bb(pblk, line, 1)) { + line = pblk_line_retry(pblk, line); + if (!line) + return NULL; + + goto retry_setup; + } + + return line; +} + +struct pblk_line *pblk_line_replace_data(struct pblk *pblk) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *cur, *new; + unsigned int left_seblks; + int meta_line; + int is_next = 0; + + cur = l_mg->data_line; + new = l_mg->data_next; + if (!new) + return NULL; + l_mg->data_line = new; + +retry_line: + left_seblks = atomic_read(&new->left_seblks); + if (left_seblks) { + /* If line is not fully erased, erase it */ + if (atomic_read(&new->left_eblks)) { + if (pblk_line_erase(pblk, new)) + return NULL; + } else { + io_schedule(); + } + goto retry_line; + } + + spin_lock(&l_mg->free_lock); + /* Allocate next line for preparation */ + l_mg->data_next = pblk_line_get(pblk); + if (l_mg->data_next) { + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; + l_mg->data_next->type = PBLK_LINETYPE_DATA; + is_next = 1; + } + +retry_meta: + meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); + if (meta_line == PBLK_DATA_LINES) { + spin_unlock(&l_mg->free_lock); + io_schedule(); + spin_lock(&l_mg->free_lock); + goto retry_meta; + } + + set_bit(meta_line, &l_mg->meta_bitmap); + new->smeta = l_mg->sline_meta[meta_line].meta; + new->emeta = l_mg->eline_meta[meta_line].meta; + new->meta_line = meta_line; + + memset(new->smeta, 0, lm->smeta_len); + memset(new->emeta, 0, lm->emeta_len); + spin_unlock(&l_mg->free_lock); + + if (is_next) + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + +retry_setup: + if (!pblk_line_set_metadata(pblk, new, cur)) { + new = pblk_line_retry(pblk, new); + if (!new) + return NULL; + + goto retry_setup; + } + + if (!pblk_line_init_bb(pblk, new, 1)) { + new = pblk_line_retry(pblk, new); + if (!new) + return NULL; + + goto retry_setup; + } + + return new; +} + +void pblk_line_free(struct pblk *pblk, struct pblk_line *line) +{ + if (line->map_bitmap) + mempool_free(line->map_bitmap, pblk->line_meta_pool); + if (line->invalid_bitmap) + mempool_free(line->invalid_bitmap, pblk->line_meta_pool); + + line->map_bitmap = NULL; + line->invalid_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; +} + +void pblk_line_put(struct kref *ref) +{ + struct pblk_line *line = container_of(ref, struct pblk_line, ref); + struct pblk *pblk = line->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_GC); + line->state = PBLK_LINESTATE_FREE; + line->gc_group = PBLK_LINEGC_NONE; + pblk_line_free(pblk, line); + spin_unlock(&line->lock); + + spin_lock(&l_mg->free_lock); + list_add_tail(&line->list, &l_mg->free_list); + l_mg->nr_free_lines++; + spin_unlock(&l_mg->free_lock); + + pblk_rl_free_lines_inc(&pblk->rl, line); +} + +int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) +{ + struct nvm_rq *rqd; + int err; + + rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL); + memset(rqd, 0, pblk_r_rq_size); + + pblk_setup_e_rq(pblk, rqd, ppa); + + rqd->end_io = pblk_end_io_erase; + rqd->private = pblk; + + /* The write thread schedules erases so that it minimizes disturbances + * with writes. Thus, there is no need to take the LUN semaphore. + */ + err = pblk_submit_io(pblk, rqd); + if (err) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + pr_err("pblk: could not async erase line:%d,blk:%d\n", + pblk_dev_ppa_to_line(ppa), + pblk_dev_ppa_to_pos(geo, ppa)); + } + + return err; +} + +struct pblk_line *pblk_line_get_data(struct pblk *pblk) +{ + return pblk->l_mg.data_line; +} + +struct pblk_line *pblk_line_get_data_next(struct pblk *pblk) +{ + return pblk->l_mg.data_next; +} + +int pblk_line_is_full(struct pblk_line *line) +{ + return (line->left_msecs == 0); +} + +void pblk_line_close(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list; + + line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta)); + + if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE)) + pr_err("pblk: line %d close I/O failed\n", line->id); + + WARN(!bitmap_full(line->map_bitmap, line->sec_in_line), + "pblk: corrupt closed line %d\n", line->id); + + spin_lock(&l_mg->free_lock); + WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); + + spin_lock(&l_mg->gc_lock); + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_OPEN); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + + list_add_tail(&line->list, move_list); + + mempool_free(line->map_bitmap, pblk->line_meta_pool); + line->map_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; + + spin_unlock(&line->lock); + spin_unlock(&l_mg->gc_lock); +} + +void pblk_line_close_ws(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct pblk_line *line = line_ws->line; + + pblk_line_close(pblk, line); + mempool_free(line_ws, pblk->line_ws_pool); +} + +void pblk_line_mark_bb(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct ppa_addr *ppa = line_ws->priv; + int ret; + + ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + if (ret) { + struct pblk_line *line; + int pos; + + line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; + pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); + + pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", + line->id, pos); + } + + kfree(ppa); + mempool_free(line_ws, pblk->line_ws_pool); +} + +void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *)) +{ + struct pblk_line_ws *line_ws; + + line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC); + if (!line_ws) + return; + + line_ws->pblk = pblk; + line_ws->line = line; + line_ws->priv = priv; + + INIT_WORK(&line_ws->ws, work); + queue_work(pblk->kw_wq, &line_ws->ws); +} + +void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun; + int ret; + + /* + * Only send one inflight I/O per LUN. Since we map at a page + * granurality, all ppas in the I/O will map to the same LUN + */ +#ifdef CONFIG_NVM_DEBUG + int i; + + for (i = 1; i < nr_ppas; i++) + WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun || + ppa_list[0].g.ch != ppa_list[i].g.ch); +#endif + /* If the LUN has been locked for this same request, do no attempt to + * lock it again + */ + if (test_and_set_bit(lun_id, lun_bitmap)) + return; + + rlun = &pblk->luns[lun_id]; + ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000)); + if (ret) { + switch (ret) { + case -ETIME: + pr_err("pblk: lun semaphore timed out\n"); + break; + case -EINTR: + pr_err("pblk: lun semaphore timed out\n"); + break; + } + } +} + +void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + int nr_luns = geo->nr_luns; + int bit = -1; + + while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) { + rlun = &pblk->luns[bit]; + up(&rlun->wr_sem); + } + + kfree(lun_bitmap); +} + +void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) +{ + struct ppa_addr l2p_ppa; + + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + return; + } + + spin_lock(&pblk->trans_lock); + l2p_ppa = pblk_trans_map_get(pblk, lba); + + if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa)) + pblk_map_invalidate(pblk, l2p_ppa); + + pblk_trans_map_set(pblk, lba, ppa); + spin_unlock(&pblk->trans_lock); +} + +void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) +{ +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a cache address */ + BUG_ON(!pblk_addr_in_cache(ppa)); + BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); +#endif + + pblk_update_map(pblk, lba, ppa); +} + +int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, + struct pblk_line *gc_line) +{ + struct ppa_addr l2p_ppa; + int ret = 1; + +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a cache address */ + BUG_ON(!pblk_addr_in_cache(ppa)); + BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); +#endif + + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + return 0; + } + + spin_lock(&pblk->trans_lock); + l2p_ppa = pblk_trans_map_get(pblk, lba); + + /* Prevent updated entries to be overwritten by GC */ + if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) || + pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) { + ret = 0; + goto out; + } + + pblk_trans_map_set(pblk, lba, ppa); +out: + spin_unlock(&pblk->trans_lock); + return ret; +} + +void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, + struct ppa_addr entry_line) +{ + struct ppa_addr l2p_line; + +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a device address */ + BUG_ON(pblk_addr_in_cache(ppa)); +#endif + /* Invalidate and discard padded entries */ + if (lba == ADDR_EMPTY) { +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->padded_wb); +#endif + pblk_map_invalidate(pblk, ppa); + return; + } + + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + return; + } + + spin_lock(&pblk->trans_lock); + l2p_line = pblk_trans_map_get(pblk, lba); + + /* Do not update L2P if the cacheline has been updated. In this case, + * the mapped ppa must be invalidated + */ + if (l2p_line.ppa != entry_line.ppa) { + if (!pblk_ppa_empty(ppa)) + pblk_map_invalidate(pblk, ppa); + goto out; + } + +#ifdef CONFIG_NVM_DEBUG + WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line)); +#endif + + pblk_trans_map_set(pblk, lba, ppa); +out: + spin_unlock(&pblk->trans_lock); +} + +void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, + sector_t blba, int nr_secs) +{ + int i; + + spin_lock(&pblk->trans_lock); + for (i = 0; i < nr_secs; i++) + ppas[i] = pblk_trans_map_get(pblk, blba + i); + spin_unlock(&pblk->trans_lock); +} + +void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, + u64 *lba_list, int nr_secs) +{ + sector_t lba; + int i; + + spin_lock(&pblk->trans_lock); + for (i = 0; i < nr_secs; i++) { + lba = lba_list[i]; + if (lba == ADDR_EMPTY) { + ppas[i].ppa = ADDR_EMPTY; + } else { + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + continue; + } + ppas[i] = pblk_trans_map_get(pblk, lba); + } + } + spin_unlock(&pblk->trans_lock); +} diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c new file mode 100644 index 000000000000..eaf479c6b63c --- /dev/null +++ b/drivers/lightnvm/pblk-gc.c @@ -0,0 +1,555 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-gc.c - pblk's garbage collector + */ + +#include "pblk.h" +#include <linux/delay.h> + +static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) +{ + kfree(gc_rq->data); + kfree(gc_rq->lba_list); + kfree(gc_rq); +} + +static int pblk_gc_write(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + struct pblk_gc_rq *gc_rq, *tgc_rq; + LIST_HEAD(w_list); + + spin_lock(&gc->w_lock); + if (list_empty(&gc->w_list)) { + spin_unlock(&gc->w_lock); + return 1; + } + + list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) { + list_move_tail(&gc_rq->list, &w_list); + gc->w_entries--; + } + spin_unlock(&gc->w_lock); + + list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { + pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list, + gc_rq->nr_secs, gc_rq->secs_to_gc, + gc_rq->line, PBLK_IOTYPE_GC); + + kref_put(&gc_rq->line->ref, pblk_line_put); + + list_del(&gc_rq->list); + pblk_gc_free_gc_rq(gc_rq); + } + + return 0; +} + +static void pblk_gc_writer_kick(struct pblk_gc *gc) +{ + wake_up_process(gc->gc_writer_ts); +} + +/* + * Responsible for managing all memory related to a gc request. Also in case of + * failure + */ +static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, + u64 *lba_list, unsigned int nr_secs) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_gc *gc = &pblk->gc; + struct pblk_gc_rq *gc_rq; + void *data; + unsigned int secs_to_gc; + int ret = NVM_IO_OK; + + data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL); + if (!data) { + ret = NVM_IO_ERR; + goto free_lba_list; + } + + /* Read from GC victim block */ + if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs, + &secs_to_gc, line)) { + ret = NVM_IO_ERR; + goto free_data; + } + + if (!secs_to_gc) + goto free_data; + + gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); + if (!gc_rq) { + ret = NVM_IO_ERR; + goto free_data; + } + + gc_rq->line = line; + gc_rq->data = data; + gc_rq->lba_list = lba_list; + gc_rq->nr_secs = nr_secs; + gc_rq->secs_to_gc = secs_to_gc; + + kref_get(&line->ref); + +retry: + spin_lock(&gc->w_lock); + if (gc->w_entries > 256) { + spin_unlock(&gc->w_lock); + usleep_range(256, 1024); + goto retry; + } + gc->w_entries++; + list_add_tail(&gc_rq->list, &gc->w_list); + spin_unlock(&gc->w_lock); + + pblk_gc_writer_kick(&pblk->gc); + + return NVM_IO_OK; + +free_data: + kfree(data); +free_lba_list: + kfree(lba_list); + + return ret; +} + +static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list; + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_GC); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + if (move_list) { + spin_lock(&l_mg->gc_lock); + list_add_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + } +} + +static void pblk_gc_line_ws(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line = line_ws->line; + struct pblk_line_meta *lm = &pblk->lm; + __le64 *lba_list = line_ws->priv; + u64 *gc_list; + int sec_left; + int nr_ppas, bit; + int put_line = 1; + + pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); + + spin_lock(&line->lock); + sec_left = line->vsc; + if (!sec_left) { + /* Lines are erased before being used (l_mg->data_/log_next) */ + spin_unlock(&line->lock); + goto out; + } + spin_unlock(&line->lock); + + if (sec_left < 0) { + pr_err("pblk: corrupted GC line (%d)\n", line->id); + put_line = 0; + pblk_put_line_back(pblk, line); + goto out; + } + + bit = -1; +next_rq: + gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL); + if (!gc_list) { + put_line = 0; + pblk_put_line_back(pblk, line); + goto out; + } + + nr_ppas = 0; + do { + bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, + bit + 1); + if (bit > line->emeta_ssec) + break; + + gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]); + } while (nr_ppas < pblk->max_write_pgs); + + if (unlikely(!nr_ppas)) { + kfree(gc_list); + goto out; + } + + if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) { + pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n", + line->id, line->vsc, + nr_ppas, nr_ppas); + put_line = 0; + pblk_put_line_back(pblk, line); + goto out; + } + + sec_left -= nr_ppas; + if (sec_left > 0) + goto next_rq; + +out: + pblk_mfree(line->emeta, l_mg->emeta_alloc_type); + mempool_free(line_ws, pblk->line_ws_pool); + atomic_dec(&pblk->gc.inflight_gc); + if (put_line) + kref_put(&line->ref, pblk_line_put); +} + +static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_ws *line_ws; + __le64 *lba_list; + int ret; + + line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); + line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type, + GFP_KERNEL); + if (!line->emeta) { + pr_err("pblk: cannot use GC emeta\n"); + goto fail_free_ws; + } + + ret = pblk_line_read_emeta(pblk, line); + if (ret) { + pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); + goto fail_free_emeta; + } + + /* If this read fails, it means that emeta is corrupted. For now, leave + * the line untouched. TODO: Implement a recovery routine that scans and + * moves all sectors on the line. + */ + lba_list = pblk_recov_get_lba_list(pblk, line->emeta); + if (!lba_list) { + pr_err("pblk: could not interpret emeta (line %d)\n", line->id); + goto fail_free_emeta; + } + + line_ws->pblk = pblk; + line_ws->line = line; + line_ws->priv = lba_list; + + INIT_WORK(&line_ws->ws, pblk_gc_line_ws); + queue_work(pblk->gc.gc_reader_wq, &line_ws->ws); + + return 0; + +fail_free_emeta: + pblk_mfree(line->emeta, l_mg->emeta_alloc_type); +fail_free_ws: + mempool_free(line_ws, pblk->line_ws_pool); + pblk_put_line_back(pblk, line); + + return 1; +} + +static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list) +{ + struct pblk_line *line, *tline; + + list_for_each_entry_safe(line, tline, gc_list, list) { + if (pblk_gc_line(pblk, line)) + pr_err("pblk: failed to GC line %d\n", line->id); + list_del(&line->list); + } +} + +/* + * Lines with no valid sectors will be returned to the free list immediately. If + * GC is activated - either because the free block count is under the determined + * threshold, or because it is being forced from user space - only lines with a + * high count of invalid sectors will be recycled. + */ +static void pblk_gc_run(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; + struct pblk_line *line, *tline; + unsigned int nr_blocks_free, nr_blocks_need; + struct list_head *group_list; + int run_gc, gc_group = 0; + int prev_gc = 0; + int inflight_gc = atomic_read(&gc->inflight_gc); + LIST_HEAD(gc_list); + + spin_lock(&l_mg->gc_lock); + list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) { + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_CLOSED); + line->state = PBLK_LINESTATE_GC; + spin_unlock(&line->lock); + + list_del(&line->list); + kref_put(&line->ref, pblk_line_put); + } + spin_unlock(&l_mg->gc_lock); + + nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl); + nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl); + run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); + +next_gc_group: + group_list = l_mg->gc_lists[gc_group++]; + spin_lock(&l_mg->gc_lock); + while (run_gc && !list_empty(group_list)) { + /* No need to queue up more GC lines than we can handle */ + if (!run_gc || inflight_gc > gc->gc_jobs_active) { + spin_unlock(&l_mg->gc_lock); + pblk_gc_lines(pblk, &gc_list); + return; + } + + line = list_first_entry(group_list, struct pblk_line, list); + nr_blocks_free += atomic_read(&line->blk_in_line); + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_CLOSED); + line->state = PBLK_LINESTATE_GC; + list_move_tail(&line->list, &gc_list); + atomic_inc(&gc->inflight_gc); + inflight_gc++; + spin_unlock(&line->lock); + + prev_gc = 1; + run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); + } + spin_unlock(&l_mg->gc_lock); + + pblk_gc_lines(pblk, &gc_list); + + if (!prev_gc && pblk->rl.rb_state > gc_group && + gc_group < PBLK_NR_GC_LISTS) + goto next_gc_group; +} + + +static void pblk_gc_kick(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + wake_up_process(gc->gc_ts); + pblk_gc_writer_kick(gc); + mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); +} + +static void pblk_gc_timer(unsigned long data) +{ + struct pblk *pblk = (struct pblk *)data; + + pblk_gc_kick(pblk); +} + +static int pblk_gc_ts(void *data) +{ + struct pblk *pblk = data; + + while (!kthread_should_stop()) { + pblk_gc_run(pblk); + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; +} + +static int pblk_gc_writer_ts(void *data) +{ + struct pblk *pblk = data; + + while (!kthread_should_stop()) { + if (!pblk_gc_write(pblk)) + continue; + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; +} + +static void pblk_gc_start(struct pblk *pblk) +{ + pblk->gc.gc_active = 1; + + pr_debug("pblk: gc start\n"); +} + +int pblk_gc_status(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + int ret; + + spin_lock(&gc->lock); + ret = gc->gc_active; + spin_unlock(&gc->lock); + + return ret; +} + +static void __pblk_gc_should_start(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + lockdep_assert_held(&gc->lock); + + if (gc->gc_enabled && !gc->gc_active) + pblk_gc_start(pblk); +} + +void pblk_gc_should_start(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + spin_lock(&gc->lock); + __pblk_gc_should_start(pblk); + spin_unlock(&gc->lock); +} + +/* + * If flush_wq == 1 then no lock should be held by the caller since + * flush_workqueue can sleep + */ +static void pblk_gc_stop(struct pblk *pblk, int flush_wq) +{ + spin_lock(&pblk->gc.lock); + pblk->gc.gc_active = 0; + spin_unlock(&pblk->gc.lock); + + pr_debug("pblk: gc stop\n"); +} + +void pblk_gc_should_stop(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + if (gc->gc_active && !gc->gc_forced) + pblk_gc_stop(pblk, 0); +} + +void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, + int *gc_active) +{ + struct pblk_gc *gc = &pblk->gc; + + spin_lock(&gc->lock); + *gc_enabled = gc->gc_enabled; + *gc_active = gc->gc_active; + spin_unlock(&gc->lock); +} + +void pblk_gc_sysfs_force(struct pblk *pblk, int force) +{ + struct pblk_gc *gc = &pblk->gc; + int rsv = 0; + + spin_lock(&gc->lock); + if (force) { + gc->gc_enabled = 1; + rsv = 64; + } + pblk_rl_set_gc_rsc(&pblk->rl, rsv); + gc->gc_forced = force; + __pblk_gc_should_start(pblk); + spin_unlock(&gc->lock); +} + +int pblk_gc_init(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + int ret; + + gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts"); + if (IS_ERR(gc->gc_ts)) { + pr_err("pblk: could not allocate GC main kthread\n"); + return PTR_ERR(gc->gc_ts); + } + + gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk, + "pblk-gc-writer-ts"); + if (IS_ERR(gc->gc_writer_ts)) { + pr_err("pblk: could not allocate GC writer kthread\n"); + ret = PTR_ERR(gc->gc_writer_ts); + goto fail_free_main_kthread; + } + + setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk); + mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); + + gc->gc_active = 0; + gc->gc_forced = 0; + gc->gc_enabled = 1; + gc->gc_jobs_active = 8; + gc->w_entries = 0; + atomic_set(&gc->inflight_gc, 0); + + gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active); + if (!gc->gc_reader_wq) { + pr_err("pblk: could not allocate GC reader workqueue\n"); + ret = -ENOMEM; + goto fail_free_writer_kthread; + } + + spin_lock_init(&gc->lock); + spin_lock_init(&gc->w_lock); + INIT_LIST_HEAD(&gc->w_list); + + return 0; + +fail_free_writer_kthread: + kthread_stop(gc->gc_writer_ts); +fail_free_main_kthread: + kthread_stop(gc->gc_ts); + + return ret; +} + +void pblk_gc_exit(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + flush_workqueue(gc->gc_reader_wq); + + del_timer(&gc->gc_timer); + pblk_gc_stop(pblk, 1); + + if (gc->gc_ts) + kthread_stop(gc->gc_ts); + + if (pblk->gc.gc_reader_wq) + destroy_workqueue(pblk->gc.gc_reader_wq); + + if (gc->gc_writer_ts) + kthread_stop(gc->gc_writer_ts); +} diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c new file mode 100644 index 000000000000..ae8cd6d5af8b --- /dev/null +++ b/drivers/lightnvm/pblk-init.c @@ -0,0 +1,962 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Implementation of a physical block-device target for Open-channel SSDs. + * + * pblk-init.c - pblk's initialization. + */ + +#include "pblk.h" + +static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache, + *pblk_w_rq_cache, *pblk_line_meta_cache; +static DECLARE_RWSEM(pblk_lock); + +static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, + struct bio *bio) +{ + int ret; + + /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap + * constraint. Writes can be of arbitrary size. + */ + if (bio_data_dir(bio) == READ) { + blk_queue_split(q, &bio, q->bio_split); + ret = pblk_submit_read(pblk, bio); + if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) + bio_put(bio); + + return ret; + } + + /* Prevent deadlock in the case of a modest LUN configuration and large + * user I/Os. Unless stalled, the rate limiter leaves at least 256KB + * available for user I/O. + */ + if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) + blk_queue_split(q, &bio, q->bio_split); + + return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); +} + +static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) +{ + struct pblk *pblk = q->queuedata; + + if (bio_op(bio) == REQ_OP_DISCARD) { + pblk_discard(pblk, bio); + if (!(bio->bi_opf & REQ_PREFLUSH)) { + bio_endio(bio); + return BLK_QC_T_NONE; + } + } + + switch (pblk_rw_io(q, pblk, bio)) { + case NVM_IO_ERR: + bio_io_error(bio); + break; + case NVM_IO_DONE: + bio_endio(bio); + break; + } + + return BLK_QC_T_NONE; +} + +static void pblk_l2p_free(struct pblk *pblk) +{ + vfree(pblk->trans_map); +} + +static int pblk_l2p_init(struct pblk *pblk) +{ + sector_t i; + struct ppa_addr ppa; + int entry_size = 8; + + if (pblk->ppaf_bitsize < 32) + entry_size = 4; + + pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); + if (!pblk->trans_map) + return -ENOMEM; + + pblk_ppa_set_empty(&ppa); + + for (i = 0; i < pblk->rl.nr_secs; i++) + pblk_trans_map_set(pblk, i, ppa); + + return 0; +} + +static void pblk_rwb_free(struct pblk *pblk) +{ + if (pblk_rb_tear_down_check(&pblk->rwb)) + pr_err("pblk: write buffer error on tear down\n"); + + pblk_rb_data_free(&pblk->rwb); + vfree(pblk_rb_entries_ref(&pblk->rwb)); +} + +static int pblk_rwb_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_rb_entry *entries; + unsigned long nr_entries; + unsigned int power_size, power_seg_sz; + + nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer); + + entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry)); + if (!entries) + return -ENOMEM; + + power_size = get_count_order(nr_entries); + power_seg_sz = get_count_order(geo->sec_size); + + return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); +} + +/* Minimum pages needed within a lun */ +#define PAGE_POOL_SIZE 16 +#define ADDR_POOL_SIZE 64 + +static int pblk_set_ppaf(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_addr_format ppaf = geo->ppaf; + int power_len; + + /* Re-calculate channel and lun format to adapt to configuration */ + power_len = get_count_order(geo->nr_chnls); + if (1 << power_len != geo->nr_chnls) { + pr_err("pblk: supports only power-of-two channel config.\n"); + return -EINVAL; + } + ppaf.ch_len = power_len; + + power_len = get_count_order(geo->luns_per_chnl); + if (1 << power_len != geo->luns_per_chnl) { + pr_err("pblk: supports only power-of-two LUN config.\n"); + return -EINVAL; + } + ppaf.lun_len = power_len; + + pblk->ppaf.sec_offset = 0; + pblk->ppaf.pln_offset = ppaf.sect_len; + pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len; + pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len; + pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len; + pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len; + pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1; + pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) << + pblk->ppaf.pln_offset; + pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) << + pblk->ppaf.ch_offset; + pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) << + pblk->ppaf.lun_offset; + pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) << + pblk->ppaf.pg_offset; + pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) << + pblk->ppaf.blk_offset; + + pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len; + + return 0; +} + +static int pblk_init_global_caches(struct pblk *pblk) +{ + char cache_name[PBLK_CACHE_NAME_LEN]; + + down_write(&pblk_lock); + pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", + sizeof(struct pblk_line_ws), 0, 0, NULL); + if (!pblk_blk_ws_cache) { + up_write(&pblk_lock); + return -ENOMEM; + } + + pblk_rec_cache = kmem_cache_create("pblk_rec", + sizeof(struct pblk_rec_ctx), 0, 0, NULL); + if (!pblk_rec_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + + pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size, + 0, 0, NULL); + if (!pblk_r_rq_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + + pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, + 0, 0, NULL); + if (!pblk_w_rq_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_r_rq_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + + snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", + pblk->disk->disk_name); + pblk_line_meta_cache = kmem_cache_create(cache_name, + pblk->lm.sec_bitmap_len, 0, 0, NULL); + if (!pblk_line_meta_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_r_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + up_write(&pblk_lock); + + return 0; +} + +static int pblk_core_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int max_write_ppas; + int mod; + + pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE); + max_write_ppas = pblk->min_write_pgs * geo->nr_luns; + pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ? + max_write_ppas : nvm_max_phys_sects(dev); + pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * + geo->nr_planes * geo->nr_luns; + + if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { + pr_err("pblk: cannot support device max_phys_sect\n"); + return -EINVAL; + } + + div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod); + if (mod) { + pr_err("pblk: bad configuration of sectors/pages\n"); + return -EINVAL; + } + + if (pblk_init_global_caches(pblk)) + return -ENOMEM; + + pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); + if (!pblk->page_pool) + return -ENOMEM; + + pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns, + pblk_blk_ws_cache); + if (!pblk->line_ws_pool) + goto free_page_pool; + + pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); + if (!pblk->rec_pool) + goto free_blk_ws_pool; + + pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache); + if (!pblk->r_rq_pool) + goto free_rec_pool; + + pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache); + if (!pblk->w_rq_pool) + goto free_r_rq_pool; + + pblk->line_meta_pool = + mempool_create_slab_pool(16, pblk_line_meta_cache); + if (!pblk->line_meta_pool) + goto free_w_rq_pool; + + pblk->kw_wq = alloc_workqueue("pblk-aux-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!pblk->kw_wq) + goto free_line_meta_pool; + + if (pblk_set_ppaf(pblk)) + goto free_kw_wq; + + if (pblk_rwb_init(pblk)) + goto free_kw_wq; + + INIT_LIST_HEAD(&pblk->compl_list); + return 0; + +free_kw_wq: + destroy_workqueue(pblk->kw_wq); +free_line_meta_pool: + mempool_destroy(pblk->line_meta_pool); +free_w_rq_pool: + mempool_destroy(pblk->w_rq_pool); +free_r_rq_pool: + mempool_destroy(pblk->r_rq_pool); +free_rec_pool: + mempool_destroy(pblk->rec_pool); +free_blk_ws_pool: + mempool_destroy(pblk->line_ws_pool); +free_page_pool: + mempool_destroy(pblk->page_pool); + return -ENOMEM; +} + +static void pblk_core_free(struct pblk *pblk) +{ + if (pblk->kw_wq) + destroy_workqueue(pblk->kw_wq); + + mempool_destroy(pblk->page_pool); + mempool_destroy(pblk->line_ws_pool); + mempool_destroy(pblk->rec_pool); + mempool_destroy(pblk->r_rq_pool); + mempool_destroy(pblk->w_rq_pool); + mempool_destroy(pblk->line_meta_pool); + + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_r_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); + kmem_cache_destroy(pblk_line_meta_cache); +} + +static void pblk_luns_free(struct pblk *pblk) +{ + kfree(pblk->luns); +} + +static void pblk_lines_free(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + int i; + + spin_lock(&l_mg->free_lock); + for (i = 0; i < l_mg->nr_lines; i++) { + line = &pblk->lines[i]; + + pblk_line_free(pblk, line); + kfree(line->blk_bitmap); + kfree(line->erase_bitmap); + } + spin_unlock(&l_mg->free_lock); +} + +static void pblk_line_meta_free(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int i; + + kfree(l_mg->bb_template); + kfree(l_mg->bb_aux); + + for (i = 0; i < PBLK_DATA_LINES; i++) { + pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); + pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); + } + + kfree(pblk->lines); +} + +static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) +{ + struct nvm_geo *geo = &dev->geo; + struct ppa_addr ppa; + u8 *blks; + int nr_blks, ret; + + nr_blks = geo->blks_per_lun * geo->plane_mode; + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; + + ppa.ppa = 0; + ppa.g.ch = rlun->bppa.g.ch; + ppa.g.lun = rlun->bppa.g.lun; + + ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); + if (ret) + goto out; + + nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); + if (nr_blks < 0) { + ret = nr_blks; + goto out; + } + + rlun->bb_list = blks; + + return 0; +out: + kfree(blks); + return ret; +} + +static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_lun *rlun; + int bb_cnt = 0; + int i; + + line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); + if (!line->blk_bitmap) + return -ENOMEM; + + line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); + if (!line->erase_bitmap) { + kfree(line->blk_bitmap); + return -ENOMEM; + } + + for (i = 0; i < lm->blk_per_line; i++) { + rlun = &pblk->luns[i]; + if (rlun->bb_list[line->id] == NVM_BLK_T_FREE) + continue; + + set_bit(i, line->blk_bitmap); + bb_cnt++; + } + + return bb_cnt; +} + +static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + int i, ret; + + /* TODO: Implement unbalanced LUN support */ + if (geo->luns_per_chnl < 0) { + pr_err("pblk: unbalanced LUN config.\n"); + return -EINVAL; + } + + pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL); + if (!pblk->luns) + return -ENOMEM; + + for (i = 0; i < geo->nr_luns; i++) { + /* Stripe across channels */ + int ch = i % geo->nr_chnls; + int lun_raw = i / geo->nr_chnls; + int lunid = lun_raw + ch * geo->luns_per_chnl; + + rlun = &pblk->luns[i]; + rlun->bppa = luns[lunid]; + + sema_init(&rlun->wr_sem, 1); + + ret = pblk_bb_discovery(dev, rlun); + if (ret) { + while (--i >= 0) + kfree(pblk->luns[i].bb_list); + return ret; + } + } + + return 0; +} + +static int pblk_lines_configure(struct pblk *pblk, int flags) +{ + struct pblk_line *line = NULL; + int ret = 0; + + if (!(flags & NVM_TARGET_FACTORY)) { + line = pblk_recov_l2p(pblk); + if (IS_ERR(line)) { + pr_err("pblk: could not recover l2p table\n"); + ret = -EFAULT; + } + } + + if (!line) { + /* Configure next line for user data */ + line = pblk_line_get_first_data(pblk); + if (!line) { + pr_err("pblk: line list corrupted\n"); + ret = -EFAULT; + } + } + + return ret; +} + +/* See comment over struct line_emeta definition */ +static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm) +{ + return (sizeof(struct line_emeta) + + ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) + + (pblk->l_mg.nr_lines * sizeof(u32)) + + lm->blk_bitmap_len); +} + +static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + sector_t provisioned; + + pblk->over_pct = 20; + + provisioned = nr_free_blks; + provisioned *= (100 - pblk->over_pct); + sector_div(provisioned, 100); + + /* Internally pblk manages all free blocks, but all calculations based + * on user capacity consider only provisioned blocks + */ + pblk->rl.total_blocks = nr_free_blks; + pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk; + pblk->capacity = provisioned * geo->sec_per_blk; + atomic_set(&pblk->rl.free_blocks, nr_free_blks); +} + +static int pblk_lines_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *line; + unsigned int smeta_len, emeta_len; + long nr_bad_blks, nr_meta_blks, nr_free_blks; + int bb_distance; + int i; + int ret; + + lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; + lm->blk_per_line = geo->nr_luns; + lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); + lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); + lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); + lm->high_thrs = lm->sec_per_line / 2; + lm->mid_thrs = lm->sec_per_line / 4; + + /* Calculate necessary pages for smeta. See comment over struct + * line_smeta definition + */ + lm->smeta_len = sizeof(struct line_smeta) + + PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; + + i = 1; +add_smeta_page: + lm->smeta_sec = i * geo->sec_per_pl; + lm->smeta_len = lm->smeta_sec * geo->sec_size; + + smeta_len = sizeof(struct line_smeta) + + PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; + if (smeta_len > lm->smeta_len) { + i++; + goto add_smeta_page; + } + + /* Calculate necessary pages for emeta. See comment over struct + * line_emeta definition + */ + i = 1; +add_emeta_page: + lm->emeta_sec = i * geo->sec_per_pl; + lm->emeta_len = lm->emeta_sec * geo->sec_size; + + emeta_len = calc_emeta_len(pblk, lm); + if (emeta_len > lm->emeta_len) { + i++; + goto add_emeta_page; + } + lm->emeta_bb = geo->nr_luns - i; + + nr_meta_blks = (lm->smeta_sec + lm->emeta_sec + + (geo->sec_per_blk / 2)) / geo->sec_per_blk; + lm->min_blk_line = nr_meta_blks + 1; + + l_mg->nr_lines = geo->blks_per_lun; + l_mg->log_line = l_mg->data_line = NULL; + l_mg->l_seq_nr = l_mg->d_seq_nr = 0; + l_mg->nr_free_lines = 0; + bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); + + /* smeta is always small enough to fit on a kmalloc memory allocation, + * emeta depends on the number of LUNs allocated to the pblk instance + */ + l_mg->smeta_alloc_type = PBLK_KMALLOC_META; + for (i = 0; i < PBLK_DATA_LINES; i++) { + l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL); + if (!l_mg->sline_meta[i].meta) + while (--i >= 0) { + kfree(l_mg->sline_meta[i].meta); + ret = -ENOMEM; + goto fail; + } + } + + if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) { + l_mg->emeta_alloc_type = PBLK_VMALLOC_META; + + for (i = 0; i < PBLK_DATA_LINES; i++) { + l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len); + if (!l_mg->eline_meta[i].meta) + while (--i >= 0) { + vfree(l_mg->eline_meta[i].meta); + ret = -ENOMEM; + goto fail; + } + } + } else { + l_mg->emeta_alloc_type = PBLK_KMALLOC_META; + + for (i = 0; i < PBLK_DATA_LINES; i++) { + l_mg->eline_meta[i].meta = + kmalloc(lm->emeta_len, GFP_KERNEL); + if (!l_mg->eline_meta[i].meta) + while (--i >= 0) { + kfree(l_mg->eline_meta[i].meta); + ret = -ENOMEM; + goto fail; + } + } + } + + l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!l_mg->bb_template) { + ret = -ENOMEM; + goto fail_free_meta; + } + + l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!l_mg->bb_aux) { + ret = -ENOMEM; + goto fail_free_bb_template; + } + + bb_distance = (geo->nr_luns) * geo->sec_per_pl; + for (i = 0; i < lm->sec_per_line; i += bb_distance) + bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); + + INIT_LIST_HEAD(&l_mg->free_list); + INIT_LIST_HEAD(&l_mg->corrupt_list); + INIT_LIST_HEAD(&l_mg->bad_list); + INIT_LIST_HEAD(&l_mg->gc_full_list); + INIT_LIST_HEAD(&l_mg->gc_high_list); + INIT_LIST_HEAD(&l_mg->gc_mid_list); + INIT_LIST_HEAD(&l_mg->gc_low_list); + INIT_LIST_HEAD(&l_mg->gc_empty_list); + + l_mg->gc_lists[0] = &l_mg->gc_high_list; + l_mg->gc_lists[1] = &l_mg->gc_mid_list; + l_mg->gc_lists[2] = &l_mg->gc_low_list; + + spin_lock_init(&l_mg->free_lock); + spin_lock_init(&l_mg->gc_lock); + + pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), + GFP_KERNEL); + if (!pblk->lines) { + ret = -ENOMEM; + goto fail_free_bb_aux; + } + + nr_free_blks = 0; + for (i = 0; i < l_mg->nr_lines; i++) { + int blk_in_line; + + line = &pblk->lines[i]; + + line->pblk = pblk; + line->id = i; + line->type = PBLK_LINETYPE_FREE; + line->state = PBLK_LINESTATE_FREE; + line->gc_group = PBLK_LINEGC_NONE; + spin_lock_init(&line->lock); + + nr_bad_blks = pblk_bb_line(pblk, line); + if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) { + ret = -EINVAL; + goto fail_free_lines; + } + + blk_in_line = lm->blk_per_line - nr_bad_blks; + if (blk_in_line < lm->min_blk_line) { + line->state = PBLK_LINESTATE_BAD; + list_add_tail(&line->list, &l_mg->bad_list); + continue; + } + + nr_free_blks += blk_in_line; + atomic_set(&line->blk_in_line, blk_in_line); + + l_mg->nr_free_lines++; + list_add_tail(&line->list, &l_mg->free_list); + } + + pblk_set_provision(pblk, nr_free_blks); + + sema_init(&pblk->erase_sem, 1); + + /* Cleanup per-LUN bad block lists - managed within lines on run-time */ + for (i = 0; i < geo->nr_luns; i++) + kfree(pblk->luns[i].bb_list); + + return 0; +fail_free_lines: + kfree(pblk->lines); +fail_free_bb_aux: + kfree(l_mg->bb_aux); +fail_free_bb_template: + kfree(l_mg->bb_template); +fail_free_meta: + for (i = 0; i < PBLK_DATA_LINES; i++) { + pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); + pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); + } +fail: + for (i = 0; i < geo->nr_luns; i++) + kfree(pblk->luns[i].bb_list); + + return ret; +} + +static int pblk_writer_init(struct pblk *pblk) +{ + setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk); + mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); + + pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); + if (IS_ERR(pblk->writer_ts)) { + pr_err("pblk: could not allocate writer kthread\n"); + return PTR_ERR(pblk->writer_ts); + } + + return 0; +} + +static void pblk_writer_stop(struct pblk *pblk) +{ + if (pblk->writer_ts) + kthread_stop(pblk->writer_ts); + del_timer(&pblk->wtimer); +} + +static void pblk_free(struct pblk *pblk) +{ + pblk_luns_free(pblk); + pblk_lines_free(pblk); + pblk_line_meta_free(pblk); + pblk_core_free(pblk); + pblk_l2p_free(pblk); + + kfree(pblk); +} + +static void pblk_tear_down(struct pblk *pblk) +{ + pblk_flush_writer(pblk); + pblk_writer_stop(pblk); + pblk_rb_sync_l2p(&pblk->rwb); + pblk_recov_pad(pblk); + pblk_rwb_free(pblk); + pblk_rl_free(&pblk->rl); + + pr_debug("pblk: consistent tear down\n"); +} + +static void pblk_exit(void *private) +{ + struct pblk *pblk = private; + + down_write(&pblk_lock); + pblk_gc_exit(pblk); + pblk_tear_down(pblk); + pblk_free(pblk); + up_write(&pblk_lock); +} + +static sector_t pblk_capacity(void *private) +{ + struct pblk *pblk = private; + + return pblk->capacity * NR_PHY_IN_LOG; +} + +static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, + int flags) +{ + struct nvm_geo *geo = &dev->geo; + struct request_queue *bqueue = dev->q; + struct request_queue *tqueue = tdisk->queue; + struct pblk *pblk; + int ret; + + if (dev->identity.dom & NVM_RSP_L2P) { + pr_err("pblk: device-side L2P table not supported. (%x)\n", + dev->identity.dom); + return ERR_PTR(-EINVAL); + } + + pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL); + if (!pblk) + return ERR_PTR(-ENOMEM); + + pblk->dev = dev; + pblk->disk = tdisk; + + spin_lock_init(&pblk->trans_lock); + spin_lock_init(&pblk->lock); + + if (flags & NVM_TARGET_FACTORY) + pblk_setup_uuid(pblk); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_set(&pblk->inflight_writes, 0); + atomic_long_set(&pblk->padded_writes, 0); + atomic_long_set(&pblk->padded_wb, 0); + atomic_long_set(&pblk->nr_flush, 0); + atomic_long_set(&pblk->req_writes, 0); + atomic_long_set(&pblk->sub_writes, 0); + atomic_long_set(&pblk->sync_writes, 0); + atomic_long_set(&pblk->compl_writes, 0); + atomic_long_set(&pblk->inflight_reads, 0); + atomic_long_set(&pblk->sync_reads, 0); + atomic_long_set(&pblk->recov_writes, 0); + atomic_long_set(&pblk->recov_writes, 0); + atomic_long_set(&pblk->recov_gc_writes, 0); +#endif + + atomic_long_set(&pblk->read_failed, 0); + atomic_long_set(&pblk->read_empty, 0); + atomic_long_set(&pblk->read_high_ecc, 0); + atomic_long_set(&pblk->read_failed_gc, 0); + atomic_long_set(&pblk->write_failed, 0); + atomic_long_set(&pblk->erase_failed, 0); + + ret = pblk_luns_init(pblk, dev->luns); + if (ret) { + pr_err("pblk: could not initialize luns\n"); + goto fail; + } + + ret = pblk_lines_init(pblk); + if (ret) { + pr_err("pblk: could not initialize lines\n"); + goto fail_free_luns; + } + + ret = pblk_core_init(pblk); + if (ret) { + pr_err("pblk: could not initialize core\n"); + goto fail_free_line_meta; + } + + ret = pblk_l2p_init(pblk); + if (ret) { + pr_err("pblk: could not initialize maps\n"); + goto fail_free_core; + } + + ret = pblk_lines_configure(pblk, flags); + if (ret) { + pr_err("pblk: could not configure lines\n"); + goto fail_free_l2p; + } + + ret = pblk_writer_init(pblk); + if (ret) { + pr_err("pblk: could not initialize write thread\n"); + goto fail_free_lines; + } + + ret = pblk_gc_init(pblk); + if (ret) { + pr_err("pblk: could not initialize gc\n"); + goto fail_stop_writer; + } + + /* inherit the size from the underlying device */ + blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); + blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); + + blk_queue_write_cache(tqueue, true, false); + + tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size; + tqueue->limits.discard_alignment = 0; + blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); + + pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n", + geo->nr_luns, pblk->l_mg.nr_lines, + (unsigned long long)pblk->rl.nr_secs, + pblk->rwb.nr_entries); + + wake_up_process(pblk->writer_ts); + return pblk; + +fail_stop_writer: + pblk_writer_stop(pblk); +fail_free_lines: + pblk_lines_free(pblk); +fail_free_l2p: + pblk_l2p_free(pblk); +fail_free_core: + pblk_core_free(pblk); +fail_free_line_meta: + pblk_line_meta_free(pblk); +fail_free_luns: + pblk_luns_free(pblk); +fail: + kfree(pblk); + return ERR_PTR(ret); +} + +/* physical block device target */ +static struct nvm_tgt_type tt_pblk = { + .name = "pblk", + .version = {1, 0, 0}, + + .make_rq = pblk_make_rq, + .capacity = pblk_capacity, + + .init = pblk_init, + .exit = pblk_exit, + + .sysfs_init = pblk_sysfs_init, + .sysfs_exit = pblk_sysfs_exit, +}; + +static int __init pblk_module_init(void) +{ + return nvm_register_tgt_type(&tt_pblk); +} + +static void pblk_module_exit(void) +{ + nvm_unregister_tgt_type(&tt_pblk); +} + +module_init(pblk_module_init); +module_exit(pblk_module_exit); +MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>"); +MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs"); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c new file mode 100644 index 000000000000..17c16955284d --- /dev/null +++ b/drivers/lightnvm/pblk-map.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-map.c - pblk's lba-ppa mapping strategy + * + */ + +#include "pblk.h" + +static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, + struct ppa_addr *ppa_list, + unsigned long *lun_bitmap, + struct pblk_sec_meta *meta_list, + unsigned int valid_secs) +{ + struct pblk_line *line = pblk_line_get_data(pblk); + struct line_emeta *emeta = line->emeta; + struct pblk_w_ctx *w_ctx; + __le64 *lba_list = pblk_line_emeta_to_lbas(emeta); + u64 paddr; + int nr_secs = pblk->min_write_pgs; + int i; + + paddr = pblk_alloc_page(pblk, line, nr_secs); + + for (i = 0; i < nr_secs; i++, paddr++) { + /* ppa to be sent to the device */ + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); + + /* Write context for target bio completion on write buffer. Note + * that the write buffer is protected by the sync backpointer, + * and a single writer thread have access to each specific entry + * at a time. Thus, it is safe to modify the context for the + * entry we are setting up for submission without taking any + * lock or memory barrier. + */ + if (i < valid_secs) { + kref_get(&line->ref); + w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i); + w_ctx->ppa = ppa_list[i]; + meta_list[i].lba = cpu_to_le64(w_ctx->lba); + lba_list[paddr] = cpu_to_le64(w_ctx->lba); + le64_add_cpu(&line->emeta->nr_valid_lbas, 1); + } else { + meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); + lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); + pblk_map_pad_invalidate(pblk, line, paddr); + } + } + + if (pblk_line_is_full(line)) { + line = pblk_line_replace_data(pblk); + if (!line) + return; + } + + pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); +} + +void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, + unsigned long *lun_bitmap, unsigned int valid_secs, + unsigned int off) +{ + struct pblk_sec_meta *meta_list = rqd->meta_list; + unsigned int map_secs; + int min = pblk->min_write_pgs; + int i; + + for (i = off; i < rqd->nr_ppas; i += min) { + map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; + pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs); + } +} + +/* only if erase_ppa is set, acquire erase semaphore */ +void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int sentry, unsigned long *lun_bitmap, + unsigned int valid_secs, struct ppa_addr *erase_ppa) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *e_line = pblk_line_get_data_next(pblk); + struct pblk_sec_meta *meta_list = rqd->meta_list; + unsigned int map_secs; + int min = pblk->min_write_pgs; + int i, erase_lun; + + for (i = 0; i < rqd->nr_ppas; i += min) { + map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; + pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs); + + erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls + + rqd->ppa_list[i].g.ch; + + if (!test_bit(erase_lun, e_line->erase_bitmap)) { + if (down_trylock(&pblk->erase_sem)) + continue; + + set_bit(erase_lun, e_line->erase_bitmap); + atomic_dec(&e_line->left_eblks); + *erase_ppa = rqd->ppa_list[i]; + erase_ppa->g.blk = e_line->id; + + /* Avoid evaluating e_line->left_eblks */ + return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, + valid_secs, i + min); + } + } + + /* Erase blocks that are bad in this line but might not be in next */ + if (unlikely(ppa_empty(*erase_ppa))) { + struct pblk_line_meta *lm = &pblk->lm; + + i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line); + if (i == lm->blk_per_line) + return; + + set_bit(i, e_line->erase_bitmap); + atomic_dec(&e_line->left_eblks); + *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */ + erase_ppa->g.blk = e_line->id; + } +} diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c new file mode 100644 index 000000000000..045384ddc1f9 --- /dev/null +++ b/drivers/lightnvm/pblk-rb.c @@ -0,0 +1,852 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * + * Based upon the circular ringbuffer. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-rb.c - pblk's write buffer + */ + +#include <linux/circ_buf.h> + +#include "pblk.h" + +static DECLARE_RWSEM(pblk_rb_lock); + +void pblk_rb_data_free(struct pblk_rb *rb) +{ + struct pblk_rb_pages *p, *t; + + down_write(&pblk_rb_lock); + list_for_each_entry_safe(p, t, &rb->pages, list) { + free_pages((unsigned long)page_address(p->pages), p->order); + list_del(&p->list); + kfree(p); + } + up_write(&pblk_rb_lock); +} + +/* + * Initialize ring buffer. The data and metadata buffers must be previously + * allocated and their size must be a power of two + * (Documentation/circular-buffers.txt) + */ +int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, + unsigned int power_size, unsigned int power_seg_sz) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + unsigned int init_entry = 0; + unsigned int alloc_order = power_size; + unsigned int max_order = MAX_ORDER - 1; + unsigned int order, iter; + + down_write(&pblk_rb_lock); + rb->entries = rb_entry_base; + rb->seg_size = (1 << power_seg_sz); + rb->nr_entries = (1 << power_size); + rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; + rb->sync_point = EMPTY_ENTRY; + + spin_lock_init(&rb->w_lock); + spin_lock_init(&rb->s_lock); + + INIT_LIST_HEAD(&rb->pages); + + if (alloc_order >= max_order) { + order = max_order; + iter = (1 << (alloc_order - max_order)); + } else { + order = alloc_order; + iter = 1; + } + + do { + struct pblk_rb_entry *entry; + struct pblk_rb_pages *page_set; + void *kaddr; + unsigned long set_size; + int i; + + page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); + if (!page_set) { + up_write(&pblk_rb_lock); + return -ENOMEM; + } + + page_set->order = order; + page_set->pages = alloc_pages(GFP_KERNEL, order); + if (!page_set->pages) { + kfree(page_set); + pblk_rb_data_free(rb); + up_write(&pblk_rb_lock); + return -ENOMEM; + } + kaddr = page_address(page_set->pages); + + entry = &rb->entries[init_entry]; + entry->data = kaddr; + entry->cacheline = pblk_cacheline_to_addr(init_entry++); + entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; + + set_size = (1 << order); + for (i = 1; i < set_size; i++) { + entry = &rb->entries[init_entry]; + entry->cacheline = pblk_cacheline_to_addr(init_entry++); + entry->data = kaddr + (i * rb->seg_size); + entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; + bio_list_init(&entry->w_ctx.bios); + } + + list_add_tail(&page_set->list, &rb->pages); + iter--; + } while (iter > 0); + up_write(&pblk_rb_lock); + +#ifdef CONFIG_NVM_DEBUG + atomic_set(&rb->inflight_sync_point, 0); +#endif + + /* + * Initialize rate-limiter, which controls access to the write buffer + * but user and GC I/O + */ + pblk_rl_init(&pblk->rl, rb->nr_entries); + + return 0; +} + +/* + * pblk_rb_calculate_size -- calculate the size of the write buffer + */ +unsigned int pblk_rb_calculate_size(unsigned int nr_entries) +{ + /* Alloc a write buffer that can at least fit 128 entries */ + return (1 << max(get_count_order(nr_entries), 7)); +} + +void *pblk_rb_entries_ref(struct pblk_rb *rb) +{ + return rb->entries; +} + +static void clean_wctx(struct pblk_w_ctx *w_ctx) +{ + int flags; + +try: + flags = READ_ONCE(w_ctx->flags); + if (!(flags & PBLK_SUBMITTED_ENTRY)) + goto try; + + /* Release flags on context. Protect from writes and reads */ + smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); + pblk_ppa_set_empty(&w_ctx->ppa); +} + +#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size) +#define pblk_rb_ring_space(rb, head, tail, size) \ + (CIRC_SPACE(head, tail, size)) + +/* + * Buffer space is calculated with respect to the back pointer signaling + * synchronized entries to the media. + */ +static unsigned int pblk_rb_space(struct pblk_rb *rb) +{ + unsigned int mem = READ_ONCE(rb->mem); + unsigned int sync = READ_ONCE(rb->sync); + + return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); +} + +/* + * Buffer count is calculated with respect to the submission entry signaling the + * entries that are available to send to the media + */ +unsigned int pblk_rb_read_count(struct pblk_rb *rb) +{ + unsigned int mem = READ_ONCE(rb->mem); + unsigned int subm = READ_ONCE(rb->subm); + + return pblk_rb_ring_count(mem, subm, rb->nr_entries); +} + +unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) +{ + unsigned int subm; + + subm = READ_ONCE(rb->subm); + /* Commit read means updating submission pointer */ + smp_store_release(&rb->subm, + (subm + nr_entries) & (rb->nr_entries - 1)); + + return subm; +} + +static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, + unsigned int to_update) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_line *line; + struct pblk_rb_entry *entry; + struct pblk_w_ctx *w_ctx; + unsigned int i; + + for (i = 0; i < to_update; i++) { + entry = &rb->entries[*l2p_upd]; + w_ctx = &entry->w_ctx; + + pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, + entry->cacheline); + + line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)]; + kref_put(&line->ref, pblk_line_put); + clean_wctx(w_ctx); + *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); + } + + return 0; +} + +/* + * When we move the l2p_update pointer, we update the l2p table - lookups will + * point to the physical address instead of to the cacheline in the write buffer + * from this moment on. + */ +static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int mem, unsigned int sync) +{ + unsigned int space, count; + int ret = 0; + + lockdep_assert_held(&rb->w_lock); + + /* Update l2p only as buffer entries are being overwritten */ + space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries); + if (space > nr_entries) + goto out; + + count = nr_entries - space; + /* l2p_update used exclusively under rb->w_lock */ + ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count); + +out: + return ret; +} + +/* + * Update the l2p entry for all sectors stored on the write buffer. This means + * that all future lookups to the l2p table will point to a device address, not + * to the cacheline in the write buffer. + */ +void pblk_rb_sync_l2p(struct pblk_rb *rb) +{ + unsigned int sync; + unsigned int to_update; + + spin_lock(&rb->w_lock); + + /* Protect from reads and writes */ + sync = smp_load_acquire(&rb->sync); + + to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries); + __pblk_rb_update_l2p(rb, &rb->l2p_update, to_update); + + spin_unlock(&rb->w_lock); +} + +/* + * Write @nr_entries to ring buffer from @data buffer if there is enough space. + * Typically, 4KB data chunks coming from a bio will be copied to the ring + * buffer, thus the write will fail if not all incoming data can be copied. + * + */ +static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, + struct pblk_rb_entry *entry) +{ + memcpy(entry->data, data, rb->seg_size); + + entry->w_ctx.lba = w_ctx.lba; + entry->w_ctx.ppa = w_ctx.ppa; +} + +void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, unsigned int ring_pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entry; + int flags; + + entry = &rb->entries[ring_pos]; + flags = READ_ONCE(entry->w_ctx.flags); +#ifdef CONFIG_NVM_DEBUG + /* Caller must guarantee that the entry is free */ + BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); +#endif + + __pblk_rb_write_entry(rb, data, w_ctx, entry); + + pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline); + flags = w_ctx.flags | PBLK_WRITTEN_DATA; + + /* Release flags on write context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); +} + +void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, + unsigned int ring_pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entry; + int flags; + + entry = &rb->entries[ring_pos]; + flags = READ_ONCE(entry->w_ctx.flags); +#ifdef CONFIG_NVM_DEBUG + /* Caller must guarantee that the entry is free */ + BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); +#endif + + __pblk_rb_write_entry(rb, data, w_ctx, entry); + + if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line)) + entry->w_ctx.lba = ADDR_EMPTY; + + flags = w_ctx.flags | PBLK_WRITTEN_DATA; + + /* Release flags on write context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); +} + +static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, + unsigned int pos) +{ + struct pblk_rb_entry *entry; + unsigned int subm, sync_point; + int flags; + + subm = READ_ONCE(rb->subm); + +#ifdef CONFIG_NVM_DEBUG + atomic_inc(&rb->inflight_sync_point); +#endif + + if (pos == subm) + return 0; + + sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); + entry = &rb->entries[sync_point]; + + flags = READ_ONCE(entry->w_ctx.flags); + flags |= PBLK_FLUSH_ENTRY; + + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + + /* Protect syncs */ + smp_store_release(&rb->sync_point, sync_point); + + spin_lock_irq(&rb->s_lock); + bio_list_add(&entry->w_ctx.bios, bio); + spin_unlock_irq(&rb->s_lock); + + return 1; +} + +static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos) +{ + unsigned int mem; + unsigned int sync; + + sync = READ_ONCE(rb->sync); + mem = READ_ONCE(rb->mem); + + if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries) + return 0; + + if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) + return 0; + + *pos = mem; + + return 1; +} + +static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos) +{ + if (!__pblk_rb_may_write(rb, nr_entries, pos)) + return 0; + + /* Protect from read count */ + smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1)); + return 1; +} + +static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos, struct bio *bio, + int *io_ret) +{ + unsigned int mem; + + if (!__pblk_rb_may_write(rb, nr_entries, pos)) + return 0; + + mem = (*pos + nr_entries) & (rb->nr_entries - 1); + *io_ret = NVM_IO_DONE; + + if (bio->bi_opf & REQ_PREFLUSH) { + struct pblk *pblk = container_of(rb, struct pblk, rwb); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->nr_flush); +#endif + if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem)) + *io_ret = NVM_IO_OK; + } + + /* Protect from read count */ + smp_store_release(&rb->mem, mem); + return 1; +} + +/* + * Atomically check that (i) there is space on the write buffer for the + * incoming I/O, and (ii) the current I/O type has enough budget in the write + * buffer (rate-limiter). + */ +int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, + unsigned int nr_entries, unsigned int *pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + int flush_done; + + spin_lock(&rb->w_lock); + if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) { + spin_unlock(&rb->w_lock); + return NVM_IO_REQUEUE; + } + + if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) { + spin_unlock(&rb->w_lock); + return NVM_IO_REQUEUE; + } + + pblk_rl_user_in(&pblk->rl, nr_entries); + spin_unlock(&rb->w_lock); + + return flush_done; +} + +/* + * Look at pblk_rb_may_write_user comment + */ +int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + + spin_lock(&rb->w_lock); + if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) { + spin_unlock(&rb->w_lock); + return 0; + } + + if (!pblk_rb_may_write(rb, nr_entries, pos)) { + spin_unlock(&rb->w_lock); + return 0; + } + + pblk_rl_gc_in(&pblk->rl, nr_entries); + spin_unlock(&rb->w_lock); + + return 1; +} + +/* + * The caller of this function must ensure that the backpointer will not + * overwrite the entries passed on the list. + */ +unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, + struct list_head *list, + unsigned int max) +{ + struct pblk_rb_entry *entry, *tentry; + struct page *page; + unsigned int read = 0; + int ret; + + list_for_each_entry_safe(entry, tentry, list, index) { + if (read > max) { + pr_err("pblk: too many entries on list\n"); + goto out; + } + + page = virt_to_page(entry->data); + if (!page) { + pr_err("pblk: could not allocate write bio page\n"); + goto out; + } + + ret = bio_add_page(bio, page, rb->seg_size, 0); + if (ret != rb->seg_size) { + pr_err("pblk: could not add page to write bio\n"); + goto out; + } + + list_del(&entry->index); + read++; + } + +out: + return read; +} + +/* + * Read available entries on rb and add them to the given bio. To avoid a memory + * copy, a page reference to the write buffer is used to be added to the bio. + * + * This function is used by the write thread to form the write bio that will + * persist data on the write buffer to the media. + */ +unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, + struct pblk_c_ctx *c_ctx, + unsigned int pos, + unsigned int nr_entries, + unsigned int count) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entry; + struct page *page; + unsigned int pad = 0, read = 0, to_read = nr_entries; + unsigned int user_io = 0, gc_io = 0; + unsigned int i; + int flags; + int ret; + + if (count < nr_entries) { + pad = nr_entries - count; + to_read = count; + } + + c_ctx->sentry = pos; + c_ctx->nr_valid = to_read; + c_ctx->nr_padded = pad; + + for (i = 0; i < to_read; i++) { + entry = &rb->entries[pos]; + + /* A write has been allowed into the buffer, but data is still + * being copied to it. It is ok to busy wait. + */ +try: + flags = READ_ONCE(entry->w_ctx.flags); + if (!(flags & PBLK_WRITTEN_DATA)) + goto try; + + if (flags & PBLK_IOTYPE_USER) + user_io++; + else if (flags & PBLK_IOTYPE_GC) + gc_io++; + else + WARN(1, "pblk: unknown IO type\n"); + + page = virt_to_page(entry->data); + if (!page) { + pr_err("pblk: could not allocate write bio page\n"); + flags &= ~PBLK_WRITTEN_DATA; + flags |= PBLK_SUBMITTED_ENTRY; + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + goto out; + } + + ret = bio_add_page(bio, page, rb->seg_size, 0); + if (ret != rb->seg_size) { + pr_err("pblk: could not add page to write bio\n"); + flags &= ~PBLK_WRITTEN_DATA; + flags |= PBLK_SUBMITTED_ENTRY; + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + goto out; + } + + if (flags & PBLK_FLUSH_ENTRY) { + unsigned int sync_point; + + sync_point = READ_ONCE(rb->sync_point); + if (sync_point == pos) { + /* Protect syncs */ + smp_store_release(&rb->sync_point, EMPTY_ENTRY); + } + + flags &= ~PBLK_FLUSH_ENTRY; +#ifdef CONFIG_NVM_DEBUG + atomic_dec(&rb->inflight_sync_point); +#endif + } + + flags &= ~PBLK_WRITTEN_DATA; + flags |= PBLK_SUBMITTED_ENTRY; + + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + + pos = (pos + 1) & (rb->nr_entries - 1); + } + + read = to_read; + pblk_rl_out(&pblk->rl, user_io, gc_io); +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(pad, &((struct pblk *) + (container_of(rb, struct pblk, rwb)))->padded_writes); +#endif +out: + return read; +} + +/* + * Copy to bio only if the lba matches the one on the given cache entry. + * Otherwise, it means that the entry has been overwritten, and the bio should + * be directed to disk. + */ +int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, + u64 pos, int bio_iter) +{ + struct pblk_rb_entry *entry; + struct pblk_w_ctx *w_ctx; + void *data; + int flags; + int ret = 1; + + spin_lock(&rb->w_lock); + +#ifdef CONFIG_NVM_DEBUG + /* Caller must ensure that the access will not cause an overflow */ + BUG_ON(pos >= rb->nr_entries); +#endif + entry = &rb->entries[pos]; + w_ctx = &entry->w_ctx; + flags = READ_ONCE(w_ctx->flags); + + /* Check if the entry has been overwritten or is scheduled to be */ + if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) { + ret = 0; + goto out; + } + + /* Only advance the bio if it hasn't been advanced already. If advanced, + * this bio is at least a partial bio (i.e., it has partially been + * filled with data from the cache). If part of the data resides on the + * media, we will read later on + */ + if (unlikely(!bio->bi_iter.bi_idx)) + bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE); + + data = bio_data(bio); + memcpy(data, entry->data, rb->seg_size); + +out: + spin_unlock(&rb->w_lock); + return ret; +} + +struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) +{ + unsigned int entry = pos & (rb->nr_entries - 1); + + return &rb->entries[entry].w_ctx; +} + +unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags) + __acquires(&rb->s_lock) +{ + if (flags) + spin_lock_irqsave(&rb->s_lock, *flags); + else + spin_lock_irq(&rb->s_lock); + + return rb->sync; +} + +void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags) + __releases(&rb->s_lock) +{ + lockdep_assert_held(&rb->s_lock); + + if (flags) + spin_unlock_irqrestore(&rb->s_lock, *flags); + else + spin_unlock_irq(&rb->s_lock); +} + +unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) +{ + unsigned int sync; + unsigned int i; + + lockdep_assert_held(&rb->s_lock); + + sync = READ_ONCE(rb->sync); + + for (i = 0; i < nr_entries; i++) + sync = (sync + 1) & (rb->nr_entries - 1); + + /* Protect from counts */ + smp_store_release(&rb->sync, sync); + + return sync; +} + +unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb) +{ + unsigned int subm, sync_point; + unsigned int count; + + /* Protect syncs */ + sync_point = smp_load_acquire(&rb->sync_point); + if (sync_point == EMPTY_ENTRY) + return 0; + + subm = READ_ONCE(rb->subm); + + /* The sync point itself counts as a sector to sync */ + count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1; + + return count; +} + +/* + * Scan from the current position of the sync pointer to find the entry that + * corresponds to the given ppa. This is necessary since write requests can be + * completed out of order. The assumption is that the ppa is close to the sync + * pointer thus the search will not take long. + * + * The caller of this function must guarantee that the sync pointer will no + * reach the entry while it is using the metadata associated with it. With this + * assumption in mind, there is no need to take the sync lock. + */ +struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, + struct ppa_addr *ppa) +{ + unsigned int sync, subm, count; + unsigned int i; + + sync = READ_ONCE(rb->sync); + subm = READ_ONCE(rb->subm); + count = pblk_rb_ring_count(subm, sync, rb->nr_entries); + + for (i = 0; i < count; i++) + sync = (sync + 1) & (rb->nr_entries - 1); + + return NULL; +} + +int pblk_rb_tear_down_check(struct pblk_rb *rb) +{ + struct pblk_rb_entry *entry; + int i; + int ret = 0; + + spin_lock(&rb->w_lock); + spin_lock_irq(&rb->s_lock); + + if ((rb->mem == rb->subm) && (rb->subm == rb->sync) && + (rb->sync == rb->l2p_update) && + (rb->sync_point == EMPTY_ENTRY)) { + goto out; + } + + if (!rb->entries) { + ret = 1; + goto out; + } + + for (i = 0; i < rb->nr_entries; i++) { + entry = &rb->entries[i]; + + if (!entry->data) { + ret = 1; + goto out; + } + } + +out: + spin_unlock(&rb->w_lock); + spin_unlock_irq(&rb->s_lock); + + return ret; +} + +unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos) +{ + return (pos & (rb->nr_entries - 1)); +} + +int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos) +{ + return (pos >= rb->nr_entries); +} + +ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_c_ctx *c; + ssize_t offset; + int queued_entries = 0; + + spin_lock_irq(&rb->s_lock); + list_for_each_entry(c, &pblk->compl_list, list) + queued_entries++; + spin_unlock_irq(&rb->s_lock); + + if (rb->sync_point != EMPTY_ENTRY) + offset = scnprintf(buf, PAGE_SIZE, + "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n", + rb->nr_entries, + rb->mem, + rb->subm, + rb->sync, + rb->l2p_update, +#ifdef CONFIG_NVM_DEBUG + atomic_read(&rb->inflight_sync_point), +#else + 0, +#endif + rb->sync_point, + pblk_rb_read_count(rb), + pblk_rb_space(rb), + pblk_rb_sync_point_count(rb), + queued_entries); + else + offset = scnprintf(buf, PAGE_SIZE, + "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n", + rb->nr_entries, + rb->mem, + rb->subm, + rb->sync, + rb->l2p_update, +#ifdef CONFIG_NVM_DEBUG + atomic_read(&rb->inflight_sync_point), +#else + 0, +#endif + pblk_rb_read_count(rb), + pblk_rb_space(rb), + pblk_rb_sync_point_count(rb), + queued_entries); + + return offset; +} diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c new file mode 100644 index 000000000000..4a12f14d78c6 --- /dev/null +++ b/drivers/lightnvm/pblk-read.c @@ -0,0 +1,529 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-read.c - pblk's read path + */ + +#include "pblk.h" + +/* + * There is no guarantee that the value read from cache has not been updated and + * resides at another location in the cache. We guarantee though that if the + * value is read from the cache, it belongs to the mapped lba. In order to + * guarantee and order between writes and reads are ordered, a flush must be + * issued. + */ +static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, + sector_t lba, struct ppa_addr ppa, + int bio_iter) +{ +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a cache address */ + BUG_ON(pblk_ppa_empty(ppa)); + BUG_ON(!pblk_addr_in_cache(ppa)); +#endif + + return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, + pblk_addr_to_cacheline(ppa), bio_iter); +} + +static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned long *read_bitmap) +{ + struct bio *bio = rqd->bio; + struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + sector_t blba = pblk_get_lba(bio); + int nr_secs = rqd->nr_ppas; + int advanced_bio = 0; + int i, j = 0; + + /* logic error: lba out-of-bounds. Ignore read request */ + if (blba + nr_secs >= pblk->rl.nr_secs) { + WARN(1, "pblk: read lbas out of bounds\n"); + return; + } + + pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); + + for (i = 0; i < nr_secs; i++) { + struct ppa_addr p = ppas[i]; + sector_t lba = blba + i; + +retry: + if (pblk_ppa_empty(p)) { + WARN_ON(test_and_set_bit(i, read_bitmap)); + continue; + } + + /* Try to read from write buffer. The address is later checked + * on the write buffer to prevent retrieving overwritten data. + */ + if (pblk_addr_in_cache(p)) { + if (!pblk_read_from_cache(pblk, bio, lba, p, i)) { + pblk_lookup_l2p_seq(pblk, &p, lba, 1); + goto retry; + } + WARN_ON(test_and_set_bit(i, read_bitmap)); + advanced_bio = 1; + } else { + /* Read from media non-cached sectors */ + rqd->ppa_list[j++] = p; + } + + if (advanced_bio) + bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(nr_secs, &pblk->inflight_reads); +#endif +} + +static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) +{ + int err; + + rqd->flags = pblk_set_read_mode(pblk); + + err = pblk_submit_io(pblk, rqd); + if (err) + return NVM_IO_ERR; + + return NVM_IO_OK; +} + +static void pblk_end_io_read(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); + struct bio *bio = rqd->bio; + + if (rqd->error) + pblk_log_read_err(pblk, rqd); +#ifdef CONFIG_NVM_DEBUG + else + WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n"); +#endif + + if (rqd->nr_ppas > 1) + nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list); + + bio_put(bio); + if (r_ctx->orig_bio) { +#ifdef CONFIG_NVM_DEBUG + WARN_ONCE(r_ctx->orig_bio->bi_error, + "pblk: corrupted read bio\n"); +#endif + bio_endio(r_ctx->orig_bio); + bio_put(r_ctx->orig_bio); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); + atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); +#endif + + pblk_free_rqd(pblk, rqd, READ); +} + +static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int bio_init_idx, + unsigned long *read_bitmap) +{ + struct bio *new_bio, *bio = rqd->bio; + struct bio_vec src_bv, dst_bv; + void *ppa_ptr = NULL; + void *src_p, *dst_p; + dma_addr_t dma_ppa_list = 0; + int nr_secs = rqd->nr_ppas; + int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); + int i, ret, hole; + DECLARE_COMPLETION_ONSTACK(wait); + + new_bio = bio_alloc(GFP_KERNEL, nr_holes); + if (!new_bio) { + pr_err("pblk: could not alloc read bio\n"); + return NVM_IO_ERR; + } + + if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) + goto err; + + if (nr_holes != new_bio->bi_vcnt) { + pr_err("pblk: malformed bio\n"); + goto err; + } + + new_bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(new_bio, REQ_OP_READ, 0); + new_bio->bi_private = &wait; + new_bio->bi_end_io = pblk_end_bio_sync; + + rqd->bio = new_bio; + rqd->nr_ppas = nr_holes; + rqd->end_io = NULL; + + if (unlikely(nr_secs > 1 && nr_holes == 1)) { + ppa_ptr = rqd->ppa_list; + dma_ppa_list = rqd->dma_ppa_list; + rqd->ppa_addr = rqd->ppa_list[0]; + } + + ret = pblk_submit_read_io(pblk, rqd); + if (ret) { + bio_put(rqd->bio); + pr_err("pblk: read IO submission failed\n"); + goto err; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: partial read I/O timed out\n"); + } + + if (rqd->error) { + atomic_long_inc(&pblk->read_failed); +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, rqd, rqd->error); +#endif + } + + if (unlikely(nr_secs > 1 && nr_holes == 1)) { + rqd->ppa_list = ppa_ptr; + rqd->dma_ppa_list = dma_ppa_list; + } + + /* Fill the holes in the original bio */ + i = 0; + hole = find_first_zero_bit(read_bitmap, nr_secs); + do { + src_bv = new_bio->bi_io_vec[i++]; + dst_bv = bio->bi_io_vec[bio_init_idx + hole]; + + src_p = kmap_atomic(src_bv.bv_page); + dst_p = kmap_atomic(dst_bv.bv_page); + + memcpy(dst_p + dst_bv.bv_offset, + src_p + src_bv.bv_offset, + PBLK_EXPOSED_PAGE_SIZE); + + kunmap_atomic(src_p); + kunmap_atomic(dst_p); + + mempool_free(src_bv.bv_page, pblk->page_pool); + + hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); + } while (hole < nr_secs); + + bio_put(new_bio); + + /* Complete the original bio and associated request */ + rqd->bio = bio; + rqd->nr_ppas = nr_secs; + rqd->private = pblk; + + bio_endio(bio); + pblk_end_io_read(rqd); + return NVM_IO_OK; + +err: + /* Free allocated pages in new bio */ + pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); + rqd->private = pblk; + pblk_end_io_read(rqd); + return NVM_IO_ERR; +} + +static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned long *read_bitmap) +{ + struct bio *bio = rqd->bio; + struct ppa_addr ppa; + sector_t lba = pblk_get_lba(bio); + + /* logic error: lba out-of-bounds. Ignore read request */ + if (lba >= pblk->rl.nr_secs) { + WARN(1, "pblk: read lba out of bounds\n"); + return; + } + + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->inflight_reads); +#endif + +retry: + if (pblk_ppa_empty(ppa)) { + WARN_ON(test_and_set_bit(0, read_bitmap)); + return; + } + + /* Try to read from write buffer. The address is later checked on the + * write buffer to prevent retrieving overwritten data. + */ + if (pblk_addr_in_cache(ppa)) { + if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) { + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); + goto retry; + } + WARN_ON(test_and_set_bit(0, read_bitmap)); + } else { + rqd->ppa_addr = ppa; + } +} + +int pblk_submit_read(struct pblk *pblk, struct bio *bio) +{ + struct nvm_tgt_dev *dev = pblk->dev; + unsigned int nr_secs = pblk_get_secs(bio); + struct nvm_rq *rqd; + unsigned long read_bitmap; /* Max 64 ppas per request */ + unsigned int bio_init_idx; + int ret = NVM_IO_ERR; + + if (nr_secs > PBLK_MAX_REQ_ADDRS) + return NVM_IO_ERR; + + bitmap_zero(&read_bitmap, nr_secs); + + rqd = pblk_alloc_rqd(pblk, READ); + if (IS_ERR(rqd)) { + pr_err_ratelimited("pblk: not able to alloc rqd"); + return NVM_IO_ERR; + } + + rqd->opcode = NVM_OP_PREAD; + rqd->bio = bio; + rqd->nr_ppas = nr_secs; + rqd->private = pblk; + rqd->end_io = pblk_end_io_read; + + /* Save the index for this bio's start. This is needed in case + * we need to fill a partial read. + */ + bio_init_idx = pblk_get_bi_idx(bio); + + if (nr_secs > 1) { + rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_ppa_list); + if (!rqd->ppa_list) { + pr_err("pblk: not able to allocate ppa list\n"); + goto fail_rqd_free; + } + + pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); + } else { + pblk_read_rq(pblk, rqd, &read_bitmap); + } + + bio_get(bio); + if (bitmap_full(&read_bitmap, nr_secs)) { + bio_endio(bio); + pblk_end_io_read(rqd); + return NVM_IO_OK; + } + + /* All sectors are to be read from the device */ + if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { + struct bio *int_bio = NULL; + struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); + + /* Clone read bio to deal with read errors internally */ + int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set); + if (!int_bio) { + pr_err("pblk: could not clone read bio\n"); + return NVM_IO_ERR; + } + + rqd->bio = int_bio; + r_ctx->orig_bio = bio; + + ret = pblk_submit_read_io(pblk, rqd); + if (ret) { + pr_err("pblk: read IO submission failed\n"); + if (int_bio) + bio_put(int_bio); + return ret; + } + + return NVM_IO_OK; + } + + /* The read bio request could be partially filled by the write buffer, + * but there are some holes that need to be read from the drive. + */ + ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); + if (ret) { + pr_err("pblk: failed to perform partial read\n"); + return ret; + } + + return NVM_IO_OK; + +fail_rqd_free: + pblk_free_rqd(pblk, rqd, READ); + return ret; +} + +static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_line *line, u64 *lba_list, + unsigned int nr_secs) +{ + struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + int valid_secs = 0; + int i; + + pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs); + + for (i = 0; i < nr_secs; i++) { + if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id || + pblk_ppa_empty(ppas[i])) { + lba_list[i] = ADDR_EMPTY; + continue; + } + + rqd->ppa_list[valid_secs++] = ppas[i]; + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(valid_secs, &pblk->inflight_reads); +#endif + return valid_secs; +} + +static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_line *line, sector_t lba) +{ + struct ppa_addr ppa; + int valid_secs = 0; + + if (lba == ADDR_EMPTY) + goto out; + + /* logic error: lba out-of-bounds */ + if (lba >= pblk->rl.nr_secs) { + WARN(1, "pblk: read lba out of bounds\n"); + goto out; + } + + spin_lock(&pblk->trans_lock); + ppa = pblk_trans_map_get(pblk, lba); + spin_unlock(&pblk->trans_lock); + + /* Ignore updated values until the moment */ + if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id || + pblk_ppa_empty(ppa)) + goto out; + + rqd->ppa_addr = ppa; + valid_secs = 1; + +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->inflight_reads); +#endif + +out: + return valid_secs; +} + +int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, + unsigned int nr_secs, unsigned int *secs_to_gc, + struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct request_queue *q = dev->q; + struct bio *bio; + struct nvm_rq rqd; + int ret, data_len; + DECLARE_COMPLETION_ONSTACK(wait); + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + if (nr_secs > 1) { + rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd.dma_ppa_list); + if (!rqd.ppa_list) + return NVM_IO_ERR; + + *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, + nr_secs); + if (*secs_to_gc == 1) { + struct ppa_addr ppa; + + ppa = rqd.ppa_list[0]; + nvm_dev_dma_free(dev->parent, rqd.ppa_list, + rqd.dma_ppa_list); + rqd.ppa_addr = ppa; + } + } else { + *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); + } + + if (!(*secs_to_gc)) + goto out; + + data_len = (*secs_to_gc) * geo->sec_size; + bio = bio_map_kern(q, data, data_len, GFP_KERNEL); + if (IS_ERR(bio)) { + pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); + goto err_free_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd.opcode = NVM_OP_PREAD; + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + rqd.nr_ppas = *secs_to_gc; + rqd.bio = bio; + + ret = pblk_submit_read_io(pblk, &rqd); + if (ret) { + bio_endio(bio); + pr_err("pblk: GC read request failed\n"); + goto err_free_dma; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: GC read I/O timed out\n"); + } + + if (rqd.error) { + atomic_long_inc(&pblk->read_failed_gc); +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, &rqd, rqd.error); +#endif + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(*secs_to_gc, &pblk->sync_reads); + atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads); + atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); +#endif + +out: + if (rqd.nr_ppas > 1) + nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); + return NVM_IO_OK; + +err_free_dma: + if (rqd.nr_ppas > 1) + nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); + return NVM_IO_ERR; +} diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c new file mode 100644 index 000000000000..f8f85087cd3c --- /dev/null +++ b/drivers/lightnvm/pblk-recovery.c @@ -0,0 +1,998 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial: Javier Gonzalez <javier@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-recovery.c - pblk's recovery path + */ + +#include "pblk.h" + +void pblk_submit_rec(struct work_struct *work) +{ + struct pblk_rec_ctx *recovery = + container_of(work, struct pblk_rec_ctx, ws_rec); + struct pblk *pblk = recovery->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_rq *rqd = recovery->rqd; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + int max_secs = nvm_max_phys_sects(dev); + struct bio *bio; + unsigned int nr_rec_secs; + unsigned int pgs_read; + int ret; + + nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, + max_secs); + + bio = bio_alloc(GFP_KERNEL, nr_rec_secs); + if (!bio) { + pr_err("pblk: not able to create recovery bio\n"); + return; + } + + bio->bi_iter.bi_sector = 0; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + rqd->bio = bio; + rqd->nr_ppas = nr_rec_secs; + + pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed, + nr_rec_secs); + if (pgs_read != nr_rec_secs) { + pr_err("pblk: could not read recovery entries\n"); + goto err; + } + + if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) { + pr_err("pblk: could not setup recovery request\n"); + goto err; + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(nr_rec_secs, &pblk->recov_writes); +#endif + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + goto err; + } + + mempool_free(recovery, pblk->rec_pool); + return; + +err: + bio_put(bio); + pblk_free_rqd(pblk, rqd, WRITE); +} + +int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, + struct pblk_rec_ctx *recovery, u64 *comp_bits, + unsigned int comp) +{ + struct nvm_tgt_dev *dev = pblk->dev; + int max_secs = nvm_max_phys_sects(dev); + struct nvm_rq *rec_rqd; + struct pblk_c_ctx *rec_ctx; + int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; + + rec_rqd = pblk_alloc_rqd(pblk, WRITE); + if (IS_ERR(rec_rqd)) { + pr_err("pblk: could not create recovery req.\n"); + return -ENOMEM; + } + + rec_ctx = nvm_rq_to_pdu(rec_rqd); + + /* Copy completion bitmap, but exclude the first X completed entries */ + bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, + (unsigned long int *)comp_bits, + comp, max_secs); + + /* Save the context for the entries that need to be re-written and + * update current context with the completed entries. + */ + rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp); + if (comp >= c_ctx->nr_valid) { + rec_ctx->nr_valid = 0; + rec_ctx->nr_padded = nr_entries - comp; + + c_ctx->nr_padded = comp - c_ctx->nr_valid; + } else { + rec_ctx->nr_valid = c_ctx->nr_valid - comp; + rec_ctx->nr_padded = c_ctx->nr_padded; + + c_ctx->nr_valid = comp; + c_ctx->nr_padded = 0; + } + + recovery->rqd = rec_rqd; + recovery->pblk = pblk; + + return 0; +} + +__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta) +{ + u32 crc; + + crc = pblk_calc_emeta_crc(pblk, emeta); + if (le32_to_cpu(emeta->crc) != crc) + return NULL; + + if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC) + return NULL; + + return pblk_line_emeta_to_lbas(emeta); +} + +static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct line_emeta *emeta = line->emeta; + __le64 *lba_list; + int data_start; + int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; + int i; + + lba_list = pblk_recov_get_lba_list(pblk, emeta); + if (!lba_list) + return 1; + + data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; + nr_data_lbas = lm->sec_per_line - lm->emeta_sec; + nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas); + + for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { + struct ppa_addr ppa; + int pos; + + ppa = addr_to_pblk_ppa(pblk, i, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + + /* Do not update bad blocks */ + if (test_bit(pos, line->blk_bitmap)) + continue; + + if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) { + spin_lock(&line->lock); + if (test_and_set_bit(i, line->invalid_bitmap)) + WARN_ONCE(1, "pblk: rec. double invalidate:\n"); + else + line->vsc--; + spin_unlock(&line->lock); + + continue; + } + + pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa); + nr_lbas++; + } + + if (nr_valid_lbas != nr_lbas) + pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", + line->id, line->emeta->nr_valid_lbas, nr_lbas); + + line->left_msecs = 0; + + return 0; +} + +static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + + return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec - + nr_bb * geo->sec_per_blk; +} + +struct pblk_recov_alloc { + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + void *data; + dma_addr_t dma_ppa_list; + dma_addr_t dma_meta_list; +}; + +static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p, u64 r_ptr) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + u64 r_ptr_int; + int left_ppas; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + + left_ppas = line->cur_sec - r_ptr; + if (!left_ppas) + return 0; + + r_ptr_int = r_ptr; + +next_read_rq: + memset(rqd, 0, pblk_r_rq_size); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PREAD; + rqd->flags = pblk_set_read_mode(pblk); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + r_ptr_int += pblk->min_write_pgs; + ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) + rqd->ppa_list[i] = + addr_to_gen_ppa(pblk, r_ptr_int, line->id); + } + + /* If read fails, more padding is needed */ + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery read timed out\n"); + return -EINTR; + } + + reinit_completion(&wait); + + /* At this point, the read should not fail. If it does, it is a problem + * we cannot recover from here. Need FTL log. + */ + if (rqd->error) { + pr_err("pblk: L2P recovery failed (%d)\n", rqd->error); + return -EINTR; + } + + for (i = 0; i < rqd->nr_ppas; i++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + continue; + + pblk_update_map(pblk, lba, rqd->ppa_list[i]); + } + + left_ppas -= rq_ppas; + if (left_ppas > 0) + goto next_read_rq; + + return 0; +} + +static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p, int left_ppas) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta); + u64 w_ptr = line->cur_sec; + int left_line_ppas = line->left_msecs; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + +next_pad_rq: + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + memset(rqd, 0, pblk_r_rq_size); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PWRITE; + rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); + ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + w_ptr += pblk->min_write_pgs; + ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { + struct ppa_addr dev_ppa; + + dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + + pblk_map_invalidate(pblk, dev_ppa); + meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); + lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY); + rqd->ppa_list[i] = dev_ppa; + } + } + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery write timed out\n"); + } + reinit_completion(&wait); + + left_line_ppas -= rq_ppas; + left_ppas -= rq_ppas; + if (left_ppas > 0 && left_line_ppas) + goto next_pad_rq; + + return 0; +} + +/* When this function is called, it means that not all upper pages have been + * written in a page that contains valid data. In order to recover this data, we + * first find the write pointer on the device, then we pad all necessary + * sectors, and finally attempt to read the valid data + */ +static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + u64 w_ptr = 0, r_ptr; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + int rec_round; + int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + + /* we could recover up until the line write pointer */ + r_ptr = line->cur_sec; + rec_round = 0; + +next_rq: + memset(rqd, 0, pblk_r_rq_size); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PREAD; + rqd->flags = pblk_set_read_mode(pblk); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); + ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + w_ptr += pblk->min_write_pgs; + ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) + rqd->ppa_list[i] = + addr_to_gen_ppa(pblk, w_ptr, line->id); + } + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery read timed out\n"); + } + reinit_completion(&wait); + + /* This should not happen since the read failed during normal recovery, + * but the media works funny sometimes... + */ + if (!rec_round++ && !rqd->error) { + rec_round = 0; + for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + continue; + + pblk_update_map(pblk, lba, rqd->ppa_list[i]); + } + } + + /* Reached the end of the written line */ + if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { + int pad_secs, nr_error_bits, bit; + int ret; + + bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); + nr_error_bits = rqd->nr_ppas - bit; + + /* Roll back failed sectors */ + line->cur_sec -= nr_error_bits; + line->left_msecs += nr_error_bits; + bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + + pad_secs = pblk_pad_distance(pblk); + if (pad_secs > line->left_msecs) + pad_secs = line->left_msecs; + + ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); + if (ret) + pr_err("pblk: OOB padding failed (err:%d)\n", ret); + + ret = pblk_recov_read_oob(pblk, line, p, r_ptr); + if (ret) + pr_err("pblk: OOB read failed (err:%d)\n", ret); + + line->left_ssecs = line->left_msecs; + left_ppas = 0; + } + + left_ppas -= rq_ppas; + if (left_ppas > 0) + goto next_rq; + + return ret; +} + +static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p, int *done) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + u64 paddr; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + int left_ppas = pblk_calc_sec_in_line(pblk, line); + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + + *done = 1; + +next_rq: + memset(rqd, 0, pblk_r_rq_size); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PREAD; + rqd->flags = pblk_set_read_mode(pblk); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); + ppa = addr_to_gen_ppa(pblk, paddr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + paddr += pblk->min_write_pgs; + ppa = addr_to_gen_ppa(pblk, paddr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) + rqd->ppa_list[i] = + addr_to_gen_ppa(pblk, paddr, line->id); + } + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + bio_put(bio); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery read timed out\n"); + } + reinit_completion(&wait); + + /* Reached the end of the written line */ + if (rqd->error) { + int nr_error_bits, bit; + + bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); + nr_error_bits = rqd->nr_ppas - bit; + + /* Roll back failed sectors */ + line->cur_sec -= nr_error_bits; + line->left_msecs += nr_error_bits; + line->left_ssecs = line->left_msecs; + bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + + left_ppas = 0; + rqd->nr_ppas = bit; + + if (rqd->error != NVM_RSP_ERR_EMPTYPAGE) + *done = 0; + } + + for (i = 0; i < rqd->nr_ppas; i++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + continue; + + pblk_update_map(pblk, lba, rqd->ppa_list[i]); + } + + left_ppas -= rq_ppas; + if (left_ppas > 0) + goto next_rq; + + return ret; +} + +/* Scan line for lbas on out of bound area */ +static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_rq *rqd; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct pblk_recov_alloc p; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + int done, ret = 0; + + rqd = pblk_alloc_rqd(pblk, READ); + if (IS_ERR(rqd)) + return PTR_ERR(rqd); + + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); + if (!meta_list) { + ret = -ENOMEM; + goto free_rqd; + } + + ppa_list = (void *)(meta_list) + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + + data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto free_meta_list; + } + + p.ppa_list = ppa_list; + p.meta_list = meta_list; + p.rqd = rqd; + p.data = data; + p.dma_ppa_list = dma_ppa_list; + p.dma_meta_list = dma_meta_list; + + ret = pblk_recov_scan_oob(pblk, line, p, &done); + if (ret) { + pr_err("pblk: could not recover L2P from OOB\n"); + goto out; + } + + if (!done) { + ret = pblk_recov_scan_all_oob(pblk, line, p); + if (ret) { + pr_err("pblk: could not recover L2P from OOB\n"); + goto out; + } + } + + if (pblk_line_is_full(line)) + pblk_line_recov_close(pblk, line); + +out: + kfree(data); +free_meta_list: + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); +free_rqd: + pblk_free_rqd(pblk, rqd, READ); + + return ret; +} + +/* Insert lines ordered by sequence number (seq_num) on list */ +static void pblk_recov_line_add_ordered(struct list_head *head, + struct pblk_line *line) +{ + struct pblk_line *t = NULL; + + list_for_each_entry(t, head, list) + if (t->seq_nr > line->seq_nr) + break; + + __list_add(&line->list, t->list.prev, &t->list); +} + +struct pblk_line *pblk_recov_l2p(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line, *tline, *data_line = NULL; + struct line_smeta *smeta; + struct line_emeta *emeta; + int found_lines = 0, recovered_lines = 0, open_lines = 0; + int is_next = 0; + int meta_line; + int i, valid_uuid = 0; + LIST_HEAD(recov_list); + + /* TODO: Implement FTL snapshot */ + + /* Scan recovery - takes place when FTL snapshot fails */ + spin_lock(&l_mg->free_lock); + meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); + set_bit(meta_line, &l_mg->meta_bitmap); + smeta = l_mg->sline_meta[meta_line].meta; + emeta = l_mg->eline_meta[meta_line].meta; + spin_unlock(&l_mg->free_lock); + + /* Order data lines using their sequence number */ + for (i = 0; i < l_mg->nr_lines; i++) { + u32 crc; + + line = &pblk->lines[i]; + + memset(smeta, 0, lm->smeta_len); + line->smeta = smeta; + line->lun_bitmap = ((void *)(smeta)) + + sizeof(struct line_smeta); + + /* Lines that cannot be read are assumed as not written here */ + if (pblk_line_read_smeta(pblk, line)) + continue; + + crc = pblk_calc_smeta_crc(pblk, smeta); + if (le32_to_cpu(smeta->crc) != crc) + continue; + + if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC) + continue; + + if (le16_to_cpu(smeta->header.version) != 1) { + pr_err("pblk: found incompatible line version %u\n", + smeta->header.version); + return ERR_PTR(-EINVAL); + } + + /* The first valid instance uuid is used for initialization */ + if (!valid_uuid) { + memcpy(pblk->instance_uuid, smeta->header.uuid, 16); + valid_uuid = 1; + } + + if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) { + pr_debug("pblk: ignore line %u due to uuid mismatch\n", + i); + continue; + } + + /* Update line metadata */ + spin_lock(&line->lock); + line->id = le32_to_cpu(line->smeta->header.id); + line->type = le16_to_cpu(line->smeta->header.type); + line->seq_nr = le64_to_cpu(line->smeta->seq_nr); + spin_unlock(&line->lock); + + /* Update general metadata */ + spin_lock(&l_mg->free_lock); + if (line->seq_nr >= l_mg->d_seq_nr) + l_mg->d_seq_nr = line->seq_nr + 1; + l_mg->nr_free_lines--; + spin_unlock(&l_mg->free_lock); + + if (pblk_line_recov_alloc(pblk, line)) + goto out; + + pblk_recov_line_add_ordered(&recov_list, line); + found_lines++; + pr_debug("pblk: recovering data line %d, seq:%llu\n", + line->id, smeta->seq_nr); + } + + if (!found_lines) { + pblk_setup_uuid(pblk); + + spin_lock(&l_mg->free_lock); + WARN_ON_ONCE(!test_and_clear_bit(meta_line, + &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); + + goto out; + } + + /* Verify closed blocks and recover this portion of L2P table*/ + list_for_each_entry_safe(line, tline, &recov_list, list) { + int off, nr_bb; + + recovered_lines++; + /* Calculate where emeta starts based on the line bb */ + off = lm->sec_per_line - lm->emeta_sec; + nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + off -= nr_bb * geo->sec_per_pl; + + memset(emeta, 0, lm->emeta_len); + line->emeta = emeta; + line->emeta_ssec = off; + + if (pblk_line_read_emeta(pblk, line)) { + pblk_recov_l2p_from_oob(pblk, line); + goto next; + } + + if (pblk_recov_l2p_from_emeta(pblk, line)) + pblk_recov_l2p_from_oob(pblk, line); + +next: + if (pblk_line_is_full(line)) { + struct list_head *move_list; + + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + spin_lock(&l_mg->gc_lock); + list_move_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + + mempool_free(line->map_bitmap, pblk->line_meta_pool); + line->map_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; + } else { + if (open_lines > 1) + pr_err("pblk: failed to recover L2P\n"); + + open_lines++; + line->meta_line = meta_line; + data_line = line; + } + } + + spin_lock(&l_mg->free_lock); + if (!open_lines) { + WARN_ON_ONCE(!test_and_clear_bit(meta_line, + &l_mg->meta_bitmap)); + pblk_line_replace_data(pblk); + } else { + /* Allocate next line for preparation */ + l_mg->data_next = pblk_line_get(pblk); + if (l_mg->data_next) { + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; + l_mg->data_next->type = PBLK_LINETYPE_DATA; + is_next = 1; + } + } + spin_unlock(&l_mg->free_lock); + + if (is_next) { + pblk_line_erase(pblk, l_mg->data_next); + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + } + +out: + if (found_lines != recovered_lines) + pr_err("pblk: failed to recover all found lines %d/%d\n", + found_lines, recovered_lines); + + return data_line; +} + +/* + * Pad until smeta can be read on current data line + */ +void pblk_recov_pad(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct nvm_rq *rqd; + struct pblk_recov_alloc p; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + + spin_lock(&l_mg->free_lock); + line = l_mg->data_line; + spin_unlock(&l_mg->free_lock); + + rqd = pblk_alloc_rqd(pblk, READ); + if (IS_ERR(rqd)) + return; + + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); + if (!meta_list) + goto free_rqd; + + ppa_list = (void *)(meta_list) + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + + data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); + if (!data) + goto free_meta_list; + + p.ppa_list = ppa_list; + p.meta_list = meta_list; + p.rqd = rqd; + p.data = data; + p.dma_ppa_list = dma_ppa_list; + p.dma_meta_list = dma_meta_list; + + if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) { + pr_err("pblk: Tear down padding failed\n"); + goto free_data; + } + + pblk_line_close(pblk, line); + +free_data: + kfree(data); +free_meta_list: + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); +free_rqd: + pblk_free_rqd(pblk, rqd, READ); +} diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c new file mode 100644 index 000000000000..ab7cbb144f3f --- /dev/null +++ b/drivers/lightnvm/pblk-rl.c @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-rl.c - pblk's rate limiter for user I/O + * + */ + +#include "pblk.h" + +static void pblk_rl_kick_u_timer(struct pblk_rl *rl) +{ + mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); +} + +int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) +{ + int rb_user_cnt = atomic_read(&rl->rb_user_cnt); + + return (!(rb_user_cnt + nr_entries > rl->rb_user_max)); +} + +int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) +{ + int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt); + int rb_user_active; + + /* If there is no user I/O let GC take over space on the write buffer */ + rb_user_active = READ_ONCE(rl->rb_user_active); + return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active)); +} + +void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) +{ + atomic_add(nr_entries, &rl->rb_user_cnt); + + /* Release user I/O state. Protect from GC */ + smp_store_release(&rl->rb_user_active, 1); + pblk_rl_kick_u_timer(rl); +} + +void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) +{ + atomic_add(nr_entries, &rl->rb_gc_cnt); +} + +void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc) +{ + atomic_sub(nr_user, &rl->rb_user_cnt); + atomic_sub(nr_gc, &rl->rb_gc_cnt); +} + +unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) +{ + return atomic_read(&rl->free_blocks); +} + +/* + * We check for (i) the number of free blocks in the current LUN and (ii) the + * total number of free blocks in the pblk instance. This is to even out the + * number of free blocks on each LUN when GC kicks in. + * + * Only the total number of free blocks is used to configure the rate limiter. + */ +static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) +{ + unsigned long free_blocks = pblk_rl_nr_free_blks(rl); + + if (free_blocks >= rl->high) { + rl->rb_user_max = max - rl->rb_gc_rsv; + rl->rb_gc_max = rl->rb_gc_rsv; + rl->rb_state = PBLK_RL_HIGH; + } else if (free_blocks < rl->high) { + int shift = rl->high_pw - rl->rb_windows_pw; + int user_windows = free_blocks >> shift; + int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; + int gc_max; + + rl->rb_user_max = user_max; + gc_max = max - rl->rb_user_max; + rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv); + + if (free_blocks > rl->low) + rl->rb_state = PBLK_RL_MID; + else + rl->rb_state = PBLK_RL_LOW; + } + + return rl->rb_state; +} + +void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv) +{ + rl->rb_gc_rsv = rl->rb_gc_max = rsv; +} + +void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) +{ + struct pblk *pblk = container_of(rl, struct pblk, rl); + int blk_in_line = atomic_read(&line->blk_in_line); + int ret; + + atomic_add(blk_in_line, &rl->free_blocks); + /* Rates will not change that often - no need to lock update */ + ret = pblk_rl_update_rates(rl, rl->rb_budget); + + if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) + pblk_gc_should_start(pblk); + else + pblk_gc_should_stop(pblk); +} + +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) +{ + struct pblk *pblk = container_of(rl, struct pblk, rl); + int blk_in_line = atomic_read(&line->blk_in_line); + int ret; + + atomic_sub(blk_in_line, &rl->free_blocks); + + /* Rates will not change that often - no need to lock update */ + ret = pblk_rl_update_rates(rl, rl->rb_budget); + if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) + pblk_gc_should_start(pblk); + else + pblk_gc_should_stop(pblk); +} + +int pblk_rl_gc_thrs(struct pblk_rl *rl) +{ + return rl->high; +} + +int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) +{ + return rl->rb_user_max; +} + +static void pblk_rl_u_timer(unsigned long data) +{ + struct pblk_rl *rl = (struct pblk_rl *)data; + + /* Release user I/O state. Protect from GC */ + smp_store_release(&rl->rb_user_active, 0); +} + +void pblk_rl_free(struct pblk_rl *rl) +{ + del_timer(&rl->u_timer); +} + +void pblk_rl_init(struct pblk_rl *rl, int budget) +{ + unsigned int rb_windows; + + rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; + rl->low = rl->total_blocks / PBLK_USER_LOW_THRS; + rl->high_pw = get_count_order(rl->high); + + /* This will always be a power-of-2 */ + rb_windows = budget / PBLK_MAX_REQ_ADDRS; + rl->rb_windows_pw = get_count_order(rb_windows) + 1; + + /* To start with, all buffer is available to user I/O writers */ + rl->rb_budget = budget; + rl->rb_user_max = budget; + atomic_set(&rl->rb_user_cnt, 0); + rl->rb_gc_max = 0; + rl->rb_state = PBLK_RL_HIGH; + atomic_set(&rl->rb_gc_cnt, 0); + + setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); + rl->rb_user_active = 0; +} diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c new file mode 100644 index 000000000000..f0af1d1ceeff --- /dev/null +++ b/drivers/lightnvm/pblk-sysfs.c @@ -0,0 +1,507 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Implementation of a physical block-device target for Open-channel SSDs. + * + * pblk-sysfs.c - pblk's sysfs + * + */ + +#include "pblk.h" + +static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + ssize_t sz = 0; + int i; + + for (i = 0; i < geo->nr_luns; i++) { + int active = 1; + + rlun = &pblk->luns[i]; + if (!down_trylock(&rlun->wr_sem)) { + active = 0; + up(&rlun->wr_sem); + } + sz += snprintf(page + sz, PAGE_SIZE - sz, + "pblk: pos:%d, ch:%d, lun:%d - %d\n", + i, + rlun->bppa.g.ch, + rlun->bppa.g.lun, + active); + } + + return sz; +} + +static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int free_blocks, total_blocks; + int rb_user_max, rb_user_cnt; + int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state; + + free_blocks = atomic_read(&pblk->rl.free_blocks); + rb_user_max = pblk->rl.rb_user_max; + rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); + rb_gc_max = pblk->rl.rb_gc_max; + rb_gc_rsv = pblk->rl.rb_gc_rsv; + rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); + rb_budget = pblk->rl.rb_budget; + rb_state = pblk->rl.rb_state; + + total_blocks = geo->blks_per_lun * geo->nr_luns; + + return snprintf(page, PAGE_SIZE, + "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", + rb_user_cnt, + rb_user_max, + rb_gc_cnt, + rb_gc_max, + rb_gc_rsv, + rb_state, + rb_budget, + pblk->rl.low, + pblk->rl.high, + free_blocks, + total_blocks, + READ_ONCE(pblk->rl.rb_user_active)); +} + +static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page) +{ + int gc_enabled, gc_active; + + pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active); + return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n", + gc_enabled, gc_active); +} + +static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page) +{ + ssize_t sz; + + sz = snprintf(page, PAGE_SIZE, + "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n", + atomic_long_read(&pblk->read_failed), + atomic_long_read(&pblk->read_high_ecc), + atomic_long_read(&pblk->read_empty), + atomic_long_read(&pblk->read_failed_gc), + atomic_long_read(&pblk->write_failed), + atomic_long_read(&pblk->erase_failed)); + + return sz; +} + +static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page) +{ + return pblk_rb_sysfs(&pblk->rwb, page); +} + +static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + ssize_t sz = 0; + + sz = snprintf(page, PAGE_SIZE - sz, + "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", + pblk->ppaf_bitsize, + pblk->ppaf.blk_offset, geo->ppaf.blk_len, + pblk->ppaf.pg_offset, geo->ppaf.pg_len, + pblk->ppaf.lun_offset, geo->ppaf.lun_len, + pblk->ppaf.ch_offset, geo->ppaf.ch_len, + pblk->ppaf.pln_offset, geo->ppaf.pln_len, + pblk->ppaf.sec_offset, geo->ppaf.sect_len); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", + geo->ppaf.blk_offset, geo->ppaf.blk_len, + geo->ppaf.pg_offset, geo->ppaf.pg_len, + geo->ppaf.lun_offset, geo->ppaf.lun_len, + geo->ppaf.ch_offset, geo->ppaf.ch_len, + geo->ppaf.pln_offset, geo->ppaf.pln_len, + geo->ppaf.sect_offset, geo->ppaf.sect_len); + + return sz; +} + +static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + ssize_t sz = 0; + int nr_free_lines; + int cur_data, cur_log; + int free_line_cnt = 0, closed_line_cnt = 0; + int d_line_cnt = 0, l_line_cnt = 0; + int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; + int free = 0, bad = 0, cor = 0; + int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; + int map_weight = 0, meta_weight = 0; + + spin_lock(&l_mg->free_lock); + cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1; + cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1; + nr_free_lines = l_mg->nr_free_lines; + + list_for_each_entry(line, &l_mg->free_list, list) + free_line_cnt++; + spin_unlock(&l_mg->free_lock); + + spin_lock(&l_mg->gc_lock); + list_for_each_entry(line, &l_mg->gc_full_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_full++; + } + + list_for_each_entry(line, &l_mg->gc_high_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_high++; + } + + list_for_each_entry(line, &l_mg->gc_mid_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_mid++; + } + + list_for_each_entry(line, &l_mg->gc_low_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_low++; + } + + list_for_each_entry(line, &l_mg->gc_empty_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_empty++; + } + + list_for_each_entry(line, &l_mg->free_list, list) + free++; + list_for_each_entry(line, &l_mg->bad_list, list) + bad++; + list_for_each_entry(line, &l_mg->corrupt_list, list) + cor++; + spin_unlock(&l_mg->gc_lock); + + spin_lock(&l_mg->free_lock); + if (l_mg->data_line) { + cur_sec = l_mg->data_line->cur_sec; + msecs = l_mg->data_line->left_msecs; + ssecs = l_mg->data_line->left_ssecs; + vsc = l_mg->data_line->vsc; + sec_in_line = l_mg->data_line->sec_in_line; + meta_weight = bitmap_weight(&l_mg->meta_bitmap, + PBLK_DATA_LINES); + map_weight = bitmap_weight(l_mg->data_line->map_bitmap, + lm->sec_per_line); + } + spin_unlock(&l_mg->free_lock); + + if (nr_free_lines != free_line_cnt) + pr_err("pblk: corrupted free line list\n"); + + sz = snprintf(page, PAGE_SIZE - sz, + "line: nluns:%d, nblks:%d, nsecs:%d\n", + geo->nr_luns, lm->blk_per_line, lm->sec_per_line); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n", + cur_data, cur_log, + free, nr_free_lines, bad, cor, + closed_line_cnt, + d_line_cnt, l_line_cnt, + l_mg->nr_lines); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", + gc_full, gc_high, gc_mid, gc_low, gc_empty, + atomic_read(&pblk->gc.inflight_gc)); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n", + cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line, + map_weight, lm->sec_per_line, meta_weight); + + return sz; +} + +static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + ssize_t sz = 0; + + sz = snprintf(page, PAGE_SIZE - sz, + "smeta - len:%d, secs:%d\n", + lm->smeta_len, lm->smeta_sec); + sz += snprintf(page + sz, PAGE_SIZE - sz, + "emeta - len:%d, sec:%d, bb_start:%d\n", + lm->emeta_len, lm->emeta_sec, + lm->emeta_bb); + sz += snprintf(page + sz, PAGE_SIZE - sz, + "bitmap lengths: sec:%d, blk:%d, lun:%d\n", + lm->sec_bitmap_len, + lm->blk_bitmap_len, + lm->lun_bitmap_len); + sz += snprintf(page + sz, PAGE_SIZE - sz, + "blk_line:%d, sec_line:%d, sec_blk:%d\n", + lm->blk_per_line, + lm->sec_per_line, + geo->sec_per_blk); + + return sz; +} + +#ifdef CONFIG_NVM_DEBUG +static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) +{ + return snprintf(page, PAGE_SIZE, + "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", + atomic_long_read(&pblk->inflight_writes), + atomic_long_read(&pblk->inflight_reads), + atomic_long_read(&pblk->req_writes), + atomic_long_read(&pblk->nr_flush), + atomic_long_read(&pblk->padded_writes), + atomic_long_read(&pblk->padded_wb), + atomic_long_read(&pblk->sub_writes), + atomic_long_read(&pblk->sync_writes), + atomic_long_read(&pblk->compl_writes), + atomic_long_read(&pblk->recov_writes), + atomic_long_read(&pblk->recov_gc_writes), + atomic_long_read(&pblk->recov_gc_reads), + atomic_long_read(&pblk->sync_reads)); +} +#endif + +static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page, + size_t len) +{ + struct pblk_gc *gc = &pblk->gc; + size_t c_len; + int value; + + c_len = strcspn(page, "\n"); + if (c_len >= len) + return -EINVAL; + + if (kstrtouint(page, 0, &value)) + return -EINVAL; + + spin_lock(&gc->lock); + pblk_rl_set_gc_rsc(&pblk->rl, value); + spin_unlock(&gc->lock); + + return len; +} + +static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, + size_t len) +{ + size_t c_len; + int force; + + c_len = strcspn(page, "\n"); + if (c_len >= len) + return -EINVAL; + + if (kstrtouint(page, 0, &force)) + return -EINVAL; + + if (force < 0 || force > 1) + return -EINVAL; + + pblk_gc_sysfs_force(pblk, force); + + return len; +} + +static struct attribute sys_write_luns = { + .name = "write_luns", + .mode = 0444, +}; + +static struct attribute sys_rate_limiter_attr = { + .name = "rate_limiter", + .mode = 0444, +}; + +static struct attribute sys_gc_state = { + .name = "gc_state", + .mode = 0444, +}; + +static struct attribute sys_errors_attr = { + .name = "errors", + .mode = 0444, +}; + +static struct attribute sys_rb_attr = { + .name = "write_buffer", + .mode = 0444, +}; + +static struct attribute sys_stats_ppaf_attr = { + .name = "ppa_format", + .mode = 0444, +}; + +static struct attribute sys_lines_attr = { + .name = "lines", + .mode = 0444, +}; + +static struct attribute sys_lines_info_attr = { + .name = "lines_info", + .mode = 0444, +}; + +static struct attribute sys_gc_force = { + .name = "gc_force", + .mode = 0200, +}; + +static struct attribute sys_gc_rl_max = { + .name = "gc_rl_max", + .mode = 0200, +}; + +#ifdef CONFIG_NVM_DEBUG +static struct attribute sys_stats_debug_attr = { + .name = "stats", + .mode = 0444, +}; +#endif + +static struct attribute *pblk_attrs[] = { + &sys_write_luns, + &sys_rate_limiter_attr, + &sys_errors_attr, + &sys_gc_state, + &sys_gc_force, + &sys_gc_rl_max, + &sys_rb_attr, + &sys_stats_ppaf_attr, + &sys_lines_attr, + &sys_lines_info_attr, +#ifdef CONFIG_NVM_DEBUG + &sys_stats_debug_attr, +#endif + NULL, +}; + +static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct pblk *pblk = container_of(kobj, struct pblk, kobj); + + if (strcmp(attr->name, "rate_limiter") == 0) + return pblk_sysfs_rate_limiter(pblk, buf); + else if (strcmp(attr->name, "write_luns") == 0) + return pblk_sysfs_luns_show(pblk, buf); + else if (strcmp(attr->name, "gc_state") == 0) + return pblk_sysfs_gc_state_show(pblk, buf); + else if (strcmp(attr->name, "errors") == 0) + return pblk_sysfs_stats(pblk, buf); + else if (strcmp(attr->name, "write_buffer") == 0) + return pblk_sysfs_write_buffer(pblk, buf); + else if (strcmp(attr->name, "ppa_format") == 0) + return pblk_sysfs_ppaf(pblk, buf); + else if (strcmp(attr->name, "lines") == 0) + return pblk_sysfs_lines(pblk, buf); + else if (strcmp(attr->name, "lines_info") == 0) + return pblk_sysfs_lines_info(pblk, buf); +#ifdef CONFIG_NVM_DEBUG + else if (strcmp(attr->name, "stats") == 0) + return pblk_sysfs_stats_debug(pblk, buf); +#endif + return 0; +} + +static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct pblk *pblk = container_of(kobj, struct pblk, kobj); + + if (strcmp(attr->name, "gc_rl_max") == 0) + return pblk_sysfs_rate_store(pblk, buf, len); + else if (strcmp(attr->name, "gc_force") == 0) + return pblk_sysfs_gc_force(pblk, buf, len); + + return 0; +} + +static const struct sysfs_ops pblk_sysfs_ops = { + .show = pblk_sysfs_show, + .store = pblk_sysfs_store, +}; + +static struct kobj_type pblk_ktype = { + .sysfs_ops = &pblk_sysfs_ops, + .default_attrs = pblk_attrs, +}; + +int pblk_sysfs_init(struct gendisk *tdisk) +{ + struct pblk *pblk = tdisk->private_data; + struct device *parent_dev = disk_to_dev(pblk->disk); + int ret; + + ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype, + kobject_get(&parent_dev->kobj), + "%s", "pblk"); + if (ret) { + pr_err("pblk: could not register %s/pblk\n", + tdisk->disk_name); + return ret; + } + + kobject_uevent(&pblk->kobj, KOBJ_ADD); + return 0; +} + +void pblk_sysfs_exit(struct gendisk *tdisk) +{ + struct pblk *pblk = tdisk->private_data; + + kobject_uevent(&pblk->kobj, KOBJ_REMOVE); + kobject_del(&pblk->kobj); + kobject_put(&pblk->kobj); +} diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c new file mode 100644 index 000000000000..aef6fd7c4a0c --- /dev/null +++ b/drivers/lightnvm/pblk-write.c @@ -0,0 +1,414 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-write.c - pblk's write path from write buffer to media + */ + +#include "pblk.h" + +static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line) +{ +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->sync_writes); +#endif + + /* Counter protected by rb sync lock */ + line->left_ssecs--; + if (!line->left_ssecs) + pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws); +} + +static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct bio *original_bio; + unsigned long ret; + int i; + + for (i = 0; i < c_ctx->nr_valid; i++) { + struct pblk_w_ctx *w_ctx; + struct ppa_addr p; + struct pblk_line *line; + + w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i); + + p = rqd->ppa_list[i]; + line = &pblk->lines[pblk_dev_ppa_to_line(p)]; + pblk_sync_line(pblk, line); + + while ((original_bio = bio_list_pop(&w_ctx->bios))) + bio_endio(original_bio); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes); +#endif + + ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); + + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); + + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, WRITE); + + return ret; +} + +static unsigned long pblk_end_queued_w_bio(struct pblk *pblk, + struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + list_del(&c_ctx->list); + return pblk_end_w_bio(pblk, rqd, c_ctx); +} + +static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct pblk_c_ctx *c, *r; + unsigned long flags; + unsigned long pos; + +#ifdef CONFIG_NVM_DEBUG + atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); +#endif + + pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + + pos = pblk_rb_sync_init(&pblk->rwb, &flags); + if (pos == c_ctx->sentry) { + pos = pblk_end_w_bio(pblk, rqd, c_ctx); + +retry: + list_for_each_entry_safe(c, r, &pblk->compl_list, list) { + rqd = nvm_rq_from_c_ctx(c); + if (c->sentry == pos) { + pos = pblk_end_queued_w_bio(pblk, rqd, c); + goto retry; + } + } + } else { + WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd); + list_add_tail(&c_ctx->list, &pblk->compl_list); + } + pblk_rb_sync_end(&pblk->rwb, &flags); +} + +/* When a write fails, we are not sure whether the block has grown bad or a page + * range is more susceptible to write errors. If a high number of pages fail, we + * assume that the block is bad and we mark it accordingly. In all cases, we + * remap and resubmit the failed entries as fast as possible; if a flush is + * waiting on a completion, the whole stack would stall otherwise. + */ +static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) +{ + void *comp_bits = &rqd->ppa_status; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + struct pblk_rec_ctx *recovery; + struct ppa_addr *ppa_list = rqd->ppa_list; + int nr_ppas = rqd->nr_ppas; + unsigned int c_entries; + int bit, ret; + + if (unlikely(nr_ppas == 1)) + ppa_list = &rqd->ppa_addr; + + recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC); + if (!recovery) { + pr_err("pblk: could not allocate recovery context\n"); + return; + } + INIT_LIST_HEAD(&recovery->failed); + + bit = -1; + while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) { + struct pblk_rb_entry *entry; + struct ppa_addr ppa; + + /* Logic error */ + if (bit > c_ctx->nr_valid) { + WARN_ONCE(1, "pblk: corrupted write request\n"); + mempool_free(recovery, pblk->rec_pool); + goto out; + } + + ppa = ppa_list[bit]; + entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa); + if (!entry) { + pr_err("pblk: could not scan entry on write failure\n"); + mempool_free(recovery, pblk->rec_pool); + goto out; + } + + /* The list is filled first and emptied afterwards. No need for + * protecting it with a lock + */ + list_add_tail(&entry->index, &recovery->failed); + } + + c_entries = find_first_bit(comp_bits, nr_ppas); + ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries); + if (ret) { + pr_err("pblk: could not recover from write failure\n"); + mempool_free(recovery, pblk->rec_pool); + goto out; + } + + INIT_WORK(&recovery->ws_rec, pblk_submit_rec); + queue_work(pblk->kw_wq, &recovery->ws_rec); + +out: + pblk_complete_write(pblk, rqd, c_ctx); +} + +static void pblk_end_io_write(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + + if (rqd->error) { + pblk_log_write_err(pblk, rqd); + return pblk_end_w_fail(pblk, rqd); + } +#ifdef CONFIG_NVM_DEBUG + else + WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n"); +#endif + + pblk_complete_write(pblk, rqd, c_ctx); +} + +static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int nr_secs) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + /* Setup write request */ + rqd->opcode = NVM_OP_PWRITE; + rqd->nr_ppas = nr_secs; + rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->private = pblk; + rqd->end_io = pblk_end_io_write; + + rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_meta_list); + if (!rqd->meta_list) + return -ENOMEM; + + if (unlikely(nr_secs == 1)) + return 0; + + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + + return 0; +} + +static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *e_line = pblk_line_get_data_next(pblk); + struct ppa_addr erase_ppa; + unsigned int valid = c_ctx->nr_valid; + unsigned int padded = c_ctx->nr_padded; + unsigned int nr_secs = valid + padded; + unsigned long *lun_bitmap; + int ret = 0; + + lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); + if (!lun_bitmap) { + ret = -ENOMEM; + goto out; + } + c_ctx->lun_bitmap = lun_bitmap; + + ret = pblk_alloc_w_rq(pblk, rqd, nr_secs); + if (ret) { + kfree(lun_bitmap); + goto out; + } + + ppa_set_empty(&erase_ppa); + if (likely(!e_line || !atomic_read(&e_line->left_eblks))) + pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); + else + pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, + valid, &erase_ppa); + +out: + if (unlikely(e_line && !ppa_empty(erase_ppa))) { + if (pblk_blk_erase_async(pblk, erase_ppa)) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int bit; + + atomic_inc(&e_line->left_eblks); + bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch; + WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap)); + up(&pblk->erase_sem); + } + } + + return ret; +} + +int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct pblk_line_meta *lm = &pblk->lm; + unsigned long *lun_bitmap; + int ret; + + lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); + if (!lun_bitmap) + return -ENOMEM; + + c_ctx->lun_bitmap = lun_bitmap; + + ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas); + if (ret) + return ret; + + pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0); + + rqd->ppa_status = (u64)0; + rqd->flags = pblk_set_progr_mode(pblk, WRITE); + + return ret; +} + +static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, + unsigned int secs_to_flush) +{ + int secs_to_sync; + + secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush); + +#ifdef CONFIG_NVM_DEBUG + if ((!secs_to_sync && secs_to_flush) + || (secs_to_sync < 0) + || (secs_to_sync > secs_avail && !secs_to_flush)) { + pr_err("pblk: bad sector calculation (a:%d,s:%d,f:%d)\n", + secs_avail, secs_to_sync, secs_to_flush); + } +#endif + + return secs_to_sync; +} + +static int pblk_submit_write(struct pblk *pblk) +{ + struct bio *bio; + struct nvm_rq *rqd; + struct pblk_c_ctx *c_ctx; + unsigned int pgs_read; + unsigned int secs_avail, secs_to_sync, secs_to_com; + unsigned int secs_to_flush; + unsigned long pos; + int err; + + /* If there are no sectors in the cache, flushes (bios without data) + * will be cleared on the cache threads + */ + secs_avail = pblk_rb_read_count(&pblk->rwb); + if (!secs_avail) + return 1; + + secs_to_flush = pblk_rb_sync_point_count(&pblk->rwb); + if (!secs_to_flush && secs_avail < pblk->min_write_pgs) + return 1; + + rqd = pblk_alloc_rqd(pblk, WRITE); + if (IS_ERR(rqd)) { + pr_err("pblk: cannot allocate write req.\n"); + return 1; + } + c_ctx = nvm_rq_to_pdu(rqd); + + bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); + if (!bio) { + pr_err("pblk: cannot allocate write bio\n"); + goto fail_free_rqd; + } + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + rqd->bio = bio; + + secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); + if (secs_to_sync > pblk->max_write_pgs) { + pr_err("pblk: bad buffer sync calculation\n"); + goto fail_put_bio; + } + + secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; + pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); + + pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos, + secs_to_sync, secs_avail); + if (!pgs_read) { + pr_err("pblk: corrupted write bio\n"); + goto fail_put_bio; + } + + if (c_ctx->nr_padded) + if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded)) + goto fail_put_bio; + + /* Assign lbas to ppas and populate request structure */ + err = pblk_setup_w_rq(pblk, rqd, c_ctx); + if (err) { + pr_err("pblk: could not setup write request\n"); + goto fail_free_bio; + } + + err = pblk_submit_io(pblk, rqd); + if (err) { + pr_err("pblk: I/O submission failed: %d\n", err); + goto fail_free_bio; + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(secs_to_sync, &pblk->sub_writes); +#endif + + return 0; + +fail_free_bio: + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded); +fail_put_bio: + bio_put(bio); +fail_free_rqd: + pblk_free_rqd(pblk, rqd, WRITE); + + return 1; +} + +int pblk_write_ts(void *data) +{ + struct pblk *pblk = data; + + while (!kthread_should_stop()) { + if (!pblk_submit_write(pblk)) + continue; + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; +} diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h new file mode 100644 index 000000000000..99f3186b5288 --- /dev/null +++ b/drivers/lightnvm/pblk.h @@ -0,0 +1,1121 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) + * Copyright (C) 2016 CNEX Labs + * Initial release: Matias Bjorling <matias@cnexlabs.com> + * Write buffering: Javier Gonzalez <javier@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Implementation of a Physical Block-device target for Open-channel SSDs. + * + */ + +#ifndef PBLK_H_ +#define PBLK_H_ + +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/bio.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/vmalloc.h> +#include <linux/crc32.h> +#include <linux/uuid.h> + +#include <linux/lightnvm.h> + +/* Run only GC if less than 1/X blocks are free */ +#define GC_LIMIT_INVERSE 5 +#define GC_TIME_MSECS 1000 + +#define PBLK_SECTOR (512) +#define PBLK_EXPOSED_PAGE_SIZE (4096) +#define PBLK_MAX_REQ_ADDRS (64) +#define PBLK_MAX_REQ_ADDRS_PW (6) + +#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) + +#define PBLK_COMMAND_TIMEOUT_MS 30000 + +/* Max 512 LUNs per device */ +#define PBLK_MAX_LUNS_BITMAP (4) + +#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR) + +#define pblk_for_each_lun(pblk, rlun, i) \ + for ((i) = 0, rlun = &(pblk)->luns[0]; \ + (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)]) + +#define ERASE 2 /* READ = 0, WRITE = 1 */ + +enum { + /* IO Types */ + PBLK_IOTYPE_USER = 1 << 0, + PBLK_IOTYPE_GC = 1 << 1, + + /* Write buffer flags */ + PBLK_FLUSH_ENTRY = 1 << 2, + PBLK_WRITTEN_DATA = 1 << 3, + PBLK_SUBMITTED_ENTRY = 1 << 4, + PBLK_WRITABLE_ENTRY = 1 << 5, +}; + +enum { + PBLK_BLK_ST_OPEN = 0x1, + PBLK_BLK_ST_CLOSED = 0x2, +}; + +/* The number of GC lists and the rate-limiter states go together. This way the + * rate-limiter can dictate how much GC is needed based on resource utilization. + */ +#define PBLK_NR_GC_LISTS 3 +#define PBLK_MAX_GC_JOBS 32 + +enum { + PBLK_RL_HIGH = 1, + PBLK_RL_MID = 2, + PBLK_RL_LOW = 3, +}; + +struct pblk_sec_meta { + u64 reserved; + __le64 lba; +}; + +#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) + +/* write completion context */ +struct pblk_c_ctx { + struct list_head list; /* Head for out-of-order completion */ + + unsigned long *lun_bitmap; /* Luns used on current request */ + unsigned int sentry; + unsigned int nr_valid; + unsigned int nr_padded; +}; + +/* Read context */ +struct pblk_r_ctx { + struct bio *orig_bio; +}; + +/* Recovery context */ +struct pblk_rec_ctx { + struct pblk *pblk; + struct nvm_rq *rqd; + struct list_head failed; + struct work_struct ws_rec; +}; + +/* Write context */ +struct pblk_w_ctx { + struct bio_list bios; /* Original bios - used for completion + * in REQ_FUA, REQ_FLUSH case + */ + u64 lba; /* Logic addr. associated with entry */ + struct ppa_addr ppa; /* Physic addr. associated with entry */ + int flags; /* Write context flags */ +}; + +struct pblk_rb_entry { + struct ppa_addr cacheline; /* Cacheline for this entry */ + void *data; /* Pointer to data on this entry */ + struct pblk_w_ctx w_ctx; /* Context for this entry */ + struct list_head index; /* List head to enable indexes */ +}; + +#define EMPTY_ENTRY (~0U) + +struct pblk_rb_pages { + struct page *pages; + int order; + struct list_head list; +}; + +struct pblk_rb { + struct pblk_rb_entry *entries; /* Ring buffer entries */ + unsigned int mem; /* Write offset - points to next + * writable entry in memory + */ + unsigned int subm; /* Read offset - points to last entry + * that has been submitted to the media + * to be persisted + */ + unsigned int sync; /* Synced - backpointer that signals + * the last submitted entry that has + * been successfully persisted to media + */ + unsigned int sync_point; /* Sync point - last entry that must be + * flushed to the media. Used with + * REQ_FLUSH and REQ_FUA + */ + unsigned int l2p_update; /* l2p update point - next entry for + * which l2p mapping will be updated to + * contain a device ppa address (instead + * of a cacheline + */ + unsigned int nr_entries; /* Number of entries in write buffer - + * must be a power of two + */ + unsigned int seg_size; /* Size of the data segments being + * stored on each entry. Typically this + * will be 4KB + */ + + struct list_head pages; /* List of data pages */ + + spinlock_t w_lock; /* Write lock */ + spinlock_t s_lock; /* Sync lock */ + +#ifdef CONFIG_NVM_DEBUG + atomic_t inflight_sync_point; /* Not served REQ_FLUSH | REQ_FUA */ +#endif +}; + +#define PBLK_RECOVERY_SECTORS 16 + +struct pblk_lun { + struct ppa_addr bppa; + + u8 *bb_list; /* Bad block list for LUN. Only used on + * bring up. Bad blocks are managed + * within lines on run-time. + */ + + struct semaphore wr_sem; +}; + +struct pblk_gc_rq { + struct pblk_line *line; + void *data; + u64 *lba_list; + int nr_secs; + int secs_to_gc; + struct list_head list; +}; + +struct pblk_gc { + int gc_active; + int gc_enabled; + int gc_forced; + int gc_jobs_active; + atomic_t inflight_gc; + + struct task_struct *gc_ts; + struct task_struct *gc_writer_ts; + struct workqueue_struct *gc_reader_wq; + struct timer_list gc_timer; + + int w_entries; + struct list_head w_list; + + spinlock_t lock; + spinlock_t w_lock; +}; + +struct pblk_rl { + unsigned int high; /* Upper threshold for rate limiter (free run - + * user I/O rate limiter + */ + unsigned int low; /* Lower threshold for rate limiter (user I/O + * rate limiter - stall) + */ + unsigned int high_pw; /* High rounded up as a power of 2 */ + +#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent + * available blks + */ +#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */ + + int rb_windows_pw; /* Number of rate windows in the write buffer + * given as a power-of-2. This guarantees that + * when user I/O is being rate limited, there + * will be reserved enough space for the GC to + * place its payload. A window is of + * pblk->max_write_pgs size, which in NVMe is + * 64, i.e., 256kb. + */ + int rb_budget; /* Total number of entries available for I/O */ + int rb_user_max; /* Max buffer entries available for user I/O */ + atomic_t rb_user_cnt; /* User I/O buffer counter */ + int rb_gc_max; /* Max buffer entries available for GC I/O */ + int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ + int rb_state; /* Rate-limiter current state */ + atomic_t rb_gc_cnt; /* GC I/O buffer counter */ + + int rb_user_active; + struct timer_list u_timer; + + unsigned long long nr_secs; + unsigned long total_blocks; + atomic_t free_blocks; +}; + +#define PBLK_LINE_NR_LUN_BITMAP 2 +#define PBLK_LINE_NR_SEC_BITMAP 2 +#define PBLK_LINE_EMPTY (~0U) + +enum { + /* Line Types */ + PBLK_LINETYPE_FREE = 0, + PBLK_LINETYPE_LOG = 1, + PBLK_LINETYPE_DATA = 2, + + /* Line state */ + PBLK_LINESTATE_FREE = 10, + PBLK_LINESTATE_OPEN = 11, + PBLK_LINESTATE_CLOSED = 12, + PBLK_LINESTATE_GC = 13, + PBLK_LINESTATE_BAD = 14, + PBLK_LINESTATE_CORRUPT = 15, + + /* GC group */ + PBLK_LINEGC_NONE = 20, + PBLK_LINEGC_EMPTY = 21, + PBLK_LINEGC_LOW = 22, + PBLK_LINEGC_MID = 23, + PBLK_LINEGC_HIGH = 24, + PBLK_LINEGC_FULL = 25, +}; + +#define PBLK_MAGIC 0x70626c6b /*pblk*/ + +struct line_header { + __le32 crc; + __le32 identifier; /* pblk identifier */ + __u8 uuid[16]; /* instance uuid */ + __le16 type; /* line type */ + __le16 version; /* type version */ + __le32 id; /* line id for current line */ +}; + +struct line_smeta { + struct line_header header; + + __le32 crc; /* Full structure including struct crc */ + /* Previous line metadata */ + __le32 prev_id; /* Line id for previous line */ + + /* Current line metadata */ + __le64 seq_nr; /* Sequence number for current line */ + + /* Active writers */ + __le32 window_wr_lun; /* Number of parallel LUNs to write */ + + __le32 rsvd[2]; +}; + +/* + * Metadata Layout: + * 1. struct pblk_emeta + * 2. nr_lbas u64 forming lba list + * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line) + * 4. nr_luns bits (u64 format) forming line bad block bitmap + * + * 3. and 4. will be part of FTL log + */ +struct line_emeta { + struct line_header header; + + __le32 crc; /* Full structure including struct crc */ + + /* Previous line metadata */ + __le32 prev_id; /* Line id for prev line */ + + /* Current line metadata */ + __le64 seq_nr; /* Sequence number for current line */ + + /* Active writers */ + __le32 window_wr_lun; /* Number of parallel LUNs to write */ + + /* Bookkeeping for recovery */ + __le32 next_id; /* Line id for next line */ + __le64 nr_lbas; /* Number of lbas mapped in line */ + __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ +}; + +struct pblk_line { + struct pblk *pblk; + unsigned int id; /* Line number corresponds to the + * block line + */ + unsigned int seq_nr; /* Unique line sequence number */ + + int state; /* PBLK_LINESTATE_X */ + int type; /* PBLK_LINETYPE_X */ + int gc_group; /* PBLK_LINEGC_X */ + struct list_head list; /* Free, GC lists */ + + unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ + + struct line_smeta *smeta; /* Start metadata */ + struct line_emeta *emeta; /* End metadata */ + int meta_line; /* Metadata line id */ + u64 smeta_ssec; /* Sector where smeta starts */ + u64 emeta_ssec; /* Sector where emeta starts */ + + unsigned int sec_in_line; /* Number of usable secs in line */ + + atomic_t blk_in_line; /* Number of good blocks in line */ + unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */ + unsigned long *erase_bitmap; /* Bitmap for erased blocks */ + + unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */ + unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */ + + atomic_t left_eblks; /* Blocks left for erasing */ + atomic_t left_seblks; /* Blocks left for sync erasing */ + + int left_msecs; /* Sectors left for mapping */ + int left_ssecs; /* Sectors left to sync */ + unsigned int cur_sec; /* Sector map pointer */ + unsigned int vsc; /* Valid sector count in line */ + + struct kref ref; /* Write buffer L2P references */ + + spinlock_t lock; /* Necessary for invalid_bitmap only */ +}; + +#define PBLK_DATA_LINES 4 + +enum{ + PBLK_KMALLOC_META = 1, + PBLK_VMALLOC_META = 2, +}; + +struct pblk_line_metadata { + void *meta; +}; + +struct pblk_line_mgmt { + int nr_lines; /* Total number of full lines */ + int nr_free_lines; /* Number of full lines in free list */ + + /* Free lists - use free_lock */ + struct list_head free_list; /* Full lines ready to use */ + struct list_head corrupt_list; /* Full lines corrupted */ + struct list_head bad_list; /* Full lines bad */ + + /* GC lists - use gc_lock */ + struct list_head *gc_lists[PBLK_NR_GC_LISTS]; + struct list_head gc_high_list; /* Full lines ready to GC, high isc */ + struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ + struct list_head gc_low_list; /* Full lines ready to GC, low isc */ + + struct list_head gc_full_list; /* Full lines ready to GC, no valid */ + struct list_head gc_empty_list; /* Full lines close, all valid */ + + struct pblk_line *log_line; /* Current FTL log line */ + struct pblk_line *data_line; /* Current data line */ + struct pblk_line *log_next; /* Next FTL log line */ + struct pblk_line *data_next; /* Next data line */ + + /* Metadata allocation type: VMALLOC | KMALLOC */ + int smeta_alloc_type; + int emeta_alloc_type; + + /* Pre-allocated metadata for data lines */ + struct pblk_line_metadata sline_meta[PBLK_DATA_LINES]; + struct pblk_line_metadata eline_meta[PBLK_DATA_LINES]; + unsigned long meta_bitmap; + + /* Helpers for fast bitmap calculations */ + unsigned long *bb_template; + unsigned long *bb_aux; + + unsigned long d_seq_nr; /* Data line unique sequence number */ + unsigned long l_seq_nr; /* Log line unique sequence number */ + + spinlock_t free_lock; + spinlock_t gc_lock; +}; + +struct pblk_line_meta { + unsigned int smeta_len; /* Total length for smeta */ + unsigned int smeta_sec; /* Sectors needed for smeta*/ + unsigned int emeta_len; /* Total length for emeta */ + unsigned int emeta_sec; /* Sectors needed for emeta*/ + unsigned int emeta_bb; /* Boundary for bb that affects emeta */ + unsigned int sec_bitmap_len; /* Length for sector bitmap in line */ + unsigned int blk_bitmap_len; /* Length for block bitmap in line */ + unsigned int lun_bitmap_len; /* Length for lun bitmap in line */ + + unsigned int blk_per_line; /* Number of blocks in a full line */ + unsigned int sec_per_line; /* Number of sectors in a line */ + unsigned int min_blk_line; /* Min. number of good blocks in line */ + + unsigned int mid_thrs; /* Threshold for GC mid list */ + unsigned int high_thrs; /* Threshold for GC high list */ +}; + +struct pblk_addr_format { + u64 ch_mask; + u64 lun_mask; + u64 pln_mask; + u64 blk_mask; + u64 pg_mask; + u64 sec_mask; + u8 ch_offset; + u8 lun_offset; + u8 pln_offset; + u8 blk_offset; + u8 pg_offset; + u8 sec_offset; +}; + +struct pblk { + struct nvm_tgt_dev *dev; + struct gendisk *disk; + + struct kobject kobj; + + struct pblk_lun *luns; + + struct pblk_line *lines; /* Line array */ + struct pblk_line_mgmt l_mg; /* Line management */ + struct pblk_line_meta lm; /* Line metadata */ + + int ppaf_bitsize; + struct pblk_addr_format ppaf; + + struct pblk_rb rwb; + + int min_write_pgs; /* Minimum amount of pages required by controller */ + int max_write_pgs; /* Maximum amount of pages supported by controller */ + int pgs_in_buffer; /* Number of pages that need to be held in buffer to + * guarantee successful reads. + */ + + sector_t capacity; /* Device capacity when bad blocks are subtracted */ + int over_pct; /* Percentage of device used for over-provisioning */ + + /* pblk provisioning values. Used by rate limiter */ + struct pblk_rl rl; + + struct semaphore erase_sem; + + unsigned char instance_uuid[16]; +#ifdef CONFIG_NVM_DEBUG + /* All debug counters apply to 4kb sector I/Os */ + atomic_long_t inflight_writes; /* Inflight writes (user and gc) */ + atomic_long_t padded_writes; /* Sectors padded due to flush/fua */ + atomic_long_t padded_wb; /* Sectors padded in write buffer */ + atomic_long_t nr_flush; /* Number of flush/fua I/O */ + atomic_long_t req_writes; /* Sectors stored on write buffer */ + atomic_long_t sub_writes; /* Sectors submitted from buffer */ + atomic_long_t sync_writes; /* Sectors synced to media */ + atomic_long_t compl_writes; /* Sectors completed in write bio */ + atomic_long_t inflight_reads; /* Inflight sector read requests */ + atomic_long_t sync_reads; /* Completed sector read requests */ + atomic_long_t recov_writes; /* Sectors submitted from recovery */ + atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */ + atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */ +#endif + + spinlock_t lock; + + atomic_long_t read_failed; + atomic_long_t read_empty; + atomic_long_t read_high_ecc; + atomic_long_t read_failed_gc; + atomic_long_t write_failed; + atomic_long_t erase_failed; + + struct task_struct *writer_ts; + + /* Simple translation map of logical addresses to physical addresses. + * The logical addresses is known by the host system, while the physical + * addresses are used when writing to the disk block device. + */ + unsigned char *trans_map; + spinlock_t trans_lock; + + struct list_head compl_list; + + mempool_t *page_pool; + mempool_t *line_ws_pool; + mempool_t *rec_pool; + mempool_t *r_rq_pool; + mempool_t *w_rq_pool; + mempool_t *line_meta_pool; + + struct workqueue_struct *kw_wq; + struct timer_list wtimer; + + struct pblk_gc gc; +}; + +struct pblk_line_ws { + struct pblk *pblk; + struct pblk_line *line; + void *priv; + struct work_struct ws; +}; + +#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx)) +#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx)) + +/* + * pblk ring buffer operations + */ +int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, + unsigned int power_size, unsigned int power_seg_sz); +unsigned int pblk_rb_calculate_size(unsigned int nr_entries); +void *pblk_rb_entries_ref(struct pblk_rb *rb); +int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, + unsigned int nr_entries, unsigned int *pos); +int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos); +void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, unsigned int pos); +void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, + unsigned int pos); +struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); + +void pblk_rb_sync_l2p(struct pblk_rb *rb); +unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, + struct pblk_c_ctx *c_ctx, + unsigned int pos, + unsigned int nr_entries, + unsigned int count); +unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, + struct list_head *list, + unsigned int max); +int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, + u64 pos, int bio_iter); +unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); + +unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); +unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); +struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, + struct ppa_addr *ppa); +void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); +unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); + +unsigned int pblk_rb_read_count(struct pblk_rb *rb); +unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); + +int pblk_rb_tear_down_check(struct pblk_rb *rb); +int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); +void pblk_rb_data_free(struct pblk_rb *rb); +ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); + +/* + * pblk core + */ +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw); +int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx); +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); +void pblk_flush_writer(struct pblk *pblk); +struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); +void pblk_discard(struct pblk *pblk, struct bio *bio); +void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); +struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, + unsigned int nr_secs, unsigned int len, + gfp_t gfp_mask); +struct pblk_line *pblk_line_get(struct pblk *pblk); +struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); +struct pblk_line *pblk_line_replace_data(struct pblk *pblk); +int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); +void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); +struct pblk_line *pblk_line_get_data(struct pblk *pblk); +struct pblk_line *pblk_line_get_data_next(struct pblk *pblk); +int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); +int pblk_line_is_full(struct pblk_line *line); +void pblk_line_free(struct pblk *pblk, struct pblk_line *line); +void pblk_line_close_ws(struct work_struct *work); +void pblk_line_close(struct pblk *pblk, struct pblk_line *line); +void pblk_line_mark_bb(struct work_struct *work); +void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *)); +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); +int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); +int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line); +int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); +void pblk_line_put(struct kref *ref); +struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); +u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); +int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, + unsigned long secs_to_flush); +void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap); +void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap); +void pblk_end_bio_sync(struct bio *bio); +void pblk_end_io_sync(struct nvm_rq *rqd); +int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, + int nr_pages); +void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line, + u64 paddr); +void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, + int nr_pages); +void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa); +void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa); +void pblk_update_map_cache(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa); +void pblk_update_map_dev(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa, struct ppa_addr entry_line); +int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, + struct pblk_line *gc_line); +void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, + u64 *lba_list, int nr_secs); +void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, + sector_t blba, int nr_secs); + +/* + * pblk user I/O write path + */ +int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, + unsigned long flags); +int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, + unsigned int nr_entries, unsigned int nr_rec_entries, + struct pblk_line *gc_line, unsigned long flags); + +/* + * pblk map + */ +void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int sentry, unsigned long *lun_bitmap, + unsigned int valid_secs, struct ppa_addr *erase_ppa); +void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, + unsigned long *lun_bitmap, unsigned int valid_secs, + unsigned int off); + +/* + * pblk write thread + */ +int pblk_write_ts(void *data); +void pblk_write_timer_fn(unsigned long data); +void pblk_write_should_kick(struct pblk *pblk); + +/* + * pblk read path + */ +int pblk_submit_read(struct pblk *pblk, struct bio *bio); +int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, + unsigned int nr_secs, unsigned int *secs_to_gc, + struct pblk_line *line); +/* + * pblk recovery + */ +void pblk_submit_rec(struct work_struct *work); +struct pblk_line *pblk_recov_l2p(struct pblk *pblk); +void pblk_recov_pad(struct pblk *pblk); +__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); +int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, + struct pblk_rec_ctx *recovery, u64 *comp_bits, + unsigned int comp); + +/* + * pblk gc + */ +#define PBLK_GC_TRIES 3 + +int pblk_gc_init(struct pblk *pblk); +void pblk_gc_exit(struct pblk *pblk); +void pblk_gc_should_start(struct pblk *pblk); +void pblk_gc_should_stop(struct pblk *pblk); +int pblk_gc_status(struct pblk *pblk); +void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, + int *gc_active); +void pblk_gc_sysfs_force(struct pblk *pblk, int force); + +/* + * pblk rate limiter + */ +void pblk_rl_init(struct pblk_rl *rl, int budget); +void pblk_rl_free(struct pblk_rl *rl); +int pblk_rl_gc_thrs(struct pblk_rl *rl); +unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); +int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); +void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); +int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); +void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); +void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); +void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv); +int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); +void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); + +/* + * pblk sysfs + */ +int pblk_sysfs_init(struct gendisk *tdisk); +void pblk_sysfs_exit(struct gendisk *tdisk); + +static inline void *pblk_malloc(size_t size, int type, gfp_t flags) +{ + if (type == PBLK_KMALLOC_META) + return kmalloc(size, flags); + return vmalloc(size); +} + +static inline void pblk_mfree(void *ptr, int type) +{ + if (type == PBLK_KMALLOC_META) + kfree(ptr); + else + vfree(ptr); +} + +static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx) +{ + return c_ctx - sizeof(struct nvm_rq); +} + +static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta) +{ + return (emeta) + 1; +} + +#define NVM_MEM_PAGE_WRITE (8) + +static inline int pblk_pad_distance(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl; +} + +static inline int pblk_dev_ppa_to_line(struct ppa_addr p) +{ + return p.g.blk; +} + +static inline int pblk_tgt_ppa_to_line(struct ppa_addr p) +{ + return p.g.blk; +} + +static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +{ + return p.g.lun * geo->nr_chnls + p.g.ch; +} + +/* A block within a line corresponds to the lun */ +static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +{ + return p.g.lun * geo->nr_chnls + p.g.ch; +} + +static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) +{ + struct ppa_addr ppa64; + + ppa64.ppa = 0; + + if (ppa32 == -1) { + ppa64.ppa = ADDR_EMPTY; + } else if (ppa32 & (1U << 31)) { + ppa64.c.line = ppa32 & ((~0U) >> 1); + ppa64.c.is_cached = 1; + } else { + ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >> + pblk->ppaf.blk_offset; + ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >> + pblk->ppaf.pg_offset; + ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >> + pblk->ppaf.lun_offset; + ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >> + pblk->ppaf.ch_offset; + ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >> + pblk->ppaf.pln_offset; + ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >> + pblk->ppaf.sec_offset; + } + + return ppa64; +} + +static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, + sector_t lba) +{ + struct ppa_addr ppa; + + if (pblk->ppaf_bitsize < 32) { + u32 *map = (u32 *)pblk->trans_map; + + ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); + } else { + struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; + + ppa = map[lba]; + } + + return ppa; +} + +static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) +{ + u32 ppa32 = 0; + + if (ppa64.ppa == ADDR_EMPTY) { + ppa32 = ~0U; + } else if (ppa64.c.is_cached) { + ppa32 |= ppa64.c.line; + ppa32 |= 1U << 31; + } else { + ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset; + ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset; + ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset; + ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset; + ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset; + ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset; + } + + return ppa32; +} + +static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa) +{ + if (pblk->ppaf_bitsize < 32) { + u32 *map = (u32 *)pblk->trans_map; + + map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); + } else { + u64 *map = (u64 *)pblk->trans_map; + + map[lba] = ppa.ppa; + } +} + +static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, + struct ppa_addr p) +{ + u64 paddr; + + paddr = 0; + paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset; + paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; + paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; + paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; + paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + + return paddr; +} + +static inline int pblk_ppa_empty(struct ppa_addr ppa_addr) +{ + return (ppa_addr.ppa == ADDR_EMPTY); +} + +static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr) +{ + ppa_addr->ppa = ADDR_EMPTY; +} + +static inline int pblk_addr_in_cache(struct ppa_addr ppa) +{ + return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached); +} + +static inline int pblk_addr_to_cacheline(struct ppa_addr ppa) +{ + return ppa.c.line; +} + +static inline struct ppa_addr pblk_cacheline_to_addr(int addr) +{ + struct ppa_addr p; + + p.c.line = addr; + p.c.is_cached = 1; + + return p; +} + +static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, + u64 line_id) +{ + struct ppa_addr ppa; + + ppa.ppa = 0; + ppa.g.blk = line_id; + ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; + ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; + ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; + ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; + ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; + + return ppa; +} + +static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr, + u64 line_id) +{ + struct ppa_addr ppa; + + ppa = addr_to_gen_ppa(pblk, paddr, line_id); + + return ppa; +} + +static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, + struct line_smeta *smeta) +{ + u32 crc = ~(u32)0; + + crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc), + sizeof(struct line_header) - sizeof(crc)); + + return crc; +} + +static inline u32 pblk_calc_smeta_crc(struct pblk *pblk, + struct line_smeta *smeta) +{ + struct pblk_line_meta *lm = &pblk->lm; + u32 crc = ~(u32)0; + + crc = crc32_le(crc, (unsigned char *)smeta + + sizeof(struct line_header) + sizeof(crc), + lm->smeta_len - + sizeof(struct line_header) - sizeof(crc)); + + return crc; +} + +static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, + struct line_emeta *emeta) +{ + struct pblk_line_meta *lm = &pblk->lm; + u32 crc = ~(u32)0; + + crc = crc32_le(crc, (unsigned char *)emeta + + sizeof(struct line_header) + sizeof(crc), + lm->emeta_len - + sizeof(struct line_header) - sizeof(crc)); + + return crc; +} + +static inline int pblk_set_progr_mode(struct pblk *pblk, int type) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int flags; + + flags = geo->plane_mode >> 1; + + if (type == WRITE) + flags |= NVM_IO_SCRAMBLE_ENABLE; + + return flags; +} + +static inline int pblk_set_read_mode(struct pblk *pblk) +{ + return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; +} + +#ifdef CONFIG_NVM_DEBUG +static inline void print_ppa(struct ppa_addr *p, char *msg, int error) +{ + if (p->c.is_cached) { + pr_err("ppa: (%s: %x) cache line: %llu\n", + msg, error, (u64)p->c.line); + } else { + pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n", + msg, error, + p->g.ch, p->g.lun, p->g.blk, + p->g.pg, p->g.pl, p->g.sec); + } +} + +static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, + int error) +{ + int bit = -1; + + if (rqd->nr_ppas == 1) { + print_ppa(&rqd->ppa_addr, "rqd", error); + return; + } + + while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas, + bit + 1)) < rqd->nr_ppas) { + print_ppa(&rqd->ppa_list[bit], "rqd", error); + } + + pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status); +} +#endif + +static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr *ppas, int nr_ppas) +{ + struct nvm_geo *geo = &tgt_dev->geo; + struct ppa_addr *ppa; + int i; + + for (i = 0; i < nr_ppas; i++) { + ppa = &ppas[i]; + + if (!ppa->c.is_cached && + ppa->g.ch < geo->nr_chnls && + ppa->g.lun < geo->luns_per_chnl && + ppa->g.pl < geo->nr_planes && + ppa->g.blk < geo->blks_per_lun && + ppa->g.pg < geo->pgs_per_blk && + ppa->g.sec < geo->sec_per_pg) + continue; + +#ifdef CONFIG_NVM_DEBUG + print_ppa(ppa, "boundary", i); +#endif + return 1; + } + return 0; +} + +static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr) +{ + struct pblk_line_meta *lm = &pblk->lm; + + if (paddr > lm->sec_per_line) + return 1; + + return 0; +} + +static inline unsigned int pblk_get_bi_idx(struct bio *bio) +{ + return bio->bi_iter.bi_idx; +} + +static inline sector_t pblk_get_lba(struct bio *bio) +{ + return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; +} + +static inline unsigned int pblk_get_secs(struct bio *bio) +{ + return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE; +} + +static inline sector_t pblk_get_sector(sector_t lba) +{ + return lba * NR_PHY_IN_LOG; +} + +static inline void pblk_setup_uuid(struct pblk *pblk) +{ + uuid_le uuid; + + uuid_le_gen(&uuid); + memcpy(pblk->instance_uuid, uuid.b, 16); +} +#endif /* PBLK_H_ */ diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index e00b1d7b976f..cf0e28a0ff61 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -318,10 +318,6 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) } page = mempool_alloc(rrpc->page_pool, GFP_NOIO); - if (!page) { - bio_put(bio); - return -ENOMEM; - } while ((slot = find_first_zero_bit(rblk->invalid_pages, nr_sec_per_blk)) < nr_sec_per_blk) { @@ -414,7 +410,6 @@ static void rrpc_block_gc(struct work_struct *work) struct rrpc *rrpc = gcb->rrpc; struct rrpc_block *rblk = gcb->rblk; struct rrpc_lun *rlun = rblk->rlun; - struct nvm_tgt_dev *dev = rrpc->dev; struct ppa_addr ppa; mempool_free(gcb, rrpc->gcb_pool); @@ -430,7 +425,7 @@ static void rrpc_block_gc(struct work_struct *work) ppa.g.lun = rlun->bppa.g.lun; ppa.g.blk = rblk->id; - if (nvm_erase_blk(dev, &ppa, 0)) + if (nvm_erase_sync(rrpc->dev, &ppa, 1)) goto put_back; rrpc_put_blk(rrpc, rblk); @@ -822,7 +817,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, for (i = 0; i < npages; i++) { /* We assume that mapping occurs at 4KB granularity */ - BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); + BUG_ON(!(laddr + i < rrpc->nr_sects)); gp = &rrpc->trans_map[laddr + i]; if (gp->rblk) { @@ -851,7 +846,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) return NVM_IO_REQUEUE; - BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); + BUG_ON(!(laddr < rrpc->nr_sects)); gp = &rrpc->trans_map[laddr]; if (gp->rblk) { @@ -1007,11 +1002,6 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio) } rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); - if (!rqd) { - pr_err_ratelimited("rrpc: not able to queue bio."); - bio_io_error(bio); - return BLK_QC_T_NONE; - } memset(rqd, 0, sizeof(struct nvm_rq)); err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE); @@ -1275,8 +1265,10 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun) } nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) - return nr_blks; + if (nr_blks < 0) { + ret = nr_blks; + goto out; + } for (i = 0; i < nr_blks; i++) { if (blks[i] == NVM_BLK_T_FREE) @@ -1514,7 +1506,8 @@ err: static struct nvm_tgt_type tt_rrpc; -static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk) +static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, + int flags) { struct request_queue *bqueue = dev->q; struct request_queue *tqueue = tdisk->queue; diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index ceff415f201c..ee1a3d9147ef 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -144,12 +144,22 @@ config XGENE_SLIMPRO_MBOX want to use the APM X-Gene SLIMpro IPCM support. config BCM_PDC_MBOX - tristate "Broadcom PDC Mailbox" - depends on ARM64 || COMPILE_TEST + tristate "Broadcom FlexSparx DMA Mailbox" + depends on ARCH_BCM_IPROC || COMPILE_TEST depends on HAS_DMA + help + Mailbox implementation for the Broadcom FlexSparx DMA ring manager, + which provides access to various offload engines on Broadcom + SoCs, including FA2/FA+ on Northstar Plus and PDC on Northstar 2. + +config BCM_FLEXRM_MBOX + tristate "Broadcom FlexRM Mailbox" + depends on ARM64 + depends on HAS_DMA + select GENERIC_MSI_IRQ_DOMAIN default ARCH_BCM_IPROC help - Mailbox implementation for the Broadcom PDC ring manager, + Mailbox implementation of the Broadcom FlexRM ring manager, which provides access to various offload engines on Broadcom - SoCs. Say Y here if you want to use the Broadcom PDC. + SoCs. Say Y here if you want to use the Broadcom FlexRM. endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 7dde4f609ae8..e2bcb03cd35b 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -30,4 +30,6 @@ obj-$(CONFIG_HI6220_MBOX) += hi6220-mailbox.o obj-$(CONFIG_BCM_PDC_MBOX) += bcm-pdc-mailbox.o +obj-$(CONFIG_BCM_FLEXRM_MBOX) += bcm-flexrm-mailbox.o + obj-$(CONFIG_TEGRA_HSP_MBOX) += tegra-hsp.o diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c new file mode 100644 index 000000000000..da67882caa7b --- /dev/null +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -0,0 +1,1595 @@ +/* Broadcom FlexRM Mailbox Driver + * + * Copyright (C) 2017 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Each Broadcom FlexSparx4 offload engine is implemented as an + * extension to Broadcom FlexRM ring manager. The FlexRM ring + * manager provides a set of rings which can be used to submit + * work to a FlexSparx4 offload engine. + * + * This driver creates a mailbox controller using a set of FlexRM + * rings where each mailbox channel represents a separate FlexRM ring. + */ + +#include <asm/barrier.h> +#include <asm/byteorder.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/mailbox_controller.h> +#include <linux/mailbox_client.h> +#include <linux/mailbox/brcm-message.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> + +/* ====== FlexRM register defines ===== */ + +/* FlexRM configuration */ +#define RING_REGS_SIZE 0x10000 +#define RING_DESC_SIZE 8 +#define RING_DESC_INDEX(offset) \ + ((offset) / RING_DESC_SIZE) +#define RING_DESC_OFFSET(index) \ + ((index) * RING_DESC_SIZE) +#define RING_MAX_REQ_COUNT 1024 +#define RING_BD_ALIGN_ORDER 12 +#define RING_BD_ALIGN_CHECK(addr) \ + (!((addr) & ((0x1 << RING_BD_ALIGN_ORDER) - 1))) +#define RING_BD_TOGGLE_INVALID(offset) \ + (((offset) >> RING_BD_ALIGN_ORDER) & 0x1) +#define RING_BD_TOGGLE_VALID(offset) \ + (!RING_BD_TOGGLE_INVALID(offset)) +#define RING_BD_DESC_PER_REQ 32 +#define RING_BD_DESC_COUNT \ + (RING_MAX_REQ_COUNT * RING_BD_DESC_PER_REQ) +#define RING_BD_SIZE \ + (RING_BD_DESC_COUNT * RING_DESC_SIZE) +#define RING_CMPL_ALIGN_ORDER 13 +#define RING_CMPL_DESC_COUNT RING_MAX_REQ_COUNT +#define RING_CMPL_SIZE \ + (RING_CMPL_DESC_COUNT * RING_DESC_SIZE) +#define RING_VER_MAGIC 0x76303031 + +/* Per-Ring register offsets */ +#define RING_VER 0x000 +#define RING_BD_START_ADDR 0x004 +#define RING_BD_READ_PTR 0x008 +#define RING_BD_WRITE_PTR 0x00c +#define RING_BD_READ_PTR_DDR_LS 0x010 +#define RING_BD_READ_PTR_DDR_MS 0x014 +#define RING_CMPL_START_ADDR 0x018 +#define RING_CMPL_WRITE_PTR 0x01c +#define RING_NUM_REQ_RECV_LS 0x020 +#define RING_NUM_REQ_RECV_MS 0x024 +#define RING_NUM_REQ_TRANS_LS 0x028 +#define RING_NUM_REQ_TRANS_MS 0x02c +#define RING_NUM_REQ_OUTSTAND 0x030 +#define RING_CONTROL 0x034 +#define RING_FLUSH_DONE 0x038 +#define RING_MSI_ADDR_LS 0x03c +#define RING_MSI_ADDR_MS 0x040 +#define RING_MSI_CONTROL 0x048 +#define RING_BD_READ_PTR_DDR_CONTROL 0x04c +#define RING_MSI_DATA_VALUE 0x064 + +/* Register RING_BD_START_ADDR fields */ +#define BD_LAST_UPDATE_HW_SHIFT 28 +#define BD_LAST_UPDATE_HW_MASK 0x1 +#define BD_START_ADDR_VALUE(pa) \ + ((u32)((((dma_addr_t)(pa)) >> RING_BD_ALIGN_ORDER) & 0x0fffffff)) +#define BD_START_ADDR_DECODE(val) \ + ((dma_addr_t)((val) & 0x0fffffff) << RING_BD_ALIGN_ORDER) + +/* Register RING_CMPL_START_ADDR fields */ +#define CMPL_START_ADDR_VALUE(pa) \ + ((u32)((((u64)(pa)) >> RING_CMPL_ALIGN_ORDER) & 0x03ffffff)) + +/* Register RING_CONTROL fields */ +#define CONTROL_MASK_DISABLE_CONTROL 12 +#define CONTROL_FLUSH_SHIFT 5 +#define CONTROL_ACTIVE_SHIFT 4 +#define CONTROL_RATE_ADAPT_MASK 0xf +#define CONTROL_RATE_DYNAMIC 0x0 +#define CONTROL_RATE_FAST 0x8 +#define CONTROL_RATE_MEDIUM 0x9 +#define CONTROL_RATE_SLOW 0xa +#define CONTROL_RATE_IDLE 0xb + +/* Register RING_FLUSH_DONE fields */ +#define FLUSH_DONE_MASK 0x1 + +/* Register RING_MSI_CONTROL fields */ +#define MSI_TIMER_VAL_SHIFT 16 +#define MSI_TIMER_VAL_MASK 0xffff +#define MSI_ENABLE_SHIFT 15 +#define MSI_ENABLE_MASK 0x1 +#define MSI_COUNT_SHIFT 0 +#define MSI_COUNT_MASK 0x3ff + +/* Register RING_BD_READ_PTR_DDR_CONTROL fields */ +#define BD_READ_PTR_DDR_TIMER_VAL_SHIFT 16 +#define BD_READ_PTR_DDR_TIMER_VAL_MASK 0xffff +#define BD_READ_PTR_DDR_ENABLE_SHIFT 15 +#define BD_READ_PTR_DDR_ENABLE_MASK 0x1 + +/* ====== FlexRM ring descriptor defines ===== */ + +/* Completion descriptor format */ +#define CMPL_OPAQUE_SHIFT 0 +#define CMPL_OPAQUE_MASK 0xffff +#define CMPL_ENGINE_STATUS_SHIFT 16 +#define CMPL_ENGINE_STATUS_MASK 0xffff +#define CMPL_DME_STATUS_SHIFT 32 +#define CMPL_DME_STATUS_MASK 0xffff +#define CMPL_RM_STATUS_SHIFT 48 +#define CMPL_RM_STATUS_MASK 0xffff + +/* Completion DME status code */ +#define DME_STATUS_MEM_COR_ERR BIT(0) +#define DME_STATUS_MEM_UCOR_ERR BIT(1) +#define DME_STATUS_FIFO_UNDERFLOW BIT(2) +#define DME_STATUS_FIFO_OVERFLOW BIT(3) +#define DME_STATUS_RRESP_ERR BIT(4) +#define DME_STATUS_BRESP_ERR BIT(5) +#define DME_STATUS_ERROR_MASK (DME_STATUS_MEM_COR_ERR | \ + DME_STATUS_MEM_UCOR_ERR | \ + DME_STATUS_FIFO_UNDERFLOW | \ + DME_STATUS_FIFO_OVERFLOW | \ + DME_STATUS_RRESP_ERR | \ + DME_STATUS_BRESP_ERR) + +/* Completion RM status code */ +#define RM_STATUS_CODE_SHIFT 0 +#define RM_STATUS_CODE_MASK 0x3ff +#define RM_STATUS_CODE_GOOD 0x0 +#define RM_STATUS_CODE_AE_TIMEOUT 0x3ff + +/* General descriptor format */ +#define DESC_TYPE_SHIFT 60 +#define DESC_TYPE_MASK 0xf +#define DESC_PAYLOAD_SHIFT 0 +#define DESC_PAYLOAD_MASK 0x0fffffffffffffff + +/* Null descriptor format */ +#define NULL_TYPE 0 +#define NULL_TOGGLE_SHIFT 58 +#define NULL_TOGGLE_MASK 0x1 + +/* Header descriptor format */ +#define HEADER_TYPE 1 +#define HEADER_TOGGLE_SHIFT 58 +#define HEADER_TOGGLE_MASK 0x1 +#define HEADER_ENDPKT_SHIFT 57 +#define HEADER_ENDPKT_MASK 0x1 +#define HEADER_STARTPKT_SHIFT 56 +#define HEADER_STARTPKT_MASK 0x1 +#define HEADER_BDCOUNT_SHIFT 36 +#define HEADER_BDCOUNT_MASK 0x1f +#define HEADER_BDCOUNT_MAX HEADER_BDCOUNT_MASK +#define HEADER_FLAGS_SHIFT 16 +#define HEADER_FLAGS_MASK 0xffff +#define HEADER_OPAQUE_SHIFT 0 +#define HEADER_OPAQUE_MASK 0xffff + +/* Source (SRC) descriptor format */ +#define SRC_TYPE 2 +#define SRC_LENGTH_SHIFT 44 +#define SRC_LENGTH_MASK 0xffff +#define SRC_ADDR_SHIFT 0 +#define SRC_ADDR_MASK 0x00000fffffffffff + +/* Destination (DST) descriptor format */ +#define DST_TYPE 3 +#define DST_LENGTH_SHIFT 44 +#define DST_LENGTH_MASK 0xffff +#define DST_ADDR_SHIFT 0 +#define DST_ADDR_MASK 0x00000fffffffffff + +/* Immediate (IMM) descriptor format */ +#define IMM_TYPE 4 +#define IMM_DATA_SHIFT 0 +#define IMM_DATA_MASK 0x0fffffffffffffff + +/* Next pointer (NPTR) descriptor format */ +#define NPTR_TYPE 5 +#define NPTR_TOGGLE_SHIFT 58 +#define NPTR_TOGGLE_MASK 0x1 +#define NPTR_ADDR_SHIFT 0 +#define NPTR_ADDR_MASK 0x00000fffffffffff + +/* Mega source (MSRC) descriptor format */ +#define MSRC_TYPE 6 +#define MSRC_LENGTH_SHIFT 44 +#define MSRC_LENGTH_MASK 0xffff +#define MSRC_ADDR_SHIFT 0 +#define MSRC_ADDR_MASK 0x00000fffffffffff + +/* Mega destination (MDST) descriptor format */ +#define MDST_TYPE 7 +#define MDST_LENGTH_SHIFT 44 +#define MDST_LENGTH_MASK 0xffff +#define MDST_ADDR_SHIFT 0 +#define MDST_ADDR_MASK 0x00000fffffffffff + +/* Source with tlast (SRCT) descriptor format */ +#define SRCT_TYPE 8 +#define SRCT_LENGTH_SHIFT 44 +#define SRCT_LENGTH_MASK 0xffff +#define SRCT_ADDR_SHIFT 0 +#define SRCT_ADDR_MASK 0x00000fffffffffff + +/* Destination with tlast (DSTT) descriptor format */ +#define DSTT_TYPE 9 +#define DSTT_LENGTH_SHIFT 44 +#define DSTT_LENGTH_MASK 0xffff +#define DSTT_ADDR_SHIFT 0 +#define DSTT_ADDR_MASK 0x00000fffffffffff + +/* Immediate with tlast (IMMT) descriptor format */ +#define IMMT_TYPE 10 +#define IMMT_DATA_SHIFT 0 +#define IMMT_DATA_MASK 0x0fffffffffffffff + +/* Descriptor helper macros */ +#define DESC_DEC(_d, _s, _m) (((_d) >> (_s)) & (_m)) +#define DESC_ENC(_d, _v, _s, _m) \ + do { \ + (_d) &= ~((u64)(_m) << (_s)); \ + (_d) |= (((u64)(_v) & (_m)) << (_s)); \ + } while (0) + +/* ====== FlexRM data structures ===== */ + +struct flexrm_ring { + /* Unprotected members */ + int num; + struct flexrm_mbox *mbox; + void __iomem *regs; + bool irq_requested; + unsigned int irq; + unsigned int msi_timer_val; + unsigned int msi_count_threshold; + struct ida requests_ida; + struct brcm_message *requests[RING_MAX_REQ_COUNT]; + void *bd_base; + dma_addr_t bd_dma_base; + u32 bd_write_offset; + void *cmpl_base; + dma_addr_t cmpl_dma_base; + /* Protected members */ + spinlock_t lock; + struct brcm_message *last_pending_msg; + u32 cmpl_read_offset; +}; + +struct flexrm_mbox { + struct device *dev; + void __iomem *regs; + u32 num_rings; + struct flexrm_ring *rings; + struct dma_pool *bd_pool; + struct dma_pool *cmpl_pool; + struct mbox_controller controller; +}; + +/* ====== FlexRM ring descriptor helper routines ===== */ + +static u64 flexrm_read_desc(void *desc_ptr) +{ + return le64_to_cpu(*((u64 *)desc_ptr)); +} + +static void flexrm_write_desc(void *desc_ptr, u64 desc) +{ + *((u64 *)desc_ptr) = cpu_to_le64(desc); +} + +static u32 flexrm_cmpl_desc_to_reqid(u64 cmpl_desc) +{ + return (u32)(cmpl_desc & CMPL_OPAQUE_MASK); +} + +static int flexrm_cmpl_desc_to_error(u64 cmpl_desc) +{ + u32 status; + + status = DESC_DEC(cmpl_desc, CMPL_DME_STATUS_SHIFT, + CMPL_DME_STATUS_MASK); + if (status & DME_STATUS_ERROR_MASK) + return -EIO; + + status = DESC_DEC(cmpl_desc, CMPL_RM_STATUS_SHIFT, + CMPL_RM_STATUS_MASK); + status &= RM_STATUS_CODE_MASK; + if (status == RM_STATUS_CODE_AE_TIMEOUT) + return -ETIMEDOUT; + + return 0; +} + +static bool flexrm_is_next_table_desc(void *desc_ptr) +{ + u64 desc = flexrm_read_desc(desc_ptr); + u32 type = DESC_DEC(desc, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + + return (type == NPTR_TYPE) ? true : false; +} + +static u64 flexrm_next_table_desc(u32 toggle, dma_addr_t next_addr) +{ + u64 desc = 0; + + DESC_ENC(desc, NPTR_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, toggle, NPTR_TOGGLE_SHIFT, NPTR_TOGGLE_MASK); + DESC_ENC(desc, next_addr, NPTR_ADDR_SHIFT, NPTR_ADDR_MASK); + + return desc; +} + +static u64 flexrm_null_desc(u32 toggle) +{ + u64 desc = 0; + + DESC_ENC(desc, NULL_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, toggle, NULL_TOGGLE_SHIFT, NULL_TOGGLE_MASK); + + return desc; +} + +static u32 flexrm_estimate_header_desc_count(u32 nhcnt) +{ + u32 hcnt = nhcnt / HEADER_BDCOUNT_MAX; + + if (!(nhcnt % HEADER_BDCOUNT_MAX)) + hcnt += 1; + + return hcnt; +} + +static void flexrm_flip_header_toogle(void *desc_ptr) +{ + u64 desc = flexrm_read_desc(desc_ptr); + + if (desc & ((u64)0x1 << HEADER_TOGGLE_SHIFT)) + desc &= ~((u64)0x1 << HEADER_TOGGLE_SHIFT); + else + desc |= ((u64)0x1 << HEADER_TOGGLE_SHIFT); + + flexrm_write_desc(desc_ptr, desc); +} + +static u64 flexrm_header_desc(u32 toggle, u32 startpkt, u32 endpkt, + u32 bdcount, u32 flags, u32 opaque) +{ + u64 desc = 0; + + DESC_ENC(desc, HEADER_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, toggle, HEADER_TOGGLE_SHIFT, HEADER_TOGGLE_MASK); + DESC_ENC(desc, startpkt, HEADER_STARTPKT_SHIFT, HEADER_STARTPKT_MASK); + DESC_ENC(desc, endpkt, HEADER_ENDPKT_SHIFT, HEADER_ENDPKT_MASK); + DESC_ENC(desc, bdcount, HEADER_BDCOUNT_SHIFT, HEADER_BDCOUNT_MASK); + DESC_ENC(desc, flags, HEADER_FLAGS_SHIFT, HEADER_FLAGS_MASK); + DESC_ENC(desc, opaque, HEADER_OPAQUE_SHIFT, HEADER_OPAQUE_MASK); + + return desc; +} + +static void flexrm_enqueue_desc(u32 nhpos, u32 nhcnt, u32 reqid, + u64 desc, void **desc_ptr, u32 *toggle, + void *start_desc, void *end_desc) +{ + u64 d; + u32 nhavail, _toggle, _startpkt, _endpkt, _bdcount; + + /* Sanity check */ + if (nhcnt <= nhpos) + return; + + /* + * Each request or packet start with a HEADER descriptor followed + * by one or more non-HEADER descriptors (SRC, SRCT, MSRC, DST, + * DSTT, MDST, IMM, and IMMT). The number of non-HEADER descriptors + * following a HEADER descriptor is represented by BDCOUNT field + * of HEADER descriptor. The max value of BDCOUNT field is 31 which + * means we can only have 31 non-HEADER descriptors following one + * HEADER descriptor. + * + * In general use, number of non-HEADER descriptors can easily go + * beyond 31. To tackle this situation, we have packet (or request) + * extenstion bits (STARTPKT and ENDPKT) in the HEADER descriptor. + * + * To use packet extension, the first HEADER descriptor of request + * (or packet) will have STARTPKT=1 and ENDPKT=0. The intermediate + * HEADER descriptors will have STARTPKT=0 and ENDPKT=0. The last + * HEADER descriptor will have STARTPKT=0 and ENDPKT=1. Also, the + * TOGGLE bit of the first HEADER will be set to invalid state to + * ensure that FlexRM does not start fetching descriptors till all + * descriptors are enqueued. The user of this function will flip + * the TOGGLE bit of first HEADER after all descriptors are + * enqueued. + */ + + if ((nhpos % HEADER_BDCOUNT_MAX == 0) && (nhcnt - nhpos)) { + /* Prepare the header descriptor */ + nhavail = (nhcnt - nhpos); + _toggle = (nhpos == 0) ? !(*toggle) : (*toggle); + _startpkt = (nhpos == 0) ? 0x1 : 0x0; + _endpkt = (nhavail <= HEADER_BDCOUNT_MAX) ? 0x1 : 0x0; + _bdcount = (nhavail <= HEADER_BDCOUNT_MAX) ? + nhavail : HEADER_BDCOUNT_MAX; + if (nhavail <= HEADER_BDCOUNT_MAX) + _bdcount = nhavail; + else + _bdcount = HEADER_BDCOUNT_MAX; + d = flexrm_header_desc(_toggle, _startpkt, _endpkt, + _bdcount, 0x0, reqid); + + /* Write header descriptor */ + flexrm_write_desc(*desc_ptr, d); + + /* Point to next descriptor */ + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + + /* Skip next pointer descriptors */ + while (flexrm_is_next_table_desc(*desc_ptr)) { + *toggle = (*toggle) ? 0 : 1; + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + } + } + + /* Write desired descriptor */ + flexrm_write_desc(*desc_ptr, desc); + + /* Point to next descriptor */ + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + + /* Skip next pointer descriptors */ + while (flexrm_is_next_table_desc(*desc_ptr)) { + *toggle = (*toggle) ? 0 : 1; + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + } +} + +static u64 flexrm_src_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, SRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, SRC_LENGTH_SHIFT, SRC_LENGTH_MASK); + DESC_ENC(desc, addr, SRC_ADDR_SHIFT, SRC_ADDR_MASK); + + return desc; +} + +static u64 flexrm_msrc_desc(dma_addr_t addr, unsigned int length_div_16) +{ + u64 desc = 0; + + DESC_ENC(desc, MSRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length_div_16, MSRC_LENGTH_SHIFT, MSRC_LENGTH_MASK); + DESC_ENC(desc, addr, MSRC_ADDR_SHIFT, MSRC_ADDR_MASK); + + return desc; +} + +static u64 flexrm_dst_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, DST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, DST_LENGTH_SHIFT, DST_LENGTH_MASK); + DESC_ENC(desc, addr, DST_ADDR_SHIFT, DST_ADDR_MASK); + + return desc; +} + +static u64 flexrm_mdst_desc(dma_addr_t addr, unsigned int length_div_16) +{ + u64 desc = 0; + + DESC_ENC(desc, MDST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length_div_16, MDST_LENGTH_SHIFT, MDST_LENGTH_MASK); + DESC_ENC(desc, addr, MDST_ADDR_SHIFT, MDST_ADDR_MASK); + + return desc; +} + +static u64 flexrm_imm_desc(u64 data) +{ + u64 desc = 0; + + DESC_ENC(desc, IMM_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, data, IMM_DATA_SHIFT, IMM_DATA_MASK); + + return desc; +} + +static u64 flexrm_srct_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, SRCT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, SRCT_LENGTH_SHIFT, SRCT_LENGTH_MASK); + DESC_ENC(desc, addr, SRCT_ADDR_SHIFT, SRCT_ADDR_MASK); + + return desc; +} + +static u64 flexrm_dstt_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, DSTT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, DSTT_LENGTH_SHIFT, DSTT_LENGTH_MASK); + DESC_ENC(desc, addr, DSTT_ADDR_SHIFT, DSTT_ADDR_MASK); + + return desc; +} + +static u64 flexrm_immt_desc(u64 data) +{ + u64 desc = 0; + + DESC_ENC(desc, IMMT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, data, IMMT_DATA_SHIFT, IMMT_DATA_MASK); + + return desc; +} + +static bool flexrm_spu_sanity_check(struct brcm_message *msg) +{ + struct scatterlist *sg; + + if (!msg->spu.src || !msg->spu.dst) + return false; + for (sg = msg->spu.src; sg; sg = sg_next(sg)) { + if (sg->length & 0xf) { + if (sg->length > SRC_LENGTH_MASK) + return false; + } else { + if (sg->length > (MSRC_LENGTH_MASK * 16)) + return false; + } + } + for (sg = msg->spu.dst; sg; sg = sg_next(sg)) { + if (sg->length & 0xf) { + if (sg->length > DST_LENGTH_MASK) + return false; + } else { + if (sg->length > (MDST_LENGTH_MASK * 16)) + return false; + } + } + + return true; +} + +static u32 flexrm_spu_estimate_nonheader_desc_count(struct brcm_message *msg) +{ + u32 cnt = 0; + unsigned int dst_target = 0; + struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst; + + while (src_sg || dst_sg) { + if (src_sg) { + cnt++; + dst_target = src_sg->length; + src_sg = sg_next(src_sg); + } else + dst_target = UINT_MAX; + + while (dst_target && dst_sg) { + cnt++; + if (dst_sg->length < dst_target) + dst_target -= dst_sg->length; + else + dst_target = 0; + dst_sg = sg_next(dst_sg); + } + } + + return cnt; +} + +static int flexrm_spu_dma_map(struct device *dev, struct brcm_message *msg) +{ + int rc; + + rc = dma_map_sg(dev, msg->spu.src, sg_nents(msg->spu.src), + DMA_TO_DEVICE); + if (rc < 0) + return rc; + + rc = dma_map_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst), + DMA_FROM_DEVICE); + if (rc < 0) { + dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src), + DMA_TO_DEVICE); + return rc; + } + + return 0; +} + +static void flexrm_spu_dma_unmap(struct device *dev, struct brcm_message *msg) +{ + dma_unmap_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst), + DMA_FROM_DEVICE); + dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src), + DMA_TO_DEVICE); +} + +static void *flexrm_spu_write_descs(struct brcm_message *msg, u32 nhcnt, + u32 reqid, void *desc_ptr, u32 toggle, + void *start_desc, void *end_desc) +{ + u64 d; + u32 nhpos = 0; + void *orig_desc_ptr = desc_ptr; + unsigned int dst_target = 0; + struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst; + + while (src_sg || dst_sg) { + if (src_sg) { + if (sg_dma_len(src_sg) & 0xf) + d = flexrm_src_desc(sg_dma_address(src_sg), + sg_dma_len(src_sg)); + else + d = flexrm_msrc_desc(sg_dma_address(src_sg), + sg_dma_len(src_sg)/16); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + dst_target = sg_dma_len(src_sg); + src_sg = sg_next(src_sg); + } else + dst_target = UINT_MAX; + + while (dst_target && dst_sg) { + if (sg_dma_len(dst_sg) & 0xf) + d = flexrm_dst_desc(sg_dma_address(dst_sg), + sg_dma_len(dst_sg)); + else + d = flexrm_mdst_desc(sg_dma_address(dst_sg), + sg_dma_len(dst_sg)/16); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + if (sg_dma_len(dst_sg) < dst_target) + dst_target -= sg_dma_len(dst_sg); + else + dst_target = 0; + dst_sg = sg_next(dst_sg); + } + } + + /* Null descriptor with invalid toggle bit */ + flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle)); + + /* Ensure that descriptors have been written to memory */ + wmb(); + + /* Flip toggle bit in header */ + flexrm_flip_header_toogle(orig_desc_ptr); + + return desc_ptr; +} + +static bool flexrm_sba_sanity_check(struct brcm_message *msg) +{ + u32 i; + + if (!msg->sba.cmds || !msg->sba.cmds_count) + return false; + + for (i = 0; i < msg->sba.cmds_count; i++) { + if (((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) || + (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C)) && + (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) && + (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C) && + (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP) && + (msg->sba.cmds[i].resp_len > DSTT_LENGTH_MASK)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT) && + (msg->sba.cmds[i].data_len > DSTT_LENGTH_MASK)) + return false; + } + + return true; +} + +static u32 flexrm_sba_estimate_nonheader_desc_count(struct brcm_message *msg) +{ + u32 i, cnt; + + cnt = 0; + for (i = 0; i < msg->sba.cmds_count; i++) { + cnt++; + + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) || + (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C)) + cnt++; + + if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP) + cnt++; + + if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT) + cnt++; + } + + return cnt; +} + +static void *flexrm_sba_write_descs(struct brcm_message *msg, u32 nhcnt, + u32 reqid, void *desc_ptr, u32 toggle, + void *start_desc, void *end_desc) +{ + u64 d; + u32 i, nhpos = 0; + struct brcm_sba_command *c; + void *orig_desc_ptr = desc_ptr; + + /* Convert SBA commands into descriptors */ + for (i = 0; i < msg->sba.cmds_count; i++) { + c = &msg->sba.cmds[i]; + + if ((c->flags & BRCM_SBA_CMD_HAS_RESP) && + (c->flags & BRCM_SBA_CMD_HAS_OUTPUT)) { + /* Destination response descriptor */ + d = flexrm_dst_desc(c->resp, c->resp_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } else if (c->flags & BRCM_SBA_CMD_HAS_RESP) { + /* Destination response with tlast descriptor */ + d = flexrm_dstt_desc(c->resp, c->resp_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + + if (c->flags & BRCM_SBA_CMD_HAS_OUTPUT) { + /* Destination with tlast descriptor */ + d = flexrm_dstt_desc(c->data, c->data_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + + if (c->flags & BRCM_SBA_CMD_TYPE_B) { + /* Command as immediate descriptor */ + d = flexrm_imm_desc(c->cmd); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } else { + /* Command as immediate descriptor with tlast */ + d = flexrm_immt_desc(c->cmd); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + + if ((c->flags & BRCM_SBA_CMD_TYPE_B) || + (c->flags & BRCM_SBA_CMD_TYPE_C)) { + /* Source with tlast descriptor */ + d = flexrm_srct_desc(c->data, c->data_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + } + + /* Null descriptor with invalid toggle bit */ + flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle)); + + /* Ensure that descriptors have been written to memory */ + wmb(); + + /* Flip toggle bit in header */ + flexrm_flip_header_toogle(orig_desc_ptr); + + return desc_ptr; +} + +static bool flexrm_sanity_check(struct brcm_message *msg) +{ + if (!msg) + return false; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_sanity_check(msg); + case BRCM_MESSAGE_SBA: + return flexrm_sba_sanity_check(msg); + default: + return false; + }; +} + +static u32 flexrm_estimate_nonheader_desc_count(struct brcm_message *msg) +{ + if (!msg) + return 0; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_estimate_nonheader_desc_count(msg); + case BRCM_MESSAGE_SBA: + return flexrm_sba_estimate_nonheader_desc_count(msg); + default: + return 0; + }; +} + +static int flexrm_dma_map(struct device *dev, struct brcm_message *msg) +{ + if (!dev || !msg) + return -EINVAL; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_dma_map(dev, msg); + default: + break; + } + + return 0; +} + +static void flexrm_dma_unmap(struct device *dev, struct brcm_message *msg) +{ + if (!dev || !msg) + return; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + flexrm_spu_dma_unmap(dev, msg); + break; + default: + break; + } +} + +static void *flexrm_write_descs(struct brcm_message *msg, u32 nhcnt, + u32 reqid, void *desc_ptr, u32 toggle, + void *start_desc, void *end_desc) +{ + if (!msg || !desc_ptr || !start_desc || !end_desc) + return ERR_PTR(-ENOTSUPP); + + if ((desc_ptr < start_desc) || (end_desc <= desc_ptr)) + return ERR_PTR(-ERANGE); + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_write_descs(msg, nhcnt, reqid, + desc_ptr, toggle, + start_desc, end_desc); + case BRCM_MESSAGE_SBA: + return flexrm_sba_write_descs(msg, nhcnt, reqid, + desc_ptr, toggle, + start_desc, end_desc); + default: + return ERR_PTR(-ENOTSUPP); + }; +} + +/* ====== FlexRM driver helper routines ===== */ + +static int flexrm_new_request(struct flexrm_ring *ring, + struct brcm_message *batch_msg, + struct brcm_message *msg) +{ + void *next; + unsigned long flags; + u32 val, count, nhcnt; + u32 read_offset, write_offset; + bool exit_cleanup = false; + int ret = 0, reqid; + + /* Do sanity check on message */ + if (!flexrm_sanity_check(msg)) + return -EIO; + msg->error = 0; + + /* If no requests possible then save data pointer and goto done. */ + reqid = ida_simple_get(&ring->requests_ida, 0, + RING_MAX_REQ_COUNT, GFP_KERNEL); + if (reqid < 0) { + spin_lock_irqsave(&ring->lock, flags); + if (batch_msg) + ring->last_pending_msg = batch_msg; + else + ring->last_pending_msg = msg; + spin_unlock_irqrestore(&ring->lock, flags); + return 0; + } + ring->requests[reqid] = msg; + + /* Do DMA mappings for the message */ + ret = flexrm_dma_map(ring->mbox->dev, msg); + if (ret < 0) { + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + return ret; + } + + /* If last_pending_msg is already set then goto done with error */ + spin_lock_irqsave(&ring->lock, flags); + if (ring->last_pending_msg) + ret = -ENOSPC; + spin_unlock_irqrestore(&ring->lock, flags); + if (ret < 0) { + dev_warn(ring->mbox->dev, "no space in ring %d\n", ring->num); + exit_cleanup = true; + goto exit; + } + + /* Determine current HW BD read offset */ + read_offset = readl_relaxed(ring->regs + RING_BD_READ_PTR); + val = readl_relaxed(ring->regs + RING_BD_START_ADDR); + read_offset *= RING_DESC_SIZE; + read_offset += (u32)(BD_START_ADDR_DECODE(val) - ring->bd_dma_base); + + /* + * Number required descriptors = number of non-header descriptors + + * number of header descriptors + + * 1x null descriptor + */ + nhcnt = flexrm_estimate_nonheader_desc_count(msg); + count = flexrm_estimate_header_desc_count(nhcnt) + nhcnt + 1; + + /* Check for available descriptor space. */ + write_offset = ring->bd_write_offset; + while (count) { + if (!flexrm_is_next_table_desc(ring->bd_base + write_offset)) + count--; + write_offset += RING_DESC_SIZE; + if (write_offset == RING_BD_SIZE) + write_offset = 0x0; + if (write_offset == read_offset) + break; + } + if (count) { + spin_lock_irqsave(&ring->lock, flags); + if (batch_msg) + ring->last_pending_msg = batch_msg; + else + ring->last_pending_msg = msg; + spin_unlock_irqrestore(&ring->lock, flags); + ret = 0; + exit_cleanup = true; + goto exit; + } + + /* Write descriptors to ring */ + next = flexrm_write_descs(msg, nhcnt, reqid, + ring->bd_base + ring->bd_write_offset, + RING_BD_TOGGLE_VALID(ring->bd_write_offset), + ring->bd_base, ring->bd_base + RING_BD_SIZE); + if (IS_ERR(next)) { + ret = PTR_ERR(next); + exit_cleanup = true; + goto exit; + } + + /* Save ring BD write offset */ + ring->bd_write_offset = (unsigned long)(next - ring->bd_base); + +exit: + /* Update error status in message */ + msg->error = ret; + + /* Cleanup if we failed */ + if (exit_cleanup) { + flexrm_dma_unmap(ring->mbox->dev, msg); + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + } + + return ret; +} + +static int flexrm_process_completions(struct flexrm_ring *ring) +{ + u64 desc; + int err, count = 0; + unsigned long flags; + struct brcm_message *msg = NULL; + u32 reqid, cmpl_read_offset, cmpl_write_offset; + struct mbox_chan *chan = &ring->mbox->controller.chans[ring->num]; + + spin_lock_irqsave(&ring->lock, flags); + + /* Check last_pending_msg */ + if (ring->last_pending_msg) { + msg = ring->last_pending_msg; + ring->last_pending_msg = NULL; + } + + /* + * Get current completion read and write offset + * + * Note: We should read completion write pointer atleast once + * after we get a MSI interrupt because HW maintains internal + * MSI status which will allow next MSI interrupt only after + * completion write pointer is read. + */ + cmpl_write_offset = readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR); + cmpl_write_offset *= RING_DESC_SIZE; + cmpl_read_offset = ring->cmpl_read_offset; + ring->cmpl_read_offset = cmpl_write_offset; + + spin_unlock_irqrestore(&ring->lock, flags); + + /* If last_pending_msg was set then queue it back */ + if (msg) + mbox_send_message(chan, msg); + + /* For each completed request notify mailbox clients */ + reqid = 0; + while (cmpl_read_offset != cmpl_write_offset) { + /* Dequeue next completion descriptor */ + desc = *((u64 *)(ring->cmpl_base + cmpl_read_offset)); + + /* Next read offset */ + cmpl_read_offset += RING_DESC_SIZE; + if (cmpl_read_offset == RING_CMPL_SIZE) + cmpl_read_offset = 0; + + /* Decode error from completion descriptor */ + err = flexrm_cmpl_desc_to_error(desc); + if (err < 0) { + dev_warn(ring->mbox->dev, + "got completion desc=0x%lx with error %d", + (unsigned long)desc, err); + } + + /* Determine request id from completion descriptor */ + reqid = flexrm_cmpl_desc_to_reqid(desc); + + /* Determine message pointer based on reqid */ + msg = ring->requests[reqid]; + if (!msg) { + dev_warn(ring->mbox->dev, + "null msg pointer for completion desc=0x%lx", + (unsigned long)desc); + continue; + } + + /* Release reqid for recycling */ + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + + /* Unmap DMA mappings */ + flexrm_dma_unmap(ring->mbox->dev, msg); + + /* Give-back message to mailbox client */ + msg->error = err; + mbox_chan_received_data(chan, msg); + + /* Increment number of completions processed */ + count++; + } + + return count; +} + +/* ====== FlexRM interrupt handler ===== */ + +static irqreturn_t flexrm_irq_event(int irq, void *dev_id) +{ + /* We only have MSI for completions so just wakeup IRQ thread */ + /* Ring related errors will be informed via completion descriptors */ + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t flexrm_irq_thread(int irq, void *dev_id) +{ + flexrm_process_completions(dev_id); + + return IRQ_HANDLED; +} + +/* ====== FlexRM mailbox callbacks ===== */ + +static int flexrm_send_data(struct mbox_chan *chan, void *data) +{ + int i, rc; + struct flexrm_ring *ring = chan->con_priv; + struct brcm_message *msg = data; + + if (msg->type == BRCM_MESSAGE_BATCH) { + for (i = msg->batch.msgs_queued; + i < msg->batch.msgs_count; i++) { + rc = flexrm_new_request(ring, msg, + &msg->batch.msgs[i]); + if (rc) { + msg->error = rc; + return rc; + } + msg->batch.msgs_queued++; + } + return 0; + } + + return flexrm_new_request(ring, NULL, data); +} + +static bool flexrm_peek_data(struct mbox_chan *chan) +{ + int cnt = flexrm_process_completions(chan->con_priv); + + return (cnt > 0) ? true : false; +} + +static int flexrm_startup(struct mbox_chan *chan) +{ + u64 d; + u32 val, off; + int ret = 0; + dma_addr_t next_addr; + struct flexrm_ring *ring = chan->con_priv; + + /* Allocate BD memory */ + ring->bd_base = dma_pool_alloc(ring->mbox->bd_pool, + GFP_KERNEL, &ring->bd_dma_base); + if (!ring->bd_base) { + dev_err(ring->mbox->dev, "can't allocate BD memory\n"); + ret = -ENOMEM; + goto fail; + } + + /* Configure next table pointer entries in BD memory */ + for (off = 0; off < RING_BD_SIZE; off += RING_DESC_SIZE) { + next_addr = off + RING_DESC_SIZE; + if (next_addr == RING_BD_SIZE) + next_addr = 0; + next_addr += ring->bd_dma_base; + if (RING_BD_ALIGN_CHECK(next_addr)) + d = flexrm_next_table_desc(RING_BD_TOGGLE_VALID(off), + next_addr); + else + d = flexrm_null_desc(RING_BD_TOGGLE_INVALID(off)); + flexrm_write_desc(ring->bd_base + off, d); + } + + /* Allocate completion memory */ + ring->cmpl_base = dma_pool_alloc(ring->mbox->cmpl_pool, + GFP_KERNEL, &ring->cmpl_dma_base); + if (!ring->cmpl_base) { + dev_err(ring->mbox->dev, "can't allocate completion memory\n"); + ret = -ENOMEM; + goto fail_free_bd_memory; + } + memset(ring->cmpl_base, 0, RING_CMPL_SIZE); + + /* Request IRQ */ + if (ring->irq == UINT_MAX) { + dev_err(ring->mbox->dev, "ring IRQ not available\n"); + ret = -ENODEV; + goto fail_free_cmpl_memory; + } + ret = request_threaded_irq(ring->irq, + flexrm_irq_event, + flexrm_irq_thread, + 0, dev_name(ring->mbox->dev), ring); + if (ret) { + dev_err(ring->mbox->dev, "failed to request ring IRQ\n"); + goto fail_free_cmpl_memory; + } + ring->irq_requested = true; + + /* Disable/inactivate ring */ + writel_relaxed(0x0, ring->regs + RING_CONTROL); + + /* Program BD start address */ + val = BD_START_ADDR_VALUE(ring->bd_dma_base); + writel_relaxed(val, ring->regs + RING_BD_START_ADDR); + + /* BD write pointer will be same as HW write pointer */ + ring->bd_write_offset = + readl_relaxed(ring->regs + RING_BD_WRITE_PTR); + ring->bd_write_offset *= RING_DESC_SIZE; + + /* Program completion start address */ + val = CMPL_START_ADDR_VALUE(ring->cmpl_dma_base); + writel_relaxed(val, ring->regs + RING_CMPL_START_ADDR); + + /* Ensure last pending message is cleared */ + ring->last_pending_msg = NULL; + + /* Completion read pointer will be same as HW write pointer */ + ring->cmpl_read_offset = + readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR); + ring->cmpl_read_offset *= RING_DESC_SIZE; + + /* Read ring Tx, Rx, and Outstanding counts to clear */ + readl_relaxed(ring->regs + RING_NUM_REQ_RECV_LS); + readl_relaxed(ring->regs + RING_NUM_REQ_RECV_MS); + readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_LS); + readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_MS); + readl_relaxed(ring->regs + RING_NUM_REQ_OUTSTAND); + + /* Configure RING_MSI_CONTROL */ + val = 0; + val |= (ring->msi_timer_val << MSI_TIMER_VAL_SHIFT); + val |= BIT(MSI_ENABLE_SHIFT); + val |= (ring->msi_count_threshold & MSI_COUNT_MASK) << MSI_COUNT_SHIFT; + writel_relaxed(val, ring->regs + RING_MSI_CONTROL); + + /* Enable/activate ring */ + val = BIT(CONTROL_ACTIVE_SHIFT); + writel_relaxed(val, ring->regs + RING_CONTROL); + + return 0; + +fail_free_cmpl_memory: + dma_pool_free(ring->mbox->cmpl_pool, + ring->cmpl_base, ring->cmpl_dma_base); + ring->cmpl_base = NULL; +fail_free_bd_memory: + dma_pool_free(ring->mbox->bd_pool, + ring->bd_base, ring->bd_dma_base); + ring->bd_base = NULL; +fail: + return ret; +} + +static void flexrm_shutdown(struct mbox_chan *chan) +{ + u32 reqid; + unsigned int timeout; + struct brcm_message *msg; + struct flexrm_ring *ring = chan->con_priv; + + /* Disable/inactivate ring */ + writel_relaxed(0x0, ring->regs + RING_CONTROL); + + /* Flush ring with timeout of 1s */ + timeout = 1000; + writel_relaxed(BIT(CONTROL_FLUSH_SHIFT), + ring->regs + RING_CONTROL); + do { + if (readl_relaxed(ring->regs + RING_FLUSH_DONE) & + FLUSH_DONE_MASK) + break; + mdelay(1); + } while (timeout--); + + /* Abort all in-flight requests */ + for (reqid = 0; reqid < RING_MAX_REQ_COUNT; reqid++) { + msg = ring->requests[reqid]; + if (!msg) + continue; + + /* Release reqid for recycling */ + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + + /* Unmap DMA mappings */ + flexrm_dma_unmap(ring->mbox->dev, msg); + + /* Give-back message to mailbox client */ + msg->error = -EIO; + mbox_chan_received_data(chan, msg); + } + + /* Release IRQ */ + if (ring->irq_requested) { + free_irq(ring->irq, ring); + ring->irq_requested = false; + } + + /* Free-up completion descriptor ring */ + if (ring->cmpl_base) { + dma_pool_free(ring->mbox->cmpl_pool, + ring->cmpl_base, ring->cmpl_dma_base); + ring->cmpl_base = NULL; + } + + /* Free-up BD descriptor ring */ + if (ring->bd_base) { + dma_pool_free(ring->mbox->bd_pool, + ring->bd_base, ring->bd_dma_base); + ring->bd_base = NULL; + } +} + +static bool flexrm_last_tx_done(struct mbox_chan *chan) +{ + bool ret; + unsigned long flags; + struct flexrm_ring *ring = chan->con_priv; + + spin_lock_irqsave(&ring->lock, flags); + ret = (ring->last_pending_msg) ? false : true; + spin_unlock_irqrestore(&ring->lock, flags); + + return ret; +} + +static const struct mbox_chan_ops flexrm_mbox_chan_ops = { + .send_data = flexrm_send_data, + .startup = flexrm_startup, + .shutdown = flexrm_shutdown, + .last_tx_done = flexrm_last_tx_done, + .peek_data = flexrm_peek_data, +}; + +static struct mbox_chan *flexrm_mbox_of_xlate(struct mbox_controller *cntlr, + const struct of_phandle_args *pa) +{ + struct mbox_chan *chan; + struct flexrm_ring *ring; + + if (pa->args_count < 3) + return ERR_PTR(-EINVAL); + + if (pa->args[0] >= cntlr->num_chans) + return ERR_PTR(-ENOENT); + + if (pa->args[1] > MSI_COUNT_MASK) + return ERR_PTR(-EINVAL); + + if (pa->args[2] > MSI_TIMER_VAL_MASK) + return ERR_PTR(-EINVAL); + + chan = &cntlr->chans[pa->args[0]]; + ring = chan->con_priv; + ring->msi_count_threshold = pa->args[1]; + ring->msi_timer_val = pa->args[2]; + + return chan; +} + +/* ====== FlexRM platform driver ===== */ + +static void flexrm_mbox_msi_write(struct msi_desc *desc, struct msi_msg *msg) +{ + struct device *dev = msi_desc_to_dev(desc); + struct flexrm_mbox *mbox = dev_get_drvdata(dev); + struct flexrm_ring *ring = &mbox->rings[desc->platform.msi_index]; + + /* Configure per-Ring MSI registers */ + writel_relaxed(msg->address_lo, ring->regs + RING_MSI_ADDR_LS); + writel_relaxed(msg->address_hi, ring->regs + RING_MSI_ADDR_MS); + writel_relaxed(msg->data, ring->regs + RING_MSI_DATA_VALUE); +} + +static int flexrm_mbox_probe(struct platform_device *pdev) +{ + int index, ret = 0; + void __iomem *regs; + void __iomem *regs_end; + struct msi_desc *desc; + struct resource *iomem; + struct flexrm_ring *ring; + struct flexrm_mbox *mbox; + struct device *dev = &pdev->dev; + + /* Allocate driver mailbox struct */ + mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL); + if (!mbox) { + ret = -ENOMEM; + goto fail; + } + mbox->dev = dev; + platform_set_drvdata(pdev, mbox); + + /* Get resource for registers */ + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) { + ret = -ENODEV; + goto fail; + } + + /* Map registers of all rings */ + mbox->regs = devm_ioremap_resource(&pdev->dev, iomem); + if (IS_ERR(mbox->regs)) { + ret = PTR_ERR(mbox->regs); + dev_err(&pdev->dev, "Failed to remap mailbox regs: %d\n", ret); + goto fail; + } + regs_end = mbox->regs + resource_size(iomem); + + /* Scan and count available rings */ + mbox->num_rings = 0; + for (regs = mbox->regs; regs < regs_end; regs += RING_REGS_SIZE) { + if (readl_relaxed(regs + RING_VER) == RING_VER_MAGIC) + mbox->num_rings++; + } + if (!mbox->num_rings) { + ret = -ENODEV; + goto fail; + } + + /* Allocate driver ring structs */ + ring = devm_kcalloc(dev, mbox->num_rings, sizeof(*ring), GFP_KERNEL); + if (!ring) { + ret = -ENOMEM; + goto fail; + } + mbox->rings = ring; + + /* Initialize members of driver ring structs */ + regs = mbox->regs; + for (index = 0; index < mbox->num_rings; index++) { + ring = &mbox->rings[index]; + ring->num = index; + ring->mbox = mbox; + while ((regs < regs_end) && + (readl_relaxed(regs + RING_VER) != RING_VER_MAGIC)) + regs += RING_REGS_SIZE; + if (regs_end <= regs) { + ret = -ENODEV; + goto fail; + } + ring->regs = regs; + regs += RING_REGS_SIZE; + ring->irq = UINT_MAX; + ring->irq_requested = false; + ring->msi_timer_val = MSI_TIMER_VAL_MASK; + ring->msi_count_threshold = 0x1; + ida_init(&ring->requests_ida); + memset(ring->requests, 0, sizeof(ring->requests)); + ring->bd_base = NULL; + ring->bd_dma_base = 0; + ring->cmpl_base = NULL; + ring->cmpl_dma_base = 0; + spin_lock_init(&ring->lock); + ring->last_pending_msg = NULL; + ring->cmpl_read_offset = 0; + } + + /* FlexRM is capable of 40-bit physical addresses only */ + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (ret) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) + goto fail; + } + + /* Create DMA pool for ring BD memory */ + mbox->bd_pool = dma_pool_create("bd", dev, RING_BD_SIZE, + 1 << RING_BD_ALIGN_ORDER, 0); + if (!mbox->bd_pool) { + ret = -ENOMEM; + goto fail; + } + + /* Create DMA pool for ring completion memory */ + mbox->cmpl_pool = dma_pool_create("cmpl", dev, RING_CMPL_SIZE, + 1 << RING_CMPL_ALIGN_ORDER, 0); + if (!mbox->cmpl_pool) { + ret = -ENOMEM; + goto fail_destroy_bd_pool; + } + + /* Allocate platform MSIs for each ring */ + ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings, + flexrm_mbox_msi_write); + if (ret) + goto fail_destroy_cmpl_pool; + + /* Save alloced IRQ numbers for each ring */ + for_each_msi_entry(desc, dev) { + ring = &mbox->rings[desc->platform.msi_index]; + ring->irq = desc->irq; + } + + /* Initialize mailbox controller */ + mbox->controller.txdone_irq = false; + mbox->controller.txdone_poll = true; + mbox->controller.txpoll_period = 1; + mbox->controller.ops = &flexrm_mbox_chan_ops; + mbox->controller.dev = dev; + mbox->controller.num_chans = mbox->num_rings; + mbox->controller.of_xlate = flexrm_mbox_of_xlate; + mbox->controller.chans = devm_kcalloc(dev, mbox->num_rings, + sizeof(*mbox->controller.chans), GFP_KERNEL); + if (!mbox->controller.chans) { + ret = -ENOMEM; + goto fail_free_msis; + } + for (index = 0; index < mbox->num_rings; index++) + mbox->controller.chans[index].con_priv = &mbox->rings[index]; + + /* Register mailbox controller */ + ret = mbox_controller_register(&mbox->controller); + if (ret) + goto fail_free_msis; + + dev_info(dev, "registered flexrm mailbox with %d channels\n", + mbox->controller.num_chans); + + return 0; + +fail_free_msis: + platform_msi_domain_free_irqs(dev); +fail_destroy_cmpl_pool: + dma_pool_destroy(mbox->cmpl_pool); +fail_destroy_bd_pool: + dma_pool_destroy(mbox->bd_pool); +fail: + return ret; +} + +static int flexrm_mbox_remove(struct platform_device *pdev) +{ + int index; + struct device *dev = &pdev->dev; + struct flexrm_ring *ring; + struct flexrm_mbox *mbox = platform_get_drvdata(pdev); + + mbox_controller_unregister(&mbox->controller); + + platform_msi_domain_free_irqs(dev); + + dma_pool_destroy(mbox->cmpl_pool); + dma_pool_destroy(mbox->bd_pool); + + for (index = 0; index < mbox->num_rings; index++) { + ring = &mbox->rings[index]; + ida_destroy(&ring->requests_ida); + } + + return 0; +} + +static const struct of_device_id flexrm_mbox_of_match[] = { + { .compatible = "brcm,iproc-flexrm-mbox", }, + {}, +}; +MODULE_DEVICE_TABLE(of, flexrm_mbox_of_match); + +static struct platform_driver flexrm_mbox_driver = { + .driver = { + .name = "brcm-flexrm-mbox", + .of_match_table = flexrm_mbox_of_match, + }, + .probe = flexrm_mbox_probe, + .remove = flexrm_mbox_remove, +}; +module_platform_driver(flexrm_mbox_driver); + +MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>"); +MODULE_DESCRIPTION("Broadcom FlexRM mailbox driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index 2aeb034d5fb9..4fe7be0bdd11 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -18,7 +18,8 @@ * Broadcom PDC Mailbox Driver * The PDC provides a ring based programming interface to one or more hardware * offload engines. For example, the PDC driver works with both SPU-M and SPU2 - * cryptographic offload hardware. In some chips the PDC is referred to as MDE. + * cryptographic offload hardware. In some chips the PDC is referred to as MDE, + * and in others the FA2/FA+ hardware is used with this PDC driver. * * The PDC driver registers with the Linux mailbox framework as a mailbox * controller, once for each PDC instance. Ring 0 for each PDC is registered as @@ -108,6 +109,7 @@ #define PDC_INTMASK_OFFSET 0x24 #define PDC_INTSTATUS_OFFSET 0x20 #define PDC_RCVLAZY0_OFFSET (0x30 + 4 * PDC_RINGSET) +#define FA_RCVLAZY0_OFFSET 0x100 /* * For SPU2, configure MDE_CKSUM_CONTROL to write 17 bytes of metadata @@ -162,6 +164,11 @@ /* Maximum size buffer the DMA engine can handle */ #define PDC_DMA_BUF_MAX 16384 +enum pdc_hw { + FA_HW, /* FA2/FA+ hardware (i.e. Northstar Plus) */ + PDC_HW /* PDC/MDE hardware (i.e. Northstar 2, Pegasus) */ +}; + struct pdc_dma_map { void *ctx; /* opaque context associated with frame */ }; @@ -211,13 +218,13 @@ struct pdc_regs { u32 gptimer; /* 0x028 */ u32 PAD; - u32 intrcvlazy_0; /* 0x030 */ - u32 intrcvlazy_1; /* 0x034 */ - u32 intrcvlazy_2; /* 0x038 */ - u32 intrcvlazy_3; /* 0x03c */ + u32 intrcvlazy_0; /* 0x030 (Only in PDC, not FA2) */ + u32 intrcvlazy_1; /* 0x034 (Only in PDC, not FA2) */ + u32 intrcvlazy_2; /* 0x038 (Only in PDC, not FA2) */ + u32 intrcvlazy_3; /* 0x03c (Only in PDC, not FA2) */ u32 PAD[48]; - u32 removed_intrecvlazy; /* 0x100 */ + u32 fa_intrecvlazy; /* 0x100 (Only in FA2, not PDC) */ u32 flowctlthresh; /* 0x104 */ u32 wrrthresh; /* 0x108 */ u32 gmac_idle_cnt_thresh; /* 0x10c */ @@ -243,7 +250,7 @@ struct pdc_regs { u32 serdes_status1; /* 0x1b0 */ u32 PAD[11]; /* 0x1b4-1dc */ u32 clk_ctl_st; /* 0x1e0 */ - u32 hw_war; /* 0x1e4 */ + u32 hw_war; /* 0x1e4 (Only in PDC, not FA2) */ u32 pwrctl; /* 0x1e8 */ u32 PAD[5]; @@ -410,6 +417,9 @@ struct pdc_state { u32 txnobuf; /* unable to create tx descriptor */ u32 rxnobuf; /* unable to create rx descriptor */ u32 rx_oflow; /* count of rx overflows */ + + /* hardware type - FA2 or PDC/MDE */ + enum pdc_hw hw_type; }; /* Global variables */ @@ -1396,7 +1406,13 @@ static int pdc_interrupts_init(struct pdc_state *pdcs) /* interrupt configuration */ iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET); - iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + PDC_RCVLAZY0_OFFSET); + + if (pdcs->hw_type == FA_HW) + iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + + FA_RCVLAZY0_OFFSET); + else + iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + + PDC_RCVLAZY0_OFFSET); /* read irq from device tree */ pdcs->pdc_irq = irq_of_parse_and_map(dn, 0); @@ -1465,6 +1481,17 @@ static int pdc_mb_init(struct pdc_state *pdcs) return 0; } +/* Device tree API */ +static const int pdc_hw = PDC_HW; +static const int fa_hw = FA_HW; + +static const struct of_device_id pdc_mbox_of_match[] = { + {.compatible = "brcm,iproc-pdc-mbox", .data = &pdc_hw}, + {.compatible = "brcm,iproc-fa2-mbox", .data = &fa_hw}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, pdc_mbox_of_match); + /** * pdc_dt_read() - Read application-specific data from device tree. * @pdev: Platform device @@ -1481,6 +1508,8 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs) { struct device *dev = &pdev->dev; struct device_node *dn = pdev->dev.of_node; + const struct of_device_id *match; + const int *hw_type; int err; err = of_property_read_u32(dn, "brcm,rx-status-len", @@ -1492,6 +1521,14 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs) pdcs->use_bcm_hdr = of_property_read_bool(dn, "brcm,use-bcm-hdr"); + pdcs->hw_type = PDC_HW; + + match = of_match_device(of_match_ptr(pdc_mbox_of_match), dev); + if (match != NULL) { + hw_type = match->data; + pdcs->hw_type = *hw_type; + } + return 0; } @@ -1525,7 +1562,7 @@ static int pdc_probe(struct platform_device *pdev) pdcs->pdc_idx = pdcg.num_spu; pdcg.num_spu++; - err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39)); if (err) { dev_warn(dev, "PDC device cannot perform DMA. Error %d.", err); goto cleanup; @@ -1611,12 +1648,6 @@ static int pdc_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id pdc_mbox_of_match[] = { - {.compatible = "brcm,iproc-pdc-mbox"}, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, pdc_mbox_of_match); - static struct platform_driver pdc_mbox_driver = { .probe = pdc_probe, .remove = pdc_remove, diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c index 613722db5daf..519376d3534c 100644 --- a/drivers/mailbox/hi6220-mailbox.c +++ b/drivers/mailbox/hi6220-mailbox.c @@ -221,7 +221,7 @@ static void hi6220_mbox_shutdown(struct mbox_chan *chan) mbox->irq_map_chan[mchan->ack_irq] = NULL; } -static struct mbox_chan_ops hi6220_mbox_ops = { +static const struct mbox_chan_ops hi6220_mbox_ops = { .send_data = hi6220_mbox_send_data, .startup = hi6220_mbox_startup, .shutdown = hi6220_mbox_shutdown, diff --git a/drivers/mailbox/mailbox-xgene-slimpro.c b/drivers/mailbox/mailbox-xgene-slimpro.c index dd2afbca51c9..a7040163dd43 100644 --- a/drivers/mailbox/mailbox-xgene-slimpro.c +++ b/drivers/mailbox/mailbox-xgene-slimpro.c @@ -174,7 +174,7 @@ static void slimpro_mbox_shutdown(struct mbox_chan *chan) devm_free_irq(mb_chan->dev, mb_chan->irq, mb_chan); } -static struct mbox_chan_ops slimpro_mbox_ops = { +static const struct mbox_chan_ops slimpro_mbox_ops = { .send_data = slimpro_mbox_send_data, .startup = slimpro_mbox_startup, .shutdown = slimpro_mbox_shutdown, diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 4671f8a12872..9dfbf7ea10a2 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -103,11 +103,14 @@ static void tx_tick(struct mbox_chan *chan, int r) /* Submit next message */ msg_submit(chan); + if (!mssg) + return; + /* Notify the client */ - if (mssg && chan->cl->tx_done) + if (chan->cl->tx_done) chan->cl->tx_done(chan->cl, mssg, r); - if (chan->cl->tx_block) + if (r != -ETIME && chan->cl->tx_block) complete(&chan->tx_complete); } @@ -260,7 +263,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) msg_submit(chan); - if (chan->cl->tx_block && chan->active_req) { + if (chan->cl->tx_block) { unsigned long wait; int ret; @@ -271,8 +274,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) ret = wait_for_completion_timeout(&chan->tx_complete, wait); if (ret == 0) { - t = -EIO; - tx_tick(chan, -EIO); + t = -ETIME; + tx_tick(chan, t); } } @@ -453,6 +456,12 @@ int mbox_controller_register(struct mbox_controller *mbox) txdone = TXDONE_BY_ACK; if (txdone == TXDONE_BY_POLL) { + + if (!mbox->ops->last_tx_done) { + dev_err(mbox->dev, "last_tx_done method is absent\n"); + return -EINVAL; + } + hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); mbox->poll_hrt.function = txdone_hrtimer; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9c689b34e6e7..975922c8f231 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2773,7 +2773,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; - ti->discard_zeroes_data_unsupported = true; ti->split_discard_bios = false; cache->features = ca->features; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 136fda3ff9e5..fea5bd52ada8 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -132,6 +132,7 @@ void dm_init_md_queue(struct mapped_device *md); void dm_init_normal_md_queue(struct mapped_device *md); int md_in_flight(struct mapped_device *md); void disable_write_same(struct mapped_device *md); +void disable_write_zeroes(struct mapped_device *md); static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) { diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 389a3637ffcc..ef1d836bd81b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2030,7 +2030,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) wake_up_process(cc->write_thread); ti->num_flush_bios = 1; - ti->discard_zeroes_data_unsupported = true; return 0; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 03940bf36f6c..3702e502466d 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -312,9 +312,12 @@ static void do_region(int op, int op_flags, unsigned region, */ if (op == REQ_OP_DISCARD) special_cmd_max_sectors = q->limits.max_discard_sectors; + else if (op == REQ_OP_WRITE_ZEROES) + special_cmd_max_sectors = q->limits.max_write_zeroes_sectors; else if (op == REQ_OP_WRITE_SAME) special_cmd_max_sectors = q->limits.max_write_same_sectors; - if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) && + if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || + op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { dec_count(io, region, -EOPNOTSUPP); return; @@ -328,11 +331,18 @@ static void do_region(int op, int op_flags, unsigned region, /* * Allocate a suitably sized-bio. */ - if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME)) + switch (op) { + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + num_bvecs = 0; + break; + case REQ_OP_WRITE_SAME: num_bvecs = 1; - else + break; + default: num_bvecs = min_t(int, BIO_MAX_PAGES, dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); + } bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); @@ -341,7 +351,7 @@ static void do_region(int op, int op_flags, unsigned region, bio_set_op_attrs(bio, op, op_flags); store_io_and_region_in_bio(bio, io, region); - if (op == REQ_OP_DISCARD) { + if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; remaining -= num_sectors; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 9e9d04cb7d51..f85846741d50 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -733,11 +733,11 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->pages = &zero_page_list; /* - * Use WRITE SAME to optimize zeroing if all dests support it. + * Use WRITE ZEROES to optimize zeroing if all dests support it. */ - job->rw = REQ_OP_WRITE_SAME; + job->rw = REQ_OP_WRITE_ZEROES; for (i = 0; i < job->num_dests; i++) - if (!bdev_write_same(job->dests[i].bdev)) { + if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { job->rw = WRITE; break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 4788b0b989a9..e17fd44ceef5 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; + ti->num_write_zeroes_bios = 1; ti->private = lc; return 0; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7f223dbed49f..2950b145443d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1103,6 +1103,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; + ti->num_write_zeroes_bios = 1; if (m->queue_mode == DM_TYPE_BIO_BASED) ti->per_io_data_size = multipath_per_bio_data_size(); else @@ -1491,7 +1492,7 @@ static int do_end_io(struct multipath *m, struct request *clone, */ int r = DM_ENDIO_REQUEUE; - if (!error && !clone->errors) + if (!error) return 0; /* I/O complete */ if (noretry_error(error)) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1e217ba84d09..2dae3e5b851c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2813,7 +2813,9 @@ static void configure_discard_support(struct raid_set *rs) /* Assume discards not supported until after checks below. */ ti->discards_supported = false; - /* RAID level 4,5,6 require discard_zeroes_data for data integrity! */ + /* + * XXX: RAID level 4,5,6 require zeroing for safety. + */ raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6); for (i = 0; i < rs->raid_disks; i++) { @@ -2827,8 +2829,6 @@ static void configure_discard_support(struct raid_set *rs) return; if (raid456) { - if (!q->limits.discard_zeroes_data) - return; if (!devices_handle_discard_safely) { DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty."); DMERR("Set dm-raid.devices_handle_discard_safely=Y to override."); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 2ddc2d20e62d..a95cbb80fb34 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1124,7 +1124,6 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->per_io_data_size = sizeof(struct dm_raid1_bio_record); - ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0); if (!ms->kmirrord_wq) { diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 0b081d170087..bff7e3bdb4ed 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -298,9 +298,14 @@ static void dm_done(struct request *clone, int error, bool mapped) r = rq_end_io(tio->ti, clone, error, &tio->info); } - if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) && - !clone->q->limits.max_write_same_sectors)) - disable_write_same(tio->md); + if (unlikely(r == -EREMOTEIO)) { + if (req_op(clone) == REQ_OP_WRITE_SAME && + !clone->q->limits.max_write_same_sectors) + disable_write_same(tio->md); + if (req_op(clone) == REQ_OP_WRITE_ZEROES && + !clone->q->limits.max_write_zeroes_sectors) + disable_write_zeroes(tio->md); + } if (r <= 0) /* The target wants to complete the I/O */ @@ -358,7 +363,7 @@ static void dm_complete_request(struct request *rq, int error) if (!rq->q->mq_ops) blk_complete_request(rq); else - blk_mq_complete_request(rq, error); + blk_mq_complete_request(rq); } /* @@ -762,7 +767,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; } -static struct blk_mq_ops dm_mq_ops = { +static const struct blk_mq_ops dm_mq_ops = { .queue_rq = dm_mq_queue_rq, .complete = dm_softirq_done, .init_request = dm_mq_init_request, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 28193a57bf47..5ef49c121d99 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = stripes; ti->num_discard_bios = stripes; ti->num_write_same_bios = stripes; + ti->num_write_zeroes_bios = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -293,6 +294,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD) || + unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) || unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) { target_bio_nr = dm_bio_get_target_bio_nr(bio); BUG_ON(target_bio_nr >= sc->stripes); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3ad16d9c9d5a..958275aca008 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1449,22 +1449,6 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) return false; } -static bool dm_table_discard_zeroes_data(struct dm_table *t) -{ - struct dm_target *ti; - unsigned i = 0; - - /* Ensure that all targets supports discard_zeroes_data. */ - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); - - if (ti->discard_zeroes_data_unsupported) - return false; - } - - return true; -} - static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1533,6 +1517,34 @@ static bool dm_table_supports_write_same(struct dm_table *t) return true; } +static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_zeroes_sectors; +} + +static bool dm_table_supports_write_zeroes(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_zeroes_bios) + return false; + + if (!ti->type->iterate_devices || + ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL)) + return false; + } + + return true; +} + + static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1592,9 +1604,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (!dm_table_discard_zeroes_data(t)) - q->limits.discard_zeroes_data = 0; - /* Ensure that all underlying devices are non-rotational. */ if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); @@ -1603,6 +1612,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_write_same(t)) q->limits.max_write_same_sectors = 0; + if (!dm_table_supports_write_zeroes(t)) + q->limits.max_write_zeroes_sectors = 0; if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b266a2b5035..a5f1916f621a 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3263,7 +3263,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * them down to the data device. The thin device's discard * processing will cause mappings to be removed from the btree. */ - ti->discard_zeroes_data_unsupported = true; if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_bios = 1; @@ -4119,7 +4118,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ - ti->discard_zeroes_data_unsupported = true; if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dfb75979e455..8bf397729bbd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -810,7 +810,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio->bi_error = io_error; bio_endio(bio); } @@ -825,6 +824,14 @@ void disable_write_same(struct mapped_device *md) limits->max_write_same_sectors = 0; } +void disable_write_zeroes(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support WRITE ZEROES, disable it */ + limits->max_write_zeroes_sectors = 0; +} + static void clone_endio(struct bio *bio) { int error = bio->bi_error; @@ -851,9 +858,14 @@ static void clone_endio(struct bio *bio) } } - if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)) - disable_write_same(md); + if (unlikely(r == -EREMOTEIO)) { + if (bio_op(bio) == REQ_OP_WRITE_SAME && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + disable_write_same(md); + if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + disable_write_zeroes(md); + } free_tio(tio); dec_pending(io, error); @@ -1202,6 +1214,11 @@ static unsigned get_num_write_same_bios(struct dm_target *ti) return ti->num_write_same_bios; } +static unsigned get_num_write_zeroes_bios(struct dm_target *ti) +{ + return ti->num_write_zeroes_bios; +} + typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) @@ -1256,6 +1273,11 @@ static int __send_write_same(struct clone_info *ci) return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); } +static int __send_write_zeroes(struct clone_info *ci) +{ + return __send_changing_extent_only(ci, get_num_write_zeroes_bios, NULL); +} + /* * Select the correct strategy for processing a non-flush bio. */ @@ -1270,6 +1292,8 @@ static int __split_and_process_non_flush(struct clone_info *ci) return __send_discard(ci); else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) return __send_write_same(ci); + else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES)) + return __send_write_zeroes(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3e38e0207a3e..377a8a3672e3 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -293,6 +293,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) split, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, split); + mddev_check_write_zeroes(mddev, split); generic_make_request(split); } } while (split != bio); diff --git a/drivers/md/md.h b/drivers/md/md.h index dde8ecb760c8..1e76d64ce180 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -709,4 +709,11 @@ static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) mddev->queue->limits.max_write_same_sectors = 0; } + +static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio) +{ + if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + mddev->queue->limits.max_write_zeroes_sectors = 0; +} #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 79a12b59250b..e95d521d93e9 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -139,6 +139,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; mddev_check_writesame(mddev, &mp_bh->bio); + mddev_check_write_zeroes(mddev, &mp_bh->bio); generic_make_request(&mp_bh->bio); return; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 93347ca7c7a6..ce7a6a56cf73 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -383,6 +383,7 @@ static int raid0_run(struct mddev *mddev) blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); @@ -504,6 +505,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) split, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, split); + mddev_check_write_zeroes(mddev, split); generic_make_request(split); } } while (split != bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a34f58772022..b59cc100320a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3177,8 +3177,10 @@ static int raid1_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (mddev->queue) + if (mddev->queue) { blk_queue_max_write_same_sectors(mddev->queue, 0); + blk_queue_max_write_zeroes_sectors(mddev->queue, 0); + } rdev_for_each(rdev, mddev) { if (!mddev->gendisk) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e89a8d78a9ed..28ec3a93acee 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3749,6 +3749,7 @@ static int raid10_run(struct mddev *mddev) blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, 0); + blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, chunk_size); if (conf->geo.raid_disks % conf->geo.near_copies) blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ed5cd705b985..2efdb0d67460 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5031,8 +5031,6 @@ static void raid5_align_endio(struct bio *bi) rdev_dec_pending(rdev, conf->mddev); if (!error) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); @@ -7229,7 +7227,6 @@ static int raid5_run(struct mddev *mddev) if (mddev->queue) { int chunk_size; - bool discard_supported = true; /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the * number of raid devices @@ -7265,48 +7262,32 @@ static int raid5_run(struct mddev *mddev) blk_queue_max_discard_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); - /* - * unaligned part of discard request will be ignored, so can't - * guarantee discard_zeroes_data - */ - mddev->queue->limits.discard_zeroes_data = 0; - blk_queue_max_write_same_sectors(mddev->queue, 0); + blk_queue_max_write_zeroes_sectors(mddev->queue, 0); rdev_for_each(rdev, mddev) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->new_data_offset << 9); - /* - * discard_zeroes_data is required, otherwise data - * could be lost. Consider a scenario: discard a stripe - * (the stripe could be inconsistent if - * discard_zeroes_data is 0); write one disk of the - * stripe (the stripe could be inconsistent again - * depending on which disks are used to calculate - * parity); the disk is broken; The stripe data of this - * disk is lost. - */ - if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) || - !bdev_get_queue(rdev->bdev)-> - limits.discard_zeroes_data) - discard_supported = false; - /* Unfortunately, discard_zeroes_data is not currently - * a guarantee - just a hint. So we only allow DISCARD - * if the sysadmin has confirmed that only safe devices - * are in use by setting a module parameter. - */ - if (!devices_handle_discard_safely) { - if (discard_supported) { - pr_info("md/raid456: discard support disabled due to uncertainty.\n"); - pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n"); - } - discard_supported = false; - } } - if (discard_supported && + /* + * zeroing is required, otherwise data + * could be lost. Consider a scenario: discard a stripe + * (the stripe could be inconsistent if + * discard_zeroes_data is 0); write one disk of the + * stripe (the stripe could be inconsistent again + * depending on which disks are used to calculate + * parity); the disk is broken; The stripe data of this + * disk is lost. + * + * We only allow DISCARD if the sysadmin has confirmed that + * only safe devices are in use by setting a module parameter. + * A better idea might be to turn DISCARD into WRITE_ZEROES + * requests, as that is required to be safe. + */ + if (devices_handle_discard_safely && mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && mddev->queue->limits.discard_granularity >= stripe) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 493eb10ce580..4c54ad34e17a 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -167,8 +167,6 @@ static void mmc_queue_setup_discard(struct request_queue *q, queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); blk_queue_max_discard_sectors(q, max_discard); - if (card->erased_byte == 0 && !mmc_can_discard(card)) - q->limits.discard_zeroes_data = 1; q->limits.discard_granularity = card->pref_erase << 9; /* granularity must not be greater than max. discard */ if (card->pref_erase > max_discard) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 66a9dedd1062..1517da3ddd7d 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -46,7 +46,7 @@ #include "mtdcore.h" -static struct backing_dev_info *mtd_bdi; +struct backing_dev_info *mtd_bdi; #ifdef CONFIG_PM_SLEEP @@ -496,11 +496,9 @@ int add_mtd_device(struct mtd_info *mtd) * mtd_device_parse_register() multiple times on the same master MTD, * especially with CONFIG_MTD_PARTITIONED_MASTER=y. */ - if (WARN_ONCE(mtd->backing_dev_info, "MTD already registered\n")) + if (WARN_ONCE(mtd->dev.type, "MTD already registered\n")) return -EEXIST; - mtd->backing_dev_info = mtd_bdi; - BUG_ON(mtd->writesize == 0); mutex_lock(&mtd_table_mutex); @@ -1775,13 +1773,18 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name) struct backing_dev_info *bdi; int ret; - bdi = kzalloc(sizeof(*bdi), GFP_KERNEL); + bdi = bdi_alloc(GFP_KERNEL); if (!bdi) return ERR_PTR(-ENOMEM); - ret = bdi_setup_and_register(bdi, name); + bdi->name = name; + /* + * We put '-0' suffix to the name to get the same name format as we + * used to get. Since this is called only once, we get a unique name. + */ + ret = bdi_register(bdi, "%.28s-0", name); if (ret) - kfree(bdi); + bdi_put(bdi); return ret ? ERR_PTR(ret) : bdi; } @@ -1813,8 +1816,7 @@ static int __init init_mtd(void) out_procfs: if (proc_mtd) remove_proc_entry("mtd", NULL); - bdi_destroy(mtd_bdi); - kfree(mtd_bdi); + bdi_put(mtd_bdi); err_bdi: class_unregister(&mtd_class); err_reg: @@ -1828,8 +1830,7 @@ static void __exit cleanup_mtd(void) if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); - bdi_destroy(mtd_bdi); - kfree(mtd_bdi); + bdi_put(mtd_bdi); idr_destroy(&mtd_idr); } diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index 20c02a3b7417..e43fea896d1e 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -18,6 +18,7 @@ #include <linux/ctype.h> #include <linux/slab.h> #include <linux/major.h> +#include <linux/backing-dev.h> /* * compare superblocks to see if they're equivalent @@ -38,6 +39,8 @@ static int get_sb_mtd_compare(struct super_block *sb, void *_mtd) return 0; } +extern struct backing_dev_info *mtd_bdi; + /* * mark the superblock by the MTD device it is using * - set the device number to be the correct MTD block device for pesuperstence @@ -49,7 +52,8 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd) sb->s_mtd = mtd; sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, mtd->index); - sb->s_bdi = mtd->backing_dev_info; + sb->s_bdi = bdi_get(mtd_bdi); + return 0; } diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index c80869e60909..51f2be8889b5 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -347,7 +347,7 @@ static int ubiblock_init_request(void *data, struct request *req, return 0; } -static struct blk_mq_ops ubiblock_mq_ops = { +static const struct blk_mq_ops ubiblock_mq_ops = { .queue_rq = ubiblock_queue_rq, .init_request = ubiblock_init_request, }; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8a4ba8b88e52..34481c9be1d1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1104,11 +1104,11 @@ static void bond_compute_features(struct bonding *bond) gso_max_size = min(gso_max_size, slave->dev->gso_max_size); gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs); } + bond_dev->hard_header_len = max_hard_header_len; done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; - bond_dev->hard_header_len = max_hard_header_len; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index 8483a40e7e9e..5f9e0e6301d0 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -72,6 +72,8 @@ config CAN_PEAK_USB PCAN-USB Pro dual CAN 2.0b channels USB adapter PCAN-USB FD single CAN-FD channel USB adapter PCAN-USB Pro FD dual CAN-FD channels USB adapter + PCAN-Chip USB CAN-FD to USB stamp module + PCAN-USB X6 6 CAN-FD channels USB adapter (see also http://www.peak-system.com). diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 300349fe8dc0..eecee7f8dfb7 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = { static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) { struct gs_can *dev = netdev_priv(netdev); - struct gs_identify_mode imode; + struct gs_identify_mode *imode; int rc; + imode = kmalloc(sizeof(*imode), GFP_KERNEL); + + if (!imode) + return -ENOMEM; + if (do_identify) - imode.mode = GS_CAN_IDENTIFY_ON; + imode->mode = GS_CAN_IDENTIFY_ON; else - imode.mode = GS_CAN_IDENTIFY_OFF; + imode->mode = GS_CAN_IDENTIFY_OFF; rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), @@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) USB_RECIP_INTERFACE, dev->channel, 0, - &imode, - sizeof(imode), + imode, + sizeof(*imode), 100); + kfree(imode); + return (rc > 0) ? 0 : rc; } diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 0b0302af3bd2..57913dbbae0a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = { {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)}, + {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)}, {} /* Terminating entry */ }; @@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = { &pcan_usb_pro, &pcan_usb_fd, &pcan_usb_pro_fd, + &pcan_usb_chip, &pcan_usb_x6, }; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index 3cbfb069893d..c01316cac354 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -27,6 +27,7 @@ #define PCAN_USBPRO_PRODUCT_ID 0x000d #define PCAN_USBPROFD_PRODUCT_ID 0x0011 #define PCAN_USBFD_PRODUCT_ID 0x0012 +#define PCAN_USBCHIP_PRODUCT_ID 0x0013 #define PCAN_USBX6_PRODUCT_ID 0x0014 #define PCAN_USB_DRIVER_NAME "peak_usb" @@ -90,6 +91,7 @@ struct peak_usb_adapter { extern const struct peak_usb_adapter pcan_usb; extern const struct peak_usb_adapter pcan_usb_pro; extern const struct peak_usb_adapter pcan_usb_fd; +extern const struct peak_usb_adapter pcan_usb_chip; extern const struct peak_usb_adapter pcan_usb_pro_fd; extern const struct peak_usb_adapter pcan_usb_x6; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 304732550f0a..528d3bb4917f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = { .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; +/* describes the PCAN-CHIP USB */ +static const struct can_bittiming_const pcan_usb_chip_const = { + .name = "pcan_chip_usb", + .tseg1_min = 1, + .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), + .tseg2_min = 1, + .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), + .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), + .brp_min = 1, + .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_chip_data_const = { + .name = "pcan_chip_usb", + .tseg1_min = 1, + .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), + .tseg2_min = 1, + .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), + .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), + .brp_min = 1, + .brp_max = (1 << PUCAN_TFAST_BRP_BITS), + .brp_inc = 1, +}; + +const struct peak_usb_adapter pcan_usb_chip = { + .name = "PCAN-Chip USB", + .device_id = PCAN_USBCHIP_PRODUCT_ID, + .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, + .ctrlmode_supported = CAN_CTRLMODE_FD | + CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY, + .clock = { + .freq = PCAN_UFD_CRYSTAL_HZ, + }, + .bittiming_const = &pcan_usb_chip_const, + .data_bittiming_const = &pcan_usb_chip_data_const, + + /* size of device private data */ + .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + + /* timestamps usage */ + .ts_used_bits = 32, + .ts_period = 1000000, /* calibration period in ts. */ + .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ + .us_per_ts_shift = 0, + + /* give here messages in/out endpoints */ + .ep_msg_in = PCAN_USBPRO_EP_MSGIN, + .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0}, + + /* size of rx/tx usb buffers */ + .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, + .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, + + /* device callbacks */ + .intf_probe = pcan_usb_pro_probe, /* same as PCAN-USB Pro */ + .dev_init = pcan_usb_fd_init, + + .dev_exit = pcan_usb_fd_exit, + .dev_free = pcan_usb_fd_free, + .dev_set_bus = pcan_usb_fd_set_bus, + .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, + .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, + .dev_decode_buf = pcan_usb_fd_decode_buf, + .dev_start = pcan_usb_fd_start, + .dev_stop = pcan_usb_fd_stop, + .dev_restart_async = pcan_usb_fd_restart_async, + .dev_encode_msg = pcan_usb_fd_encode_msg, + + .do_get_berr_counter = pcan_usb_fd_get_berr_counter, +}; + /* describes the PCAN-USB Pro FD adapter */ static const struct can_bittiming_const pcan_usb_pro_fd_const = { .name = "pcan_usb_pro_fd", diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 8cf4801994e8..fa0eece21eef 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, static void b53_set_forwarding(struct b53_device *dev, int enable) { + struct dsa_switch *ds = dev->ds; u8 mgmt; b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); @@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) mgmt &= ~SM_SW_FWD_EN; b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt); + + /* Include IMP port in dumb forwarding mode when no tagging protocol is + * set + */ + if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) { + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt); + mgmt |= B53_MII_DUMB_FWDG_EN; + b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); + } } static void b53_enable_vlan(struct b53_device *dev, bool enable) @@ -598,7 +608,8 @@ static void b53_switch_reset_gpio(struct b53_device *dev) static int b53_switch_reset(struct b53_device *dev) { - u8 mgmt; + unsigned int timeout = 1000; + u8 mgmt, reg; b53_switch_reset_gpio(dev); @@ -607,6 +618,28 @@ static int b53_switch_reset(struct b53_device *dev) b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00); } + /* This is specific to 58xx devices here, do not use is58xx() which + * covers the larger Starfigther 2 family, including 7445/7278 which + * still use this driver as a library and need to perform the reset + * earlier. + */ + if (dev->chip_id == BCM58XX_DEVICE_ID) { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + reg |= SW_RST | EN_SW_RST | EN_CH_RST; + b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg); + + do { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + if (!(reg & SW_RST)) + break; + + usleep_range(1000, 2000); + } while (timeout-- > 0); + + if (timeout == 0) + return -ETIMEDOUT; + } + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); if (!(mgmt & SM_SW_FWD_EN)) { @@ -1731,7 +1764,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .vlans = 4096, .enabled_ports = 0x1ff, .arl_entries = 4, - .cpu_port = B53_CPU_PORT_25, + .cpu_port = B53_CPU_PORT, .vta_regs = B53_VTA_REGS, .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 9fd24c418fa4..e5c86d44667a 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -104,6 +104,10 @@ #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) +/* Switch control (8 bit) */ +#define B53_SWITCH_CTRL 0x22 +#define B53_MII_DUMB_FWDG_EN BIT(6) + /* (16 bit) */ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 @@ -139,6 +143,7 @@ /* Software reset register (8 bit) */ #define B53_SOFTRESET 0x79 #define SW_RST BIT(7) +#define EN_CH_RST BIT(6) #define EN_SW_RST BIT(4) /* Fast Aging Control register (8 bit) */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index dc52053128bc..3d9490cd2db1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -90,7 +90,7 @@ #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) #define MLX5_UMR_ALIGN (2048) -#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d55fff0ba388..26fc77e80f7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i int idx = 0; int err = 0; + info->data = MAX_NUM_OF_ETHTOOL_RULES; while ((!err || err == -ENOENT) && idx < info->rule_cnt) { err = mlx5e_ethtool_get_flow(priv, info, location); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 66c133757a5e..15cc7b469d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -174,7 +174,7 @@ unlock: static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; u64 tx_offload_none = 0; @@ -229,6 +229,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); + memcpy(&priv->stats.sw, s, sizeof(*s)); } static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) @@ -243,7 +244,6 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); - memset(out, 0, outlen); mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fade7233dac5..5436866798f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -639,7 +639,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && rep->vport != FDB_UPLINK_VPORT) { - if (min_inline > esw->offloads.inline_mode) { + if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); @@ -785,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], - unsigned char h_dest[ETH_ALEN], - int ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static void gen_vxlan_header_ipv4(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + int ttl, + __be32 daddr, + __be32 saddr, + __be16 udp_dst_port, + __be32 vx_vni) { - int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN; struct ethhdr *eth = (struct ethhdr *)buf; struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); @@ -817,20 +817,17 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, udp->dest = udp_dst_port; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vx_vni); - - return encap_size; } -static int gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], - unsigned char h_dest[ETH_ALEN], - int ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static void gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) { - int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; struct ethhdr *eth = (struct ethhdr *)buf; struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); @@ -852,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev, udp->dest = udp_dst_port; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vx_vni); - - return encap_size; } static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, @@ -862,13 +857,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; - int encap_size, ttl, err; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; + int ttl, err; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } - encap_header = kzalloc(max_encap_size, GFP_KERNEL); + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); if (!encap_header) return -ENOMEM; @@ -903,11 +905,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, - e->h_dest, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); + gen_vxlan_header_ipv4(*out_dev, encap_header, + ipv4_encap_size, e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -915,7 +917,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - encap_size, encap_header, &e->encap_id); + ipv4_encap_size, encap_header, &e->encap_id); out: if (err && n) neigh_release(n); @@ -930,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; - int encap_size, err, ttl = 0; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; + int err, ttl = 0; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } - encap_header = kzalloc(max_encap_size, GFP_KERNEL); + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); if (!encap_header) return -ENOMEM; @@ -972,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, - e->h_dest, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); + gen_vxlan_header_ipv6(*out_dev, encap_header, + ipv6_encap_size, e->h_dest, ttl, + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -984,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - encap_size, encap_header, &e->encap_id); + ipv6_encap_size, encap_header, &e->encap_id); out: if (err && n) neigh_release(n); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 307ec6c5fd3b..d111cebca9f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -911,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; int num_vports = esw->enabled_vports; - int err; - int vport; + int err, vport; u8 mlx5_mode; if (!MLX5_CAP_GEN(dev, vport_group_manager)) @@ -921,9 +920,17 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + return 0; + /* fall through */ + case MLX5_CAP_INLINE_MODE_L2: + esw_warn(dev, "Inline mode can't be set\n"); return -EOPNOTSUPP; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + break; + } if (esw->offloads.num_flows > 0) { esw_warn(dev, "Can't set inline mode when flows are configured\n"); @@ -966,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - return -EOPNOTSUPP; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) { + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; int vport; - u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; @@ -985,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - return -EOPNOTSUPP; + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } +query_vports: for (vport = 1; vport <= nvfs; vport++) { mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); if (vport > 1 && prev_mlx5_mode != mlx5_mode) @@ -996,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) prev_mlx5_mode = mlx5_mode; } +out: *mode = mlx5_mode; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 60154a175bd3..0ad66324247f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (err) { dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); - goto out_err; + goto err_cmd_cleanup; } err = mlx5_core_enable_hca(dev, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 2e6b0f290ddc..222b25908d01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref) struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); list_del(&up->list); + iounmap(up->map); if (mlx5_cmd_free_uar(up->mdev, up->index)) mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); kfree(up->reg_bitmap); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index a6e2bbe629bd..cfdadb658ade 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -64,11 +64,11 @@ ((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7) static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = { - {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH} + {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI}, + {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE}, + {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE}, + {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE}, + {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}, }; static bool qed_dcbx_app_ethtype(u32 app_info_bitmap) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8cfc4a54f2dc..3cd7989c007d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1516,11 +1516,12 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) spin_unlock_irqrestore(&priv->lock, flags); return NETDEV_TX_BUSY; } - entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC); - priv->tx_skb[q][entry / NUM_TX_DESC] = skb; if (skb_put_padto(skb, ETH_ZLEN)) - goto drop; + goto exit; + + entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC); + priv->tx_skb[q][entry / NUM_TX_DESC] = skb; buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + entry / NUM_TX_DESC * DPTR_ALIGN; diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index ee14662415c5..a0c52e328102 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); #define EFX_RXQ_MIN_ENT 128U #define EFX_TXQ_MIN_ENT(efx) (2 * efx_tx_max_skb_descs(efx)) -#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_35388(efx) ? \ +/* All EF10 architecture NICs steal one bit of the DMAQ size for various + * other purposes when counting TxQ entries, so we halve the queue size. + */ +#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_EF10(efx) ? \ EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE) static inline bool efx_rss_enabled(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h index 103f827a1623..c67fa18b8121 100644 --- a/drivers/net/ethernet/sfc/workarounds.h +++ b/drivers/net/ethernet/sfc/workarounds.h @@ -16,6 +16,7 @@ */ #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0) +#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) #define EFX_WORKAROUND_10G(efx) 1 /* Bit-bashed I2C reads cause performance drop */ diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 9e631952b86f..48a541eb0af2 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -76,7 +76,7 @@ config TI_CPSW config TI_CPTS bool "TI Common Platform Time Sync (CPTS) Support" depends on TI_CPSW || TI_KEYSTONE_NETCP - depends on PTP_1588_CLOCK + depends on POSIX_TIMERS ---help--- This driver supports the Common Platform Time Sync unit of the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem. @@ -87,6 +87,8 @@ config TI_CPTS_MOD tristate depends on TI_CPTS default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y + select NET_PTP_CLASSIFY + imply PTP_1588_CLOCK default m config TI_KEYSTONE_NETCP diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index a45f98fa4aa7..3dadee1080b9 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1017,8 +1017,8 @@ tc35815_free_queues(struct net_device *dev) BUG_ON(lp->tx_skbs[i].skb != skb); #endif if (skb) { - dev_kfree_skb(skb); pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); lp->tx_skbs[i].skb = NULL; lp->tx_skbs[i].skb_dma = 0; } diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index f9f3dba7a588..db23cb36ae5c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -751,7 +751,6 @@ struct netvsc_device { u32 send_section_cnt; u32 send_section_size; unsigned long *send_section_map; - int map_words; /* Used for NetVSP initialization protocol */ struct completion channel_init_wait; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8dd0b8770328..15ef713d96c0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device) struct netvsc_device *net_device; struct nvsp_message *init_packet; struct net_device *ndev; + size_t map_words; int node; net_device = get_outbound_net_device(device); @@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device) net_device->send_section_size, net_device->send_section_cnt); /* Setup state for managing the send buffer. */ - net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, - BITS_PER_LONG); + map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); - net_device->send_section_map = kcalloc(net_device->map_words, - sizeof(ulong), GFP_KERNEL); + net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL); if (net_device->send_section_map == NULL) { ret = -ENOMEM; goto cleanup; @@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) unsigned long *map_addr = net_device->send_section_map; unsigned int i; - for_each_clear_bit(i, map_addr, net_device->map_words) { + for_each_clear_bit(i, map_addr, net_device->send_section_cnt) { if (sync_test_and_set_bit(i, map_addr) == 0) return i; } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ff0a5ed3ca80..49ce4e9f4a0f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -617,7 +617,8 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err) static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, unsigned char **iv, - struct scatterlist **sg) + struct scatterlist **sg, + int num_frags) { size_t size, iv_offset, sg_offset; struct aead_request *req; @@ -629,7 +630,7 @@ static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, size = ALIGN(size, __alignof__(struct scatterlist)); sg_offset = size; - size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1); + size += sizeof(struct scatterlist) * num_frags; tmp = kmalloc(size, GFP_ATOMIC); if (!tmp) @@ -649,6 +650,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, { int ret; struct scatterlist *sg; + struct sk_buff *trailer; unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; @@ -723,7 +725,14 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg); + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) { + macsec_txsa_put(tx_sa); + kfree_skb(skb); + return ERR_PTR(ret); + } + + req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); @@ -732,7 +741,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, macsec_fill_iv(iv, secy->sci, pn); - sg_init_table(sg, MAX_SKB_FRAGS + 1); + sg_init_table(sg, ret); skb_to_sgvec(skb, sg, 0, skb->len); if (tx_sc->encrypt) { @@ -917,6 +926,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, { int ret; struct scatterlist *sg; + struct sk_buff *trailer; unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; @@ -927,7 +937,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, if (!skb) return ERR_PTR(-ENOMEM); - req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg); + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) { + kfree_skb(skb); + return ERR_PTR(ret); + } + req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); @@ -936,7 +951,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, hdr = (struct macsec_eth_header *)skb->data; macsec_fill_iv(iv, sci, ntohl(hdr->packet_number)); - sg_init_table(sg, MAX_SKB_FRAGS + 1); + sg_init_table(sg, ret); skb_to_sgvec(skb, sg, 0, skb->len); if (hdr->tci_an & MACSEC_TCI_E) { diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9261722960a7..b34eaaae03fd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1139,6 +1139,7 @@ static int macvlan_port_create(struct net_device *dev) static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); + struct sk_buff *skb; dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); @@ -1147,7 +1148,15 @@ static void macvlan_port_destroy(struct net_device *dev) * but we need to cancel it and purge left skbs if any. */ cancel_work_sync(&port->bc_work); - __skb_queue_purge(&port->bc_queue); + + while ((skb = __skb_dequeue(&port->bc_queue))) { + const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; + + if (src) + dev_put(src->dev); + + kfree_skb(skb); + } kfree(port); } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1326d99771c1..da5b39268370 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -297,17 +297,6 @@ static int kszphy_config_init(struct phy_device *phydev) if (priv->led_mode >= 0) kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode); - if (phy_interrupt_is_valid(phydev)) { - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE); - if (ret < 0) - return ret; - } - return 0; } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a2bfc82e95d7..97ff1278167b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_mii_ioctl); /** - * phy_start_aneg - start auto-negotiation for this PHY device + * phy_start_aneg_priv - start auto-negotiation for this PHY device * @phydev: the phy_device struct + * @sync: indicate whether we should wait for the workqueue cancelation * * Description: Sanitizes the settings (if we're not autonegotiating * them), and then calls the driver's config_aneg function. * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) { + bool trigger = 0; int err; if (!phydev->drv) @@ -628,10 +630,40 @@ int phy_start_aneg(struct phy_device *phydev) } } + /* Re-schedule a PHY state machine to check PHY status because + * negotiation may already be done and aneg interrupt may not be + * generated. + */ + if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { + err = phy_aneg_done(phydev); + if (err > 0) { + trigger = true; + err = 0; + } + } + out_unlock: mutex_unlock(&phydev->lock); + + if (trigger) + phy_trigger_machine(phydev, sync); + return err; } + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + return phy_start_aneg_priv(phydev, true); +} EXPORT_SYMBOL(phy_start_aneg); /** @@ -659,7 +691,7 @@ void phy_start_machine(struct phy_device *phydev) * state machine runs. */ -static void phy_trigger_machine(struct phy_device *phydev, bool sync) +void phy_trigger_machine(struct phy_device *phydev, bool sync) { if (sync) cancel_delayed_work_sync(&phydev->state_queue); @@ -1154,7 +1186,7 @@ void phy_state_machine(struct work_struct *work) mutex_unlock(&phydev->lock); if (needs_aneg) - err = phy_start_aneg(phydev); + err = phy_start_aneg_priv(phydev, false); else if (do_suspend) phy_suspend(phydev); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f8c81f12d988..85c01247f2e3 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2361,8 +2361,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2634,8 +2636,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_PORT_LIST_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 3dd490f53e48..f28bd74ac275 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -369,7 +369,7 @@ config USB_NET_NET1080 optionally with LEDs that indicate traffic config USB_NET_PLUSB - tristate "Prolific PL-2301/2302/25A1 based cables" + tristate "Prolific PL-2301/2302/25A1/27A1 based cables" # if the handshake/init/reset problems, from original 'plusb', # are ever resolved ... then remove "experimental" depends on USB_USBNET diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 4f2e8141dbe2..00067a0c51ca 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2534,13 +2534,6 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, SET_NETDEV_DEV(net, &interface->dev); SET_NETDEV_DEVTYPE(net, &hso_type); - /* registering our net device */ - result = register_netdev(net); - if (result) { - dev_err(&interface->dev, "Failed to register device\n"); - goto exit; - } - /* start allocating */ for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); @@ -2560,6 +2553,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, add_net_device(hso_dev); + /* registering our net device */ + result = register_netdev(net); + if (result) { + dev_err(&interface->dev, "Failed to register device\n"); + goto exit; + } + hso_log_port(hso_dev); hso_create_rfkill(hso_dev, interface); @@ -3279,9 +3279,9 @@ static void __exit hso_exit(void) pr_info("unloaded\n"); tty_unregister_driver(tty_drv); - put_tty_driver(tty_drv); /* deregister the usb driver */ usb_deregister(&hso_driver); + put_tty_driver(tty_drv); } /* Module definitions */ diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 22e1a9a99a7d..6fe59373cba9 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev) } static const struct driver_info prolific_info = { - .description = "Prolific PL-2301/PL-2302/PL-25A1", + .description = "Prolific PL-2301/PL-2302/PL-25A1/PL-27A1", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT, /* some PL-2302 versions seem to fail usb_set_interface() */ .reset = pl_reset, @@ -139,6 +139,17 @@ static const struct usb_device_id products [] = { * Host-to-Host Cable */ .driver_info = (unsigned long) &prolific_info, + +}, + +/* super speed cables */ +{ + USB_DEVICE(0x067b, 0x27a1), /* PL-27A1, no eeprom + * also: goobay Active USB 3.0 + * Data Link, + * Unitek Y-3501 + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END @@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = { module_usb_driver(plusb_driver); MODULE_AUTHOR("David Brownell"); -MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver"); +MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eeb409c287b8..d5e0906262ea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5; module_param(shutdown_timeout, byte, 0644); MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); -unsigned int nvme_max_retries = 5; -module_param_named(max_retries, nvme_max_retries, uint, 0644); +static u8 nvme_max_retries = 5; +module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); @@ -62,11 +61,66 @@ module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); +static bool force_apst; +module_param(force_apst, bool, 0644); +MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static int nvme_error_status(struct request *req) +{ + switch (nvme_req(req)->status & 0x7ff) { + case NVME_SC_SUCCESS: + return 0; + case NVME_SC_CAP_EXCEEDED: + return -ENOSPC; + default: + return -EIO; + + /* + * XXX: these errors are a nasty side-band protocol to + * drivers/md/dm-mpath.c:noretry_error() that aren't documented + * anywhere.. + */ + case NVME_SC_CMD_SEQ_ERROR: + return -EILSEQ; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_UNWRITTEN_BLOCK: + return -ENODATA; + } +} + +static inline bool nvme_req_needs_retry(struct request *req) +{ + if (blk_noretry_request(req)) + return false; + if (nvme_req(req)->status & NVME_SC_DNR) + return false; + if (jiffies - req->start_time >= req->timeout) + return false; + if (nvme_req(req)->retries >= nvme_max_retries) + return false; + return true; +} + +void nvme_complete_rq(struct request *req) +{ + if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + return; + } + + blk_mq_end_request(req, nvme_error_status(req)); +} +EXPORT_SYMBOL_GPL(nvme_complete_rq); + void nvme_cancel_request(struct request *req, void *data, bool reserved) { int status; @@ -80,7 +134,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved) status = NVME_SC_ABORT_REQ; if (blk_queue_dying(req->q)) status |= NVME_SC_DNR; - blk_mq_complete_request(req, status); + nvme_req(req)->status = status; + blk_mq_complete_request(req); + } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -205,12 +261,6 @@ fail: return NULL; } -void nvme_requeue_req(struct request *req) -{ - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); -} -EXPORT_SYMBOL_GPL(nvme_requeue_req); - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -327,6 +377,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } + switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -335,6 +391,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); break; + case REQ_OP_WRITE_ZEROES: + /* currently only aliased to deallocate for a few ctrls: */ case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); break; @@ -378,7 +436,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; out: blk_mq_free_request(req); return ret; @@ -463,7 +524,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, } submit: blk_execute_rq(req->q, disk, req, 0); - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; if (result) *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { @@ -900,16 +964,14 @@ static void nvme_config_discard(struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) - ns->queue->limits.discard_zeroes_data = 1; - else - ns->queue->limits.discard_zeroes_data = 0; - ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); + + if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) @@ -1267,7 +1329,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * heuristic: we are willing to spend at most 2% of the time * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power - * non-operational state after waiting 100 * (enlat + exlat) + * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * the requested maximum latency. * @@ -1278,6 +1340,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) unsigned apste; struct nvme_feat_auto_pst *table; + u64 max_lat_us = 0; + int max_ps = -1; int ret; /* @@ -1299,6 +1363,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) if (ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ apste = 0; + dev_dbg(ctrl->device, "APST disabled\n"); } else { __le64 target = cpu_to_le64(0); int state; @@ -1348,9 +1413,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) target = cpu_to_le64((state << 3) | (transition_ms << 8)); + + if (max_ps == -1) + max_ps = state; + + if (total_latency_us > max_lat_us) + max_lat_us = total_latency_us; } apste = 1; + + if (max_ps == -1) { + dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); + } else { + dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", + max_ps, max_lat_us, (int)sizeof(*table), table); + } } ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, @@ -1488,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } } + if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { + dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; + } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); @@ -1510,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->npss = id->npss; prev_apsta = ctrl->apsta; - ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + if (ctrl->quirks & NVME_QUIRK_NO_APST) { + if (force_apst && id->apsta) { + dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->apsta = 1; + } else { + ctrl->apsta = 0; + } + } else { + ctrl->apsta = id->apsta; + } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); if (ctrl->ops->is_fabrics) { @@ -2393,7 +2485,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_freeze_queue_start(ns->queue); + blk_freeze_queue_start(ns->queue); mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_freeze); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5b7386f69f4d..990e6fb32a63 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -471,6 +471,16 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) } EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) +{ + if (ctrl->opts->max_reconnects != -1 && + ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(nvmf_should_reconnect); + /** * nvmf_register_transport() - NVMe Fabrics Library registration function. * @ops: Transport ops instance to be registered to the @@ -533,6 +543,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_QUEUE_SIZE, "queue_size=%d" }, { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" }, { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, + { NVMF_OPT_CTRL_LOSS_TMO, "ctrl_loss_tmo=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, @@ -546,6 +557,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, char *options, *o, *p; int token, ret = 0; size_t nqnlen = 0; + int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; @@ -655,6 +667,16 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->kato = token; break; + case NVMF_OPT_CTRL_LOSS_TMO: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + + if (token < 0) + pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); + ctrl_loss_tmo = token; + break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", @@ -710,6 +732,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (ctrl_loss_tmo < 0) + opts->max_reconnects = -1; + else + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + if (!opts->host) { kref_get(&nvmf_default_host->ref); opts->host = nvmf_default_host; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 156018182ce4..f5a9c1fb186f 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -21,6 +21,8 @@ #define NVMF_MAX_QUEUE_SIZE 1024 #define NVMF_DEF_QUEUE_SIZE 128 #define NVMF_DEF_RECONNECT_DELAY 10 +/* default to 600 seconds of reconnect attempts before giving up */ +#define NVMF_DEF_CTRL_LOSS_TMO 600 /* * Define a host as seen by the target. We allocate one at boot, but also @@ -53,6 +55,7 @@ enum { NVMF_OPT_HOSTNQN = 1 << 8, NVMF_OPT_RECONNECT_DELAY = 1 << 9, NVMF_OPT_HOST_TRADDR = 1 << 10, + NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, }; /** @@ -77,6 +80,10 @@ enum { * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. * @kato: Keep-alive timeout. * @host: Virtual NVMe host, contains the NQN and Host ID. + * @nr_reconnects: number of reconnect attempted since the last ctrl failure + * @max_reconnects: maximum number of allowed reconnect attempts before removing + * the controller, (-1) means reconnect forever, zero means remove + * immediately; */ struct nvmf_ctrl_options { unsigned mask; @@ -91,6 +98,8 @@ struct nvmf_ctrl_options { bool discovery_nqn; unsigned int kato; struct nvmf_host *host; + int nr_reconnects; + int max_reconnects; }; /* @@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d996ca73d3be..4976db56e351 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -19,6 +19,7 @@ #include <linux/parser.h> #include <uapi/scsi/fc/fc_fs.h> #include <uapi/scsi/fc/fc_els.h> +#include <linux/delay.h> #include "nvme.h" #include "fabrics.h" @@ -44,6 +45,8 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ +#define NVME_FC_MAX_CONNECT_ATTEMPTS 1 + struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -61,16 +64,24 @@ struct nvme_fc_queue { unsigned long flags; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ +enum nvme_fcop_flags { + FCOP_FLAGS_TERMIO = (1 << 0), + FCOP_FLAGS_RELEASED = (1 << 1), + FCOP_FLAGS_COMPLETE = (1 << 2), + FCOP_FLAGS_AEN = (1 << 3), +}; + struct nvmefc_ls_req_op { struct nvmefc_ls_req ls_req; - struct nvme_fc_ctrl *ctrl; + struct nvme_fc_rport *rport; struct nvme_fc_queue *queue; struct request *rq; + u32 flags; int ls_error; struct completion ls_done; - struct list_head lsreq_list; /* ctrl->ls_req_list */ + struct list_head lsreq_list; /* rport->ls_req_list */ bool req_queued; }; @@ -79,6 +90,7 @@ enum nvme_fcpop_state { FCPOP_STATE_IDLE = 1, FCPOP_STATE_ACTIVE = 2, FCPOP_STATE_ABORTED = 3, + FCPOP_STATE_COMPLETE = 4, }; struct nvme_fc_fcp_op { @@ -97,6 +109,7 @@ struct nvme_fc_fcp_op { struct request *rq; atomic_t state; + u32 flags; u32 rqno; u32 nents; @@ -120,23 +133,24 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; + struct list_head ls_req_list; + struct device *dev; /* physical device for dma */ + struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_state { - FCCTRL_INIT = 0, - FCCTRL_ACTIVE = 1, +enum nvme_fcctrl_flags { + FCCTRL_TERMIO = (1 << 0), }; struct nvme_fc_ctrl { spinlock_t lock; struct nvme_fc_queue *queues; - u32 queue_count; - struct device *dev; struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + u32 queue_count; u32 cnum; u64 association_id; @@ -144,14 +158,19 @@ struct nvme_fc_ctrl { u64 cap; struct list_head ctrl_list; /* rport->ctrl_list */ - struct list_head ls_req_list; struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; struct work_struct delete_work; + struct work_struct reset_work; + struct delayed_work connect_work; + int reconnect_delay; + int connect_attempts; + struct kref ref; - int state; + u32 flags; + u32 iocnt; struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; @@ -419,9 +438,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); + INIT_LIST_HEAD(&newrec->ls_req_list); kref_init(&newrec->ref); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + newrec->dev = lport->dev; + newrec->lport = lport; newrec->remoteport.private = &newrec[1]; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; @@ -444,7 +466,6 @@ out_kfree_rport: out_reghost_failed: *portptr = NULL; return ret; - } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); @@ -487,6 +508,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) return kref_get_unless_zero(&rport->ref); } +static int +nvme_fc_abort_lsops(struct nvme_fc_rport *rport) +{ + struct nvmefc_ls_req_op *lsop; + unsigned long flags; + +restart: + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { + if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { + lsop->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&rport->lock, flags); + rport->lport->ops->ls_abort(&rport->lport->localport, + &rport->remoteport, + &lsop->ls_req); + goto restart; + } + } + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously @@ -522,6 +567,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_abort_lsops(rport); + nvme_fc_rport_put(rport); return 0; } @@ -624,16 +671,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); static void -__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, - struct nvmefc_ls_req_op *lsop) +__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) { + struct nvme_fc_rport *rport = lsop->rport; struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); if (!lsop->req_queued) { - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); return; } @@ -641,56 +688,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, lsop->req_queued = false; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma, + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvme_fc_ctrl_put(ctrl); + nvme_fc_rport_put(rport); } static int -__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, +__nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - int ret; + int ret = 0; - if (!nvme_fc_ctrl_get(ctrl)) + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return -ECONNREFUSED; + + if (!nvme_fc_rport_get(rport)) return -ESHUTDOWN; lsreq->done = done; - lsop->ctrl = ctrl; + lsop->rport = rport; lsop->req_queued = false; INIT_LIST_HEAD(&lsop->lsreq_list); init_completion(&lsop->ls_done); - lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr, + lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, lsreq->rqstlen + lsreq->rsplen, DMA_BIDIRECTIONAL); - if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) { - nvme_fc_ctrl_put(ctrl); - dev_err(ctrl->dev, - "els request command failed EFAULT.\n"); - return -EFAULT; + if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_putrport; } lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); - list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list); + list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); lsop->req_queued = true; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport, - &ctrl->rport->remoteport, lsreq); + ret = rport->lport->ops->ls_req(&rport->lport->localport, + &rport->remoteport, lsreq); if (ret) - lsop->ls_error = ret; + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&rport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&rport->lock, flags); + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_putrport: + nvme_fc_rport_put(rport); return ret; } @@ -705,15 +767,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) } static int -nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) +nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; int ret; - ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done); + ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); - if (!ret) + if (!ret) { /* * No timeout/not interruptible as we need the struct * to exist until the lldd calls us back. Thus mandate @@ -722,14 +784,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) */ wait_for_completion(&lsop->ls_done); - __nvme_fc_finish_ls_req(ctrl, lsop); + __nvme_fc_finish_ls_req(lsop); - if (ret) { - dev_err(ctrl->dev, - "ls request command failed (%d).\n", ret); - return ret; + ret = lsop->ls_error; } + if (ret) + return ret; + /* ACC or RJT payload ? */ if (rjt->w0.ls_cmd == FCNVME_LS_RJT) return -ENXIO; @@ -737,19 +799,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) return 0; } -static void -nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl, +static int +nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { - int ret; - - ret = __nvme_fc_send_ls_req(ctrl, lsop, done); - /* don't wait for completion */ - if (ret) - done(&lsop->ls_req, ret); + return __nvme_fc_send_ls_req(rport, lsop, done); } /* Validation Error indexes into the string table below */ @@ -839,7 +896,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rsplen = sizeof(*assoc_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -848,11 +905,12 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, /* validate the ACC response */ if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (assoc_acc->hdr.desc_list_len != + else if (assoc_acc->hdr.desc_list_len != fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc))) fcret = VERR_CR_ASSOC_ACC_LEN; - if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (assoc_acc->hdr.rqst.desc_tag != + cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (assoc_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -946,7 +1004,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rsplen = sizeof(*conn_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -955,10 +1013,10 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, /* validate the ACC response */ if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (conn_acc->hdr.desc_list_len != + else if (conn_acc->hdr.desc_list_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) fcret = VERR_CR_CONN_ACC_LEN; - if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (conn_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -997,14 +1055,8 @@ static void nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) { struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); - struct nvme_fc_ctrl *ctrl = lsop->ctrl; - __nvme_fc_finish_ls_req(ctrl, lsop); - - if (status) - dev_err(ctrl->dev, - "disconnect assoc ls request command failed (%d).\n", - status); + __nvme_fc_finish_ls_req(lsop); /* fc-nvme iniator doesn't care about success or failure of cmd */ @@ -1035,6 +1087,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) struct fcnvme_ls_disconnect_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; + int ret; lsop = kzalloc((sizeof(*lsop) + ctrl->lport->ops->lsrqst_priv_sz + @@ -1077,7 +1130,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq->rsplen = sizeof(*discon_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); /* only meaningful part to terminating the association */ ctrl->association_id = 0; @@ -1086,6 +1142,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) /* *********************** NVME Ctrl Routines **************************** */ +static void __nvme_fc_final_op_cleanup(struct request *rq); static int nvme_fc_reinit_request(void *data, struct request *rq) @@ -1123,21 +1180,84 @@ nvme_fc_exit_request(void *data, struct request *rq, return __nvme_fc_exit_request(data, op); } +static int +__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +{ + int state; + + state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); + if (state != FCPOP_STATE_ACTIVE) { + atomic_set(&op->state, state); + return -ECANCELED; + } + + ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, + &ctrl->rport->remoteport, + op->queue->lldd_handle, + &op->fcp_req); + + return 0; +} + static void -nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl) +nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) { struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; - int i; + unsigned long flags; + int i, ret; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT) + if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) continue; - __nvme_fc_exit_request(ctrl, aen_op); - nvme_fc_ctrl_put(ctrl); + + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + aen_op->flags |= FCOP_FLAGS_TERMIO; + } + spin_unlock_irqrestore(&ctrl->lock, flags); + + ret = __nvme_fc_abort_op(ctrl, aen_op); + if (ret) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ + + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + aen_op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } } -void +static inline int +__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_fcp_op *op) +{ + unsigned long flags; + bool complete_rq = false; + + spin_lock_irqsave(&ctrl->lock, flags); + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + } + if (op->flags & FCOP_FLAGS_RELEASED) + complete_rq = true; + else + op->flags |= FCOP_FLAGS_COMPLETE; + spin_unlock_irqrestore(&ctrl->lock, flags); + + return complete_rq; +} + +static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); @@ -1146,7 +1266,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_queue *queue = op->queue; struct nvme_completion *cqe = &op->rsp_iu.cqe; - u16 status; + struct nvme_command *sqe = &op->cmd_iu.sqe; + __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); + union nvme_result result; + bool complete_rq; /* * WARNING: @@ -1181,9 +1304,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) - status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - else - status = freq->status; + status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + else if (freq->status) + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); /* * For the linux implementation, if we have an unsuccesful @@ -1211,10 +1334,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) */ if (freq->transferred_length != be32_to_cpu(op->cmd_iu.data_len)) { - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result.u64 = 0; + result.u64 = 0; break; case sizeof(struct nvme_fc_ersp_iu): @@ -1226,28 +1349,40 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || - op->rqno != le16_to_cpu(cqe->command_id))) { - status = -EIO; + op->rsp_iu.status_code || + sqe->common.command_id != cqe->command_id)) { + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result = cqe->result; - status = le16_to_cpu(cqe->status) >> 1; + result = cqe->result; + status = cqe->status; break; default: - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } done: - if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) { - nvme_complete_async_event(&queue->ctrl->ctrl, status, - &op->nreq.result); + if (op->flags & FCOP_FLAGS_AEN) { + nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); return; } - blk_mq_complete_request(rq, status); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + if (!complete_rq) { + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + status = cpu_to_le16(NVME_SC_ABORT_REQ); + if (blk_queue_dying(rq->q)) + status |= cpu_to_le16(NVME_SC_DNR); + } + nvme_end_request(rq, status, result); + } else + __nvme_fc_final_op_cleanup(rq); } static int @@ -1328,25 +1463,55 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; + void *private; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + GFP_KERNEL); + if (!private) + return -ENOMEM; + cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], aen_op, (struct request *)NULL, (AEN_CMDID_BASE + i)); - if (ret) + if (ret) { + kfree(private); return ret; + } + + aen_op->flags = FCOP_FLAGS_AEN; + aen_op->fcp_req.first_sgl = NULL; /* no sg list */ + aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; + /* Note: core layer may overwrite the sqe.command_id value */ sqe->common.command_id = AEN_CMDID_BASE + i; } return 0; } +static void +nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_fcp_op *aen_op; + int i; + + aen_op = ctrl->aen_ops; + for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + if (!aen_op->fcp_req.private) + continue; + + __nvme_fc_exit_request(ctrl, aen_op); + + kfree(aen_op->fcp_req.private); + aen_op->fcp_req.private = NULL; + } +} static inline void __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, @@ -1446,15 +1611,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, } static void -nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl) -{ - __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_fc_free_queue(&ctrl->queues[0]); -} - -static void nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) { int i; @@ -1541,19 +1697,27 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->state != FCCTRL_INIT) { - /* remove from rport list */ - spin_lock_irqsave(&ctrl->rport->lock, flags); - list_del(&ctrl->ctrl_list); - spin_unlock_irqrestore(&ctrl->rport->lock, flags); + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); } + /* remove from rport list */ + spin_lock_irqsave(&ctrl->rport->lock, flags); + list_del(&ctrl->ctrl_list); + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + + blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_free_tag_set(&ctrl->admin_tag_set); + + kfree(ctrl->queues); + put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - kfree(ctrl->queues); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); - nvmf_free_options(ctrl->ctrl.opts); + if (ctrl->ctrl.opts) + nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); } @@ -1574,57 +1738,38 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl) +nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); WARN_ON(nctrl != &ctrl->ctrl); - /* - * Tear down the association, which will generate link - * traffic to terminate connections - */ - - if (ctrl->state != FCCTRL_INIT) { - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_delete_hw_io_queues(ctrl); - nvme_fc_free_io_queues(ctrl); - } - - nvme_fc_exit_aen_ops(ctrl); - - nvme_fc_destroy_admin_queue(ctrl); - } - nvme_fc_ctrl_put(ctrl); } - -static int -__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +static void +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { - int state; + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport association error detected: %s\n", + ctrl->cnum, errmsg); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: resetting controller\n", ctrl->cnum); - state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); - if (state != FCPOP_STATE_ACTIVE) { - atomic_set(&op->state, state); - return -ECANCELED; /* fail */ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to RECONNECTING\n", ctrl->cnum); + return; } - ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, - &ctrl->rport->remoteport, - op->queue->lldd_handle, - &op->fcp_req); - - return 0; + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Failed to schedule " + "reset work\n", ctrl->cnum); } -enum blk_eh_timer_return +static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); @@ -1640,11 +1785,13 @@ nvme_fc_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; /* - * TODO: force a controller reset - * when that happens, queues will be torn down and outstanding - * ios will be terminated, and the above abort, on a single io - * will no longer be needed. + * we can't individually ABTS an io without affecting the queue, + * thus killing the queue, adn thus the association. + * So resolve by performing a controller reset, which will stop + * the host/io stack, terminate the association on the link, + * and recreate an association on the link. */ + nvme_fc_error_recovery(ctrl, "io timeout error"); return BLK_EH_HANDLED; } @@ -1738,6 +1885,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u32 csn; int ret; + /* + * before attempting to send the io, check to see if we believe + * the target device is present + */ + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return BLK_MQ_RQ_QUEUE_ERROR; + if (!nvme_fc_ctrl_get(ctrl)) return BLK_MQ_RQ_QUEUE_ERROR; @@ -1761,7 +1915,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, op->fcp_req.io_dir = io_dir; op->fcp_req.transferred_length = 0; op->fcp_req.rcv_rsplen = 0; - op->fcp_req.status = 0; + op->fcp_req.status = NVME_SC_SUCCESS; op->fcp_req.sqid = cpu_to_le16(queue->qnum); /* @@ -1782,14 +1936,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); sqe->rw.dptr.sgl.addr = 0; - /* odd that we set the command_id - should come from nvme-fabrics */ - WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno)); - - if (op->rq) { /* skipped on aens */ + if (!(op->flags & FCOP_FLAGS_AEN)) { ret = nvme_fc_map_data(ctrl, op->rq, op); if (ret < 0) { - dev_err(queue->ctrl->ctrl.device, - "Failed to map data (%d)\n", ret); nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); return (ret == -ENOMEM || ret == -EAGAIN) ? @@ -1802,7 +1951,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, atomic_set(&op->state, FCPOP_STATE_ACTIVE); - if (op->rq) + if (!(op->flags & FCOP_FLAGS_AEN)) blk_mq_start_request(op->rq); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, @@ -1810,9 +1959,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { - dev_err(ctrl->dev, - "Send nvme command failed - lldd returned %d.\n", ret); - if (op->rq) { /* normal request */ nvme_fc_unmap_data(ctrl, op->rq, op); nvme_cleanup_cmd(op->rq); @@ -1882,12 +2028,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) struct nvme_fc_fcp_op *op; req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) { - dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on QNum %#x not found\n", - tag, queue->qnum); + if (!req) return 0; - } op = blk_mq_rq_to_pdu(req); @@ -1904,11 +2046,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; + unsigned long flags; + bool terminating = false; int ret; if (aer_idx > NVME_FC_NR_AEN_COMMANDS) return; + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + terminating = true; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (terminating) + return; + aen_op = &ctrl->aen_ops[aer_idx]; ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, @@ -1919,36 +2071,101 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) } static void -nvme_fc_complete_rq(struct request *rq) +__nvme_fc_final_op_cleanup(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int error = 0, state; - state = atomic_xchg(&op->state, FCPOP_STATE_IDLE); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | + FCOP_FLAGS_COMPLETE); nvme_cleanup_cmd(rq); - nvme_fc_unmap_data(ctrl, rq, op); + nvme_complete_rq(rq); + nvme_fc_ctrl_put(ctrl); - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } +} - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); +static void +nvme_fc_complete_rq(struct request *rq) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = op->ctrl; + unsigned long flags; + bool completed = false; + + /* + * the core layer, on controller resets after calling + * nvme_shutdown_ctrl(), calls complete_rq without our + * calling blk_mq_complete_request(), thus there may still + * be live i/o outstanding with the LLDD. Means transport has + * to track complete calls vs fcpio_done calls to know what + * path to take on completes and dones. + */ + spin_lock_irqsave(&ctrl->lock, flags); + if (op->flags & FCOP_FLAGS_COMPLETE) + completed = true; + else + op->flags |= FCOP_FLAGS_RELEASED; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (completed) + __nvme_fc_final_op_cleanup(rq); +} + +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static void +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + unsigned long flags; + int status; + + if (!blk_mq_request_started(req)) + return; + + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + op->flags |= FCOP_FLAGS_TERMIO; } + spin_unlock_irqrestore(&ctrl->lock, flags); - nvme_fc_ctrl_put(ctrl); + status = __nvme_fc_abort_op(ctrl, op); + if (status) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ - blk_mq_end_request(rq, error); + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } -static struct blk_mq_ops nvme_fc_mq_ops = { + +static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, @@ -1959,145 +2176,275 @@ static struct blk_mq_ops nvme_fc_mq_ops = { .timeout = nvme_fc_timeout, }; -static struct blk_mq_ops nvme_fc_admin_mq_ops = { - .queue_rq = nvme_fc_queue_rq, - .complete = nvme_fc_complete_rq, - .init_request = nvme_fc_init_admin_request, - .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, - .init_hctx = nvme_fc_init_admin_hctx, - .timeout = nvme_fc_timeout, -}; - static int -nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl) +nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) { - u32 segs; - int error; + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; - nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; + } - error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_FC_AQ_BLKMQ_DEPTH, - (NVME_FC_AQ_BLKMQ_DEPTH / 4)); - if (error) - return error; + ctrl->queue_count = opts->nr_io_queues + 1; + if (!opts->nr_io_queues) + return 0; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); + ctrl->tag_set.ops = &nvme_fc_mq_ops; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; + ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + (SG_CHUNK_SIZE * sizeof(struct scatterlist)) + ctrl->lport->ops->fcprqst_priv_sz; - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_free_queue; + ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + if (ret) + return ret; - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; + ctrl->ctrl.tagset = &ctrl->tag_set; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_cleanup_blk_queue; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_cleanup_blk_queue: + nvme_stop_keep_alive(&ctrl->ctrl); + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_fc_free_io_queues(ctrl); + + /* force put free routine to ignore io queues */ + ctrl->ctrl.tagset = NULL; + + return ret; +} + +static int +nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; + + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; } - error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, + /* check for io queues existing */ + if (ctrl->queue_count == 1) + return 0; + + dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + ret = blk_mq_reinit_tagset(&ctrl->tag_set); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_free_io_queues: + nvme_fc_free_io_queues(ctrl); + return ret; +} + +/* + * This routine restarts the controller on the host side, and + * on the link side, recreates the controller association. + */ +static int +nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + u32 segs; + int ret; + bool changed; + + ctrl->connect_attempts++; + + /* + * Create the admin queue + */ + + nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + + ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, NVME_FC_AQ_BLKMQ_DEPTH); - if (error) - goto out_cleanup_queue; + if (ret) + goto out_free_queue; - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) + ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], + NVME_FC_AQ_BLKMQ_DEPTH, + (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + if (ret) goto out_delete_hw_queue; - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); - if (error) { + if (ctrl->ctrl.state != NVME_CTRL_NEW) + blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* + * Check controller capabilities + * + * todo:- add code to check if ctrl attributes changed from + * prior connection values + */ + + ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + if (ret) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_delete_hw_queue; + goto out_disconnect_admin_queue; } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); - if (error) - goto out_delete_hw_queue; + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + if (ret) + goto out_disconnect_admin_queue; segs = min_t(u32, NVME_FC_MAX_SEGMENTS, ctrl->lport->ops->max_sgl_segments); ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_delete_hw_queue; + ret = nvme_init_identify(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* sanity checks */ + + /* FC-NVME does not have other data in the capsule */ + if (ctrl->ctrl.icdoff) { + dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", + ctrl->ctrl.icdoff); + goto out_disconnect_admin_queue; + } nvme_start_keep_alive(&ctrl->ctrl); - return 0; + /* FC-NVME supports normal SGL Data Block Descriptors */ + if (opts->queue_size > ctrl->ctrl.maxcmd) { + /* warn if maxcmd is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl maxcmd %u, reducing " + "to queue_size\n", + opts->queue_size, ctrl->ctrl.maxcmd); + opts->queue_size = ctrl->ctrl.maxcmd; + } + + ret = nvme_fc_init_aen_ops(ctrl); + if (ret) + goto out_term_aen_ops; + + /* + * Create the io queues + */ + + if (ctrl->queue_count > 1) { + if (ctrl->ctrl.state == NVME_CTRL_NEW) + ret = nvme_fc_create_io_queues(ctrl); + else + ret = nvme_fc_reinit_io_queues(ctrl); + if (ret) + goto out_term_aen_ops; + } + + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + WARN_ON_ONCE(!changed); + + ctrl->connect_attempts = 0; + + kref_get(&ctrl->ctrl.kref); + + if (ctrl->queue_count > 1) { + nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); + } + + return 0; /* Success */ + +out_term_aen_ops: + nvme_fc_term_aen_ops(ctrl); + nvme_stop_keep_alive(&ctrl->ctrl); +out_disconnect_admin_queue: + /* send a Disconnect(association) LS to fc-nvme target */ + nvme_fc_xmt_disconnect_assoc(ctrl); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); - return error; + + return ret; } /* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. + * This routine stops operation of the controller on the host side. + * On the host os stack side: Admin and IO queues are stopped, + * outstanding ios on them terminated via FC ABTS. + * On the link side: the association is terminated. */ static void -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); -int status; - - if (!blk_mq_request_started(req)) - return; + unsigned long flags; - /* this performs an ABTS-LS on the FC exchange for the io */ - status = __nvme_fc_abort_op(ctrl, op); - /* - * if __nvme_fc_abort_op failed: io wasn't active to abort - * consider it done. Assume completion path already completing - * in parallel - */ - if (status) - /* io wasn't active to abort consider it done */ - /* assume completion path already completing in parallel */ - return; -} + nvme_stop_keep_alive(&ctrl->ctrl); + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->flags |= FCCTRL_TERMIO; + ctrl->iocnt = 0; + spin_unlock_irqrestore(&ctrl->lock, flags); -/* - * This routine stops operation of the controller. Admin and IO queues - * are stopped, outstanding ios on them terminated, and the nvme ctrl - * is shutdown. - */ -static void -nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) -{ /* * If io queues are present, stop them and terminate all outstanding * ios on them. As FC allocates FC exchange for each io, the @@ -2116,35 +2463,79 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) nvme_fc_terminate_exchange, &ctrl->ctrl); } - if (ctrl->ctrl.state == NVME_CTRL_LIVE) - nvme_shutdown_ctrl(&ctrl->ctrl); + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ /* - * now clean up the admin queue. Same thing as above. + * clean up the admin queue. Same thing as above. * use blk_mq_tagset_busy_itr() and the transport routine to * terminate the exchanges. */ blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); + + /* kill the aens as they are a separate path */ + nvme_fc_abort_aen_ops(ctrl); + + /* wait for all io that had to be aborted */ + spin_lock_irqsave(&ctrl->lock, flags); + while (ctrl->iocnt) { + spin_unlock_irqrestore(&ctrl->lock, flags); + msleep(1000); + spin_lock_irqsave(&ctrl->lock, flags); + } + ctrl->flags &= ~FCCTRL_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + nvme_fc_term_aen_ops(ctrl); + + /* + * send a Disconnect(association) LS to fc-nvme target + * Note: could have been sent at top of process, but + * cleaner on link traffic if after the aborts complete. + * Note: if association doesn't exist, association_id will be 0 + */ + if (ctrl->association_id) + nvme_fc_xmt_disconnect_assoc(ctrl); + + if (ctrl->ctrl.tagset) { + nvme_fc_delete_hw_io_queues(ctrl); + nvme_fc_free_io_queues(ctrl); + } + + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); + nvme_fc_free_queue(&ctrl->queues[0]); } -/* - * Called to teardown an association. - * May be called with association fully in place or partially in place. - */ static void -__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) +nvme_fc_delete_ctrl_work(struct work_struct *work) { - nvme_stop_keep_alive(&ctrl->ctrl); + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, delete_work); - /* stop and terminate ios on admin and io queues */ - nvme_fc_shutdown_ctrl(ctrl); + cancel_work_sync(&ctrl->reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + + /* + * kill the association on the link side. this will block + * waiting for io to terminate + */ + nvme_fc_delete_association(ctrl); /* * tear down the controller * This will result in the last reference on the nvme ctrl to - * expire, calling the transport nvme_fc_free_nvme_ctrl() callback. + * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. * From there, the transport will tear down it's logical queues and * association. */ @@ -2153,15 +2544,6 @@ __nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) nvme_put_ctrl(&ctrl->ctrl); } -static void -nvme_fc_del_ctrl_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); - - __nvme_fc_remove_ctrl(ctrl); -} - static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) { @@ -2181,25 +2563,85 @@ static int nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_rport *rport = ctrl->rport; - unsigned long flags; int ret; - spin_lock_irqsave(&rport->lock, flags); + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; + ret = __nvme_fc_del_ctrl(ctrl); - spin_unlock_irqrestore(&rport->lock, flags); - if (ret) - return ret; - flush_work(&ctrl->delete_work); + if (!ret) + flush_workqueue(nvme_fc_wq); - return 0; + nvme_put_ctrl(&ctrl->ctrl); + + return ret; +} + +static void +nvme_fc_reset_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, reset_work); + int ret; + + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } + + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); } +/* + * called by the nvme core layer, for sysfs interface that requests + * a reset of the nvme controller + */ static int nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) { - return -EIO; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) + return -EBUSY; + + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + return -EBUSY; + + flush_work(&ctrl->reset_work); + + return 0; } static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { @@ -2210,95 +2652,75 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, .reset_ctrl = nvme_fc_reset_nvme_ctrl, - .free_ctrl = nvme_fc_free_nvme_ctrl, + .free_ctrl = nvme_fc_nvme_ctrl_freed, .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_del_nvme_ctrl, .get_subsysnqn = nvmf_get_subsysnqn, .get_address = nvmf_get_address, }; -static int -nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) +static void +nvme_fc_connect_ctrl_work(struct work_struct *work) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret) { - dev_info(ctrl->ctrl.device, - "set_queue_count failed: %d\n", ret); - return ret; - } - - ctrl->queue_count = opts->nr_io_queues + 1; - if (!opts->nr_io_queues) - return 0; - - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - nvme_fc_init_io_queues(ctrl); - - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); - if (ret) - return ret; - - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); - if (ret) - goto out_cleanup_blk_queue; + struct nvme_fc_ctrl *ctrl = + container_of(to_delayed_work(work), + struct nvme_fc_ctrl, connect_work); - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); - if (ret) - goto out_delete_hw_queues; + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } - return 0; + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } -out_delete_hw_queues: - nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - nvme_stop_keep_alive(&ctrl->ctrl); - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_free_io_queues(ctrl); + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reconnect complete\n", + ctrl->cnum); +} - /* force put free routine to ignore io queues */ - ctrl->ctrl.tagset = NULL; - return ret; -} +static const struct blk_mq_ops nvme_fc_admin_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_admin_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_admin_hctx, + .timeout = nvme_fc_timeout, +}; static struct nvme_ctrl * -__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, +nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) { struct nvme_fc_ctrl *ctrl; unsigned long flags; int ret, idx; - bool changed; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { @@ -2314,21 +2736,18 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->ctrl_list); - INIT_LIST_HEAD(&ctrl->ls_req_list); ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; - ctrl->state = FCCTRL_INIT; ctrl->cnum = idx; - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); - if (ret) - goto out_free_ida; - get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work); + INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); + INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); + INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + ctrl->reconnect_delay = opts->reconnect_delay; spin_lock_init(&ctrl->lock); /* io queue count */ @@ -2345,87 +2764,87 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), GFP_KERNEL); if (!ctrl->queues) - goto out_uninit_ctrl; - - ret = nvme_fc_configure_admin_queue(ctrl); - if (ret) - goto out_uninit_ctrl; - - /* sanity checks */ - - /* FC-NVME does not have other data in the capsule */ - if (ctrl->ctrl.icdoff) { - dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", - ctrl->ctrl.icdoff); - goto out_remove_admin_queue; - } - - /* FC-NVME supports normal SGL Data Block Descriptors */ + goto out_free_ida; - if (opts->queue_size > ctrl->ctrl.maxcmd) { - /* warn if maxcmd is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", - opts->queue_size, ctrl->ctrl.maxcmd); - opts->queue_size = ctrl->ctrl.maxcmd; - } + memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); + ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; + ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ + ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.nr_hw_queues = 1; + ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - ret = nvme_fc_init_aen_ops(ctrl); + ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) - goto out_exit_aen_ops; + goto out_free_queues; - if (ctrl->queue_count > 1) { - ret = nvme_fc_create_io_queues(ctrl); - if (ret) - goto out_exit_aen_ops; + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + ret = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_admin_tag_set; } - spin_lock_irqsave(&ctrl->lock, flags); - ctrl->state = FCCTRL_ACTIVE; - spin_unlock_irqrestore(&ctrl->lock, flags); - - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); + /* + * Would have been nice to init io queues tag set as well. + * However, we require interaction from the controller + * for max io queue count before we can do so. + * Defer this to the connect path. + */ - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); + if (ret) + goto out_cleanup_admin_q; - kref_get(&ctrl->ctrl.kref); + /* at this point, teardown path changes to ref counting on nvme ctrl */ spin_lock_irqsave(&rport->lock, flags); list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); + ret = nvme_fc_create_association(ctrl); + if (ret) { + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); + + /* as we're past the point where we transition to the ref + * counting teardown path, if we return a bad pointer here, + * the calling routine, thinking it's prior to the + * transition, will do an rport put. Since the teardown + * path also does a rport put, we do an extra get here to + * so proper order/teardown happens. + */ + nvme_fc_rport_get(rport); + + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); } - return &ctrl->ctrl; + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); -out_exit_aen_ops: - nvme_fc_exit_aen_ops(ctrl); -out_remove_admin_queue: - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - nvme_stop_keep_alive(&ctrl->ctrl); - nvme_fc_destroy_admin_queue(ctrl); -out_uninit_ctrl: - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); - if (ret > 0) - ret = -EIO; - /* exit via here will follow ctlr ref point callbacks to free */ - return ERR_PTR(ret); + return &ctrl->ctrl; +out_cleanup_admin_q: + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_admin_tag_set: + blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_free_queues: + kfree(ctrl->queues); out_free_ida: + put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); out_free_ctrl: kfree(ctrl); out_fail: - nvme_fc_rport_put(rport); /* exit via here doesn't follow ctlr ref points */ return ERR_PTR(ret); } @@ -2497,6 +2916,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + struct nvme_ctrl *ctrl; struct nvmet_fc_traddr laddr = { 0L, 0L }; struct nvmet_fc_traddr raddr = { 0L, 0L }; unsigned long flags; @@ -2528,7 +2948,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_unlock_irqrestore(&nvme_fc_lock, flags); - return __nvme_fc_create_ctrl(dev, opts, lport, rport); + ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); + if (IS_ERR(ctrl)) + nvme_fc_rport_put(rport); + return ctrl; } } spin_unlock_irqrestore(&nvme_fc_lock, flags); @@ -2546,11 +2969,20 @@ static struct nvmf_transport_ops nvme_fc_transport = { static int __init nvme_fc_init_module(void) { + int ret; + nvme_fc_wq = create_workqueue("nvme_fc_wq"); if (!nvme_fc_wq) return -ENOMEM; - return nvmf_register_transport(&nvme_fc_transport); + ret = nvmf_register_transport(&nvme_fc_transport); + if (ret) + goto err; + + return 0; +err: + destroy_workqueue(nvme_fc_wq); + return ret; } static void __exit nvme_fc_exit_module(void) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 21cac8523bd8..e4e4e60b1224 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -241,9 +241,9 @@ static inline void _nvme_nvm_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); + BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512); + BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); } static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) @@ -324,7 +324,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvme_nvm_addr_format)); + sizeof(struct nvm_addr_format)); ret = init_grps(nvm_id, nvme_nvm_id); out: @@ -483,8 +483,8 @@ static void nvme_nvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - rqd->ppa_status = nvme_req(rq)->result.u64; - rqd->error = error; + rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); + rqd->error = nvme_req(rq)->status; nvm_end_io(rqd); kfree(nvme_req(rq)->cmd); @@ -510,12 +510,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) } rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - rq->ioprio = bio_prio(bio); - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; + if (bio) { + blk_init_request_from_bio(rq, bio); + } else { + rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + rq->__data_len = 0; + } nvme_nvm_rqtocmd(rq, rqd, ns, cmd); @@ -526,21 +526,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) return 0; } -static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - struct request_queue *q = dev->q; - struct nvme_ns *ns = q->queuedata; - struct nvme_nvm_command c = {}; - - c.erase.opcode = NVM_OP_ERASE; - c.erase.nsid = cpu_to_le32(ns->ns_id); - c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c.erase.length = cpu_to_le16(rqd->nr_ppas - 1); - c.erase.control = cpu_to_le16(rqd->flags); - - return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); -} - static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) { struct nvme_ns *ns = nvmdev->q->queuedata; @@ -576,7 +561,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .set_bb_tbl = nvme_nvm_set_bb_tbl, .submit_io = nvme_nvm_submit_io, - .erase_block = nvme_nvm_erase_block, .create_dma_pool = nvme_nvm_create_dma_pool, .destroy_dma_pool = nvme_nvm_destroy_dma_pool, @@ -611,7 +595,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, __le64 *metadata = NULL; dma_addr_t metadata_dma; DECLARE_COMPLETION_ONSTACK(wait); - int ret; + int ret = 0; rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, NVME_QID_ANY); @@ -681,9 +665,12 @@ submit: wait_for_completion_io(&wait); - ret = nvme_error_status(rq->errors); + if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else if (nvme_req(rq)->status & 0x7ff) + ret = -EIO; if (result) - *result = rq->errors & 0x7ff; + *result = nvme_req(rq)->status & 0x7ff; if (status) *status = le64_to_cpu(nvme_req(rq)->result.u64); @@ -766,7 +753,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); /* cdw11-12 */ c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le32(vcmd.control); + c.ph_rw.control = cpu_to_le16(vcmd.control); c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); @@ -809,6 +796,8 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) struct request_queue *q = ns->queue; struct nvm_dev *dev; + _nvme_nvm_check_size(); + dev = nvm_alloc_dev(node); if (!dev) return -ENOMEM; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ab2d6ec7eb5c..29c708ca9621 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,16 +21,6 @@ #include <linux/lightnvm.h> #include <linux/sed-opal.h> -enum { - /* - * Driver internal status code for commands that were cancelled due - * to timeouts or controller shutdown. The value is negative so - * that it a) doesn't overlap with the unsigned hardware error codes, - * and b) can easily be tested for. - */ - NVME_SC_CANCELLED = -EINTR, -}; - extern unsigned char nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -43,8 +33,6 @@ extern unsigned char shutdown_timeout; #define NVME_DEFAULT_KATO 5 #define NVME_KATO_GRACE 10 -extern unsigned int nvme_max_retries; - enum { NVME_NS_LBA = 0, NVME_NS_LIGHTNVM = 1, @@ -68,10 +56,10 @@ enum nvme_quirks { NVME_QUIRK_IDENTIFY_CNS = (1 << 1), /* - * The controller deterministically returns O's on reads to discarded - * logical blocks. + * The controller deterministically returns O's on reads to + * logical blocks that deallocate was called on. */ - NVME_QUIRK_DISCARD_ZEROES = (1 << 2), + NVME_QUIRK_DEALLOCATE_ZEROES = (1 << 2), /* * The controller needs a delay before starts checking the device @@ -97,6 +85,13 @@ enum nvme_quirks { struct nvme_request { struct nvme_command *cmd; union nvme_result result; + u8 retries; + u8 flags; + u16 status; +}; + +enum { + NVME_REQ_CANCELLED = (1 << 0), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -254,25 +249,17 @@ static inline void nvme_cleanup_cmd(struct request *req) } } -static inline int nvme_error_status(u16 status) +static inline void nvme_end_request(struct request *req, __le16 status, + union nvme_result result) { - switch (status & 0x7ff) { - case NVME_SC_SUCCESS: - return 0; - case NVME_SC_CAP_EXCEEDED: - return -ENOSPC; - default: - return -EIO; - } -} + struct nvme_request *rq = nvme_req(req); -static inline bool nvme_req_needs_retry(struct request *req, u16 status) -{ - return !(status & NVME_SC_DNR || blk_noretry_request(req)) && - (jiffies - req->start_time) < req->timeout && - req->retries < nvme_max_retries; + rq->status = le16_to_cpu(status) >> 1; + rq->result = result; + blk_mq_complete_request(req); } +void nvme_complete_rq(struct request *req); void nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); @@ -307,7 +294,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid); -void nvme_requeue_req(struct request *req); int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5d309535abbd..c8541c3dcd19 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -104,8 +104,22 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; + u32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + u32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; }; +static inline unsigned int sq_idx(unsigned int qid, u32 stride) +{ + return qid * 2 * stride; +} + +static inline unsigned int cq_idx(unsigned int qid, u32 stride) +{ + return (qid * 2 + 1) * stride; +} + static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) { return container_of(ctrl, struct nvme_dev, ctrl); @@ -134,6 +148,10 @@ struct nvme_queue { u16 qid; u8 cq_phase; u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; }; /* @@ -172,6 +190,112 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); +} + +static inline unsigned int nvme_dbbuf_size(u32 stride) +{ + return ((num_possible_cpus() + 1) * 8 * stride); +} + +static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) + return 0; + + dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_dbs_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_dbs) + return -ENOMEM; + dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_eis_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + return -ENOMEM; + } + + return 0; +} + +static void nvme_dbbuf_dma_free(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + } + if (dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_eis, dev->dbbuf_eis_dma_addr); + dev->dbbuf_eis = NULL; + } +} + +static void nvme_dbbuf_init(struct nvme_dev *dev, + struct nvme_queue *nvmeq, int qid) +{ + if (!dev->dbbuf_dbs || !qid) + return; + + nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; +} + +static void nvme_dbbuf_set(struct nvme_dev *dev) +{ + struct nvme_command c; + + if (!dev->dbbuf_dbs) + return; + + memset(&c, 0, sizeof(c)); + c.dbbuf.opcode = nvme_admin_dbbuf; + c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); + c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); + + if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { + dev_warn(dev->dev, "unable to set dbbuf\n"); + /* Free memory and continue on */ + nvme_dbbuf_dma_free(dev); + } +} + +static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) +{ + return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old); +} + +/* Update dbbuf and return true if an MMIO is required */ +static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + volatile u32 *dbbuf_ei) +{ + if (dbbuf_db) { + u16 old_value; + + /* + * Ensure that the queue is written before updating + * the doorbell in memory + */ + wmb(); + + old_value = *dbbuf_db; + *dbbuf_db = value; + + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + return false; + } + + return true; } /* @@ -298,7 +422,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, if (++tail == nvmeq->q_depth) tail = 0; - writel(tail, nvmeq->q_db); + if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, + nvmeq->dbbuf_sq_ei)) + writel(tail, nvmeq->q_db); nvmeq->sq_tail = tail; } @@ -327,10 +453,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->nents = 0; iod->length = size; - if (!(rq->rq_flags & RQF_DONTPREP)) { - rq->retries = 0; - rq->rq_flags |= RQF_DONTPREP; - } return BLK_MQ_RQ_QUEUE_OK; } @@ -629,34 +751,12 @@ out_free_cmd: return ret; } -static void nvme_complete_rq(struct request *req) +static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dev *dev = iod->nvmeq->dev; - int error = 0; - - nvme_unmap_data(dev, req); - - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - req->retries++; - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } - - if (unlikely(iod->aborted)) { - dev_warn(dev->ctrl.device, - "completing aborted command with status: %04x\n", - req->errors); - } - blk_mq_end_request(req, error); + nvme_unmap_data(iod->nvmeq->dev, req); + nvme_complete_rq(req); } /* We read the CQE phase first to check if the rest of the entry is valid */ @@ -706,15 +806,16 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - nvme_req(req)->result = cqe.result; - blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); + nvme_end_request(req, cqe.status, cqe.result); } if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; if (likely(nvmeq->cq_vector >= 0)) - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, + nvmeq->dbbuf_cq_ei)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -746,10 +847,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_queue *nvmeq = hctx->driver_data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { spin_lock_irq(&nvmeq->q_lock); __nvme_process_cq(nvmeq, &tag); @@ -762,6 +861,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } +static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + return __nvme_poll(nvmeq, tag); +} + static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { struct nvme_dev *dev = to_nvme_dev(ctrl); @@ -813,7 +919,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + int flags = NVME_QUEUE_PHYS_CONTIG; /* * Note: we (ab)use the fact the the prp fields survive if no data @@ -844,9 +950,9 @@ static void abort_endio(struct request *req, int error) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; - u16 status = req->errors; - dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status); + dev_warn(nvmeq->dev->ctrl.device, + "Abort status: 0x%x", nvme_req(req)->status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); blk_mq_free_request(req); } @@ -860,6 +966,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; /* + * Did we miss an interrupt? + */ + if (__nvme_poll(nvmeq, req->tag)) { + dev_warn(dev->ctrl.device, + "I/O %d QID %d timeout, completion polled\n", + req->tag, nvmeq->qid); + return BLK_EH_HANDLED; + } + + /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on @@ -870,7 +986,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -890,7 +1006,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Mark the request as handled, since the inline shutdown * forces all outstanding requests to complete. */ - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -1098,6 +1214,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; spin_unlock_irq(&nvmeq->q_lock); } @@ -1130,18 +1247,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; } -static struct blk_mq_ops nvme_mq_admin_ops = { +static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; -static struct blk_mq_ops nvme_mq_ops = { +static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .map_queues = nvme_pci_map_queues, @@ -1570,6 +1687,8 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + + nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -1756,6 +1875,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); + nvme_dbbuf_dma_free(dev); put_device(dev->dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); @@ -1823,6 +1943,13 @@ static void nvme_reset_work(struct work_struct *work) dev->ctrl.opal_dev = NULL; } + if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { + result = nvme_dbbuf_dma_alloc(dev); + if (result) + dev_warn(dev->dev, + "unable to allocate dma for dbbuf\n"); + } + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -2159,13 +2286,13 @@ static const struct pci_error_handlers nvme_err_handler = { static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0953), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a53), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 16f84eb0b95e..29cf88ac3f61 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,7 +34,7 @@ #include "fabrics.h" -#define NVME_RDMA_CONNECT_TIMEOUT_MS 1000 /* 1 second */ +#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ #define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ @@ -118,7 +118,6 @@ struct nvme_rdma_ctrl { struct nvme_rdma_qe async_event_sqe; - int reconnect_delay; struct delayed_work reconnect_work; struct list_head list; @@ -129,14 +128,8 @@ struct nvme_rdma_ctrl { u64 cap; u32 max_fr_pages; - union { - struct sockaddr addr; - struct sockaddr_in addr_in; - }; - union { - struct sockaddr src_addr; - struct sockaddr_in src_addr_in; - }; + struct sockaddr_storage addr; + struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; }; @@ -569,11 +562,12 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return PTR_ERR(queue->cm_id); } - queue->cm_error = -ETIMEDOUT; if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - src_addr = &ctrl->src_addr; + src_addr = (struct sockaddr *)&ctrl->src_addr; - ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr, + queue->cm_error = -ETIMEDOUT; + ret = rdma_resolve_addr(queue->cm_id, src_addr, + (struct sockaddr *)&ctrl->addr, NVME_RDMA_CONNECT_TIMEOUT_MS); if (ret) { dev_info(ctrl->ctrl.device, @@ -712,6 +706,26 @@ free_ctrl: kfree(ctrl); } +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { + WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_LIVE); + return; + } + + if (nvmf_should_reconnect(&ctrl->ctrl)) { + dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", + ctrl->ctrl.opts->reconnect_delay); + queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, + ctrl->ctrl.opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->ctrl.device, "Removing controller...\n"); + queue_work(nvme_rdma_wq, &ctrl->delete_work); + } +} + static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), @@ -719,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) bool changed; int ret; + ++ctrl->ctrl.opts->nr_reconnects; + if (ctrl->queue_count > 1) { nvme_rdma_free_io_queues(ctrl); @@ -763,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); + ctrl->ctrl.opts->nr_reconnects = 0; if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); @@ -777,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) stop_admin_q: blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); requeue: - /* Make sure we are not resetting/deleting */ - if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) { - dev_info(ctrl->ctrl.device, - "Failed reconnect attempt, requeueing...\n"); - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); - } + dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", + ctrl->ctrl.opts->nr_reconnects); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -810,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); - dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n", - ctrl->reconnect_delay); - - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -1169,8 +1178,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - req->req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); return ret; } @@ -1407,7 +1415,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) nvme_rdma_error_recovery(req->queue->ctrl); /* fail with DNR on cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -1509,27 +1517,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_queue *queue = req->queue; - int error = 0; - - nvme_rdma_unmap_data(queue, rq); - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } - - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); - } - - blk_mq_end_request(rq, error); + nvme_rdma_unmap_data(req->queue, rq); + nvme_complete_rq(rq); } -static struct blk_mq_ops nvme_rdma_mq_ops = { +static const struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_request, @@ -1540,7 +1533,7 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { .timeout = nvme_rdma_timeout, }; -static struct blk_mq_ops nvme_rdma_admin_mq_ops = { +static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_admin_request, @@ -1857,27 +1850,13 @@ out_free_io_queues: return ret; } -static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p) -{ - u8 *addr = (u8 *)&in_addr->sin_addr.s_addr; - size_t buflen = strlen(p); - - /* XXX: handle IPv6 addresses */ - - if (buflen > INET_ADDRSTRLEN) - return -EINVAL; - if (in4_pton(p, buflen, addr, '\0', NULL) == 0) - return -EINVAL; - in_addr->sin_family = AF_INET; - return 0; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_rdma_ctrl *ctrl; int ret; bool changed; + char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1885,40 +1864,33 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr); + if (opts->mask & NVMF_OPT_TRSVCID) + port = opts->trsvcid; + else + port = __stringify(NVME_RDMA_IP_PORT); + + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->traddr, port, &ctrl->addr); if (ret) { - pr_err("malformed IP address passed: %s\n", opts->traddr); + pr_err("malformed address passed: %s:%s\n", opts->traddr, port); goto out_free_ctrl; } if (opts->mask & NVMF_OPT_HOST_TRADDR) { - ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in, - opts->host_traddr); + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->host_traddr, NULL, &ctrl->src_addr); if (ret) { - pr_err("malformed src IP address passed: %s\n", + pr_err("malformed src address passed: %s\n", opts->host_traddr); goto out_free_ctrl; } } - if (opts->mask & NVMF_OPT_TRSVCID) { - u16 port; - - ret = kstrtou16(opts->trsvcid, 0, &port); - if (ret) - goto out_free_ctrl; - - ctrl->addr_in.sin_port = cpu_to_be16(port); - } else { - ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT); - } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 0 /* no quirks, we're perfect! */); if (ret) goto out_free_ctrl; - ctrl->reconnect_delay = opts->reconnect_delay; INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); @@ -1977,7 +1949,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); kref_get(&ctrl->ctrl.kref); @@ -2013,7 +1985,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | - NVMF_OPT_HOST_TRADDR, + NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_rdma_create_ctrl, }; @@ -2055,12 +2027,20 @@ static int __init nvme_rdma_init_module(void) return -ENOMEM; ret = ib_register_client(&nvme_rdma_ib_client); - if (ret) { - destroy_workqueue(nvme_rdma_wq); - return ret; - } + if (ret) + goto err_destroy_wq; + + ret = nvmf_register_transport(&nvme_rdma_transport); + if (ret) + goto err_unreg_client; + + return 0; - return nvmf_register_transport(&nvme_rdma_transport); +err_unreg_client: + ib_unregister_client(&nvme_rdma_ib_client); +err_destroy_wq: + destroy_workqueue(nvme_rdma_wq); + return ret; } static void __exit nvme_rdma_cleanup_module(void) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index f49ae2758bb7..1f7671e631dd 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -1609,7 +1609,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); u16 control; - u32 max_blocks = queue_max_hw_sectors(ns->queue); + u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9); num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); @@ -2138,15 +2138,6 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, return res; } -static int nvme_trans_security_protocol(struct nvme_ns *ns, - struct sg_io_hdr *hdr, - u8 *cmd) -{ - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); -} - static int nvme_trans_synchronize_cache(struct nvme_ns *ns, struct sg_io_hdr *hdr) { @@ -2414,10 +2405,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case REQUEST_SENSE: retcode = nvme_trans_request_sense(ns, hdr, cmd); break; - case SECURITY_PROTOCOL_IN: - case SECURITY_PROTOCOL_OUT: - retcode = nvme_trans_security_protocol(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 76450b0c55f1..ff1f97006322 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -121,7 +121,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) } switch (req->cmd->get_log_page.lid) { - case 0x01: + case NVME_LOG_ERROR: /* * We currently never set the More bit in the status field, * so all error log entries are invalid and can be zeroed out. @@ -129,7 +129,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) * mandatory log page. */ break; - case 0x02: + case NVME_LOG_SMART: /* * XXX: fill out actual smart log * @@ -149,7 +149,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) goto err; } break; - case 0x03: + case NVME_LOG_FW_SLOT: /* * We only support a single firmware slot which always is * active, so we can zero out the whole firmware slot log and @@ -480,31 +480,25 @@ static void nvmet_execute_keep_alive(struct nvmet_req *req) nvmet_req_complete(req, 0); } -int nvmet_parse_admin_cmd(struct nvmet_req *req) +u16 nvmet_parse_admin_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; req->ns = NULL; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got admin cmd %d while CC.EN == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; switch (cmd->common.opcode) { case nvme_admin_get_log_page: req->data_len = nvmet_get_log_page_len(cmd); switch (cmd->get_log_page.lid) { - case 0x01: - case 0x02: - case 0x03: + case NVME_LOG_ERROR: + case NVME_LOG_SMART: + case NVME_LOG_FW_SLOT: req->execute = nvmet_execute_get_log_page; return 0; } @@ -545,6 +539,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) return 0; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 798653b329b2..cf90713043da 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -273,8 +273,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); if (IS_ERR(ns->bdev)) { - pr_err("nvmet: failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); ret = PTR_ERR(ns->bdev); ns->bdev = NULL; goto out_unlock; @@ -661,6 +661,23 @@ out: return status; } +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) +{ + if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { + pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + + if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { + pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + req->ns = NULL; + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + return 0; +} + static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index af8aabf05335..1aaf597e81fc 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -159,15 +159,15 @@ out: nvmet_req_complete(req, status); } -int nvmet_parse_discovery_cmd(struct nvmet_req *req) +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; req->ns = NULL; if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got cmd %d while not ready\n", - cmd->common.opcode); + pr_err("got cmd %d while not ready\n", + cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } @@ -180,8 +180,8 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) req->execute = nvmet_execute_get_disc_log_page; return 0; default: - pr_err("nvmet: unsupported get_log_page lid %d\n", - cmd->get_log_page.lid); + pr_err("unsupported get_log_page lid %d\n", + cmd->get_log_page.lid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } case nvme_admin_identify: @@ -192,17 +192,16 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) nvmet_execute_identify_disc_ctrl; return 0; default: - pr_err("nvmet: unsupported identify cns %d\n", - cmd->identify.cns); + pr_err("unsupported identify cns %d\n", + cmd->identify.cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("nvmet: unsupported cmd %d\n", - cmd->common.opcode); + pr_err("unsupported cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 8bd022af3df6..3cc17269504b 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -73,7 +73,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) nvmet_req_complete(req, status); } -int nvmet_parse_fabrics_cmd(struct nvmet_req *req) +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) } status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, - le32_to_cpu(c->kato), &ctrl); + le32_to_cpu(c->kato), &ctrl); if (status) goto out; @@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); } @@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); + le16_to_cpu(d->cntlid), + req, &ctrl); if (status) goto out; @@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); return; @@ -214,7 +232,7 @@ out_ctrl_put: goto out; } -int nvmet_parse_connect_cmd(struct nvmet_req *req) +u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 8f483ee7868c..62eba29c85fb 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -82,10 +82,13 @@ struct nvmet_fc_fcp_iod { enum nvmet_fcp_datadir io_dir; bool active; bool abort; + bool aborted; + bool writedataactive; spinlock_t flock; struct nvmet_req req; struct work_struct work; + struct work_struct done_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -116,7 +119,7 @@ struct nvmet_fc_tgt_queue { u16 qid; u16 sqsize; u16 ersp_ratio; - u16 sqhd; + __le16 sqhd; int cpu; atomic_t connected; atomic_t sqtail; @@ -213,6 +216,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -414,9 +418,13 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); + INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->fcpreq = NULL; list_add_tail(&fod->fcp_list, &queue->fod_list); spin_lock_init(&fod->flock); @@ -463,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) if (fod) { list_del(&fod->fcp_list); fod->active = true; - fod->abort = false; /* * no queue reference is taken, as it was taken by the * queue lookup just prior to the allocation. The iod @@ -479,17 +486,30 @@ static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + + fcpreq->nvmet_fc_private = NULL; + spin_lock_irqsave(&queue->qlock, flags); list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->writedataactive = false; + fod->fcpreq = NULL; spin_unlock_irqrestore(&queue->qlock, flags); /* * release the reference taken at queue lookup and fod allocation */ nvmet_fc_tgt_q_put(queue); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -616,32 +636,12 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue) static void -nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, - struct nvmefc_tgt_fcp_req *fcpreq) -{ - int ret; - - fcpreq->op = NVMET_FCOP_ABORT; - fcpreq->offset = 0; - fcpreq->timeout = 0; - fcpreq->transfer_length = 0; - fcpreq->transferred_length = 0; - fcpreq->fcp_error = 0; - fcpreq->sg_cnt = 0; - - ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq); - if (ret) - /* should never reach here !! */ - WARN_ON(1); -} - - -static void nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { + struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; unsigned long flags; - int i; + int i, writedataactive; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -652,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; + writedataactive = fod->writedataactive; spin_unlock(&fod->flock); + /* + * only call lldd abort routine if waiting for + * writedata. other outstanding ops should finish + * on their own. + */ + if (writedataactive) { + spin_lock(&fod->flock); + fod->aborted = true; + spin_unlock(&fod->flock); + tgtport->ops->fcp_abort( + &tgtport->fc_target_port, fod->fcpreq); + } } } spin_unlock_irqrestore(&queue->qlock, flags); @@ -846,7 +859,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, int ret, idx; if (!template->xmt_ls_rsp || !template->fcp_op || - !template->targetport_delete || + !template->fcp_abort || + !template->fcp_req_release || !template->targetport_delete || !template->max_hw_queues || !template->max_sgl_segments || !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; @@ -1044,7 +1058,7 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd) +nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) { struct fcnvme_ls_acc_hdr *acc = buf; @@ -1189,8 +1203,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1281,8 +1295,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? - ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1369,8 +1384,12 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - (ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + (ret == VERR_NO_CONN) ? + FCNVME_RJT_RC_INV_CONN : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1479,7 +1498,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, default: iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, - ELS_RJT_INVAL, ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } nvmet_fc_xmt_ls_rsp(tgtport, iod); @@ -1619,6 +1638,8 @@ nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod) for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count) __free_page(sg_page(sg)); kfree(fod->data_sg); + fod->data_sg = NULL; + fod->data_sg_cnt = 0; } @@ -1679,7 +1700,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, xfr_length != fod->total_length || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || - queue_90percent_full(fod->queue, cqe->sq_head)) + queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head))) send_ersp = true; /* re-set the fields */ @@ -1704,6 +1725,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq); static void +nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + /* + * if an ABTS was received or we issued the fcp_abort early + * don't call abort routine again. + */ + /* no need to take lock - lock was taken earlier to get here */ + if (!fod->aborted) + tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq); + + nvmet_fc_free_fcp_iod(fod->queue, fod); +} + +static void nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -1716,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } static void @@ -1725,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct scatterlist *sg, *datasg; + unsigned long flags; u32 tlen, sg_off; int ret; @@ -1789,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, */ fod->abort = true; - if (op == NVMET_FCOP_WRITEDATA) + if (op == NVMET_FCOP_WRITEDATA) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = false; + spin_unlock_irqrestore(&fod->flock, flags); nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); - else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { + } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { fcpreq->fcp_error = ret; fcpreq->transferred_length = 0; nvmet_fc_xmt_fcp_op_done(fod->fcpreq); @@ -1800,32 +1845,54 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, } } +static inline bool +__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + /* if in the middle of an io and we need to tear down */ + if (abort) { + if (fcpreq->op == NVMET_FCOP_WRITEDATA) { + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + return true; + } + + nvmet_fc_abort_op(tgtport, fod); + return true; + } + + return false; +} + +/* + * actual done handler for FCP operations when completed by the lldd + */ static void -nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) { - struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; bool abort; spin_lock_irqsave(&fod->flock, flags); abort = fod->abort; + fod->writedataactive = false; spin_unlock_irqrestore(&fod->flock, flags); - /* if in the middle of an io and we need to tear down */ - if (abort && fcpreq->op != NVMET_FCOP_ABORT) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - return; - } - switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { + spin_lock(&fod->flock); + fod->abort = true; + spin_unlock(&fod->flock); + nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); return; @@ -1833,6 +1900,10 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) fod->offset += fcpreq->transferred_length; if (fod->offset != fod->total_length) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = true; + spin_unlock_irqrestore(&fod->flock, flags); + /* transfer the next chunk */ nvmet_fc_transfer_fcp_data(tgtport, fod, NVMET_FCOP_WRITEDATA); @@ -1847,12 +1918,11 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1861,8 +1931,6 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) if (fcpreq->op == NVMET_FCOP_READDATA_RSP) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); nvmet_fc_free_fcp_iod(fod->queue, fod); return; } @@ -1885,19 +1953,38 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) break; case NVMET_FCOP_RSP: - case NVMET_FCOP_ABORT: - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; nvmet_fc_free_fcp_iod(fod->queue, fod); break; default: - nvmet_fc_free_tgt_pgs(fod); - nvmet_fc_abort_op(tgtport, fod->fcpreq); break; } } +static void +nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, done_work); + + nvmet_fc_fod_op_done(fod); +} + +static void +nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue = fod->queue; + + if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) + /* context switch so completion is not in ISR context */ + queue_work_on(queue->cpu, queue->work_q, &fod->done_work); + else + nvmet_fc_fod_op_done(fod); +} + /* * actual completion handler after execution by the nvmet layer */ @@ -1919,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport, fod->queue->sqhd = cqe->sq_head; if (abort) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1971,7 +2055,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* * Actual processing routine for received FC-NVME LS Requests from the LLD */ -void +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -2018,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, &fod->queue->nvme_cq, &fod->queue->nvme_sq, &nvmet_fc_tgt_fcp_ops); - if (!ret) { /* bad SQE content */ - nvmet_fc_abort_op(tgtport, fod->fcpreq); + if (!ret) { /* bad SQE content or invalid ctrl state */ + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2059,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, return; transport_error: - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } /* @@ -2089,7 +2173,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * If this routine returns error, the lldd should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU - * was receive on. + * was received on. * @fcpreq: pointer to a fcpreq request structure to be used to reference * the exchange corresponding to the FCP Exchange. * @cmdiubuf: pointer to the buffer containing the FCP CMD IU @@ -2112,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) return -EIO; - queue = nvmet_fc_find_target_queue(tgtport, be64_to_cpu(cmdiu->connection_id)); if (!queue) @@ -2142,12 +2225,68 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); - queue_work_on(queue->cpu, queue->work_q, &fod->work); + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); return 0; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); +/** + * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD + * upon the reception of an ABTS for a FCP command + * + * Notify the transport that an ABTS has been received for a FCP command + * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The + * LLDD believes the command is still being worked on + * (template_ops->fcp_req_release() has not been called). + * + * The transport will wait for any outstanding work (an op to the LLDD, + * which the lldd should complete with error due to the ABTS; or the + * completion from the nvmet layer of the nvme command), then will + * stop processing and call the nvmet_fc_rcv_fcp_req() callback to + * return the i/o context to the LLDD. The LLDD may send the BA_ACC + * to the ABTS either after return from this function (assuming any + * outstanding op work has been terminated) or upon the callback being + * called. + * + * @target_port: pointer to the (registered) target port the FCP CMD IU + * was received on. + * @fcpreq: pointer to the fcpreq request structure that corresponds + * to the exchange that received the ABTS. + */ +void +nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + + if (!fod || fod->fcpreq != fcpreq) + /* job appears to have already completed, ignore abort */ + return; + + queue = fod->queue; + + spin_lock_irqsave(&queue->qlock, flags); + if (fod->active) { + /* + * mark as abort. The abort handler, invoked upon completion + * of any work, will detect the aborted status and do the + * callback. + */ + spin_lock(&fod->flock); + fod->abort = true; + fod->aborted = true; + spin_unlock(&fod->flock); + } + spin_unlock_irqrestore(&queue->qlock, flags); +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort); + enum { FCT_TRADDR_ERR = 0, FCT_TRADDR_WWNN = 1 << 0, @@ -2177,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) if (!options) return -ENOMEM; - while ((p = strsep(&o, ",\n")) != NULL) { + while ((p = strsep(&o, ":\n")) != NULL) { if (!*p) continue; @@ -2238,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port) if (!tgtport->port) { tgtport->port = port; port->priv = tgtport; + nvmet_fc_tgtport_get(tgtport); ret = 0; } else ret = -EALREADY; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 4e8e6a22bce1..15551ef79c8c 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -246,11 +246,19 @@ struct fcloop_lsreq { struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; + spinlock_t reqlock; u16 status; + bool active; + bool aborted; struct work_struct work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; +struct fcloop_ini_fcpreq { + struct nvmefc_fcp_req *fcpreq; + struct fcloop_fcpreq *tfcp_req; + struct work_struct iniwork; +}; static inline struct fcloop_lsreq * tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) @@ -341,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } /* - * FCP IO operation done. call back up initiator "done" flows. + * FCP IO operation done by initiator abort. + * call back up initiator "done" flows. + */ +static void +fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +{ + struct fcloop_ini_fcpreq *inireq = + container_of(work, struct fcloop_ini_fcpreq, iniwork); + + inireq->fcpreq->done(inireq->fcpreq); +} + +/* + * FCP IO operation done by target completion. + * call back up initiator "done" flows. */ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) @@ -349,12 +371,18 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, work); struct fcloop_tport *tport = tfcp_req->tport; - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; - if (tport->remoteport) { + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + spin_unlock(&tfcp_req->reqlock); + + if (tport->remoteport && fcpreq) { fcpreq->status = tfcp_req->status; fcpreq->done(fcpreq); } + + kfree(tfcp_req); } @@ -364,20 +392,25 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_fcpreq *tfcp_req = fcpreq->private; struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req; int ret = 0; - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + if (!rport->targetport) + return -ECONNREFUSED; - if (!rport->targetport) { - tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR; - schedule_work(&tfcp_req->work); - return ret; - } + tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL); + if (!tfcp_req) + return -ENOMEM; + inireq->fcpreq = fcpreq; + inireq->tfcp_req = tfcp_req; + INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + spin_lock_init(&tfcp_req->reqlock); + INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); @@ -444,63 +477,129 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; u32 rsplen = 0, xfrlen = 0; - int fcp_err = 0; + int fcp_err = 0, active, aborted; u8 op = tgt_fcpreq->op; + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + active = tfcp_req->active; + aborted = tfcp_req->aborted; + tfcp_req->active = true; + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(active)) + /* illegal - call while i/o active */ + return -EALREADY; + + if (unlikely(aborted)) { + /* target transport has aborted i/o prior */ + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = 0; + tgt_fcpreq->fcp_error = -ECANCELED; + tgt_fcpreq->done(tgt_fcpreq); + return 0; + } + + /* + * if fcpreq is NULL, the I/O has been aborted (from + * initiator side). For the target side, act as if all is well + * but don't actually move data. + */ + switch (op) { case NVMET_FCOP_WRITEDATA: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } break; case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } if (op == NVMET_FCOP_READDATA) break; /* Fall-Thru to RSP handling */ case NVMET_FCOP_RSP: - rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? - fcpreq->rsplen : tgt_fcpreq->rsplen); - memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); - if (rsplen < tgt_fcpreq->rsplen) - fcp_err = -E2BIG; - fcpreq->rcv_rsplen = rsplen; - fcpreq->status = 0; + if (fcpreq) { + rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? + fcpreq->rsplen : tgt_fcpreq->rsplen); + memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); + if (rsplen < tgt_fcpreq->rsplen) + fcp_err = -E2BIG; + fcpreq->rcv_rsplen = rsplen; + fcpreq->status = 0; + } tfcp_req->status = 0; break; - case NVMET_FCOP_ABORT: - tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; - break; - default: fcp_err = -EINVAL; break; } + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = xfrlen; tgt_fcpreq->fcp_error = fcp_err; tgt_fcpreq->done(tgt_fcpreq); - if ((!fcp_err) && (op == NVMET_FCOP_RSP || - op == NVMET_FCOP_READDATA_RSP || - op == NVMET_FCOP_ABORT)) - schedule_work(&tfcp_req->work); - return 0; } static void +fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + int active; + + /* + * mark aborted only in case there were 2 threads in transport + * (one doing io, other doing abort) and only kills ops posted + * after the abort request + */ + spin_lock(&tfcp_req->reqlock); + active = tfcp_req->active; + tfcp_req->aborted = true; + spin_unlock(&tfcp_req->reqlock); + + tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; + + /* + * nothing more to do. If io wasn't active, the transport should + * immediately call the req_release. If it was active, the op + * will complete, and the lldd should call req_release. + */ +} + +static void +fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + + schedule_work(&tfcp_req->work); +} + +static void fcloop_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) @@ -513,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { + struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + + if (!tfcp_req) + /* abort has already been called */ + return; + + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + + /* break initiator/target relationship for io */ + spin_lock(&tfcp_req->reqlock); + inireq->tfcp_req = NULL; + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); + + /* post the aborted io completion */ + fcpreq->status = -ECANCELED; + schedule_work(&inireq->iniwork); } static void @@ -546,7 +666,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_SGL_SEGS 256 #define FCLOOP_DMABOUND_4G 0xFFFFFFFF -struct nvme_fc_port_template fctemplate = { +static struct nvme_fc_port_template fctemplate = { .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, @@ -563,20 +683,23 @@ struct nvme_fc_port_template fctemplate = { .local_priv_sz = sizeof(struct fcloop_lport), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), - .fcprqst_priv_sz = sizeof(struct fcloop_fcpreq), + .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; -struct nvmet_fc_target_template tgttemplate = { +static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, + .fcp_abort = fcloop_tgt_fcp_abort, + .fcp_req_release = fcloop_fcp_req_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED, + .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 6b0baa9caab9..c77940d80fc8 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -184,7 +184,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) (req->ns->blksize_shift - 9)) + 1; if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, true)) + GFP_KERNEL, &bio, 0)) status = NVME_SC_INTERNAL | NVME_SC_DNR; if (bio) { @@ -196,26 +196,19 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) } } -int nvmet_parse_io_cmd(struct nvmet_req *req) +u16 nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got io cmd %d while CC.EN == 0\n", - cmd->common.opcode); + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) { req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + return ret; } req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (!req->ns) + if (unlikely(!req->ns)) return NVME_SC_INVALID_NS | NVME_SC_DNR; switch (cmd->common.opcode) { @@ -237,7 +230,8 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) req->execute = nvmet_execute_write_zeroes; return 0; default: - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index c7b0b6a52708..304f1c87c160 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -13,12 +13,10 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/scatterlist.h> -#include <linux/delay.h> #include <linux/blk-mq.h> #include <linux/nvme.h> #include <linux/module.h> #include <linux/parser.h> -#include <linux/t10-pi.h> #include "nvmet.h" #include "../host/nvme.h" #include "../host/fabrics.h" @@ -93,31 +91,26 @@ static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) static void nvme_loop_complete_rq(struct request *req) { struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); - int error = 0; nvme_cleanup_cmd(req); sg_free_table_chained(&iod->sg_table, true); + nvme_complete_rq(req); +} - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } +static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue) +{ + u32 queue_idx = nvme_loop_queue_idx(queue); - blk_mq_end_request(req, error); + if (queue_idx == 0) + return queue->ctrl->admin_tag_set.tags[queue_idx]; + return queue->ctrl->tag_set.tags[queue_idx - 1]; } static void nvme_loop_queue_response(struct nvmet_req *req) { - struct nvme_loop_iod *iod = - container_of(req, struct nvme_loop_iod, req); - struct nvme_completion *cqe = &iod->rsp; + struct nvme_loop_queue *queue = + container_of(req->sq, struct nvme_loop_queue, nvme_sq); + struct nvme_completion *cqe = req->rsp; /* * AEN requests are special as they don't time out and can @@ -125,15 +118,22 @@ static void nvme_loop_queue_response(struct nvmet_req *req) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 && + if (unlikely(nvme_loop_queue_idx(queue) == 0 && cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status, + nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); } else { - struct request *rq = blk_mq_rq_from_pdu(iod); + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "tag 0x%x on queue %d not found\n", + cqe->command_id, nvme_loop_queue_idx(queue)); + return; + } - iod->nvme_req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); } } @@ -154,7 +154,7 @@ nvme_loop_timeout(struct request *rq, bool reserved) schedule_work(&iod->queue->ctrl->reset_work); /* fail with DNR on admin cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -268,7 +268,7 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static struct blk_mq_ops nvme_loop_mq_ops = { +static const struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, @@ -276,7 +276,7 @@ static struct blk_mq_ops nvme_loop_mq_ops = { .timeout = nvme_loop_timeout, }; -static struct blk_mq_ops nvme_loop_admin_mq_ops = { +static const struct blk_mq_ops nvme_loop_admin_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_admin_request, @@ -349,6 +349,19 @@ out_destroy_queues: return ret; } +static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i, ret; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + if (ret) + return ret; + } + + return 0; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -490,7 +503,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, reset_work); bool changed; - int i, ret; + int ret; nvme_loop_shutdown_ctrl(ctrl); @@ -502,11 +515,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_destroy_admin; - for (i = 1; i < ctrl->queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_destroy_io; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_destroy_io; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); @@ -559,7 +570,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - int ret, i; + int ret; ret = nvme_loop_init_io_queues(ctrl); if (ret) @@ -588,11 +599,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i < ctrl->queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_cleanup_connect_q; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; return 0; @@ -736,7 +745,12 @@ static int __init nvme_loop_init_module(void) ret = nvmet_register_transport(&nvme_loop_ops); if (ret) return ret; - return nvmf_register_transport(&nvme_loop_transport); + + ret = nvmf_register_transport(&nvme_loop_transport); + if (ret) + nvmet_unregister_transport(&nvme_loop_ops); + + return ret; } static void __exit nvme_loop_cleanup_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f7ff15f17ca9..7cb77ba5993b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -253,11 +253,11 @@ struct nvmet_async_event { u8 log_page; }; -int nvmet_parse_connect_cmd(struct nvmet_req *req); -int nvmet_parse_io_cmd(struct nvmet_req *req); -int nvmet_parse_admin_cmd(struct nvmet_req *req); -int nvmet_parse_discovery_cmd(struct nvmet_req *req); -int nvmet_parse_fabrics_cmd(struct nvmet_req *req); +u16 nvmet_parse_connect_cmd(struct nvmet_req *req); +u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_parse_admin_cmd(struct nvmet_req *req); +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); @@ -278,6 +278,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, struct nvmet_req *req, struct nvmet_ctrl **ret); void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ecc4fe862561..99c69018a35f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1199,6 +1199,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } queue->port = cm_id->context; + if (queue->host_qid == 0) { + /* Let inflight controller teardown complete */ + flush_scheduled_work(); + } + ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) goto release_queue; @@ -1427,12 +1432,16 @@ restart: static int nvmet_rdma_add_port(struct nvmet_port *port) { struct rdma_cm_id *cm_id; - struct sockaddr_in addr_in; - u16 port_in; + struct sockaddr_storage addr = { }; + __kernel_sa_family_t af; int ret; switch (port->disc_addr.adrfam) { case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; break; default: pr_err("address family %d not supported\n", @@ -1440,13 +1449,13 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return -EINVAL; } - ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in); - if (ret) + ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, + port->disc_addr.trsvcid, &addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + port->disc_addr.traddr, port->disc_addr.trsvcid); return ret; - - addr_in.sin_family = AF_INET; - addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr); - addr_in.sin_port = htons(port_in); + } cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); @@ -1455,20 +1464,32 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return PTR_ERR(cm_id); } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in); + /* + * Allow both IPv4 and IPv6 sockets to bind a single port + * at the same time. + */ + ret = rdma_set_afonly(cm_id, 1); + if (ret) { + pr_err("rdma_set_afonly failed (%d)\n", ret); + goto out_destroy_id; + } + + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); if (ret) { - pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("listening to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpc)\n", - le16_to_cpu(port->disc_addr.portid), &addr_in); + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); port->priv = cm_id; return 0; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 4bc88eb52712..e1bffc9bb194 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -141,6 +141,14 @@ config DELL_WMI_AIO To compile this driver as a module, choose M here: the module will be called dell-wmi-aio. +config DELL_WMI_LED + tristate "External LED on Dell Business Netbooks" + depends on LEDS_CLASS + depends on ACPI_WMI + help + This adds support for the Latitude 2100 and similar + notebooks that have an external LED. + config DELL_SMO8800 tristate "Dell Latitude freefall driver (ACPI SMO88XX)" depends on ACPI diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 299d0f9e40f7..776b3a7a4984 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_DELL_SMBIOS) += dell-smbios.o obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o obj-$(CONFIG_DELL_WMI) += dell-wmi.o obj-$(CONFIG_DELL_WMI_AIO) += dell-wmi-aio.o +obj-$(CONFIG_DELL_WMI_LED) += dell-wmi-led.o obj-$(CONFIG_DELL_SMO8800) += dell-smo8800.o obj-$(CONFIG_DELL_RBTN) += dell-rbtn.o obj-$(CONFIG_ACER_WMI) += acer-wmi.o diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index f57dd282a002..2e237bad4995 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/i8042.h> #include <linux/debugfs.h> +#include <linux/dell-led.h> #include <linux/seq_file.h> #include <acpi/video.h> #include "dell-rbtn.h" @@ -42,6 +43,8 @@ #define KBD_LED_AUTO_50_TOKEN 0x02EB #define KBD_LED_AUTO_75_TOKEN 0x02EC #define KBD_LED_AUTO_100_TOKEN 0x02F6 +#define GLOBAL_MIC_MUTE_ENABLE 0x0364 +#define GLOBAL_MIC_MUTE_DISABLE 0x0365 struct quirk_entry { u8 touchpad_led; @@ -1978,6 +1981,31 @@ static void kbd_led_exit(void) led_classdev_unregister(&kbd_led); } +int dell_micmute_led_set(int state) +{ + struct calling_interface_buffer *buffer; + struct calling_interface_token *token; + + if (state == 0) + token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE); + else if (state == 1) + token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); + else + return -EINVAL; + + if (!token) + return -ENODEV; + + buffer = dell_smbios_get_buffer(); + buffer->input[0] = token->location; + buffer->input[1] = token->value; + dell_smbios_send_request(1, 0); + dell_smbios_release_buffer(); + + return state; +} +EXPORT_SYMBOL_GPL(dell_micmute_led_set); + static int __init dell_init(void) { struct calling_interface_buffer *buffer; diff --git a/drivers/leds/dell-led.c b/drivers/platform/x86/dell-wmi-led.c index b3d6e9c15cf9..a0c7e99530ef 100644 --- a/drivers/leds/dell-led.c +++ b/drivers/platform/x86/dell-wmi-led.c @@ -1,6 +1,4 @@ /* - * dell_led.c - Dell LED Driver - * * Copyright (C) 2010 Dell Inc. * Louis Davis <louis_davis@dell.com> * Jim Dailey <jim_dailey@dell.com> @@ -15,16 +13,12 @@ #include <linux/leds.h> #include <linux/slab.h> #include <linux/module.h> -#include <linux/dmi.h> -#include <linux/dell-led.h> -#include "../platform/x86/dell-smbios.h" MODULE_AUTHOR("Louis Davis/Jim Dailey"); MODULE_DESCRIPTION("Dell LED Control Driver"); MODULE_LICENSE("GPL"); #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396" -#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492" MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID); /* Error Result Codes: */ @@ -43,53 +37,6 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID); #define CMD_LED_OFF 17 #define CMD_LED_BLINK 18 -#define GLOBAL_MIC_MUTE_ENABLE 0x364 -#define GLOBAL_MIC_MUTE_DISABLE 0x365 - -static int dell_micmute_led_set(int state) -{ - struct calling_interface_buffer *buffer; - struct calling_interface_token *token; - - if (!wmi_has_guid(DELL_APP_GUID)) - return -ENODEV; - - if (state == 0) - token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE); - else if (state == 1) - token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); - else - return -EINVAL; - - if (!token) - return -ENODEV; - - buffer = dell_smbios_get_buffer(); - buffer->input[0] = token->location; - buffer->input[1] = token->value; - dell_smbios_send_request(1, 0); - dell_smbios_release_buffer(); - - return state; -} - -int dell_app_wmi_led_set(int whichled, int on) -{ - int state = 0; - - switch (whichled) { - case DELL_LED_MICMUTE: - state = dell_micmute_led_set(on); - break; - default: - pr_warn("led type %x is not supported\n", whichled); - break; - } - - return state; -} -EXPORT_SYMBOL_GPL(dell_app_wmi_led_set); - struct bios_args { unsigned char length; unsigned char result_code; @@ -99,37 +46,29 @@ struct bios_args { unsigned char off_time; }; -static int dell_led_perform_fn(u8 length, - u8 result_code, - u8 device_id, - u8 command, - u8 on_time, - u8 off_time) +static int dell_led_perform_fn(u8 length, u8 result_code, u8 device_id, + u8 command, u8 on_time, u8 off_time) { - struct bios_args *bios_return; - u8 return_code; - union acpi_object *obj; struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + struct bios_args *bios_return; struct acpi_buffer input; + union acpi_object *obj; acpi_status status; + u8 return_code; - struct bios_args args; - args.length = length; - args.result_code = result_code; - args.device_id = device_id; - args.command = command; - args.on_time = on_time; - args.off_time = off_time; + struct bios_args args = { + .length = length, + .result_code = result_code, + .device_id = device_id, + .command = command, + .on_time = on_time, + .off_time = off_time + }; input.length = sizeof(struct bios_args); input.pointer = &args; - status = wmi_evaluate_method(DELL_LED_BIOS_GUID, - 1, - 1, - &input, - &output); - + status = wmi_evaluate_method(DELL_LED_BIOS_GUID, 1, 1, &input, &output); if (ACPI_FAILURE(status)) return status; @@ -137,7 +76,7 @@ static int dell_led_perform_fn(u8 length, if (!obj) return -EINVAL; - else if (obj->type != ACPI_TYPE_BUFFER) { + if (obj->type != ACPI_TYPE_BUFFER) { kfree(obj); return -EINVAL; } @@ -170,8 +109,7 @@ static int led_off(void) 0); /* not used */ } -static int led_blink(unsigned char on_eighths, - unsigned char off_eighths) +static int led_blink(unsigned char on_eighths, unsigned char off_eighths) { return dell_led_perform_fn(5, /* Length of command */ INTERFACE_ERROR, /* Init to INTERFACE_ERROR */ @@ -182,7 +120,7 @@ static int led_blink(unsigned char on_eighths, } static void dell_led_set(struct led_classdev *led_cdev, - enum led_brightness value) + enum led_brightness value) { if (value == LED_OFF) led_off(); @@ -191,27 +129,22 @@ static void dell_led_set(struct led_classdev *led_cdev, } static int dell_led_blink(struct led_classdev *led_cdev, - unsigned long *delay_on, - unsigned long *delay_off) + unsigned long *delay_on, unsigned long *delay_off) { unsigned long on_eighths; unsigned long off_eighths; - /* The Dell LED delay is based on 125ms intervals. - Need to round up to next interval. */ + /* + * The Dell LED delay is based on 125ms intervals. + * Need to round up to next interval. + */ - on_eighths = (*delay_on + 124) / 125; - if (0 == on_eighths) - on_eighths = 1; - if (on_eighths > 255) - on_eighths = 255; + on_eighths = DIV_ROUND_UP(*delay_on, 125); + on_eighths = clamp_t(unsigned long, on_eighths, 1, 255); *delay_on = on_eighths * 125; - off_eighths = (*delay_off + 124) / 125; - if (0 == off_eighths) - off_eighths = 1; - if (off_eighths > 255) - off_eighths = 255; + off_eighths = DIV_ROUND_UP(*delay_off, 125); + off_eighths = clamp_t(unsigned long, off_eighths, 1, 255); *delay_off = off_eighths * 125; led_blink(on_eighths, off_eighths); @@ -232,29 +165,21 @@ static int __init dell_led_init(void) { int error = 0; - if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID)) + if (!wmi_has_guid(DELL_LED_BIOS_GUID)) return -ENODEV; - if (wmi_has_guid(DELL_LED_BIOS_GUID)) { - error = led_off(); - if (error != 0) - return -ENODEV; - - error = led_classdev_register(NULL, &dell_led); - } + error = led_off(); + if (error != 0) + return -ENODEV; - return error; + return led_classdev_register(NULL, &dell_led); } static void __exit dell_led_exit(void) { - int error = 0; + led_classdev_unregister(&dell_led); - if (wmi_has_guid(DELL_LED_BIOS_GUID)) { - error = led_off(); - if (error == 0) - led_classdev_unregister(&dell_led); - } + led_off(); } module_init(dell_led_init); diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index b8cacccf18c8..13f1714cf6f7 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -67,6 +67,15 @@ config POWER_RESET_BRCMSTB Say Y here if you have a Broadcom STB board and you wish to have restart support. +config POWER_RESET_GEMINI_POWEROFF + bool "Cortina Gemini power-off driver" + depends on ARCH_GEMINI || COMPILE_TEST + depends on OF && HAS_IOMEM + default ARCH_GEMINI + help + This driver supports turning off the Cortina Gemini SoC. + Select this if you're building a kernel with Gemini SoC support. + config POWER_RESET_GPIO bool "GPIO power-off driver" depends on OF_GPIO diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile index 11dae3b56ff9..58cf5b30559f 100644 --- a/drivers/power/reset/Makefile +++ b/drivers/power/reset/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_POWER_RESET_AT91_SAMA5D2_SHDWC) += at91-sama5d2_shdwc.o obj-$(CONFIG_POWER_RESET_AXXIA) += axxia-reset.o obj-$(CONFIG_POWER_RESET_BRCMKONA) += brcm-kona-reset.o obj-$(CONFIG_POWER_RESET_BRCMSTB) += brcmstb-reboot.o +obj-$(CONFIG_POWER_RESET_GEMINI_POWEROFF) += gemini-poweroff.o obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o obj-$(CONFIG_POWER_RESET_GPIO_RESTART) += gpio-restart.o obj-$(CONFIG_POWER_RESET_HISI) += hisi-reboot.o diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c new file mode 100644 index 000000000000..de878fd26f27 --- /dev/null +++ b/drivers/power/reset/gemini-poweroff.c @@ -0,0 +1,160 @@ +/* + * Gemini power management controller + * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org> + * + * Inspired by code from the SL3516 board support by Jason Lee + * Inspired by code from Janos Laube <janos.dev@gmail.com> + */ +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/reboot.h> + +#define GEMINI_PWC_ID 0x00010500 +#define GEMINI_PWC_IDREG 0x00 +#define GEMINI_PWC_CTRLREG 0x04 +#define GEMINI_PWC_STATREG 0x08 + +#define GEMINI_CTRL_SHUTDOWN BIT(0) +#define GEMINI_CTRL_ENABLE BIT(1) +#define GEMINI_CTRL_IRQ_CLR BIT(2) + +#define GEMINI_STAT_CIR BIT(4) +#define GEMINI_STAT_RTC BIT(5) +#define GEMINI_STAT_POWERBUTTON BIT(6) + +struct gemini_powercon { + struct device *dev; + void __iomem *base; +}; + +static irqreturn_t gemini_powerbutton_interrupt(int irq, void *data) +{ + struct gemini_powercon *gpw = data; + u32 val; + + /* ACK the IRQ */ + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_IRQ_CLR; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + val = readl(gpw->base + GEMINI_PWC_STATREG); + val &= 0x70U; + switch (val) { + case GEMINI_STAT_CIR: + dev_info(gpw->dev, "infrared poweroff\n"); + orderly_poweroff(true); + break; + case GEMINI_STAT_RTC: + dev_info(gpw->dev, "RTC poweroff\n"); + orderly_poweroff(true); + break; + case GEMINI_STAT_POWERBUTTON: + dev_info(gpw->dev, "poweroff button pressed\n"); + orderly_poweroff(true); + break; + default: + dev_info(gpw->dev, "other power management IRQ\n"); + break; + } + + return IRQ_HANDLED; +} + +/* This callback needs this static local as it has void as argument */ +static struct gemini_powercon *gpw_poweroff; + +static void gemini_poweroff(void) +{ + struct gemini_powercon *gpw = gpw_poweroff; + u32 val; + + dev_crit(gpw->dev, "Gemini power off\n"); + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_ENABLE | GEMINI_CTRL_IRQ_CLR; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + val &= ~GEMINI_CTRL_ENABLE; + val |= GEMINI_CTRL_SHUTDOWN; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); +} + +static int gemini_poweroff_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct gemini_powercon *gpw; + u32 val; + int irq; + int ret; + + gpw = devm_kzalloc(dev, sizeof(*gpw), GFP_KERNEL); + if (!gpw) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + gpw->base = devm_ioremap_resource(dev, res); + if (IS_ERR(gpw->base)) + return PTR_ERR(gpw->base); + + irq = platform_get_irq(pdev, 0); + if (!irq) + return -EINVAL; + + gpw->dev = dev; + + val = readl(gpw->base + GEMINI_PWC_IDREG); + val &= 0xFFFFFF00U; + if (val != GEMINI_PWC_ID) { + dev_err(dev, "wrong power controller ID: %08x\n", + val); + return -ENODEV; + } + + /* Clear the power management IRQ */ + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_IRQ_CLR; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + ret = devm_request_irq(dev, irq, gemini_powerbutton_interrupt, 0, + "poweroff", gpw); + if (ret) + return ret; + + pm_power_off = gemini_poweroff; + gpw_poweroff = gpw; + + /* + * Enable the power controller. This is crucial on Gemini + * systems: if this is not done, pressing the power button + * will result in unconditional poweroff without any warning. + * This makes the kernel handle the poweroff. + */ + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_ENABLE; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + dev_info(dev, "Gemini poweroff driver registered\n"); + + return 0; +} + +static const struct of_device_id gemini_poweroff_of_match[] = { + { + .compatible = "cortina,gemini-power-controller", + }, + {} +}; + +static struct platform_driver gemini_poweroff_driver = { + .probe = gemini_poweroff_probe, + .driver = { + .name = "gemini-poweroff", + .of_match_table = gemini_poweroff_of_match, + }, +}; +builtin_platform_driver(gemini_poweroff_driver); diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c index b68338399e5e..f9f1cb54fbf9 100644 --- a/drivers/power/reset/syscon-poweroff.c +++ b/drivers/power/reset/syscon-poweroff.c @@ -28,12 +28,13 @@ static struct regmap *map; static u32 offset; +static u32 value; static u32 mask; static void syscon_poweroff(void) { /* Issue the poweroff */ - regmap_write(map, offset, mask); + regmap_update_bits(map, offset, mask, value); mdelay(1000); @@ -43,6 +44,7 @@ static void syscon_poweroff(void) static int syscon_poweroff_probe(struct platform_device *pdev) { char symname[KSYM_NAME_LEN]; + int mask_err, value_err; map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap"); if (IS_ERR(map)) { @@ -55,11 +57,22 @@ static int syscon_poweroff_probe(struct platform_device *pdev) return -EINVAL; } - if (of_property_read_u32(pdev->dev.of_node, "mask", &mask)) { - dev_err(&pdev->dev, "unable to read 'mask'"); + value_err = of_property_read_u32(pdev->dev.of_node, "value", &value); + mask_err = of_property_read_u32(pdev->dev.of_node, "mask", &mask); + if (value_err && mask_err) { + dev_err(&pdev->dev, "unable to read 'value' and 'mask'"); return -EINVAL; } + if (value_err) { + /* support old binding */ + value = mask; + mask = 0xFFFFFFFF; + } else if (mask_err) { + /* support value without mask*/ + mask = 0xFFFFFFFF; + } + if (pm_power_off) { lookup_symbol_name((ulong)pm_power_off, symname); dev_err(&pdev->dev, diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index da54ac88f068..da922756149f 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -117,6 +117,12 @@ config BATTERY_DS2782 Say Y here to enable support for the DS2782/DS2786 standalone battery gas-gauge. +config BATTERY_LEGO_EV3 + tristate "LEGO MINDSTORMS EV3 battery" + depends on OF && IIO && GPIOLIB + help + Say Y here to enable support for the LEGO MINDSTORMS EV3 battery. + config BATTERY_PMU tristate "Apple PMU battery" depends on PPC32 && ADB_PMU @@ -317,6 +323,14 @@ config BATTERY_RX51 Say Y here to enable support for battery information on Nokia RX-51, also known as N900 tablet. +config CHARGER_CPCAP + tristate "CPCAP PMIC Charger Driver" + depends on MFD_CPCAP && IIO + default MFD_CPCAP + help + Say Y to enable support for CPCAP PMIC charger driver for Motorola + mobile devices such as Droid 4. + config CHARGER_ISP1704 tristate "ISP1704 USB Charger Detection" depends on USB_PHY @@ -438,6 +452,7 @@ config CHARGER_BQ2415X config CHARGER_BQ24190 tristate "TI BQ24190 battery charger driver" depends on I2C + depends on EXTCON depends on GPIOLIB || COMPILE_TEST help Say Y to enable support for the TI BQ24190 battery charger. diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile index 3789a2c06fdf..39fc733e6cc4 100644 --- a/drivers/power/supply/Makefile +++ b/drivers/power/supply/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_BATTERY_DS2781) += ds2781_battery.o obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o obj-$(CONFIG_BATTERY_GAUGE_LTC2941) += ltc2941-battery-gauge.o obj-$(CONFIG_BATTERY_GOLDFISH) += goldfish_battery.o +obj-$(CONFIG_BATTERY_LEGO_EV3) += lego_ev3_battery.o obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o @@ -51,6 +52,7 @@ obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o obj-$(CONFIG_BATTERY_JZ4740) += jz4740-battery.o obj-$(CONFIG_BATTERY_RX51) += rx51_battery.o obj-$(CONFIG_AB8500_BM) += ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o abx500_chargalg.o pm2301_charger.o +obj-$(CONFIG_CHARGER_CPCAP) += cpcap-charger.o obj-$(CONFIG_CHARGER_ISP1704) += isp1704_charger.o obj-$(CONFIG_CHARGER_MAX8903) += max8903_charger.o obj-$(CONFIG_CHARGER_TWL4030) += twl4030_charger.o diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c index d29864533093..8c49586015d0 100644 --- a/drivers/power/supply/ab8500_bmdata.c +++ b/drivers/power/supply/ab8500_bmdata.c @@ -430,10 +430,10 @@ static const struct abx500_maxim_parameters ab8500_maxi_params = { }; static const struct abx500_maxim_parameters abx540_maxi_params = { - .ena_maxi = true, - .chg_curr = 3000, - .wait_cycles = 10, - .charger_curr_step = 200, + .ena_maxi = true, + .chg_curr = 3000, + .wait_cycles = 10, + .charger_curr_step = 200, }; static const struct abx500_bm_charger_parameters chg = { diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index a4f08492abeb..bd9e5c3d8cc2 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -11,16 +11,15 @@ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/delay.h> +#include <linux/extcon.h> #include <linux/of_irq.h> #include <linux/of_device.h> #include <linux/pm_runtime.h> #include <linux/power_supply.h> +#include <linux/workqueue.h> #include <linux/gpio.h> #include <linux/i2c.h> -#include <linux/power/bq24190_charger.h> - - #define BQ24190_MANUFACTURER "Texas Instruments" #define BQ24190_REG_ISC 0x00 /* Input Source Control */ @@ -39,6 +38,9 @@ #define BQ24190_REG_POC_WDT_RESET_SHIFT 6 #define BQ24190_REG_POC_CHG_CONFIG_MASK (BIT(5) | BIT(4)) #define BQ24190_REG_POC_CHG_CONFIG_SHIFT 4 +#define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 +#define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 +#define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1)) #define BQ24190_REG_POC_SYS_MIN_SHIFT 1 #define BQ24190_REG_POC_BOOST_LIM_MASK BIT(0) @@ -151,10 +153,12 @@ struct bq24190_dev_info { struct device *dev; struct power_supply *charger; struct power_supply *battery; + struct extcon_dev *extcon; + struct notifier_block extcon_nb; + struct delayed_work extcon_work; char model_name[I2C_NAME_SIZE]; - kernel_ulong_t model; - unsigned int gpio_int; - unsigned int irq; + bool initialized; + bool irq_event; struct mutex f_reg_lock; u8 f_reg; u8 ss_reg; @@ -168,6 +172,12 @@ struct bq24190_dev_info { * number at that index in the array is the real-world value that it * represents. */ + +/* REG00[2:0] (IINLIM) in uAh */ +static const int bq24190_isc_iinlim_values[] = { + 100000, 150000, 500000, 900000, 1200000, 1500000, 2000000, 3000000 +}; + /* REG02[7:2] (ICHG) in uAh */ static const int bq24190_ccc_ichg_values[] = { 512000, 576000, 640000, 704000, 768000, 832000, 896000, 960000, @@ -418,6 +428,7 @@ static ssize_t bq24190_sysfs_show(struct device *dev, struct power_supply *psy = dev_get_drvdata(dev); struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy); struct bq24190_sysfs_field_info *info; + ssize_t count; int ret; u8 v; @@ -425,11 +436,20 @@ static ssize_t bq24190_sysfs_show(struct device *dev, if (!info) return -EINVAL; + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; + ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v); if (ret) - return ret; + count = ret; + else + count = scnprintf(buf, PAGE_SIZE, "%hhx\n", v); + + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); - return scnprintf(buf, PAGE_SIZE, "%hhx\n", v); + return count; } static ssize_t bq24190_sysfs_store(struct device *dev, @@ -449,9 +469,16 @@ static ssize_t bq24190_sysfs_store(struct device *dev, if (ret < 0) return ret; + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; + ret = bq24190_write_mask(bdi, info->reg, info->mask, info->shift, v); if (ret) - return ret; + count = ret; + + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); return count; } @@ -523,16 +550,13 @@ static int bq24190_register_reset(struct bq24190_dev_info *bdi) if (ret < 0) return ret; - if (!v) - break; + if (v == 0) + return 0; - udelay(10); + usleep_range(100, 200); } while (--limit); - if (!limit) - return -EIO; - - return 0; + return -EIO; } /* Charger power supply property routines */ @@ -793,7 +817,9 @@ static int bq24190_charger_get_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: @@ -833,7 +859,9 @@ static int bq24190_charger_get_property(struct power_supply *psy, ret = -ENODATA; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -846,7 +874,9 @@ static int bq24190_charger_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: @@ -862,7 +892,9 @@ static int bq24190_charger_set_property(struct power_supply *psy, ret = -EINVAL; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -1063,7 +1095,9 @@ static int bq24190_battery_get_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_STATUS: @@ -1091,7 +1125,9 @@ static int bq24190_battery_get_property(struct power_supply *psy, ret = -ENODATA; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -1104,7 +1140,9 @@ static int bq24190_battery_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_ONLINE: @@ -1117,7 +1155,9 @@ static int bq24190_battery_set_property(struct power_supply *psy, ret = -EINVAL; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -1157,9 +1197,8 @@ static const struct power_supply_desc bq24190_battery_desc = { .property_is_writeable = bq24190_battery_property_is_writeable, }; -static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) +static void bq24190_check_status(struct bq24190_dev_info *bdi) { - struct bq24190_dev_info *bdi = data; const u8 battery_mask_ss = BQ24190_REG_SS_CHRG_STAT_MASK; const u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK | BQ24190_REG_F_NTC_FAULT_MASK; @@ -1167,12 +1206,10 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) u8 ss_reg = 0, f_reg = 0; int i, ret; - pm_runtime_get_sync(bdi->dev); - ret = bq24190_read(bdi, BQ24190_REG_SS, &ss_reg); if (ret < 0) { dev_err(bdi->dev, "Can't read SS reg: %d\n", ret); - goto out; + return; } i = 0; @@ -1180,12 +1217,17 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) ret = bq24190_read(bdi, BQ24190_REG_F, &f_reg); if (ret < 0) { dev_err(bdi->dev, "Can't read F reg: %d\n", ret); - goto out; + return; } } while (f_reg && ++i < 2); + /* ignore over/under voltage fault after disconnect */ + if (f_reg == (1 << BQ24190_REG_F_CHRG_FAULT_SHIFT) && + !(ss_reg & BQ24190_REG_SS_PG_STAT_MASK)) + f_reg = 0; + if (f_reg != bdi->f_reg) { - dev_info(bdi->dev, + dev_warn(bdi->dev, "Fault: boost %d, charge %d, battery %d, ntc %d\n", !!(f_reg & BQ24190_REG_F_BOOST_FAULT_MASK), !!(f_reg & BQ24190_REG_F_CHRG_FAULT_MASK), @@ -1229,90 +1271,126 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) if (alert_battery) power_supply_changed(bdi->battery); -out: - pm_runtime_put_sync(bdi->dev); - dev_dbg(bdi->dev, "ss_reg: 0x%02x, f_reg: 0x%02x\n", ss_reg, f_reg); +} + +static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) +{ + struct bq24190_dev_info *bdi = data; + int error; + + bdi->irq_event = true; + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + return IRQ_NONE; + } + bq24190_check_status(bdi); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + bdi->irq_event = false; return IRQ_HANDLED; } -static int bq24190_hw_init(struct bq24190_dev_info *bdi) +static void bq24190_extcon_work(struct work_struct *work) { + struct bq24190_dev_info *bdi = + container_of(work, struct bq24190_dev_info, extcon_work.work); + int error, iinlim = 0; u8 v; - int ret; - - pm_runtime_get_sync(bdi->dev); - /* First check that the device really is what its supposed to be */ - ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS, - BQ24190_REG_VPRS_PN_MASK, - BQ24190_REG_VPRS_PN_SHIFT, - &v); - if (ret < 0) - goto out; + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + return; + } - if (v != bdi->model) { - ret = -ENODEV; - goto out; + if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_SDP) == 1) + iinlim = 500000; + else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_CDP) == 1 || + extcon_get_state(bdi->extcon, EXTCON_CHG_USB_ACA) == 1) + iinlim = 1500000; + else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_DCP) == 1) + iinlim = 2000000; + + if (iinlim) { + error = bq24190_set_field_val(bdi, BQ24190_REG_ISC, + BQ24190_REG_ISC_IINLIM_MASK, + BQ24190_REG_ISC_IINLIM_SHIFT, + bq24190_isc_iinlim_values, + ARRAY_SIZE(bq24190_isc_iinlim_values), + iinlim); + if (error < 0) + dev_err(bdi->dev, "Can't set IINLIM: %d\n", error); } - ret = bq24190_register_reset(bdi); - if (ret < 0) - goto out; + /* if no charger found and in USB host mode, set OTG 5V boost, else normal */ + if (!iinlim && extcon_get_state(bdi->extcon, EXTCON_USB_HOST) == 1) + v = BQ24190_REG_POC_CHG_CONFIG_OTG; + else + v = BQ24190_REG_POC_CHG_CONFIG_CHARGE; - ret = bq24190_set_mode_host(bdi); - if (ret < 0) - goto out; + error = bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24190_REG_POC_CHG_CONFIG_MASK, + BQ24190_REG_POC_CHG_CONFIG_SHIFT, + v); + if (error < 0) + dev_err(bdi->dev, "Can't set CHG_CONFIG: %d\n", error); - ret = bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); -out: - pm_runtime_put_sync(bdi->dev); - return ret; + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); } -#ifdef CONFIG_OF -static int bq24190_setup_dt(struct bq24190_dev_info *bdi) +static int bq24190_extcon_event(struct notifier_block *nb, unsigned long event, + void *param) { - bdi->irq = irq_of_parse_and_map(bdi->dev->of_node, 0); - if (bdi->irq <= 0) - return -1; + struct bq24190_dev_info *bdi = + container_of(nb, struct bq24190_dev_info, extcon_nb); - return 0; -} -#else -static int bq24190_setup_dt(struct bq24190_dev_info *bdi) -{ - return -1; + /* + * The Power-Good detection may take up to 220ms, sometimes + * the external charger detection is quicker, and the bq24190 will + * reset to iinlim based on its own charger detection (which is not + * hooked up when using external charger detection) resulting in + * a too low default 500mA iinlim. Delay applying the extcon value + * for 300ms to avoid this. + */ + queue_delayed_work(system_wq, &bdi->extcon_work, msecs_to_jiffies(300)); + + return NOTIFY_OK; } -#endif -static int bq24190_setup_pdata(struct bq24190_dev_info *bdi, - struct bq24190_platform_data *pdata) +static int bq24190_hw_init(struct bq24190_dev_info *bdi) { + u8 v; int ret; - if (!gpio_is_valid(pdata->gpio_int)) - return -1; - - ret = gpio_request(pdata->gpio_int, dev_name(bdi->dev)); + /* First check that the device really is what its supposed to be */ + ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS, + BQ24190_REG_VPRS_PN_MASK, + BQ24190_REG_VPRS_PN_SHIFT, + &v); if (ret < 0) - return -1; + return ret; - ret = gpio_direction_input(pdata->gpio_int); - if (ret < 0) - goto out; + if (v != BQ24190_REG_VPRS_PN_24190 && + v != BQ24190_REG_VPRS_PN_24192I) { + dev_err(bdi->dev, "Error unknown model: 0x%02x\n", v); + return -ENODEV; + } - bdi->irq = gpio_to_irq(pdata->gpio_int); - if (!bdi->irq) - goto out; + ret = bq24190_register_reset(bdi); + if (ret < 0) + return ret; - bdi->gpio_int = pdata->gpio_int; - return 0; + ret = bq24190_set_mode_host(bdi); + if (ret < 0) + return ret; -out: - gpio_free(pdata->gpio_int); - return -1; + return bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); } static int bq24190_probe(struct i2c_client *client, @@ -1320,9 +1398,9 @@ static int bq24190_probe(struct i2c_client *client, { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); struct device *dev = &client->dev; - struct bq24190_platform_data *pdata = client->dev.platform_data; struct power_supply_config charger_cfg = {}, battery_cfg = {}; struct bq24190_dev_info *bdi; + const char *name; int ret; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { @@ -1338,7 +1416,6 @@ static int bq24190_probe(struct i2c_client *client, bdi->client = client; bdi->dev = dev; - bdi->model = id->driver_data; strncpy(bdi->model_name, id->name, I2C_NAME_SIZE); mutex_init(&bdi->f_reg_lock); bdi->f_reg = 0; @@ -1346,23 +1423,43 @@ static int bq24190_probe(struct i2c_client *client, i2c_set_clientdata(client, bdi); - if (dev->of_node) - ret = bq24190_setup_dt(bdi); - else - ret = bq24190_setup_pdata(bdi, pdata); - - if (ret) { + if (!client->irq) { dev_err(dev, "Can't get irq info\n"); return -EINVAL; } + /* + * Devicetree platforms should get extcon via phandle (not yet supported). + * On ACPI platforms, extcon clients may invoke us with: + * struct property_entry pe[] = + * { PROPERTY_ENTRY_STRING("extcon-name", client_name), ... }; + * struct i2c_board_info bi = + * { .type = "bq24190", .addr = 0x6b, .properties = pe, .irq = irq }; + * struct i2c_adapter ad = { ... }; + * i2c_add_adapter(&ad); + * i2c_new_device(&ad, &bi); + */ + if (device_property_read_string(dev, "extcon-name", &name) == 0) { + bdi->extcon = extcon_get_extcon_dev(name); + if (!bdi->extcon) + return -EPROBE_DEFER; + + dev_info(bdi->dev, "using extcon device %s\n", name); + } + pm_runtime_enable(dev); - pm_runtime_resume(dev); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 600); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "pm_runtime_get failed: %i\n", ret); + goto out_pmrt; + } ret = bq24190_hw_init(bdi); if (ret < 0) { dev_err(dev, "Hardware init failed\n"); - goto out1; + goto out_pmrt; } charger_cfg.drv_data = bdi; @@ -1373,7 +1470,7 @@ static int bq24190_probe(struct i2c_client *client, if (IS_ERR(bdi->charger)) { dev_err(dev, "Can't register charger\n"); ret = PTR_ERR(bdi->charger); - goto out1; + goto out_pmrt; } battery_cfg.drv_data = bdi; @@ -1382,87 +1479,160 @@ static int bq24190_probe(struct i2c_client *client, if (IS_ERR(bdi->battery)) { dev_err(dev, "Can't register battery\n"); ret = PTR_ERR(bdi->battery); - goto out2; + goto out_charger; } ret = bq24190_sysfs_create_group(bdi); if (ret) { dev_err(dev, "Can't create sysfs entries\n"); - goto out3; + goto out_battery; } - ret = devm_request_threaded_irq(dev, bdi->irq, NULL, + bdi->initialized = true; + + ret = devm_request_threaded_irq(dev, client->irq, NULL, bq24190_irq_handler_thread, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "bq24190-charger", bdi); if (ret < 0) { dev_err(dev, "Can't set up irq handler\n"); - goto out4; + goto out_sysfs; + } + + if (bdi->extcon) { + INIT_DELAYED_WORK(&bdi->extcon_work, bq24190_extcon_work); + bdi->extcon_nb.notifier_call = bq24190_extcon_event; + ret = devm_extcon_register_notifier_all(dev, bdi->extcon, + &bdi->extcon_nb); + if (ret) { + dev_err(dev, "Can't register extcon\n"); + goto out_sysfs; + } + + /* Sync initial cable state */ + queue_delayed_work(system_wq, &bdi->extcon_work, 0); } + enable_irq_wake(client->irq); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + return 0; -out4: +out_sysfs: bq24190_sysfs_remove_group(bdi); -out3: +out_battery: power_supply_unregister(bdi->battery); -out2: +out_charger: power_supply_unregister(bdi->charger); -out1: +out_pmrt: + pm_runtime_put_sync(dev); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - if (bdi->gpio_int) - gpio_free(bdi->gpio_int); return ret; } static int bq24190_remove(struct i2c_client *client) { struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + int error; - pm_runtime_get_sync(bdi->dev); - bq24190_register_reset(bdi); - pm_runtime_put_sync(bdi->dev); + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + } + bq24190_register_reset(bdi); bq24190_sysfs_remove_group(bdi); power_supply_unregister(bdi->battery); power_supply_unregister(bdi->charger); + if (error >= 0) + pm_runtime_put_sync(bdi->dev); + pm_runtime_dont_use_autosuspend(bdi->dev); pm_runtime_disable(bdi->dev); - if (bdi->gpio_int) - gpio_free(bdi->gpio_int); + return 0; +} + +static __maybe_unused int bq24190_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + + if (!bdi->initialized) + return 0; + + dev_dbg(bdi->dev, "%s\n", __func__); + + return 0; +} + +static __maybe_unused int bq24190_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + + if (!bdi->initialized) + return 0; + + if (!bdi->irq_event) { + dev_dbg(bdi->dev, "checking events on possible wakeirq\n"); + bq24190_check_status(bdi); + } return 0; } -#ifdef CONFIG_PM_SLEEP -static int bq24190_pm_suspend(struct device *dev) +static __maybe_unused int bq24190_pm_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + int error; + + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + } - pm_runtime_get_sync(bdi->dev); bq24190_register_reset(bdi); - pm_runtime_put_sync(bdi->dev); + + if (error >= 0) { + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + } return 0; } -static int bq24190_pm_resume(struct device *dev) +static __maybe_unused int bq24190_pm_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + int error; bdi->f_reg = 0; bdi->ss_reg = BQ24190_REG_SS_VBUS_STAT_MASK; /* impossible state */ - pm_runtime_get_sync(bdi->dev); + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + } + bq24190_register_reset(bdi); bq24190_set_mode_host(bdi); bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); - pm_runtime_put_sync(bdi->dev); + + if (error >= 0) { + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + } /* Things may have changed while suspended so alert upper layer */ power_supply_changed(bdi->charger); @@ -1470,17 +1640,16 @@ static int bq24190_pm_resume(struct device *dev) return 0; } -#endif -static SIMPLE_DEV_PM_OPS(bq24190_pm_ops, bq24190_pm_suspend, bq24190_pm_resume); +static const struct dev_pm_ops bq24190_pm_ops = { + SET_RUNTIME_PM_OPS(bq24190_runtime_suspend, bq24190_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(bq24190_pm_suspend, bq24190_pm_resume) +}; -/* - * Only support the bq24190 right now. The bq24192, bq24192i, and bq24193 - * are similar but not identical so the driver needs to be extended to - * support them. - */ static const struct i2c_device_id bq24190_i2c_ids[] = { - { "bq24190", BQ24190_REG_VPRS_PN_24190 }, + { "bq24190" }, + { "bq24192i" }, { }, }; MODULE_DEVICE_TABLE(i2c, bq24190_i2c_ids); diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index f993a55cde20..8e2c41ded171 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -723,7 +723,7 @@ static int bq25890_irq_probe(struct bq25890_device *bq) { struct gpio_desc *irq; - irq = devm_gpiod_get_index(bq->dev, BQ25890_IRQ_PIN, 0, GPIOD_IN); + irq = devm_gpiod_get(bq->dev, BQ25890_IRQ_PIN, GPIOD_IN); if (IS_ERR(irq)) { dev_err(bq->dev, "Could not probe irq pin.\n"); return PTR_ERR(irq); diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c index e664ca7c0afd..adc3761831e1 100644 --- a/drivers/power/supply/charger-manager.c +++ b/drivers/power/supply/charger-manager.c @@ -1198,7 +1198,7 @@ static int charger_extcon_notifier(struct notifier_block *self, static int charger_extcon_init(struct charger_manager *cm, struct charger_cable *cable) { - int ret = 0; + int ret; /* * Charger manager use Extcon framework to identify @@ -1232,7 +1232,7 @@ static int charger_manager_register_extcon(struct charger_manager *cm) { struct charger_desc *desc = cm->desc; struct charger_regulator *charger; - int ret = 0; + int ret; int i; int j; @@ -1255,15 +1255,14 @@ static int charger_manager_register_extcon(struct charger_manager *cm) if (ret < 0) { dev_err(cm->dev, "Cannot initialize charger(%s)\n", charger->regulator_name); - goto err; + return ret; } cable->charger = charger; cable->cm = cm; } } -err: - return ret; + return 0; } /* help function of sysfs node to control charger(regulator) */ @@ -1372,7 +1371,7 @@ static int charger_manager_register_sysfs(struct charger_manager *cm) int chargers_externally_control = 1; char buf[11]; char *str; - int ret = 0; + int ret; int i; /* Create sysfs entry to control charger(regulator) */ @@ -1382,10 +1381,9 @@ static int charger_manager_register_sysfs(struct charger_manager *cm) snprintf(buf, 10, "charger.%d", i); str = devm_kzalloc(cm->dev, sizeof(char) * (strlen(buf) + 1), GFP_KERNEL); - if (!str) { - ret = -ENOMEM; - goto err; - } + if (!str) + return -ENOMEM; + strcpy(str, buf); charger->attrs[0] = &charger->attr_name.attr; @@ -1426,19 +1424,16 @@ static int charger_manager_register_sysfs(struct charger_manager *cm) if (ret < 0) { dev_err(cm->dev, "Cannot create sysfs entry of %s regulator\n", charger->regulator_name); - ret = -EINVAL; - goto err; + return ret; } } if (chargers_externally_control) { dev_err(cm->dev, "Cannot register regulator because charger-manager must need at least one charger for charging battery\n"); - ret = -EINVAL; - goto err; + return -EINVAL; } -err: - return ret; + return 0; } static int cm_init_thermal_data(struct charger_manager *cm, @@ -1626,7 +1621,7 @@ static int charger_manager_probe(struct platform_device *pdev) { struct charger_desc *desc = cm_get_drv_data(pdev); struct charger_manager *cm; - int ret = 0, i = 0; + int ret, i = 0; int j = 0; union power_supply_propval val; struct power_supply *fuel_gauge; @@ -1887,14 +1882,12 @@ MODULE_DEVICE_TABLE(platform, charger_manager_id); static int cm_suspend_noirq(struct device *dev) { - int ret = 0; - if (device_may_wakeup(dev)) { device_set_wakeup_capable(dev, false); - ret = -EAGAIN; + return -EAGAIN; } - return ret; + return 0; } static bool cm_need_to_awake(void) diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c new file mode 100644 index 000000000000..543a1bd21ab9 --- /dev/null +++ b/drivers/power/supply/cpcap-charger.c @@ -0,0 +1,681 @@ +/* + * Motorola CPCAP PMIC battery charger driver + * + * Copyright (C) 2017 Tony Lindgren <tony@atomide.com> + * + * Rewritten for Linux power framework with some parts based on + * on earlier driver found in the Motorola Linux kernel: + * + * Copyright (C) 2009-2010 Motorola, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/power_supply.h> +#include <linux/regmap.h> + +#include <linux/gpio/consumer.h> +#include <linux/usb/phy_companion.h> +#include <linux/phy/omap_usb.h> +#include <linux/usb/otg.h> +#include <linux/iio/consumer.h> +#include <linux/mfd/motorola-cpcap.h> + +/* CPCAP_REG_CRM register bits */ +#define CPCAP_REG_CRM_UNUSED_641_15 BIT(15) /* 641 = register number */ +#define CPCAP_REG_CRM_UNUSED_641_14 BIT(14) /* 641 = register number */ +#define CPCAP_REG_CRM_CHRG_LED_EN BIT(13) +#define CPCAP_REG_CRM_RVRSMODE BIT(12) +#define CPCAP_REG_CRM_ICHRG_TR1 BIT(11) +#define CPCAP_REG_CRM_ICHRG_TR0 BIT(10) +#define CPCAP_REG_CRM_FET_OVRD BIT(9) +#define CPCAP_REG_CRM_FET_CTRL BIT(8) +#define CPCAP_REG_CRM_VCHRG3 BIT(7) +#define CPCAP_REG_CRM_VCHRG2 BIT(6) +#define CPCAP_REG_CRM_VCHRG1 BIT(5) +#define CPCAP_REG_CRM_VCHRG0 BIT(4) +#define CPCAP_REG_CRM_ICHRG3 BIT(3) +#define CPCAP_REG_CRM_ICHRG2 BIT(2) +#define CPCAP_REG_CRM_ICHRG1 BIT(1) +#define CPCAP_REG_CRM_ICHRG0 BIT(0) + +/* CPCAP_REG_CRM trickle charge voltages */ +#define CPCAP_REG_CRM_TR(val) (((val) & 0x3) << 10) +#define CPCAP_REG_CRM_TR_0A00 CPCAP_REG_CRM_TR(0x0) +#define CPCAP_REG_CRM_TR_0A24 CPCAP_REG_CRM_TR(0x1) +#define CPCAP_REG_CRM_TR_0A48 CPCAP_REG_CRM_TR(0x2) +#define CPCAP_REG_CRM_TR_0A72 CPCAP_REG_CRM_TR(0x4) + +/* CPCAP_REG_CRM charge voltages */ +#define CPCAP_REG_CRM_VCHRG(val) (((val) & 0xf) << 4) +#define CPCAP_REG_CRM_VCHRG_3V80 CPCAP_REG_CRM_VCHRG(0x0) +#define CPCAP_REG_CRM_VCHRG_4V10 CPCAP_REG_CRM_VCHRG(0x1) +#define CPCAP_REG_CRM_VCHRG_4V15 CPCAP_REG_CRM_VCHRG(0x2) +#define CPCAP_REG_CRM_VCHRG_4V20 CPCAP_REG_CRM_VCHRG(0x3) +#define CPCAP_REG_CRM_VCHRG_4V22 CPCAP_REG_CRM_VCHRG(0x4) +#define CPCAP_REG_CRM_VCHRG_4V24 CPCAP_REG_CRM_VCHRG(0x5) +#define CPCAP_REG_CRM_VCHRG_4V26 CPCAP_REG_CRM_VCHRG(0x6) +#define CPCAP_REG_CRM_VCHRG_4V28 CPCAP_REG_CRM_VCHRG(0x7) +#define CPCAP_REG_CRM_VCHRG_4V30 CPCAP_REG_CRM_VCHRG(0x8) +#define CPCAP_REG_CRM_VCHRG_4V32 CPCAP_REG_CRM_VCHRG(0x9) +#define CPCAP_REG_CRM_VCHRG_4V34 CPCAP_REG_CRM_VCHRG(0xa) +#define CPCAP_REG_CRM_VCHRG_4V36 CPCAP_REG_CRM_VCHRG(0xb) +#define CPCAP_REG_CRM_VCHRG_4V38 CPCAP_REG_CRM_VCHRG(0xc) +#define CPCAP_REG_CRM_VCHRG_4V40 CPCAP_REG_CRM_VCHRG(0xd) +#define CPCAP_REG_CRM_VCHRG_4V42 CPCAP_REG_CRM_VCHRG(0xe) +#define CPCAP_REG_CRM_VCHRG_4V44 CPCAP_REG_CRM_VCHRG(0xf) + +/* CPCAP_REG_CRM charge currents */ +#define CPCAP_REG_CRM_ICHRG(val) (((val) & 0xf) << 0) +#define CPCAP_REG_CRM_ICHRG_0A000 CPCAP_REG_CRM_ICHRG(0x0) +#define CPCAP_REG_CRM_ICHRG_0A070 CPCAP_REG_CRM_ICHRG(0x1) +#define CPCAP_REG_CRM_ICHRG_0A176 CPCAP_REG_CRM_ICHRG(0x2) +#define CPCAP_REG_CRM_ICHRG_0A264 CPCAP_REG_CRM_ICHRG(0x3) +#define CPCAP_REG_CRM_ICHRG_0A352 CPCAP_REG_CRM_ICHRG(0x4) +#define CPCAP_REG_CRM_ICHRG_0A440 CPCAP_REG_CRM_ICHRG(0x5) +#define CPCAP_REG_CRM_ICHRG_0A528 CPCAP_REG_CRM_ICHRG(0x6) +#define CPCAP_REG_CRM_ICHRG_0A616 CPCAP_REG_CRM_ICHRG(0x7) +#define CPCAP_REG_CRM_ICHRG_0A704 CPCAP_REG_CRM_ICHRG(0x8) +#define CPCAP_REG_CRM_ICHRG_0A792 CPCAP_REG_CRM_ICHRG(0x9) +#define CPCAP_REG_CRM_ICHRG_0A880 CPCAP_REG_CRM_ICHRG(0xa) +#define CPCAP_REG_CRM_ICHRG_0A968 CPCAP_REG_CRM_ICHRG(0xb) +#define CPCAP_REG_CRM_ICHRG_1A056 CPCAP_REG_CRM_ICHRG(0xc) +#define CPCAP_REG_CRM_ICHRG_1A144 CPCAP_REG_CRM_ICHRG(0xd) +#define CPCAP_REG_CRM_ICHRG_1A584 CPCAP_REG_CRM_ICHRG(0xe) +#define CPCAP_REG_CRM_ICHRG_NO_LIMIT CPCAP_REG_CRM_ICHRG(0xf) + +enum { + CPCAP_CHARGER_IIO_BATTDET, + CPCAP_CHARGER_IIO_VOLTAGE, + CPCAP_CHARGER_IIO_VBUS, + CPCAP_CHARGER_IIO_CHRG_CURRENT, + CPCAP_CHARGER_IIO_BATT_CURRENT, + CPCAP_CHARGER_IIO_NR, +}; + +struct cpcap_charger_ddata { + struct device *dev; + struct regmap *reg; + struct list_head irq_list; + struct delayed_work detect_work; + struct delayed_work vbus_work; + struct gpio_desc *gpio[2]; /* gpio_reven0 & 1 */ + + struct iio_channel *channels[CPCAP_CHARGER_IIO_NR]; + + struct power_supply *usb; + + struct phy_companion comparator; /* For USB VBUS */ + bool vbus_enabled; + atomic_t active; + + int status; +}; + +struct cpcap_interrupt_desc { + int irq; + struct list_head node; + const char *name; +}; + +struct cpcap_charger_ints_state { + bool chrg_det; + bool rvrs_chrg; + bool vbusov; + + bool chrg_se1b; + bool rvrs_mode; + bool chrgcurr1; + bool vbusvld; + + bool battdetb; +}; + +static enum power_supply_property cpcap_charger_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_ONLINE, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, +}; + +static bool cpcap_charger_battery_found(struct cpcap_charger_ddata *ddata) +{ + struct iio_channel *channel; + int error, value; + + channel = ddata->channels[CPCAP_CHARGER_IIO_BATTDET]; + error = iio_read_channel_raw(channel, &value); + if (error < 0) { + dev_warn(ddata->dev, "%s failed: %i\n", __func__, error); + + return false; + } + + return value == 1; +} + +static int cpcap_charger_get_charge_voltage(struct cpcap_charger_ddata *ddata) +{ + struct iio_channel *channel; + int error, value = 0; + + channel = ddata->channels[CPCAP_CHARGER_IIO_VOLTAGE]; + error = iio_read_channel_processed(channel, &value); + if (error < 0) { + dev_warn(ddata->dev, "%s failed: %i\n", __func__, error); + + return 0; + } + + return value; +} + +static int cpcap_charger_get_charge_current(struct cpcap_charger_ddata *ddata) +{ + struct iio_channel *channel; + int error, value = 0; + + channel = ddata->channels[CPCAP_CHARGER_IIO_CHRG_CURRENT]; + error = iio_read_channel_processed(channel, &value); + if (error < 0) { + dev_warn(ddata->dev, "%s failed: %i\n", __func__, error); + + return 0; + } + + return value; +} + +static int cpcap_charger_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct cpcap_charger_ddata *ddata = dev_get_drvdata(psy->dev.parent); + + switch (psp) { + case POWER_SUPPLY_PROP_STATUS: + val->intval = ddata->status; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + if (ddata->status == POWER_SUPPLY_STATUS_CHARGING) + val->intval = cpcap_charger_get_charge_voltage(ddata) * + 1000; + else + val->intval = 0; + break; + case POWER_SUPPLY_PROP_CURRENT_NOW: + if (ddata->status == POWER_SUPPLY_STATUS_CHARGING) + val->intval = cpcap_charger_get_charge_current(ddata) * + 1000; + else + val->intval = 0; + break; + case POWER_SUPPLY_PROP_ONLINE: + val->intval = ddata->status == POWER_SUPPLY_STATUS_CHARGING; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void cpcap_charger_set_cable_path(struct cpcap_charger_ddata *ddata, + bool enabled) +{ + if (!ddata->gpio[0]) + return; + + gpiod_set_value(ddata->gpio[0], enabled); +} + +static void cpcap_charger_set_inductive_path(struct cpcap_charger_ddata *ddata, + bool enabled) +{ + if (!ddata->gpio[1]) + return; + + gpiod_set_value(ddata->gpio[1], enabled); +} + +static int cpcap_charger_set_state(struct cpcap_charger_ddata *ddata, + int max_voltage, int charge_current, + int trickle_current) +{ + bool enable; + int error; + + enable = max_voltage && (charge_current || trickle_current); + dev_dbg(ddata->dev, "%s enable: %i\n", __func__, enable); + + if (!enable) { + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, + 0x3fff, + CPCAP_REG_CRM_FET_OVRD | + CPCAP_REG_CRM_FET_CTRL); + if (error) { + ddata->status = POWER_SUPPLY_STATUS_UNKNOWN; + goto out_err; + } + + ddata->status = POWER_SUPPLY_STATUS_DISCHARGING; + + return 0; + } + + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, 0x3fff, + CPCAP_REG_CRM_CHRG_LED_EN | + trickle_current | + CPCAP_REG_CRM_FET_OVRD | + CPCAP_REG_CRM_FET_CTRL | + max_voltage | + charge_current); + if (error) { + ddata->status = POWER_SUPPLY_STATUS_UNKNOWN; + goto out_err; + } + + ddata->status = POWER_SUPPLY_STATUS_CHARGING; + + return 0; + +out_err: + dev_err(ddata->dev, "%s failed with %i\n", __func__, error); + + return error; +} + +static bool cpcap_charger_vbus_valid(struct cpcap_charger_ddata *ddata) +{ + int error, value = 0; + struct iio_channel *channel = + ddata->channels[CPCAP_CHARGER_IIO_VBUS]; + + error = iio_read_channel_processed(channel, &value); + if (error >= 0) + return value > 3900 ? true : false; + + dev_err(ddata->dev, "error reading VBUS: %i\n", error); + + return false; +} + +/* VBUS control functions for the USB PHY companion */ + +static void cpcap_charger_vbus_work(struct work_struct *work) +{ + struct cpcap_charger_ddata *ddata; + bool vbus = false; + int error; + + ddata = container_of(work, struct cpcap_charger_ddata, + vbus_work.work); + + if (ddata->vbus_enabled) { + vbus = cpcap_charger_vbus_valid(ddata); + if (vbus) { + dev_info(ddata->dev, "VBUS already provided\n"); + + return; + } + + cpcap_charger_set_cable_path(ddata, false); + cpcap_charger_set_inductive_path(ddata, false); + + error = cpcap_charger_set_state(ddata, 0, 0, 0); + if (error) + goto out_err; + + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, + CPCAP_REG_CRM_RVRSMODE, + CPCAP_REG_CRM_RVRSMODE); + if (error) + goto out_err; + } else { + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, + CPCAP_REG_CRM_RVRSMODE, 0); + if (error) + goto out_err; + + cpcap_charger_set_cable_path(ddata, true); + cpcap_charger_set_inductive_path(ddata, true); + } + + return; + +out_err: + dev_err(ddata->dev, "%s could not %s vbus: %i\n", __func__, + ddata->vbus_enabled ? "enable" : "disable", error); +} + +static int cpcap_charger_set_vbus(struct phy_companion *comparator, + bool enabled) +{ + struct cpcap_charger_ddata *ddata = + container_of(comparator, struct cpcap_charger_ddata, + comparator); + + ddata->vbus_enabled = enabled; + schedule_delayed_work(&ddata->vbus_work, 0); + + return 0; +} + +/* Charger interrupt handling functions */ + +static int cpcap_charger_get_ints_state(struct cpcap_charger_ddata *ddata, + struct cpcap_charger_ints_state *s) +{ + int val, error; + + error = regmap_read(ddata->reg, CPCAP_REG_INTS1, &val); + if (error) + return error; + + s->chrg_det = val & BIT(13); + s->rvrs_chrg = val & BIT(12); + s->vbusov = val & BIT(11); + + error = regmap_read(ddata->reg, CPCAP_REG_INTS2, &val); + if (error) + return error; + + s->chrg_se1b = val & BIT(13); + s->rvrs_mode = val & BIT(6); + s->chrgcurr1 = val & BIT(4); + s->vbusvld = val & BIT(3); + + error = regmap_read(ddata->reg, CPCAP_REG_INTS4, &val); + if (error) + return error; + + s->battdetb = val & BIT(6); + + return 0; +} + +static void cpcap_usb_detect(struct work_struct *work) +{ + struct cpcap_charger_ddata *ddata; + struct cpcap_charger_ints_state s; + int error; + + ddata = container_of(work, struct cpcap_charger_ddata, + detect_work.work); + + error = cpcap_charger_get_ints_state(ddata, &s); + if (error) + return; + + if (cpcap_charger_vbus_valid(ddata) && s.chrgcurr1) { + int max_current; + + if (cpcap_charger_battery_found(ddata)) + max_current = CPCAP_REG_CRM_ICHRG_1A584; + else + max_current = CPCAP_REG_CRM_ICHRG_0A528; + + error = cpcap_charger_set_state(ddata, + CPCAP_REG_CRM_VCHRG_4V20, + max_current, + CPCAP_REG_CRM_TR_0A72); + if (error) + goto out_err; + } else { + error = cpcap_charger_set_state(ddata, 0, 0, 0); + if (error) + goto out_err; + } + + return; + +out_err: + dev_err(ddata->dev, "%s failed with %i\n", __func__, error); +} + +static irqreturn_t cpcap_charger_irq_thread(int irq, void *data) +{ + struct cpcap_charger_ddata *ddata = data; + + if (!atomic_read(&ddata->active)) + return IRQ_NONE; + + schedule_delayed_work(&ddata->detect_work, 0); + + return IRQ_HANDLED; +} + +static int cpcap_usb_init_irq(struct platform_device *pdev, + struct cpcap_charger_ddata *ddata, + const char *name) +{ + struct cpcap_interrupt_desc *d; + int irq, error; + + irq = platform_get_irq_byname(pdev, name); + if (!irq) + return -ENODEV; + + error = devm_request_threaded_irq(ddata->dev, irq, NULL, + cpcap_charger_irq_thread, + IRQF_SHARED, + name, ddata); + if (error) { + dev_err(ddata->dev, "could not get irq %s: %i\n", + name, error); + + return error; + } + + d = devm_kzalloc(ddata->dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->name = name; + d->irq = irq; + list_add(&d->node, &ddata->irq_list); + + return 0; +} + +static const char * const cpcap_charger_irqs[] = { + /* REG_INT_0 */ + "chrg_det", "rvrs_chrg", + + /* REG_INT1 */ + "chrg_se1b", "se0conn", "rvrs_mode", "chrgcurr1", "vbusvld", + + /* REG_INT_3 */ + "battdetb", +}; + +static int cpcap_usb_init_interrupts(struct platform_device *pdev, + struct cpcap_charger_ddata *ddata) +{ + int i, error; + + for (i = 0; i < ARRAY_SIZE(cpcap_charger_irqs); i++) { + error = cpcap_usb_init_irq(pdev, ddata, cpcap_charger_irqs[i]); + if (error) + return error; + } + + return 0; +} + +static void cpcap_charger_init_optional_gpios(struct cpcap_charger_ddata *ddata) +{ + int i; + + for (i = 0; i < 2; i++) { + ddata->gpio[i] = devm_gpiod_get_index(ddata->dev, "mode", + i, GPIOD_OUT_HIGH); + if (IS_ERR(ddata->gpio[i])) { + dev_info(ddata->dev, "no mode change GPIO%i: %li\n", + i, PTR_ERR(ddata->gpio[i])); + ddata->gpio[i] = NULL; + } + } +} + +static int cpcap_charger_init_iio(struct cpcap_charger_ddata *ddata) +{ + const char * const names[CPCAP_CHARGER_IIO_NR] = { + "battdetb", "battp", "vbus", "chg_isense", "batti", + }; + int error, i; + + for (i = 0; i < CPCAP_CHARGER_IIO_NR; i++) { + ddata->channels[i] = devm_iio_channel_get(ddata->dev, + names[i]); + if (IS_ERR(ddata->channels[i])) { + error = PTR_ERR(ddata->channels[i]); + goto out_err; + } + + if (!ddata->channels[i]->indio_dev) { + error = -ENXIO; + goto out_err; + } + } + + return 0; + +out_err: + dev_err(ddata->dev, "could not initialize VBUS or ID IIO: %i\n", + error); + + return error; +} + +static const struct power_supply_desc cpcap_charger_usb_desc = { + .name = "cpcap_usb", + .type = POWER_SUPPLY_TYPE_USB, + .properties = cpcap_charger_props, + .num_properties = ARRAY_SIZE(cpcap_charger_props), + .get_property = cpcap_charger_get_property, +}; + +#ifdef CONFIG_OF +static const struct of_device_id cpcap_charger_id_table[] = { + { + .compatible = "motorola,mapphone-cpcap-charger", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, cpcap_charger_id_table); +#endif + +static int cpcap_charger_probe(struct platform_device *pdev) +{ + struct cpcap_charger_ddata *ddata; + const struct of_device_id *of_id; + int error; + + of_id = of_match_device(of_match_ptr(cpcap_charger_id_table), + &pdev->dev); + if (!of_id) + return -EINVAL; + + ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + ddata->dev = &pdev->dev; + + ddata->reg = dev_get_regmap(ddata->dev->parent, NULL); + if (!ddata->reg) + return -ENODEV; + + INIT_LIST_HEAD(&ddata->irq_list); + INIT_DELAYED_WORK(&ddata->detect_work, cpcap_usb_detect); + INIT_DELAYED_WORK(&ddata->vbus_work, cpcap_charger_vbus_work); + platform_set_drvdata(pdev, ddata); + + error = cpcap_charger_init_iio(ddata); + if (error) + return error; + + atomic_set(&ddata->active, 1); + + ddata->usb = devm_power_supply_register(ddata->dev, + &cpcap_charger_usb_desc, + NULL); + if (IS_ERR(ddata->usb)) { + error = PTR_ERR(ddata->usb); + dev_err(ddata->dev, "failed to register USB charger: %i\n", + error); + + return error; + } + + error = cpcap_usb_init_interrupts(pdev, ddata); + if (error) + return error; + + ddata->comparator.set_vbus = cpcap_charger_set_vbus; + error = omap_usb2_set_comparator(&ddata->comparator); + if (error == -ENODEV) { + dev_info(ddata->dev, "charger needs phy, deferring probe\n"); + return -EPROBE_DEFER; + } + + cpcap_charger_init_optional_gpios(ddata); + + schedule_delayed_work(&ddata->detect_work, 0); + + return 0; +} + +static int cpcap_charger_remove(struct platform_device *pdev) +{ + struct cpcap_charger_ddata *ddata = platform_get_drvdata(pdev); + int error; + + atomic_set(&ddata->active, 0); + error = omap_usb2_set_comparator(NULL); + if (error) + dev_warn(ddata->dev, "could not clear USB comparator: %i\n", + error); + + error = cpcap_charger_set_state(ddata, 0, 0, 0); + if (error) + dev_warn(ddata->dev, "could not clear charger: %i\n", + error); + cancel_delayed_work_sync(&ddata->vbus_work); + cancel_delayed_work_sync(&ddata->detect_work); + + return 0; +} + +static struct platform_driver cpcap_charger_driver = { + .probe = cpcap_charger_probe, + .driver = { + .name = "cpcap-charger", + .of_match_table = of_match_ptr(cpcap_charger_id_table), + }, + .remove = cpcap_charger_remove, +}; +module_platform_driver(cpcap_charger_driver); + +MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>"); +MODULE_DESCRIPTION("CPCAP Battery Charger Interface driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:cpcap-charger"); diff --git a/drivers/power/supply/lego_ev3_battery.c b/drivers/power/supply/lego_ev3_battery.c new file mode 100644 index 000000000000..7b993d669f7f --- /dev/null +++ b/drivers/power/supply/lego_ev3_battery.c @@ -0,0 +1,228 @@ +/* + * Battery driver for LEGO MINDSTORMS EV3 + * + * Copyright (C) 2017 David Lechner <david@lechnology.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/gpio/consumer.h> +#include <linux/iio/consumer.h> +#include <linux/iio/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/power_supply.h> + +struct lego_ev3_battery { + struct iio_channel *iio_v; + struct iio_channel *iio_i; + struct gpio_desc *rechargeable_gpio; + struct power_supply *psy; + int technology; + int v_max; + int v_min; +}; + +static int lego_ev3_battery_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct lego_ev3_battery *batt = power_supply_get_drvdata(psy); + int val2; + + switch (psp) { + case POWER_SUPPLY_PROP_TECHNOLOGY: + val->intval = batt->technology; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + /* battery voltage is iio channel * 2 + Vce of transistor */ + iio_read_channel_processed(batt->iio_v, &val->intval); + val->intval *= 2000; + val->intval += 200000; + /* plus adjust for shunt resistor drop */ + iio_read_channel_processed(batt->iio_i, &val2); + val2 *= 1000; + val2 /= 15; + val->intval += val2; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: + val->intval = batt->v_max; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: + val->intval = batt->v_min; + break; + case POWER_SUPPLY_PROP_CURRENT_NOW: + /* battery current is iio channel / 15 / 0.05 ohms */ + iio_read_channel_processed(batt->iio_i, &val->intval); + val->intval *= 20000; + val->intval /= 15; + break; + case POWER_SUPPLY_PROP_SCOPE: + val->intval = POWER_SUPPLY_SCOPE_SYSTEM; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int lego_ev3_battery_set_property(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ + struct lego_ev3_battery *batt = power_supply_get_drvdata(psy); + + switch (psp) { + case POWER_SUPPLY_PROP_TECHNOLOGY: + /* + * Only allow changing technology from Unknown to NiMH. Li-ion + * batteries are automatically detected and should not be + * overridden. Rechargeable AA batteries, on the other hand, + * cannot be automatically detected, and so must be manually + * specified. This should only be set once during system init, + * so there is no mechanism to go back to Unknown. + */ + if (batt->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN) + return -EINVAL; + switch (val->intval) { + case POWER_SUPPLY_TECHNOLOGY_NiMH: + batt->technology = POWER_SUPPLY_TECHNOLOGY_NiMH; + batt->v_max = 7800000; + batt->v_min = 5400000; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int lego_ev3_battery_property_is_writeable(struct power_supply *psy, + enum power_supply_property psp) +{ + struct lego_ev3_battery *batt = power_supply_get_drvdata(psy); + + return psp == POWER_SUPPLY_PROP_TECHNOLOGY && + batt->technology == POWER_SUPPLY_TECHNOLOGY_UNKNOWN; +} + +static enum power_supply_property lego_ev3_battery_props[] = { + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_SCOPE, +}; + +static const struct power_supply_desc lego_ev3_battery_desc = { + .name = "lego-ev3-battery", + .type = POWER_SUPPLY_TYPE_BATTERY, + .properties = lego_ev3_battery_props, + .num_properties = ARRAY_SIZE(lego_ev3_battery_props), + .get_property = lego_ev3_battery_get_property, + .set_property = lego_ev3_battery_set_property, + .property_is_writeable = lego_ev3_battery_property_is_writeable, +}; + +static int lego_ev3_battery_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct lego_ev3_battery *batt; + struct power_supply_config psy_cfg = {}; + int err; + + batt = devm_kzalloc(dev, sizeof(*batt), GFP_KERNEL); + if (!batt) + return -ENOMEM; + + platform_set_drvdata(pdev, batt); + + batt->iio_v = devm_iio_channel_get(dev, "voltage"); + err = PTR_ERR_OR_ZERO(batt->iio_v); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get voltage iio channel\n"); + return err; + } + + batt->iio_i = devm_iio_channel_get(dev, "current"); + err = PTR_ERR_OR_ZERO(batt->iio_i); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get current iio channel\n"); + return err; + } + + batt->rechargeable_gpio = devm_gpiod_get(dev, "rechargeable", GPIOD_IN); + err = PTR_ERR_OR_ZERO(batt->rechargeable_gpio); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get rechargeable gpio\n"); + return err; + } + + /* + * The rechargeable battery indication switch cannot be changed without + * removing the battery, so we only need to read it once. + */ + if (gpiod_get_value(batt->rechargeable_gpio)) { + /* 2-cell Li-ion, 7.4V nominal */ + batt->technology = POWER_SUPPLY_TECHNOLOGY_LION; + batt->v_max = 84000000; + batt->v_min = 60000000; + } else { + /* 6x AA Alkaline, 9V nominal */ + batt->technology = POWER_SUPPLY_TECHNOLOGY_UNKNOWN; + batt->v_max = 90000000; + batt->v_min = 48000000; + } + + psy_cfg.of_node = pdev->dev.of_node; + psy_cfg.drv_data = batt; + + batt->psy = devm_power_supply_register(dev, &lego_ev3_battery_desc, + &psy_cfg); + err = PTR_ERR_OR_ZERO(batt->psy); + if (err) { + dev_err(dev, "failed to register power supply\n"); + return err; + } + + return 0; +} + +static const struct of_device_id of_lego_ev3_battery_match[] = { + { .compatible = "lego,ev3-battery", }, + { } +}; +MODULE_DEVICE_TABLE(of, of_lego_ev3_battery_match); + +static struct platform_driver lego_ev3_battery_driver = { + .driver = { + .name = "lego-ev3-battery", + .of_match_table = of_lego_ev3_battery_match, + }, + .probe = lego_ev3_battery_probe, +}; +module_platform_driver(lego_ev3_battery_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Lechner <david@lechnology.com>"); +MODULE_DESCRIPTION("LEGO MINDSTORMS EV3 Battery Driver"); diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c index 509e2b341bd6..677f7c40b25a 100644 --- a/drivers/power/supply/lp8788-charger.c +++ b/drivers/power/supply/lp8788-charger.c @@ -651,7 +651,7 @@ static ssize_t lp8788_show_eoc_time(struct device *dev, { struct lp8788_charger *pchg = dev_get_drvdata(dev); char *stime[] = { "400ms", "5min", "10min", "15min", - "20min", "25min", "30min" "No timeout" }; + "20min", "25min", "30min", "No timeout" }; u8 val; lp8788_read_byte(pchg->lp, LP8788_CHG_EOC, &val); diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index 4adf2ba021ce..7efb908f4451 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/swab.h> @@ -61,7 +62,7 @@ struct ltc294x_info { struct power_supply *supply; /* Supply pointer */ struct power_supply_desc supply_desc; /* Supply description */ struct delayed_work work; /* Work scheduler */ - int num_regs; /* Number of registers (chip type) */ + unsigned long num_regs; /* Number of registers (chip type) */ int charge; /* Last charge register content */ int r_sense; /* mOhm */ int Qlsb; /* nAh */ @@ -387,7 +388,7 @@ static int ltc294x_i2c_probe(struct i2c_client *client, np = of_node_get(client->dev.of_node); - info->num_regs = id->driver_data; + info->num_regs = (unsigned long)of_device_get_match_data(&client->dev); info->supply_desc.name = np->name; /* r_sense can be negative, when sense+ is connected to the battery @@ -497,9 +498,23 @@ static const struct i2c_device_id ltc294x_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, ltc294x_i2c_id); +static const struct of_device_id ltc294x_i2c_of_match[] = { + { + .compatible = "lltc,ltc2941", + .data = (void *)LTC2941_NUM_REGS + }, + { + .compatible = "lltc,ltc2943", + .data = (void *)LTC2943_NUM_REGS + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ltc294x_i2c_of_match); + static struct i2c_driver ltc294x_driver = { .driver = { .name = "LTC2941", + .of_match_table = ltc294x_i2c_of_match, .pm = LTC294X_PM_OPS, }, .probe = ltc294x_i2c_probe, diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c index e7c3649b31a0..33c40f79d23d 100644 --- a/drivers/power/supply/max17040_battery.c +++ b/drivers/power/supply/max17040_battery.c @@ -277,9 +277,17 @@ static const struct i2c_device_id max17040_id[] = { }; MODULE_DEVICE_TABLE(i2c, max17040_id); +static const struct of_device_id max17040_of_match[] = { + { .compatible = "maxim,max17040" }, + { .compatible = "maxim,max77836-battery" }, + { }, +}; +MODULE_DEVICE_TABLE(of, max17040_of_match); + static struct i2c_driver max17040_i2c_driver = { .driver = { .name = "max17040", + .of_match_table = max17040_of_match, .pm = MAX17040_PM_OPS, }, .probe = max17040_probe, diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c index 353765a5f44c..15947dbb511e 100644 --- a/drivers/power/supply/sbs-charger.c +++ b/drivers/power/supply/sbs-charger.c @@ -137,10 +137,7 @@ static enum power_supply_property sbs_properties[] = { static bool sbs_readable_reg(struct device *dev, unsigned int reg) { - if (reg < SBS_CHARGER_REG_SPEC_INFO) - return false; - else - return true; + return reg >= SBS_CHARGER_REG_SPEC_INFO; } static bool sbs_volatile_reg(struct device *dev, unsigned int reg) diff --git a/drivers/power/supply/tps65217_charger.c b/drivers/power/supply/tps65217_charger.c index 29b61e81b385..1f5234098aaf 100644 --- a/drivers/power/supply/tps65217_charger.c +++ b/drivers/power/supply/tps65217_charger.c @@ -58,8 +58,6 @@ static int tps65217_config_charger(struct tps65217_charger *charger) { int ret; - dev_dbg(charger->dev, "%s\n", __func__); - /* * tps65217 rev. G, p. 31 (see p. 32 for NTC schematic) * @@ -205,8 +203,6 @@ static int tps65217_charger_probe(struct platform_device *pdev) int ret; int i; - dev_dbg(&pdev->dev, "%s\n", __func__); - charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL); if (!charger) return -ENOMEM; diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index bcd4dc304f27..990ff3d218bc 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -1117,7 +1117,7 @@ fail: return ret; } -static int __exit twl4030_bci_remove(struct platform_device *pdev) +static int twl4030_bci_remove(struct platform_device *pdev) { struct twl4030_bci *bci = platform_get_drvdata(pdev); @@ -1148,11 +1148,11 @@ MODULE_DEVICE_TABLE(of, twl_bci_of_match); static struct platform_driver twl4030_bci_driver = { .probe = twl4030_bci_probe, + .remove = twl4030_bci_remove, .driver = { .name = "twl4030_bci", .of_match_table = of_match_ptr(twl_bci_of_match), }, - .remove = __exit_p(twl4030_bci_remove), }; module_platform_driver(twl4030_bci_driver); diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 6ff61dad5e21..62fed9dc893e 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -183,11 +183,33 @@ static void jsfd_read(char *buf, unsigned long p, size_t togo) { } } -static void jsfd_do_request(struct request_queue *q) +static int jsfd_queue; + +static struct request *jsfd_next_request(void) +{ + struct request_queue *q; + struct request *rq; + int old_pos = jsfd_queue; + + do { + q = jsfd_disk[jsfd_queue]->queue; + if (++jsfd_queue == JSF_MAX) + jsfd_queue = 0; + if (q) { + rq = blk_fetch_request(q); + if (rq) + return rq; + } + } while (jsfd_queue != old_pos); + + return NULL; +} + +static void jsfd_request(void) { struct request *req; - req = blk_fetch_request(q); + req = jsfd_next_request(); while (req) { struct jsfd_part *jdp = req->rq_disk->private_data; unsigned long offset = blk_rq_pos(req) << 9; @@ -211,10 +233,15 @@ static void jsfd_do_request(struct request_queue *q) err = 0; end: if (!__blk_end_request_cur(req, err)) - req = blk_fetch_request(q); + req = jsfd_next_request(); } } +static void jsfd_do_request(struct request_queue *q) +{ + jsfd_request(); +} + /* * The memory devices use the full 32/64 bits of the offset, and so we cannot * check against negative addresses: they are ok. The return value is weird, @@ -544,8 +571,6 @@ static int jsflash_init(void) return 0; } -static struct request_queue *jsf_queue; - static int jsfd_init(void) { static DEFINE_SPINLOCK(lock); @@ -562,6 +587,11 @@ static int jsfd_init(void) struct gendisk *disk = alloc_disk(1); if (!disk) goto out; + disk->queue = blk_init_queue(jsfd_do_request, &lock); + if (!disk->queue) { + put_disk(disk); + goto out; + } jsfd_disk[i] = disk; } @@ -570,13 +600,6 @@ static int jsfd_init(void) goto out; } - jsf_queue = blk_init_queue(jsfd_do_request, &lock); - if (!jsf_queue) { - err = -ENOMEM; - unregister_blkdev(JSFD_MAJOR, "jsfd"); - goto out; - } - for (i = 0; i < JSF_MAX; i++) { struct gendisk *disk = jsfd_disk[i]; if ((i & JSF_PART_MASK) >= JSF_NPART) continue; @@ -589,7 +612,6 @@ static int jsfd_init(void) disk->fops = &jsfd_fops; set_capacity(disk, jdp->dsize >> 9); disk->private_data = jdp; - disk->queue = jsf_queue; add_disk(disk); set_disk_ro(disk, 1); } @@ -619,6 +641,7 @@ static void __exit jsflash_cleanup_module(void) for (i = 0; i < JSF_MAX; i++) { if ((i & JSF_PART_MASK) >= JSF_NPART) continue; del_gendisk(jsfd_disk[i]); + blk_cleanup_queue(jsfd_disk[i]->queue); put_disk(jsfd_disk[i]); } if (jsf0.busy) @@ -628,7 +651,6 @@ static void __exit jsflash_cleanup_module(void) misc_deregister(&jsf_dev); unregister_blkdev(JSFD_MAJOR, "jsfd"); - blk_cleanup_queue(jsf_queue); } module_init(jsflash_init_module); diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index fc2855565a51..93dbe58c47c8 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -166,6 +166,7 @@ scsi_mod-y += scsi_scan.o scsi_sysfs.o scsi_devinfo.o scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o +scsi_mod-$(CONFIG_BLK_DEBUG_FS) += scsi_debugfs.o scsi_mod-y += scsi_trace.o scsi_logging.o scsi_mod-$(CONFIG_PM) += scsi_pm.o scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 257bbdd0f0b8..6d7840b096e6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -56,7 +56,7 @@ struct lpfc_sli2_slim; #define LPFC_MAX_SG_SEG_CNT 4096 /* sg element count per scsi cmnd */ #define LPFC_MAX_SGL_SEG_CNT 512 /* SGL element count per scsi cmnd */ #define LPFC_MAX_BPL_SEG_CNT 4096 /* BPL element count per scsi cmnd */ -#define LPFC_MIN_NVME_SEG_CNT 254 +#define LPFC_MAX_NVME_SEG_CNT 128 /* max SGL element cnt per NVME cmnd */ #define LPFC_MAX_SGE_SIZE 0x80000000 /* Maximum data allowed in a SGE */ #define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */ @@ -474,6 +474,8 @@ struct lpfc_vport { unsigned long rcv_buffer_time_stamp; uint32_t vport_flag; #define STATIC_VPORT 1 +#define FAWWPN_SET 2 +#define FAWWPN_PARAM_CHG 4 uint16_t fdmi_num_disc; uint32_t fdmi_hba_mask; @@ -781,6 +783,7 @@ struct lpfc_hba { uint32_t cfg_nvmet_fb_size; uint32_t cfg_total_seg_cnt; uint32_t cfg_sg_seg_cnt; + uint32_t cfg_nvme_seg_cnt; uint32_t cfg_sg_dma_buf_size; uint64_t cfg_soft_wwnn; uint64_t cfg_soft_wwpn; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 22819afbaef5..513fd07715cd 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2292,6 +2292,8 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; unsigned int cnt = count; + uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level; + u32 *fawwpn_key = (uint32_t *)&vport->fc_sparam.un.vendorVersion[0]; /* * We're doing a simple sanity check for soft_wwpn setting. @@ -2305,6 +2307,12 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr, * here. The intent is to protect against the random user or * application that is just writing attributes. */ + if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0051 "LPFC_DRIVER_NAME" soft wwpn can not" + " be enabled: fawwpn is enabled\n"); + return -EINVAL; + } /* count may include a LF at end of string */ if (buf[cnt-1] == '\n') @@ -3335,7 +3343,7 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP, * percentage will go to NVME. */ LPFC_ATTR_R(xri_split, 50, 10, 90, - "Division of XRI resources between SCSI and NVME"); + "Division of XRI resources between SCSI and NVME"); /* # lpfc_log_verbose: Only turn this flag on if you are willing to risk being diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 18157d2840a3..a1686c2d863c 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2486,6 +2486,10 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi) mbox, *rpi); else { *rpi = lpfc_sli4_alloc_rpi(phba); + if (*rpi == LPFC_RPI_ALLOC_ERROR) { + mempool_free(mbox, phba->mbox_mem_pool); + return -EBUSY; + } status = lpfc_reg_rpi(phba, phba->pport->vpi, phba->pport->fc_myDID, (uint8_t *)&phba->pport->fc_sparam, diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 54e6ac42fbcd..944b32ca4931 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -24,6 +24,7 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *); struct fc_rport; struct fc_frame_header; +struct lpfc_nvmet_rcv_ctx; void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli_read_link_ste(struct lpfc_hba *); void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t); @@ -99,7 +100,7 @@ void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *); int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *, struct lpfc_nodelist *); -void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t); +struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did); struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); int lpfc_nlp_put(struct lpfc_nodelist *); int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp); @@ -245,6 +246,10 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *); void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *); struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba); void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab); +void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_dmabuf *mp); +int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr); void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *, uint16_t); int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, @@ -302,6 +307,8 @@ int lpfc_sli_check_eratt(struct lpfc_hba *); void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *, struct lpfc_sli_ring *, uint32_t); void lpfc_sli4_handle_received_buffer(struct lpfc_hba *, struct hbq_dmabuf *); +void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted); void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *); int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index d3e9af983015..1487406aea77 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -537,19 +537,53 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) } } +static void +lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp = NULL; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + /* + * To conserve rpi's, filter out addresses for other + * vports on the same physical HBAs. + */ + if (Did != vport->fc_myDID && + (!lpfc_find_vport_by_did(phba, Did) || + vport->cfg_peer_port_login)) { + if (!phba->nvmet_support) { + /* FCPI/NVMEI path. Process Did */ + lpfc_prep_node_fc4type(vport, Did, fc4_type); + return; + } + /* NVMET path. NVMET only cares about NVMEI nodes. */ + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_type != NLP_NVME_INITIATOR || + ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) + continue; + spin_lock_irq(shost->host_lock); + if (ndlp->nlp_DID == Did) + ndlp->nlp_flag &= ~NLP_NVMET_RECOV; + else + ndlp->nlp_flag |= NLP_NVMET_RECOV; + spin_unlock_irq(shost->host_lock); + } + } +} + static int lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, uint32_t Size) { - struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ct_request *Response = (struct lpfc_sli_ct_request *) mp->virt; - struct lpfc_nodelist *ndlp = NULL; struct lpfc_dmabuf *mlast, *next_mp; uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType; uint32_t Did, CTentry; int Cnt; struct list_head head; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp = NULL; lpfc_set_disctmo(vport); vport->num_disc_nodes = 0; @@ -574,19 +608,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, /* Get next DID from NameServer List */ CTentry = *ctptr++; Did = ((be32_to_cpu(CTentry)) & Mask_DID); - - ndlp = NULL; - - /* - * Check for rscn processing or not - * To conserve rpi's, filter out addresses for other - * vports on the same physical HBAs. - */ - if ((Did != vport->fc_myDID) && - ((lpfc_find_vport_by_did(phba, Did) == NULL) || - vport->cfg_peer_port_login)) - lpfc_prep_node_fc4type(vport, Did, fc4_type); - + lpfc_ns_rsp_audit_did(vport, Did, fc4_type); if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY))) goto nsout1; @@ -596,6 +618,22 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, } + /* All GID_FT entries processed. If the driver is running in + * in target mode, put impacted nodes into recovery and drop + * the RPI to flush outstanding IO. + */ + if (vport->phba->nvmet_support) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (!(ndlp->nlp_flag & NLP_NVMET_RECOV)) + continue; + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NVMET_RECOV; + spin_lock_irq(shost->host_lock); + } + } + nsout1: list_del(&head); return 0; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 913eed822cb8..fce549a91911 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -745,73 +745,102 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; int len = 0; + int cnt; if (phba->nvmet_support) { if (!phba->targetport) return len; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "\nNVME Targetport Statistics\n"); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "LS: Rcv %08x Drop %08x Abort %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_drop), atomic_read(&tgtp->xmt_ls_abort)); if (atomic_read(&tgtp->rcv_ls_req_in) != atomic_read(&tgtp->rcv_ls_req_out)) { - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "Rcv LS: in %08x != out %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_out)); } - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n", atomic_read(&tgtp->xmt_ls_rsp), atomic_read(&tgtp->xmt_ls_drop), atomic_read(&tgtp->xmt_ls_rsp_cmpl), atomic_read(&tgtp->xmt_ls_rsp_error)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP: Rcv %08x Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_drop)); if (atomic_read(&tgtp->rcv_fcp_cmd_in) != atomic_read(&tgtp->rcv_fcp_cmd_out)) { - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "Rcv FCP: in %08x != out %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_out)); } - len += snprintf(buf+len, size-len, - "FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n", + len += snprintf(buf + len, size - len, + "FCP Rsp: read %08x readrsp %08x " + "write %08x rsp %08x\n", atomic_read(&tgtp->xmt_fcp_read), atomic_read(&tgtp->xmt_fcp_read_rsp), atomic_read(&tgtp->xmt_fcp_write), atomic_read(&tgtp->xmt_fcp_rsp)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP Rsp: abort %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_abort), atomic_read(&tgtp->xmt_fcp_drop)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP Rsp Cmpl: %08x err %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_rsp_cmpl), atomic_read(&tgtp->xmt_fcp_rsp_error), atomic_read(&tgtp->xmt_fcp_rsp_drop)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "ABORT: Xmt %08x Err %08x Cmpl %08x", atomic_read(&tgtp->xmt_abort_rsp), atomic_read(&tgtp->xmt_abort_rsp_error), atomic_read(&tgtp->xmt_abort_cmpl)); - len += snprintf(buf+len, size-len, "\n"); + len += snprintf(buf + len, size - len, "\n"); + + cnt = 0; + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + cnt++; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + if (cnt) { + len += snprintf(buf + len, size - len, + "ABORT: %d ctx entries\n", cnt); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) + break; + len += snprintf(buf + len, size - len, + "Entry: oxid %x state %x " + "flag %x\n", + ctxp->oxid, ctxp->state, + ctxp->flag); + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + } } else { if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) return len; @@ -3128,8 +3157,6 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, datqp->queue_id, datqp->entry_count, datqp->entry_size, datqp->host_index, datqp->hba_index); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); - return len; } @@ -5700,10 +5727,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) #ifdef CONFIG_SCSI_LPFC_DEBUG_FS struct lpfc_hba *phba = vport->phba; - if (vport->disc_trc) { - kfree(vport->disc_trc); - vport->disc_trc = NULL; - } + kfree(vport->disc_trc); + vport->disc_trc = NULL; debugfs_remove(vport->debug_disc_trc); /* discovery_trace */ vport->debug_disc_trc = NULL; @@ -5770,10 +5795,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(phba->debug_readRef); /* readRef */ phba->debug_readRef = NULL; - if (phba->slow_ring_trc) { - kfree(phba->slow_ring_trc); - phba->slow_ring_trc = NULL; - } + kfree(phba->slow_ring_trc); + phba->slow_ring_trc = NULL; /* slow_ring_trace */ debugfs_remove(phba->debug_slow_ring_trc); diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index f4ff99d95db3..9d5a379f4b15 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -157,6 +157,7 @@ struct lpfc_node_rrq { #define NLP_LOGO_SND 0x00000100 /* sent LOGO request for this entry */ #define NLP_RNID_SND 0x00000400 /* sent RNID request for this entry */ #define NLP_ELS_SND_MASK 0x000007e0 /* sent ELS request for this entry */ +#define NLP_NVMET_RECOV 0x00001000 /* NVMET auditing node for recovery. */ #define NLP_DEFER_RM 0x00010000 /* Remove this ndlp if no longer used */ #define NLP_DELAY_TMO 0x00020000 /* delay timeout is running for node */ #define NLP_NPR_2B_DISC 0x00040000 /* node is included in num_disc_nodes */ diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index a5ca37e45fb6..67827e397431 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -603,9 +603,11 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport, memcmp(&vport->fabric_portname, &sp->portName, sizeof(struct lpfc_name)) || memcmp(&vport->fabric_nodename, &sp->nodeName, - sizeof(struct lpfc_name))) + sizeof(struct lpfc_name)) || + (vport->vport_flag & FAWWPN_PARAM_CHG)) { fabric_param_changed = 1; - + vport->vport_flag &= ~FAWWPN_PARAM_CHG; + } /* * Word 1 Bit 31 in common service parameter is overloaded. * Word 1 Bit 31 in FLOGI request is multiple NPort request @@ -895,10 +897,9 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, * Cannot find existing Fabric ndlp, so allocate a * new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, PT2PT_RemoteID); if (!ndlp) goto fail; - lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -1364,7 +1365,6 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) int lpfc_initial_flogi(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; vport->port_state = LPFC_FLOGI; @@ -1374,10 +1374,9 @@ lpfc_initial_flogi(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Set the node type */ ndlp->nlp_type |= NLP_FABRIC; /* Put ndlp onto node list */ @@ -1418,17 +1417,15 @@ lpfc_initial_flogi(struct lpfc_vport *vport) int lpfc_initial_fdisc(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* First look for the Fabric ndlp */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Put ndlp onto node list */ lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { @@ -1564,14 +1561,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->active_rrq_pool); return ndlp; } - new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); + new_ndlp = lpfc_nlp_init(vport, ndlp->nlp_DID); if (!new_ndlp) { if (active_rrqs_xri_bitmap) mempool_free(active_rrqs_xri_bitmap, phba->active_rrq_pool); return ndlp; } - lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID); } else if (!NLP_CHK_NODE_ACT(new_ndlp)) { rc = memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)); @@ -2845,10 +2841,9 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) ndlp = lpfc_findnode_did(vport, nportid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, nportid); if (!ndlp) return 1; - lpfc_nlp_init(vport, ndlp, nportid); lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -2938,10 +2933,9 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) ndlp = lpfc_findnode_did(vport, nportid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, nportid); if (!ndlp) return 1; - lpfc_nlp_init(vport, ndlp, nportid); lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -4403,7 +4397,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); memset(pcmd, 0, cmdsize); - *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)); + *((uint32_t *)(pcmd)) = elsrspcmd; pcmd += sizeof(uint32_t); /* For PRLI, remainder of payload is PRLI parameter page */ @@ -5867,8 +5861,11 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport) (ndlp->nlp_state == NLP_STE_UNUSED_NODE) || !lpfc_rscn_payload_check(vport, ndlp->nlp_DID)) continue; + + /* NVME Target mode does not do RSCN Recovery. */ if (vport->phba->nvmet_support) continue; + lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); lpfc_cancel_retry_delay_tmo(vport, ndlp); @@ -6133,7 +6130,6 @@ int lpfc_els_handle_rscn(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp; - struct lpfc_hba *phba = vport->phba; /* Ignore RSCN if the port is being torn down. */ if (vport->load_flag & FC_UNLOADING) { @@ -6157,22 +6153,16 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, NameServer_DID); if (ndlp && NLP_CHK_NODE_ACT(ndlp) && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { - /* Good ndlp, issue CT Request to NameServer */ + /* Good ndlp, issue CT Request to NameServer. Need to + * know how many gidfts were issued. If none, then just + * flush the RSCN. Otherwise, the outstanding requests + * need to complete. + */ vport->gidft_inp = 0; - if (lpfc_issue_gidft(vport) == 0) - /* Wait for NameServer query cmpl before we can - * continue - */ + if (lpfc_issue_gidft(vport) > 0) return 1; } else { - /* If login to NameServer does not exist, issue one */ - /* Good status, issue PLOGI to NameServer */ - ndlp = lpfc_findnode_did(vport, NameServer_DID); - if (ndlp && NLP_CHK_NODE_ACT(ndlp)) - /* Wait for NameServer login cmpl before we can - continue */ - return 1; - + /* Nameserver login in question. Revalidate. */ if (ndlp) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_PLOGI_ISSUE); @@ -6182,12 +6172,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) } ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; } else { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, NameServer_DID); if (!ndlp) { lpfc_els_flush_rscn(vport); return 0; } - lpfc_nlp_init(vport, ndlp, NameServer_DID); ndlp->nlp_prev_state = ndlp->nlp_state; lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); } @@ -7746,11 +7735,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, did); if (!ndlp) goto dropit; - - lpfc_nlp_init(vport, ndlp, did); lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); newnode = 1; if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) @@ -8193,7 +8180,6 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, static void lpfc_start_fdmi(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* If this is the first time, allocate an ndlp and initialize @@ -8202,9 +8188,8 @@ lpfc_start_fdmi(struct lpfc_vport *vport) */ ndlp = lpfc_findnode_did(vport, FDMI_DID); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, FDMI_DID); if (ndlp) { - lpfc_nlp_init(vport, ndlp, FDMI_DID); ndlp->nlp_type |= NLP_FABRIC; } else { return; @@ -8257,7 +8242,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, NameServer_DID); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, NameServer_DID); if (!ndlp) { if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { lpfc_disc_start(vport); @@ -8268,7 +8253,6 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) "0251 NameServer login: no memory\n"); return; } - lpfc_nlp_init(vport, ndlp, NameServer_DID); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); if (!ndlp) { @@ -8771,7 +8755,7 @@ lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, pcmd += sizeof(uint32_t); /* Node Name */ pcmd += sizeof(uint32_t); /* Node Name */ memcpy(pcmd, &vport->fc_nodename, 8); - + memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion)); lpfc_set_disctmo(vport); phba->fc_stat.elsXmitFDISC++; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 180b072beef6..0482c5580331 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -3002,6 +3002,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) MAILBOX_t *mb = &pmb->u.mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1; struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct serv_parm *sp = &vport->fc_sparam; uint32_t ed_tov; @@ -3031,6 +3032,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } lpfc_update_vport_wwn(vport); + fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn); if (vport->port_type == LPFC_PHYSICAL_PORT) { memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn)); memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn)); @@ -3309,6 +3311,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_sli_ring *pring; MAILBOX_t *mb = &pmb->u.mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + uint8_t attn_type; /* Unblock ELS traffic */ pring = lpfc_phba_elsring(phba); @@ -3325,6 +3328,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } la = (struct lpfc_mbx_read_top *) &pmb->u.mb.un.varReadTop; + attn_type = bf_get(lpfc_mbx_read_top_att_type, la); memcpy(&phba->alpa_map[0], mp->virt, 128); @@ -3337,7 +3341,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (phba->fc_eventTag <= la->eventTag) { phba->fc_stat.LinkMultiEvent++; - if (bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) + if (attn_type == LPFC_ATT_LINK_UP) if (phba->fc_eventTag != 0) lpfc_linkdown(phba); } @@ -3353,7 +3357,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } phba->link_events++; - if ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) && + if ((attn_type == LPFC_ATT_LINK_UP) && !(phba->sli.sli_flag & LPFC_MENLO_MAINT)) { phba->fc_stat.LinkUp++; if (phba->link_flag & LS_LOOPBACK_MODE) { @@ -3379,8 +3383,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) phba->wait_4_mlo_maint_flg); } lpfc_mbx_process_link_up(phba, la); - } else if (bf_get(lpfc_mbx_read_top_att_type, la) == - LPFC_ATT_LINK_DOWN) { + } else if (attn_type == LPFC_ATT_LINK_DOWN || + attn_type == LPFC_ATT_UNEXP_WWPN) { phba->fc_stat.LinkDown++; if (phba->link_flag & LS_LOOPBACK_MODE) lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, @@ -3389,6 +3393,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) "Data: x%x x%x x%x\n", la->eventTag, phba->fc_eventTag, phba->pport->port_state, vport->fc_flag); + else if (attn_type == LPFC_ATT_UNEXP_WWPN) + lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, + "1313 Link Down UNEXP WWPN Event x%x received " + "Data: x%x x%x x%x x%x x%x\n", + la->eventTag, phba->fc_eventTag, + phba->pport->port_state, vport->fc_flag, + bf_get(lpfc_mbx_read_top_mm, la), + bf_get(lpfc_mbx_read_top_fa, la)); else lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, "1305 Link Down Event x%x received " @@ -3399,8 +3411,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) bf_get(lpfc_mbx_read_top_fa, la)); lpfc_mbx_issue_link_down(phba); } - if ((phba->sli.sli_flag & LPFC_MENLO_MAINT) && - ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP))) { + if (phba->sli.sli_flag & LPFC_MENLO_MAINT && + attn_type == LPFC_ATT_LINK_UP) { if (phba->link_state != LPFC_LINK_DOWN) { phba->fc_stat.LinkDown++; lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, @@ -4136,7 +4148,6 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int old_state, int new_state) { struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - struct lpfc_hba *phba = vport->phba; if (new_state == NLP_STE_UNMAPPED_NODE) { ndlp->nlp_flag &= ~NLP_NODEV_REMOVE; @@ -4155,14 +4166,14 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_unregister_remote_port(ndlp); } - /* Notify the NVME transport of this rport's loss */ - if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) && - (vport->phba->nvmet_support == 0) && - ((ndlp->nlp_fc4_type & NLP_FC4_NVME) || - (ndlp->nlp_DID == Fabric_DID))) { + /* Notify the NVME transport of this rport's loss on the + * Initiator. For NVME Target, should upcall transport + * in the else clause when API available. + */ + if (ndlp->nlp_fc4_type & NLP_FC4_NVME) { vport->phba->nport_event_cnt++; - lpfc_nvme_unregister_port(vport, ndlp); + if (vport->phba->nvmet_support == 0) + lpfc_nvme_unregister_port(vport, ndlp); } } @@ -4368,10 +4379,17 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint32_t did; unsigned long flags; unsigned long *active_rrqs_xri_bitmap = NULL; + int rpi = LPFC_RPI_ALLOC_ERROR; if (!ndlp) return NULL; + if (phba->sli_rev == LPFC_SLI_REV4) { + rpi = lpfc_sli4_alloc_rpi(vport->phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) + return NULL; + } + spin_lock_irqsave(&phba->ndlp_lock, flags); /* The ndlp should not be in memory free mode */ if (NLP_CHK_FREE_REQ(ndlp)) { @@ -4381,7 +4399,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, kref_read(&ndlp->kref)); - return NULL; + goto free_rpi; } /* The ndlp should not already be in active mode */ if (NLP_CHK_NODE_ACT(ndlp)) { @@ -4391,7 +4409,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, kref_read(&ndlp->kref)); - return NULL; + goto free_rpi; } /* Keep the original DID */ @@ -4409,7 +4427,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, spin_unlock_irqrestore(&phba->ndlp_lock, flags); if (vport->phba->sli_rev == LPFC_SLI_REV4) { - ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba); + ndlp->nlp_rpi = rpi; lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0008 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, @@ -4426,6 +4444,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "node enable: did:x%x", ndlp->nlp_DID, 0, 0); return ndlp; + +free_rpi: + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli4_free_rpi(vport->phba, rpi); + return NULL; } void @@ -5104,65 +5127,82 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { + if (vport->phba->nvmet_support) + return NULL; if ((vport->fc_flag & FC_RSCN_MODE) != 0 && lpfc_rscn_payload_check(vport, did) == 0) return NULL; - ndlp = (struct lpfc_nodelist *) - mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, did); if (!ndlp) return NULL; - lpfc_nlp_init(vport, ndlp, did); lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); - if (vport->phba->nvmet_support) - return ndlp; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); return ndlp; } else if (!NLP_CHK_NODE_ACT(ndlp)) { + if (vport->phba->nvmet_support) + return NULL; ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE); if (!ndlp) return NULL; - if (vport->phba->nvmet_support) - return ndlp; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); return ndlp; } + /* The NVME Target does not want to actively manage an rport. + * The goal is to allow the target to reset its state and clear + * pending IO in preparation for the initiator to recover. + */ if ((vport->fc_flag & FC_RSCN_MODE) && !(vport->fc_flag & FC_NDISC_ACTIVE)) { if (lpfc_rscn_payload_check(vport, did)) { - /* If we've already received a PLOGI from this NPort - * we don't need to try to discover it again. - */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) - return NULL; /* Since this node is marked for discovery, * delay timeout is not needed. */ lpfc_cancel_retry_delay_tmo(vport, ndlp); + + /* NVME Target mode waits until rport is known to be + * impacted by the RSCN before it transitions. No + * active management - just go to NPR provided the + * node had a valid login. + */ if (vport->phba->nvmet_support) return ndlp; + + /* If we've already received a PLOGI from this NPort + * we don't need to try to discover it again. + */ + if (ndlp->nlp_flag & NLP_RCV_PLOGI) + return NULL; + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); } else ndlp = NULL; } else { - /* If we've already received a PLOGI from this NPort, - * or we are already in the process of discovery on it, - * we don't need to try to discover it again. + /* If the initiator received a PLOGI from this NPort or if the + * initiator is already in the process of discovery on it, + * there's no need to try to discover it again. */ if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE || ndlp->nlp_state == NLP_STE_PLOGI_ISSUE || - ndlp->nlp_flag & NLP_RCV_PLOGI) + (!vport->phba->nvmet_support && + ndlp->nlp_flag & NLP_RCV_PLOGI)) return NULL; - lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + if (vport->phba->nvmet_support) return ndlp; + + /* Moving to NPR state clears unsolicited flags and + * allows for rediscovery + */ + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); @@ -5887,16 +5927,31 @@ lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi) return NULL; } -void -lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - uint32_t did) +struct lpfc_nodelist * +lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did) { + struct lpfc_nodelist *ndlp; + int rpi = LPFC_RPI_ALLOC_ERROR; + + if (vport->phba->sli_rev == LPFC_SLI_REV4) { + rpi = lpfc_sli4_alloc_rpi(vport->phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) + return NULL; + } + + ndlp = mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + if (vport->phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli4_free_rpi(vport->phba, rpi); + return NULL; + } + memset(ndlp, 0, sizeof (struct lpfc_nodelist)); lpfc_initialize_node(vport, ndlp, did); INIT_LIST_HEAD(&ndlp->nlp_listp); if (vport->phba->sli_rev == LPFC_SLI_REV4) { - ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba); + ndlp->nlp_rpi = rpi; lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0007 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, @@ -5918,7 +5973,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "node init: did:x%x", ndlp->nlp_DID, 0, 0); - return; + return ndlp; } /* This routine releases all resources associated with a specifc NPort's ndlp diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 15ca21484150..26a5647e057e 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -509,6 +509,8 @@ struct class_parms { uint8_t word3Reserved2; /* Fc Word 3, bit 0: 7 */ }; +#define FAPWWN_KEY_VENDOR 0x42524344 /*valid vendor version fawwpn key*/ + struct serv_parm { /* Structure is in Big Endian format */ struct csp cmn; struct lpfc_name portName; @@ -2885,6 +2887,7 @@ struct lpfc_mbx_read_top { #define LPFC_ATT_RESERVED 0x00 /* Reserved - attType */ #define LPFC_ATT_LINK_UP 0x01 /* Link is up */ #define LPFC_ATT_LINK_DOWN 0x02 /* Link is down */ +#define LPFC_ATT_UNEXP_WWPN 0x06 /* Link is down Unexpected WWWPN */ uint32_t word3; #define lpfc_mbx_read_top_alpa_granted_SHIFT 24 #define lpfc_mbx_read_top_alpa_granted_MASK 0x000000FF diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 15277705cb6b..1d12f2be36bc 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -2720,6 +2720,9 @@ struct lpfc_mbx_request_features { #define lpfc_mbx_rq_ftr_rq_ifip_SHIFT 7 #define lpfc_mbx_rq_ftr_rq_ifip_MASK 0x00000001 #define lpfc_mbx_rq_ftr_rq_ifip_WORD word2 +#define lpfc_mbx_rq_ftr_rq_iaar_SHIFT 9 +#define lpfc_mbx_rq_ftr_rq_iaar_MASK 0x00000001 +#define lpfc_mbx_rq_ftr_rq_iaar_WORD word2 #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT 11 #define lpfc_mbx_rq_ftr_rq_perfh_MASK 0x00000001 #define lpfc_mbx_rq_ftr_rq_perfh_WORD word2 @@ -3853,6 +3856,7 @@ struct lpfc_acqe_fc_la { #define LPFC_FC_LA_TYPE_NO_HARD_ALPA 0x3 #define LPFC_FC_LA_TYPE_MDS_LINK_DOWN 0x4 #define LPFC_FC_LA_TYPE_MDS_LOOPBACK 0x5 +#define LPFC_FC_LA_TYPE_UNEXP_WWPN 0x6 #define lpfc_acqe_fc_la_port_type_SHIFT 6 #define lpfc_acqe_fc_la_port_type_MASK 0x00000003 #define lpfc_acqe_fc_la_port_type_WORD word0 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 6cc561b04211..90ae354a9c45 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -42,6 +42,10 @@ #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport_fc.h> +#include <scsi/scsi_tcq.h> +#include <scsi/fc/fc_fs.h> + +#include <linux/nvme-fc-driver.h> #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -52,6 +56,7 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" +#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" @@ -335,6 +340,9 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) void lpfc_update_vport_wwn(struct lpfc_vport *vport) { + uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level; + u32 *fawwpn_key = (u32 *)&vport->fc_sparam.un.vendorVersion[0]; + /* If the soft name exists then update it using the service params */ if (vport->phba->cfg_soft_wwnn) u64_to_wwn(vport->phba->cfg_soft_wwnn, @@ -354,9 +362,25 @@ lpfc_update_vport_wwn(struct lpfc_vport *vport) memcpy(&vport->fc_sparam.nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); - if (vport->fc_portname.u.wwn[0] == 0 || vport->phba->cfg_soft_wwpn) + /* + * If the port name has changed, then set the Param changes flag + * to unreg the login + */ + if (vport->fc_portname.u.wwn[0] != 0 && + memcmp(&vport->fc_portname, &vport->fc_sparam.portName, + sizeof(struct lpfc_name))) + vport->vport_flag |= FAWWPN_PARAM_CHG; + + if (vport->fc_portname.u.wwn[0] == 0 || + vport->phba->cfg_soft_wwpn || + (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) || + vport->vport_flag & FAWWPN_SET) { memcpy(&vport->fc_portname, &vport->fc_sparam.portName, sizeof(struct lpfc_name)); + vport->vport_flag &= ~FAWWPN_SET; + if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) + vport->vport_flag |= FAWWPN_SET; + } else memcpy(&vport->fc_sparam.portName, &vport->fc_portname, sizeof(struct lpfc_name)); @@ -1003,8 +1027,10 @@ static int lpfc_hba_down_post_s4(struct lpfc_hba *phba) { struct lpfc_scsi_buf *psb, *psb_next; + struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; LIST_HEAD(aborts); LIST_HEAD(nvme_aborts); + LIST_HEAD(nvmet_aborts); unsigned long iflag = 0; struct lpfc_sglq *sglq_entry = NULL; @@ -1027,16 +1053,10 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_for_each_entry(sglq_entry, &phba->sli4_hba.lpfc_abts_els_sgl_list, list) sglq_entry->state = SGL_FREED; - list_for_each_entry(sglq_entry, - &phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list) - sglq_entry->state = SGL_FREED; list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list, &phba->sli4_hba.lpfc_els_sgl_list); - if (phba->sli4_hba.nvme_wq) - list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list, - &phba->sli4_hba.lpfc_nvmet_sgl_list); spin_unlock(&phba->sli4_hba.sgl_list_lock); /* abts_scsi_buf_list_lock required because worker thread uses this @@ -1053,6 +1073,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list, &nvme_aborts); + list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + &nvmet_aborts); spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); } @@ -1066,13 +1088,20 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_splice(&aborts, &phba->lpfc_scsi_buf_list_put); spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag); - list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { - psb->pCmd = NULL; - psb->status = IOSTAT_SUCCESS; + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { + psb->pCmd = NULL; + psb->status = IOSTAT_SUCCESS; + } + spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); + list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); + spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); + + list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { + ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + } } - spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); - list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); - spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); lpfc_sli4_free_sp_events(phba); return 0; @@ -2874,34 +2903,38 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) { struct lpfc_nodelist *ndlp, *next_ndlp; struct lpfc_vport **vports; - int i; + int i, rpi; + unsigned long flags; if (phba->sli_rev != LPFC_SLI_REV4) return; vports = lpfc_create_vport_work_array(phba); - if (vports != NULL) { - for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { - if (vports[i]->load_flag & FC_UNLOADING) - continue; + if (vports == NULL) + return; - list_for_each_entry_safe(ndlp, next_ndlp, - &vports[i]->fc_nodes, - nlp_listp) { - if (NLP_CHK_NODE_ACT(ndlp)) { - ndlp->nlp_rpi = - lpfc_sli4_alloc_rpi(phba); - lpfc_printf_vlog(ndlp->vport, KERN_INFO, - LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x %p\n", - ndlp->nlp_rpi, - ndlp->nlp_DID, - ndlp->nlp_flag, - ndlp->nlp_usg_map, - ndlp); - } + for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { + if (vports[i]->load_flag & FC_UNLOADING) + continue; + + list_for_each_entry_safe(ndlp, next_ndlp, + &vports[i]->fc_nodes, + nlp_listp) { + if (!NLP_CHK_NODE_ACT(ndlp)) + continue; + rpi = lpfc_sli4_alloc_rpi(phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) { + spin_lock_irqsave(&phba->ndlp_lock, flags); + NLP_CLR_NODE_ACT(ndlp); + spin_unlock_irqrestore(&phba->ndlp_lock, flags); + continue; } + ndlp->nlp_rpi = rpi; + lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, + "0009 rpi:%x DID:%x " + "flg:%x map:%x %p\n", ndlp->nlp_rpi, + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_usg_map, ndlp); } } lpfc_destroy_vport_work_array(phba, vports); @@ -3508,6 +3541,12 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba) spin_unlock(&phba->scsi_buf_list_put_lock); spin_unlock_irq(&phba->scsi_buf_list_get_lock); + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "6060 Current allocated SCSI xri-sgl count:%d, " + "maximum SCSI xri count:%d (split:%d)\n", + phba->sli4_hba.scsi_xri_cnt, + phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split); + if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) { /* max scsi xri shrinked below the allocated scsi buffers */ scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt - @@ -4508,9 +4547,15 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) /* Parse and translate link attention fields */ la = (struct lpfc_mbx_read_top *)&pmb->u.mb.un.varReadTop; la->eventTag = acqe_fc->event_tag; - bf_set(lpfc_mbx_read_top_att_type, la, - LPFC_FC_LA_TYPE_LINK_DOWN); + if (phba->sli4_hba.link_state.status == + LPFC_FC_LA_TYPE_UNEXP_WWPN) { + bf_set(lpfc_mbx_read_top_att_type, la, + LPFC_FC_LA_TYPE_UNEXP_WWPN); + } else { + bf_set(lpfc_mbx_read_top_att_type, la, + LPFC_FC_LA_TYPE_LINK_DOWN); + } /* Invoke the mailbox command callback function */ lpfc_mbx_cmpl_read_topology(phba, pmb); @@ -4716,10 +4761,9 @@ lpfc_sli4_perform_vport_cvl(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Set the node type */ ndlp->nlp_type |= NLP_FABRIC; /* Put ndlp onto node list */ @@ -5778,6 +5822,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the Abort nvme buffer list used by driver */ spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); /* Fast-path XRI aborted CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); } @@ -5809,6 +5854,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_vfi_blk_list); INIT_LIST_HEAD(&phba->lpfc_vpi_blk_list); + /* Initialize mboxq lists. If the early init routines fail + * these lists need to be correctly initialized. + */ + INIT_LIST_HEAD(&phba->sli.mboxq); + INIT_LIST_HEAD(&phba->sli.mboxq_cmpl); + /* initialize optic_state to 0xFF */ phba->sli4_hba.lnk_info.optic_state = 0xff; @@ -5874,6 +5925,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) "READ_NV, mbxStatus x%x\n", bf_get(lpfc_mqe_command, &mboxq->u.mqe), bf_get(lpfc_mqe_status, &mboxq->u.mqe)); + mempool_free(mboxq, phba->mbox_mem_pool); rc = -EIO; goto out_free_bsmbx; } @@ -6398,7 +6450,7 @@ lpfc_init_sgl_list(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list); - INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); /* els xri-sgl book keeping */ phba->sli4_hba.els_xri_cnt = 0; @@ -7799,7 +7851,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) /* Create Fast Path FCP WQs */ wqesize = (phba->fcp_embed_io) ? - LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -7830,7 +7882,7 @@ int lpfc_sli4_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; - int idx, io_channel, max; + int idx, io_channel; /* * Create HBA Record arrays. @@ -7991,15 +8043,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) if (lpfc_alloc_nvme_wq_cq(phba, idx)) goto out_error; - /* allocate MRQ CQs */ - max = phba->cfg_nvme_io_channel; - if (max < phba->cfg_nvmet_mrq) - max = phba->cfg_nvmet_mrq; - - for (idx = 0; idx < max; idx++) - if (lpfc_alloc_nvme_wq_cq(phba, idx)) - goto out_error; - if (phba->nvmet_support) { for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { qdesc = lpfc_sli4_queue_alloc(phba, @@ -8221,11 +8264,11 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) /* Release FCP cqs */ lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq, - phba->cfg_fcp_io_channel); + phba->cfg_fcp_io_channel); /* Release FCP wqs */ lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq, - phba->cfg_fcp_io_channel); + phba->cfg_fcp_io_channel); /* Release FCP CQ mapping array */ lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map); @@ -8571,15 +8614,15 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0528 %s not allocated\n", phba->sli4_hba.mbx_cq ? - "Mailbox WQ" : "Mailbox CQ"); + "Mailbox WQ" : "Mailbox CQ"); rc = -ENOMEM; goto out_destroy; } rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0], - phba->sli4_hba.mbx_cq, - phba->sli4_hba.mbx_wq, - NULL, 0, LPFC_MBOX); + phba->sli4_hba.mbx_cq, + phba->sli4_hba.mbx_wq, + NULL, 0, LPFC_MBOX); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n", @@ -9934,17 +9977,19 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) { int wait_time = 0; int nvme_xri_cmpl = 1; + int nvmet_xri_cmpl = 1; int fcp_xri_cmpl = 1; int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); - int nvmet_xri_cmpl = - list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) fcp_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list); - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvme_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + nvmet_xri_cmpl = + list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + } while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl || !nvmet_xri_cmpl) { @@ -9970,9 +10015,12 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1); wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1; } - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvme_xri_cmpl = list_empty( &phba->sli4_hba.lpfc_abts_nvme_buf_list); + nvmet_xri_cmpl = list_empty( + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + } if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) fcp_xri_cmpl = list_empty( @@ -9981,8 +10029,6 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); - nvmet_xri_cmpl = - list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); } } @@ -10048,9 +10094,14 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Stop kthread signal shall trigger work_done one more time */ kthread_stop(phba->worker_thread); + /* Unset the queues shared with the hardware then release all + * allocated resources. + */ + lpfc_sli4_queue_unset(phba); + lpfc_sli4_queue_destroy(phba); + /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); /* Stop the SLI4 device port */ phba->pport->work_port_events = 0; @@ -10306,6 +10357,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid) } /* Initialize and populate the iocb list per host */ + error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT); if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -11051,7 +11103,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct lpfc_hba *phba; struct lpfc_vport *vport = NULL; struct Scsi_Host *shost = NULL; - int error; + int error, cnt; uint32_t cfg_mode, intr_mode; /* Allocate memory for HBA structure */ @@ -11085,12 +11137,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) goto out_unset_pci_mem_s4; } - /* Initialize and populate the iocb list per host */ + cnt = phba->cfg_iocb_cnt * 1024; + if (phba->nvmet_support) + cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq; + /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d.\n", - phba->cfg_iocb_cnt*1024); - error = lpfc_init_iocb_list(phba, phba->cfg_iocb_cnt*1024); + "2821 initialize iocb list %d total %d\n", + phba->cfg_iocb_cnt, cnt); + error = lpfc_init_iocb_list(phba, cnt); if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -11177,7 +11232,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) if ((phba->nvmet_support == 0) && (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { /* Create NVME binding with nvme_fc_transport. This - * ensures the vport is initialized. + * ensures the vport is initialized. If the localport + * create fails, it should not unload the driver to + * support field issues. */ error = lpfc_nvme_create_localport(vport); if (error) { @@ -11185,7 +11242,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) "6004 NVME registration failed, " "error x%x\n", error); - goto out_disable_intr; } } @@ -11984,6 +12040,7 @@ int lpfc_fof_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; + uint32_t wqesize; /* Create FOF EQ */ qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize, @@ -12004,8 +12061,11 @@ lpfc_fof_queue_create(struct lpfc_hba *phba) phba->sli4_hba.oas_cq = qdesc; /* Create OAS WQ */ - qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize, + wqesize = (phba->fcp_embed_io) ? + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount); + if (!qdesc) goto out_error; diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index a928f5187fa4..ce25a18367b5 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -2083,9 +2083,12 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq) if (phba->max_vpi && phba->cfg_enable_npiv) bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1); - if (phba->nvmet_support) + if (phba->nvmet_support) { bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1); - + /* iaab/iaar NOT set for now */ + bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0); + bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0); + } return; } diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 061626bdf701..8777c2d5f50d 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -361,8 +361,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, case NLP_STE_PRLI_ISSUE: case NLP_STE_UNMAPPED_NODE: case NLP_STE_MAPPED_NODE: - /* lpfc_plogi_confirm_nport skips fabric did, handle it here */ - if (!(ndlp->nlp_type & NLP_FABRIC)) { + /* For initiators, lpfc_plogi_confirm_nport skips fabric did. + * For target mode, execute implicit logo. + * Fabric nodes go into NPR. + */ + if (!(ndlp->nlp_type & NLP_FABRIC) && + !(phba->nvmet_support)) { lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL); return 1; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0024de1c6c1f..8008c8205fb6 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -401,6 +401,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nodelist *ndlp; struct ulp_bde64 *bpl; struct lpfc_dmabuf *bmp; + uint16_t ntype, nstate; /* there are two dma buf in the request, actually there is one and * the second one is just the start address + cmd size. @@ -417,11 +418,26 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, vport = lport->vport; ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); - if (!ndlp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6043 Could not find node for DID %x\n", + if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, + "6051 DID x%06x not an active rport.\n", pnvme_rport->port_id); - return 1; + return -ENODEV; + } + + /* The remote node has to be a mapped nvme target or an + * unmapped nvme initiator or it's an error. + */ + ntype = ndlp->nlp_type; + nstate = ndlp->nlp_state; + if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) || + (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, + "6088 DID x%06x not ready for " + "IO. State x%x, Type x%x\n", + pnvme_rport->port_id, + ndlp->nlp_state, ndlp->nlp_type); + return -ENODEV; } bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); if (!bmp) { @@ -456,7 +472,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, /* Expand print to include key fields. */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6051 ENTER. lport %p, rport %p lsreq%p rqstlen:%d " + "6149 ENTER. lport %p, rport %p lsreq%p rqstlen:%d " "rsplen:%d %pad %pad\n", pnvme_lport, pnvme_rport, pnvme_lsreq, pnvme_lsreq->rqstlen, @@ -745,6 +761,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, struct nvme_fc_cmd_iu *cp; struct lpfc_nvme_rport *rport; struct lpfc_nodelist *ndlp; + struct lpfc_nvme_fcpreq_priv *freqpriv; unsigned long flags; uint32_t code; uint16_t cid, sqhd, data; @@ -772,9 +789,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, ndlp = rport->ndlp; if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6061 rport %p, ndlp %p, DID x%06x ndlp " - "not ready.\n", - rport, ndlp, rport->remoteport->port_id); + "6061 rport %p, DID x%06x node not ready.\n", + rport, rport->remoteport->port_id); ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id); if (!ndlp) { @@ -853,15 +869,18 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, break; lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, "6081 NVME Completion Protocol Error: " - "status x%x result x%x placed x%x\n", + "xri %x status x%x result x%x " + "placed x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, lpfc_ncmd->status, lpfc_ncmd->result, wcqe->total_data_placed); break; default: out_err: lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, - "6072 NVME Completion Error: " + "6072 NVME Completion Error: xri %x " "status x%x result x%x placed x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, lpfc_ncmd->status, lpfc_ncmd->result, wcqe->total_data_placed); nCmd->transferred_length = 0; @@ -900,6 +919,8 @@ out_err: phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++; } #endif + freqpriv = nCmd->private; + freqpriv->nvme_buf = NULL; nCmd->done(nCmd); spin_lock_irqsave(&phba->hbalock, flags); @@ -1099,12 +1120,12 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, first_data_sgl = sgl; lpfc_ncmd->seg_cnt = nCmd->sg_cnt; - if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) { + if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6058 Too many sg segments from " "NVME Transport. Max %d, " "nvmeIO sg_cnt %d\n", - phba->cfg_sg_seg_cnt, + phba->cfg_nvme_seg_cnt, lpfc_ncmd->seg_cnt); lpfc_ncmd->seg_cnt = 0; return 1; @@ -1196,6 +1217,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_buf *lpfc_ncmd; struct lpfc_nvme_rport *rport; struct lpfc_nvme_qhandle *lpfc_queue_info; + struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint64_t start = 0; #endif @@ -1274,7 +1296,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, * Do not let the IO hang out forever. There is no midlayer issuing * an abort so inform the FW of the maximum IO pending time. */ - pnvme_fcreq->private = (void *)lpfc_ncmd; + freqpriv->nvme_buf = lpfc_ncmd; lpfc_ncmd->nvmeCmd = pnvme_fcreq; lpfc_ncmd->nrport = rport; lpfc_ncmd->ndlp = ndlp; @@ -1404,6 +1426,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_buf *lpfc_nbuf; struct lpfc_iocbq *abts_buf; struct lpfc_iocbq *nvmereq_wqe; + struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private; union lpfc_wqe *abts_wqe; unsigned long flags; int ret_val; @@ -1414,7 +1437,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; /* Announce entry to new IO submit field. */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, "6002 Abort Request to rport DID x%06x " "for nvme_fc_req %p\n", pnvme_rport->port_id, @@ -1444,7 +1467,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* The remote node has to be ready to send an abort. */ if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) && !(ndlp->nlp_type & NLP_NVME_TARGET)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6048 rport %p, DID x%06x not ready for " "IO. State x%x, Type x%x\n", rport, pnvme_rport->port_id, @@ -1459,27 +1482,28 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* driver queued commands are in process of being flushed */ if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6139 Driver in reset cleanup - flushing " "NVME Req now. hba_flag x%x\n", phba->hba_flag); return; } - lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private; + lpfc_nbuf = freqpriv->nvme_buf; if (!lpfc_nbuf) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6140 NVME IO req has no matching lpfc nvme " "io buffer. Skipping abort req.\n"); return; } else if (!lpfc_nbuf->nvmeCmd) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6141 lpfc NVME IO req has no nvme_fcreq " "io buffer. Skipping abort req.\n"); return; } + nvmereq_wqe = &lpfc_nbuf->cur_iocbq; /* * The lpfc_nbuf and the mapped nvme_fcreq in the driver's @@ -1490,23 +1514,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, */ if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6143 NVME req mismatch: " "lpfc_nbuf %p nvmeCmd %p, " - "pnvme_fcreq %p. Skipping Abort\n", + "pnvme_fcreq %p. Skipping Abort xri x%x\n", lpfc_nbuf, lpfc_nbuf->nvmeCmd, - pnvme_fcreq); + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } /* Don't abort IOs no longer on the pending queue. */ - nvmereq_wqe = &lpfc_nbuf->cur_iocbq; if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6142 NVME IO req %p not queued - skipping " - "abort req\n", - pnvme_fcreq); + "abort req xri x%x\n", + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } @@ -1517,21 +1540,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Outstanding abort is in progress */ if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6144 Outstanding NVME I/O Abort Request " "still pending on nvme_fcreq %p, " - "lpfc_ncmd %p\n", - pnvme_fcreq, lpfc_nbuf); + "lpfc_ncmd %p xri x%x\n", + pnvme_fcreq, lpfc_nbuf, + nvmereq_wqe->sli4_xritag); return; } abts_buf = __lpfc_sli_get_iocbq(phba); if (!abts_buf) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6136 No available abort wqes. Skipping " - "Abts req for nvme_fcreq %p.\n", - pnvme_fcreq); + "Abts req for nvme_fcreq %p xri x%x\n", + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } @@ -1580,7 +1604,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf); spin_unlock_irqrestore(&phba->hbalock, flags); if (ret_val == IOCB_ERROR) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6137 Failed abts issue_wqe with status x%x " "for nvme_fcreq %p.\n", ret_val, pnvme_fcreq); @@ -1588,8 +1612,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, return; } - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, - "6138 Transport Abort NVME Request Issued for\n" + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, + "6138 Transport Abort NVME Request Issued for " "ox_id x%x on reqtag x%x\n", nvmereq_wqe->sli4_xritag, abts_buf->iotag); @@ -1618,7 +1642,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = { .local_priv_sz = sizeof(struct lpfc_nvme_lport), .remote_priv_sz = sizeof(struct lpfc_nvme_rport), .lsrqst_priv_sz = 0, - .fcprqst_priv_sz = 0, + .fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv), }; /** @@ -2049,7 +2073,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) if (lpfc_test_rrq_active(phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag)) continue; - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); found = 1; break; } @@ -2064,7 +2088,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) if (lpfc_test_rrq_active( phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag)) continue; - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); found = 1; break; } @@ -2092,6 +2116,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) lpfc_ncmd->nonsg_phys = 0; if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6310 XB release deferred for " + "ox_id x%x on reqtag x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, + lpfc_ncmd->cur_iocbq.iotag); + spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); lpfc_ncmd->nvmeCmd = NULL; @@ -2142,8 +2172,18 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn); nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn); - /* For now need + 1 to get around NVME transport logic */ - lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1; + /* Limit to LPFC_MAX_NVME_SEG_CNT. + * For now need + 1 to get around NVME transport logic. + */ + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_INIT, + "6300 Reducing sg segment cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else { + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel; /* localport is allocated from the stack, but the registration @@ -2249,12 +2289,23 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) void lpfc_nvme_update_localport(struct lpfc_vport *vport) { +#if (IS_ENABLED(CONFIG_NVME_FC)) struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; localport = vport->localport; + if (!localport) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME, + "6710 Update NVME fail. No localport\n"); + return; + } lport = (struct lpfc_nvme_lport *)localport->private; - + if (!lport) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME, + "6171 Update NVME fail. localP %p, No lport\n", + localport); + return; + } lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, "6012 Update NVME lport %p did x%x\n", localport, vport->fc_myDID); @@ -2268,7 +2319,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, "6030 bound lport %p to DID x%06x\n", lport, localport->port_id); - +#endif } int @@ -2409,6 +2460,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport; struct nvme_fc_remote_port *remoteport; + unsigned long wait_tmo; localport = vport->localport; @@ -2451,11 +2503,12 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * before proceeding. This guarantees the transport and driver * have completed the unreg process. */ - ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5); + wait_tmo = msecs_to_jiffies(5000); + ret = wait_for_completion_timeout(&rport->rport_unreg_done, + wait_tmo); if (ret == 0) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6169 Unreg nvme wait failed %d\n", - ret); + "6169 Unreg nvme wait timeout\n"); } } return; @@ -2463,7 +2516,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) input_err: #endif lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6168: State error: lport %p, rport%p FCID x%06x\n", + "6168 State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); } @@ -2494,7 +2547,7 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, &phba->sli4_hba.lpfc_abts_nvme_buf_list, list) { if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) { - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; spin_unlock( @@ -2510,6 +2563,12 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, rxid, 1); lpfc_sli4_abts_err_handler(phba, ndlp, axri); } + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6311 XRI Aborted xri x%x tag x%x " + "released\n", + xri, lpfc_ncmd->cur_iocbq.iotag); + lpfc_release_nvme_buf(phba, lpfc_ncmd); if (rrq_empty) lpfc_worker_wake_up(phba); @@ -2518,4 +2577,8 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, } spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); spin_unlock_irqrestore(&phba->hbalock, iflag); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6312 XRI Aborted xri x%x not found\n", xri); + } diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 1347deb8dd6c..ec32f45daa66 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -21,12 +21,7 @@ * included with this package. * ********************************************************************/ -#define LPFC_NVME_MIN_SEGS 16 -#define LPFC_NVME_DEFAULT_SEGS 66 /* 256K IOs - 64 + 2 */ -#define LPFC_NVME_MAX_SEGS 510 -#define LPFC_NVMET_MIN_POSTBUF 16 -#define LPFC_NVMET_DEFAULT_POSTBUF 1024 -#define LPFC_NVMET_MAX_POSTBUF 4096 +#define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVME_WQSIZE 256 #define LPFC_NVME_ERSP_LEN 0x20 @@ -102,3 +97,7 @@ struct lpfc_nvme_buf { uint64_t ts_data_nvme; #endif }; + +struct lpfc_nvme_fcpreq_priv { + struct lpfc_nvme_buf *nvme_buf; +}; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index acba1b67e505..94434e621c33 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -71,6 +71,26 @@ static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, struct lpfc_nvmet_rcv_ctx *, uint32_t, uint16_t); +void +lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) +{ + unsigned long iflag; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6313 NVMET Defer ctx release xri x%x flg x%x\n", + ctxp->oxid, ctxp->flag); + + spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); + if (ctxp->flag & LPFC_NVMET_CTX_RLS) { + spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, + iflag); + return; + } + ctxp->flag |= LPFC_NVMET_CTX_RLS; + list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); +} + /** * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response * @phba: Pointer to HBA context object. @@ -139,6 +159,11 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, struct lpfc_dmabuf *mp) { if (ctxp) { + if (ctxp->flag) + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6314 rq_post ctx xri x%x flag x%x\n", + ctxp->oxid, ctxp->flag); + if (ctxp->txrdy) { pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, ctxp->txrdy_phys); @@ -337,39 +362,55 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, #endif ctxp = cmdwqe->context2; + ctxp->flag &= ~LPFC_NVMET_IO_INP; + rsp = &ctxp->ctx.fcp_req; op = rsp->op; - ctxp->flag &= ~LPFC_NVMET_IO_INP; status = bf_get(lpfc_wcqe_c_status, wcqe); result = wcqe->parameter; - if (!phba->targetport) - goto out; + if (phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + else + tgtp = NULL; lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n", ctxp->oxid, op, status); - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (status) { rsp->fcp_error = NVME_SC_DATA_XFER_ERROR; rsp->transferred_length = 0; - atomic_inc(&tgtp->xmt_fcp_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_fcp_rsp_error); + + /* pick up SLI4 exhange busy condition */ + if (bf_get(lpfc_wcqe_c_xb, wcqe)) { + ctxp->flag |= LPFC_NVMET_XBUSY; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6315 IO Cmpl XBUSY: xri x%x: %x/%x\n", + ctxp->oxid, status, result); + } else { + ctxp->flag &= ~LPFC_NVMET_XBUSY; + } + } else { rsp->fcp_error = NVME_SC_SUCCESS; if (op == NVMET_FCOP_RSP) rsp->transferred_length = rsp->rsplen; else rsp->transferred_length = rsp->transfer_length; - atomic_inc(&tgtp->xmt_fcp_rsp_cmpl); + if (tgtp) + atomic_inc(&tgtp->xmt_fcp_rsp_cmpl); } -out: if ((op == NVMET_FCOP_READDATA_RSP) || (op == NVMET_FCOP_RSP)) { /* Sanity check */ ctxp->state = LPFC_NVMET_STE_DONE; ctxp->entry_cnt++; + #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { if (rsp->op == NVMET_FCOP_READDATA_RSP) { @@ -408,9 +449,7 @@ out: if (phba->ktime_on) lpfc_nvmet_ktime(phba, ctxp); #endif - /* Let Abort cmpl repost the context */ - if (!(ctxp->flag & LPFC_NVMET_ABORT_OP)) - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + /* lpfc_nvmet_xmt_fcp_release() will recycle the context */ } else { ctxp->entry_cnt++; start_clean = offsetof(struct lpfc_iocbq, wqe); @@ -519,7 +558,6 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_iocbq *nvmewqeq; - unsigned long iflags; int rc; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -544,32 +582,12 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, } #endif - if (rsp->op == NVMET_FCOP_ABORT) { - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6103 Abort op: oxri x%x %d cnt %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - - lpfc_nvmeio_data(phba, "NVMET FCP ABRT: " - "xri x%x state x%x cnt x%x\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - - atomic_inc(&lpfc_nvmep->xmt_fcp_abort); - ctxp->entry_cnt++; - ctxp->flag |= LPFC_NVMET_ABORT_OP; - if (ctxp->flag & LPFC_NVMET_IO_INP) - lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, - ctxp->oxid); - else - lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, - ctxp->oxid); - return 0; - } - /* Sanity check */ - if (ctxp->state == LPFC_NVMET_STE_ABORT) { + if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) || + (ctxp->state == LPFC_NVMET_STE_ABORT)) { atomic_inc(&lpfc_nvmep->xmt_fcp_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6102 Bad state IO x%x aborted\n", + "6102 IO xri x%x aborted\n", ctxp->oxid); rc = -ENXIO; goto aerr; @@ -594,10 +612,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", ctxp->oxid, rsp->op, rsp->rsplen); - /* For now we take hbalock */ - spin_lock_irqsave(&phba->hbalock, iflags); rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq); - spin_unlock_irqrestore(&phba->hbalock, iflags); if (rc == WQE_SUCCESS) { ctxp->flag |= LPFC_NVMET_IO_INP; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -634,10 +649,79 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport) complete(&tport->tport_unreg_done); } +static void +lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *req) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_hba *phba = ctxp->phba; + unsigned long flags; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6103 Abort op: oxri x%x flg x%x cnt %d\n", + ctxp->oxid, ctxp->flag, ctxp->entry_cnt); + + lpfc_nvmeio_data(phba, "NVMET FCP ABRT: " + "xri x%x flg x%x cnt x%x\n", + ctxp->oxid, ctxp->flag, ctxp->entry_cnt); + + atomic_inc(&lpfc_nvmep->xmt_fcp_abort); + ctxp->entry_cnt++; + spin_lock_irqsave(&ctxp->ctxlock, flags); + + /* Since iaab/iaar are NOT set, we need to check + * if the firmware is in process of aborting IO + */ + if (ctxp->flag & LPFC_NVMET_XBUSY) { + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + return; + } + ctxp->flag |= LPFC_NVMET_ABORT_OP; + if (ctxp->flag & LPFC_NVMET_IO_INP) + lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); + else + lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); + spin_unlock_irqrestore(&ctxp->ctxlock, flags); +} + +static void +lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *rsp) +{ + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_hba *phba = ctxp->phba; + unsigned long flags; + bool aborting = false; + + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_ABORT_OP) || + (ctxp->flag & LPFC_NVMET_XBUSY)) { + aborting = true; + /* let the abort path do the real release */ + lpfc_nvmet_defer_release(phba, ctxp); + } + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + + lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid, + ctxp->state, 0); + + if (aborting) + return; + + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); +} + static struct nvmet_fc_target_template lpfc_tgttemplate = { .targetport_delete = lpfc_nvmet_targetport_delete, .xmt_ls_rsp = lpfc_nvmet_xmt_ls_rsp, .fcp_op = lpfc_nvmet_xmt_fcp_op, + .fcp_abort = lpfc_nvmet_xmt_fcp_abort, + .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, @@ -666,10 +750,23 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn); pinfo.port_id = vport->fc_myDID; + /* Limit to LPFC_MAX_NVME_SEG_CNT. + * For now need + 1 to get around NVME transport logic. + */ + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, + "6400 Reducing sg segment cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else { + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; - lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_OPDONE_IN_ISR; #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, @@ -750,7 +847,120 @@ void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, struct sli4_wcqe_xri_aborted *axri) { - /* TODO: work in progress */ + uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); + uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_nodelist *ndlp; + unsigned long iflag = 0; + int rrq_empty = 0; + bool released = false; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6317 XB aborted xri x%x rxid x%x\n", xri, rxid); + + if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) + return; + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + continue; + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + if (ctxp->flag & LPFC_NVMET_CTX_RLS && + !(ctxp->flag & LPFC_NVMET_ABORT_OP)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_XBUSY; + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + + rrq_empty = list_empty(&phba->active_rrq_list); + spin_unlock_irqrestore(&phba->hbalock, iflag); + ndlp = lpfc_findnode_did(phba->pport, ctxp->sid); + if (ndlp && NLP_CHK_NODE_ACT(ndlp) && + (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE || + ndlp->nlp_state == NLP_STE_MAPPED_NODE)) { + lpfc_set_rrq_active(phba, ndlp, + ctxp->rqb_buffer->sglq->sli4_lxritag, + rxid, 1); + lpfc_sli4_abts_err_handler(phba, ndlp, axri); + } + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6318 XB aborted %x flg x%x (%x)\n", + ctxp->oxid, ctxp->flag, released); + if (released) + lpfc_nvmet_rq_post(phba, ctxp, + &ctxp->rqb_buffer->hbuf); + if (rrq_empty) + lpfc_worker_wake_up(phba); + return; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); +} + +int +lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr) + +{ +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + struct lpfc_hba *phba = vport->phba; + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct nvmefc_tgt_fcp_req *rsp; + uint16_t xri; + unsigned long iflag = 0; + + xri = be16_to_cpu(fc_hdr->fh_ox_id); + + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + continue; + + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); + + spin_lock_irqsave(&ctxp->ctxlock, iflag); + ctxp->flag |= LPFC_NVMET_ABTS_RCV; + spin_unlock_irqrestore(&ctxp->ctxlock, iflag); + + lpfc_nvmeio_data(phba, + "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", + xri, smp_processor_id(), 0); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6319 NVMET Rcv ABTS:acc xri x%x\n", xri); + + rsp = &ctxp->ctx.fcp_req; + nvmet_fc_rcv_fcp_abort(phba->targetport, rsp); + + /* Respond with BA_ACC accordingly */ + lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1); + return 0; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); + + lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", + xri, smp_processor_id(), 1); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6320 NVMET Rcv ABTS:rjt xri x%x\n", xri); + + /* Respond with BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 0); +#endif + return 0; } void @@ -940,6 +1150,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->rqb_buffer = nvmebuf; ctxp->entry_cnt = 1; ctxp->flag = 0; + spin_lock_init(&ctxp->ctxlock); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { @@ -962,8 +1173,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, } #endif - lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d from %06x\n", - oxid, size, sid); + lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d CPU %02x\n", + oxid, size, smp_processor_id()); atomic_inc(&tgtp->rcv_fcp_cmd_in); /* @@ -1237,11 +1448,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, return NULL; } - if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) { + if (rsp->sg_cnt > phba->cfg_nvme_seg_cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: " - "NPORT x%x oxid:x%x\n", - ctxp->sid, ctxp->oxid); + "NPORT x%x oxid:x%x cnt %d\n", + ctxp->sid, ctxp->oxid, phba->cfg_nvme_seg_cnt); return NULL; } @@ -1593,6 +1804,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t status, result; + unsigned long flags; + bool released = false; ctxp = cmdwqe->context2; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -1601,21 +1814,46 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; atomic_inc(&tgtp->xmt_abort_cmpl); + ctxp->state = LPFC_NVMET_STE_DONE; + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && + !(ctxp->flag & LPFC_NVMET_XBUSY)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n", - ctxp->oxid, wcqe->word0, wcqe->total_data_placed, + "6165 ABORT cmpl: xri x%x flg x%x (%d) " + "WCQE: %08x %08x %08x %08x\n", + ctxp->oxid, ctxp->flag, released, + wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); - ctxp->state = LPFC_NVMET_STE_DONE; - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); cmdwqe->context2 = NULL; cmdwqe->context3 = NULL; lpfc_sli_release_iocbq(phba, cmdwqe); + + /* Since iaab/iaar are NOT set, there is no work left. + * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * should have been called already. + */ } /** - * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS + * lpfc_nvmet_unsol_fcp_abort_cmp - Completion handler for ABTS * @phba: Pointer to HBA context object. * @cmdwqe: Pointer to driver command WQE object. * @wcqe: Pointer to driver response CQE object. @@ -1625,12 +1863,14 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, * The function frees memory resources used for the NVME commands. **/ static void -lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - struct lpfc_wcqe_complete *wcqe) +lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) { struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; + unsigned long flags; uint32_t status, result; + bool released = false; ctxp = cmdwqe->context2; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -1639,23 +1879,55 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; atomic_inc(&tgtp->xmt_abort_cmpl); + if (!ctxp) { + /* if context is clear, related io alrady complete */ + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6070 ABTS cmpl: WCQE: %08x %08x %08x %08x\n", + wcqe->word0, wcqe->total_data_placed, + result, wcqe->word3); + return; + } + + /* Sanity check */ + if (ctxp->state != LPFC_NVMET_STE_ABORT) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6112 ABTS Wrong state:%d oxid x%x\n", + ctxp->state, ctxp->oxid); + } + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + ctxp->state = LPFC_NVMET_STE_DONE; + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && + !(ctxp->flag & LPFC_NVMET_XBUSY)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n", - ctxp, wcqe->word0, wcqe->total_data_placed, + "6316 ABTS cmpl xri x%x flg x%x (%x) " + "WCQE: %08x %08x %08x %08x\n", + ctxp->oxid, ctxp->flag, released, + wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); - - if (ctxp) { - /* Sanity check */ - if (ctxp->state != LPFC_NVMET_STE_ABORT) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6112 ABORT Wrong state:%d oxid x%x\n", - ctxp->state, ctxp->oxid); - } - ctxp->state = LPFC_NVMET_STE_DONE; + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; - } + + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; + + /* Since iaab/iaar are NOT set, there is no work left. + * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * should have been called already. + */ } /** @@ -1708,10 +1980,14 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6067 Abort: sid %x xri x%x/x%x\n", + "6067 ABTS: sid %x xri x%x/x%x\n", sid, xri, ctxp->wqeq->sli4_xritag); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (!ctxp->wqeq) { + ctxp->wqeq = ctxp->rqb_buffer->iocbq; + ctxp->wqeq->hba_wqidx = 0; + } ndlp = lpfc_findnode_did(phba->pport, sid); if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || @@ -1817,10 +2093,11 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, - "6160 Drop ABTS - wrong NDLP state x%x.\n", + "6160 Drop ABORT - wrong NDLP state x%x.\n", (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1828,9 +2105,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba); if (!ctxp->abort_wqeq) { lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, - "6161 Abort failed: No wqeqs: " + "6161 ABORT failed: No wqeqs: " "xri: x%x\n", ctxp->oxid); /* No failure to an ABTS request. */ + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } abts_wqeq = ctxp->abort_wqeq; @@ -1838,8 +2116,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->state = LPFC_NVMET_STE_ABORT; /* Announce entry to new IO submit field. */ - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6162 Abort Request to rport DID x%06x " + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6162 ABORT Request to rport DID x%06x " "for xri x%x x%x\n", ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag); @@ -1855,6 +2133,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "NVME Req now. hba_flag x%x oxid x%x\n", phba->hba_flag, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1866,6 +2145,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "still pending on oxid x%x\n", ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1913,9 +2193,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, if (rc == WQE_SUCCESS) return 0; + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; lpfc_sli_release_iocbq(phba, abts_wqeq); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME, - "6166 Failed abts issue_wqe with status x%x " + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6166 Failed ABORT issue_wqe with status x%x " "for oxid x%x.\n", rc, ctxp->oxid); return 1; @@ -1944,8 +2225,8 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, spin_lock_irqsave(&phba->hbalock, flags); abts_wqeq = ctxp->wqeq; - abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp; - abts_wqeq->iocb_cmpl = 0; + abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp; + abts_wqeq->iocb_cmpl = NULL; abts_wqeq->iocb_flag |= LPFC_IO_NVMET; rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); @@ -1955,7 +2236,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, } aerr: - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6135 Failed to Issue ABTS for oxid x%x. Status x%x\n", diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index ca96f05c1604..128759fe6650 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -21,9 +21,7 @@ * included with this package. * ********************************************************************/ -#define LPFC_NVMET_MIN_SEGS 16 -#define LPFC_NVMET_DEFAULT_SEGS 64 /* 256K IOs */ -#define LPFC_NVMET_MAX_SEGS 510 +#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVMET_SUCCESS_LEN 12 /* Used for NVME Target */ @@ -77,10 +75,12 @@ struct lpfc_nvmet_rcv_ctx { struct nvmefc_tgt_ls_req ls_req; struct nvmefc_tgt_fcp_req fcp_req; } ctx; + struct list_head list; struct lpfc_hba *phba; struct lpfc_iocbq *wqeq; struct lpfc_iocbq *abort_wqeq; dma_addr_t txrdy_phys; + spinlock_t ctxlock; /* protect flag access */ uint32_t *txrdy; uint32_t sid; uint32_t offset; @@ -97,8 +97,11 @@ struct lpfc_nvmet_rcv_ctx { #define LPFC_NVMET_STE_RSP 4 #define LPFC_NVMET_STE_DONE 5 uint16_t flag; -#define LPFC_NVMET_IO_INP 1 -#define LPFC_NVMET_ABORT_OP 2 +#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ +#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ +#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ +#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ +#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ struct rqb_dmabuf *rqb_buffer; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1c9fa45df7eb..cf19f4976f5f 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6338,7 +6338,7 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type, } /** - * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block + * lpfc_sli4_repost_sgl_list - Repost the buffers sgl pages as block * @phba: pointer to lpfc hba data structure. * @pring: Pointer to driver SLI ring object. * @sgl_list: linked link of sgl buffers to post @@ -13758,7 +13758,10 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue) lpfc_free_rq_buffer(queue->phba, queue); kfree(queue->rqbp); } - kfree(queue->pring); + + if (!list_empty(&queue->wq_list)) + list_del(&queue->wq_list); + kfree(queue); return; } @@ -14738,6 +14741,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, case LPFC_Q_CREATE_VERSION_1: bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); + bf_set(lpfc_mbox_hdr_version, &shdr->request, + LPFC_Q_CREATE_VERSION_1); + switch (wq->entry_size) { default: case 64: @@ -15561,6 +15567,8 @@ lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq) } /* Remove wq from any list */ list_del_init(&wq->list); + kfree(wq->pring); + wq->pring = NULL; mempool_free(mbox, wq->phba->mbox_mem_pool); return status; } @@ -16513,7 +16521,7 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * This function sends a basic response to a previous unsol sequence abort * event after aborting the sequence handling. **/ -static void +void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, struct fc_frame_header *fc_hdr, bool aborted) { @@ -16534,14 +16542,13 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, sid); if (!ndlp) { lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, "1268 Failed to allocate ndlp for " "oxid:x%x SID:x%x\n", oxid, sid); return; } - lpfc_nlp_init(vport, ndlp, sid); /* Put ndlp onto pport node list */ lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { @@ -16690,6 +16697,11 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, } lpfc_in_buf_free(phba, &dmabuf->dbuf); + if (phba->nvmet_support) { + lpfc_nvmet_rcv_unsol_abort(vport, &fc_hdr); + return; + } + /* Respond with BA_ACC or BA_RJT accordingly */ lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 710458cf11d6..da46471337c8 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -620,7 +620,7 @@ struct lpfc_sli4_hba { struct list_head lpfc_els_sgl_list; struct list_head lpfc_abts_els_sgl_list; struct list_head lpfc_nvmet_sgl_list; - struct list_head lpfc_abts_nvmet_sgl_list; + struct list_head lpfc_abts_nvmet_ctx_list; struct list_head lpfc_abts_scsi_buf_list; struct list_head lpfc_abts_nvme_buf_list; struct lpfc_sglq **lpfc_sglq_active_list; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index d4e95e28f4e3..1c26dc67151b 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "11.2.0.10" +#define LPFC_DRIVER_VERSION "11.2.0.12" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 9a0339dbc024..c714482bf4c5 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -738,10 +738,9 @@ lpfc_vport_delete(struct fc_vport *fc_vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, allocate one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) goto skip_logo; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Indicate free memory when release */ NLP_SET_FREE_REQ(ndlp); } else { diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 6903f03c88af..8a1b94816419 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -477,7 +477,7 @@ static void _set_error_resid(struct osd_request *or, struct request *req, int error) { or->async_error = error; - or->req_errors = req->errors ? : error; + or->req_errors = scsi_req(req)->result ? : error; or->sense_len = scsi_req(req)->sense_len; if (or->sense_len) memcpy(or->sense, scsi_req(req)->sense, or->sense_len); @@ -489,7 +489,10 @@ static void _set_error_resid(struct osd_request *or, struct request *req, int osd_execute_request(struct osd_request *or) { - int error = blk_execute_rq(or->request->q, NULL, or->request, 0); + int error; + + blk_execute_rq(or->request->q, NULL, or->request, 0); + error = scsi_req(or->request)->result ? -EIO : 0; _set_error_resid(or, or->request, error); return error; @@ -1602,7 +1605,7 @@ static int _init_blk_request(struct osd_request *or, req->rq_flags |= RQF_QUIET; req->timeout = or->timeout; - req->retries = or->retries; + scsi_req(req)->retries = or->retries; if (has_out) { or->out.req = req; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index c47f4b349bac..67cbed92f07d 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -327,7 +327,7 @@ static void osst_end_async(struct request *req, int update) struct osst_tape *STp = SRpnt->stp; struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; - STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; + STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result; #if DEBUG STp->write_pending = 0; #endif @@ -414,7 +414,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ memcpy(rq->cmd, cmd, rq->cmd_len); req->timeout = timeout; - req->retries = retries; + rq->retries = retries; req->end_io_data = SRpnt; blk_execute_rq_nowait(req->q, NULL, req, 1, osst_end_async); diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 84c9098cc089..b6e40fd4c3c1 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -2553,13 +2553,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) ql_log(ql_log_warn, vha, 0x7089, "mbx abort_command " "failed.\n"); - bsg_job->req->errors = + scsi_req(bsg_job->req)->result = bsg_reply->result = -EIO; } else { ql_dbg(ql_dbg_user, vha, 0x708a, "mbx abort_command " "success.\n"); - bsg_job->req->errors = + scsi_req(bsg_job->req)->result = bsg_reply->result = 0; } spin_lock_irqsave(&ha->hardware_lock, flags); @@ -2570,7 +2570,7 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) } spin_unlock_irqrestore(&ha->hardware_lock, flags); ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n"); - bsg_job->req->errors = bsg_reply->result = -ENXIO; + scsi_req(bsg_job->req)->result = bsg_reply->result = -ENXIO; return 0; done: diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c new file mode 100644 index 000000000000..a97c9507103d --- /dev/null +++ b/drivers/scsi/scsi_debugfs.c @@ -0,0 +1,13 @@ +#include <linux/seq_file.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_dbg.h> +#include "scsi_debugfs.h" + +void scsi_show_rq(struct seq_file *m, struct request *rq) +{ + struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req); + char buf[80]; + + __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len); + seq_printf(m, ", .cmd=%s", buf); +} diff --git a/drivers/scsi/scsi_debugfs.h b/drivers/scsi/scsi_debugfs.h new file mode 100644 index 000000000000..951b043e82d0 --- /dev/null +++ b/drivers/scsi/scsi_debugfs.h @@ -0,0 +1,4 @@ +struct request; +struct seq_file; + +void scsi_show_rq(struct seq_file *m, struct request *rq); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f2cafae150bc..2db412dd4b44 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1988,7 +1988,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) req->rq_flags |= RQF_QUIET; req->timeout = 10 * HZ; - req->retries = 5; + rq->retries = 5; blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e5a2d590a104..1c3e87d6c48f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -34,6 +34,7 @@ #include <trace/events/scsi.h> +#include "scsi_debugfs.h" #include "scsi_priv.h" #include "scsi_logging.h" @@ -229,8 +230,8 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) * @rq_flags: flags for ->rq_flags * @resid: optional residual length * - * returns the req->errors value which is the scsi_cmnd result - * field. + * Returns the scsi_cmnd result field if a command was executed, or a negative + * Linux error code if we didn't get that far. */ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, @@ -256,7 +257,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, rq->cmd_len = COMMAND_SIZE(cmd[0]); memcpy(rq->cmd, cmd, rq->cmd_len); - req->retries = retries; + rq->retries = retries; req->timeout = timeout; req->cmd_flags |= flags; req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT; @@ -281,7 +282,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE); if (sshdr) scsi_normalize_sense(rq->sense, rq->sense_len, sshdr); - ret = req->errors; + ret = rq->result; out: blk_put_request(req); @@ -797,8 +798,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) /* * __scsi_error_from_host_byte may have reset the host_byte */ - req->errors = cmd->result; - + scsi_req(req)->result = cmd->result; scsi_req(req)->resid_len = scsi_get_resid(cmd); if (scsi_bidi_cmnd(cmd)) { @@ -835,7 +835,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) /* * Recovered errors need reporting, but they're always treated as * success, so fiddle the result code here. For passthrough requests - * we already took a copy of the original into rq->errors which + * we already took a copy of the original into sreq->result which * is what gets returned to the user */ if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) { @@ -1061,10 +1061,10 @@ int scsi_init_io(struct scsi_cmnd *cmd) struct scsi_device *sdev = cmd->device; struct request *rq = cmd->request; bool is_mq = (rq->mq_ctx != NULL); - int error; + int error = BLKPREP_KILL; if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) - return -EINVAL; + goto err_exit; error = scsi_init_sgtable(rq, &cmd->sdb); if (error) @@ -1177,7 +1177,7 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) cmd->cmd_len = scsi_req(req)->cmd_len; cmd->cmnd = scsi_req(req)->cmd; cmd->transfersize = blk_rq_bytes(req); - cmd->allowed = req->retries; + cmd->allowed = scsi_req(req)->retries; return BLKPREP_OK; } @@ -1281,7 +1281,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret) switch (ret) { case BLKPREP_KILL: case BLKPREP_INVALID: - req->errors = DID_NO_CONNECT << 16; + scsi_req(req)->result = DID_NO_CONNECT << 16; /* release the command and kill it */ if (req->special) { struct scsi_cmnd *cmd = req->special; @@ -1905,7 +1905,7 @@ static int scsi_mq_prep_fn(struct request *req) static void scsi_mq_done(struct scsi_cmnd *cmd) { trace_scsi_dispatch_cmd_done(cmd); - blk_mq_complete_request(cmd->request, cmd->request->errors); + blk_mq_complete_request(cmd->request); } static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -2154,10 +2154,13 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) return q; } -static struct blk_mq_ops scsi_mq_ops = { +static const struct blk_mq_ops scsi_mq_ops = { .queue_rq = scsi_queue_rq, .complete = scsi_softirq_done, .timeout = scsi_timeout, +#ifdef CONFIG_BLK_DEBUG_FS + .show_rq = scsi_show_rq, +#endif .init_request = scsi_init_request, .exit_request = scsi_exit_request, .map_queues = scsi_map_queues, diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index cdbb293aca08..9fdbd50c31b4 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -184,9 +184,9 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost, blk_rq_bytes(req->next_rq); handler = to_sas_internal(shost->transportt)->f->smp_handler; ret = handler(shost, rphy, req); - req->errors = ret; + scsi_req(req)->result = ret; - blk_end_request_all(req, ret); + blk_end_request_all(req, 0); spin_lock_irq(q->queue_lock); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 35ad5e8a31ab..0dc95e102e69 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -418,6 +418,46 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(provisioning_mode); +static const char *zeroing_mode[] = { + [SD_ZERO_WRITE] = "write", + [SD_ZERO_WS] = "writesame", + [SD_ZERO_WS16_UNMAP] = "writesame_16_unmap", + [SD_ZERO_WS10_UNMAP] = "writesame_10_unmap", +}; + +static ssize_t +zeroing_mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + + return snprintf(buf, 20, "%s\n", zeroing_mode[sdkp->zeroing_mode]); +} + +static ssize_t +zeroing_mode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (!strncmp(buf, zeroing_mode[SD_ZERO_WRITE], 20)) + sdkp->zeroing_mode = SD_ZERO_WRITE; + else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS], 20)) + sdkp->zeroing_mode = SD_ZERO_WS; + else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS16_UNMAP], 20)) + sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; + else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS10_UNMAP], 20)) + sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; + else + return -EINVAL; + + return count; +} +static DEVICE_ATTR_RW(zeroing_mode); + static ssize_t max_medium_access_timeouts_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -496,6 +536,7 @@ static struct attribute *sd_disk_attrs[] = { &dev_attr_app_tag_own.attr, &dev_attr_thin_provisioning.attr, &dev_attr_provisioning_mode.attr, + &dev_attr_zeroing_mode.attr, &dev_attr_max_write_same_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, NULL, @@ -644,26 +685,11 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; - q->limits.discard_zeroes_data = 0; - - /* - * When LBPRZ is reported, discard alignment and granularity - * must be fixed to the logical block size. Otherwise the block - * layer will drop misaligned portions of the request which can - * lead to data corruption. If LBPRZ is not set, we honor the - * device preference. - */ - if (sdkp->lbprz) { - q->limits.discard_alignment = 0; - q->limits.discard_granularity = logical_block_size; - } else { - q->limits.discard_alignment = sdkp->unmap_alignment * - logical_block_size; - q->limits.discard_granularity = - max(sdkp->physical_block_size, - sdkp->unmap_granularity * logical_block_size); - } - + q->limits.discard_alignment = + sdkp->unmap_alignment * logical_block_size; + q->limits.discard_granularity = + max(sdkp->physical_block_size, + sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; switch (mode) { @@ -681,19 +707,16 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) case SD_LBP_WS16: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS16_BLOCKS); - q->limits.discard_zeroes_data = sdkp->lbprz; break; case SD_LBP_WS10: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); - q->limits.discard_zeroes_data = sdkp->lbprz; break; case SD_LBP_ZERO: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); - q->limits.discard_zeroes_data = 1; break; } @@ -701,93 +724,122 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } -/** - * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device - * @sdp: scsi device to operate on - * @rq: Request to prepare - * - * Will issue either UNMAP or WRITE SAME(16) depending on preference - * indicated by target device. - **/ -static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) +static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { - struct request *rq = cmd->request; struct scsi_device *sdp = cmd->device; - struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); - sector_t sector = blk_rq_pos(rq); - unsigned int nr_sectors = blk_rq_sectors(rq); - unsigned int len; - int ret; + struct request *rq = cmd->request; + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + unsigned int data_len = 24; char *buf; - struct page *page; - - sector >>= ilog2(sdp->sector_size) - 9; - nr_sectors >>= ilog2(sdp->sector_size) - 9; - page = alloc_page(GFP_ATOMIC | __GFP_ZERO); - if (!page) + rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!rq->special_vec.bv_page) return BLKPREP_DEFER; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = data_len; + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - switch (sdkp->provisioning_mode) { - case SD_LBP_UNMAP: - buf = page_address(page); + cmd->cmd_len = 10; + cmd->cmnd[0] = UNMAP; + cmd->cmnd[8] = 24; - cmd->cmd_len = 10; - cmd->cmnd[0] = UNMAP; - cmd->cmnd[8] = 24; + buf = page_address(rq->special_vec.bv_page); + put_unaligned_be16(6 + 16, &buf[0]); + put_unaligned_be16(16, &buf[2]); + put_unaligned_be64(sector, &buf[8]); + put_unaligned_be32(nr_sectors, &buf[16]); - put_unaligned_be16(6 + 16, &buf[0]); - put_unaligned_be16(16, &buf[2]); - put_unaligned_be64(sector, &buf[8]); - put_unaligned_be32(nr_sectors, &buf[16]); + cmd->allowed = SD_MAX_RETRIES; + cmd->transfersize = data_len; + rq->timeout = SD_TIMEOUT; + scsi_req(rq)->resid_len = data_len; - len = 24; - break; + return scsi_init_io(cmd); +} - case SD_LBP_WS16: - cmd->cmd_len = 16; - cmd->cmnd[0] = WRITE_SAME_16; +static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) +{ + struct scsi_device *sdp = cmd->device; + struct request *rq = cmd->request; + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + u32 data_len = sdp->sector_size; + + rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!rq->special_vec.bv_page) + return BLKPREP_DEFER; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = data_len; + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; + + cmd->cmd_len = 16; + cmd->cmnd[0] = WRITE_SAME_16; + if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ - put_unaligned_be64(sector, &cmd->cmnd[2]); - put_unaligned_be32(nr_sectors, &cmd->cmnd[10]); + put_unaligned_be64(sector, &cmd->cmnd[2]); + put_unaligned_be32(nr_sectors, &cmd->cmnd[10]); - len = sdkp->device->sector_size; - break; + cmd->allowed = SD_MAX_RETRIES; + cmd->transfersize = data_len; + rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; + scsi_req(rq)->resid_len = data_len; - case SD_LBP_WS10: - case SD_LBP_ZERO: - cmd->cmd_len = 10; - cmd->cmnd[0] = WRITE_SAME; - if (sdkp->provisioning_mode == SD_LBP_WS10) - cmd->cmnd[1] = 0x8; /* UNMAP */ - put_unaligned_be32(sector, &cmd->cmnd[2]); - put_unaligned_be16(nr_sectors, &cmd->cmnd[7]); + return scsi_init_io(cmd); +} - len = sdkp->device->sector_size; - break; +static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) +{ + struct scsi_device *sdp = cmd->device; + struct request *rq = cmd->request; + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + u32 data_len = sdp->sector_size; - default: - ret = BLKPREP_INVALID; - goto out; - } + rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!rq->special_vec.bv_page) + return BLKPREP_DEFER; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = data_len; + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - rq->timeout = SD_TIMEOUT; + cmd->cmd_len = 10; + cmd->cmnd[0] = WRITE_SAME; + if (unmap) + cmd->cmnd[1] = 0x8; /* UNMAP */ + put_unaligned_be32(sector, &cmd->cmnd[2]); + put_unaligned_be16(nr_sectors, &cmd->cmnd[7]); - cmd->transfersize = len; cmd->allowed = SD_MAX_RETRIES; + cmd->transfersize = data_len; + rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; + scsi_req(rq)->resid_len = data_len; - rq->special_vec.bv_page = page; - rq->special_vec.bv_offset = 0; - rq->special_vec.bv_len = len; + return scsi_init_io(cmd); +} - rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - scsi_req(rq)->resid_len = len; +static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) +{ + struct request *rq = cmd->request; + struct scsi_device *sdp = cmd->device; + struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + + if (!(rq->cmd_flags & REQ_NOUNMAP)) { + switch (sdkp->zeroing_mode) { + case SD_ZERO_WS16_UNMAP: + return sd_setup_write_same16_cmnd(cmd, true); + case SD_ZERO_WS10_UNMAP: + return sd_setup_write_same10_cmnd(cmd, true); + } + } - ret = scsi_init_io(cmd); -out: - if (ret != BLKPREP_OK) - __free_page(page); - return ret; + if (sdp->no_write_same) + return BLKPREP_INVALID; + if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) + return sd_setup_write_same16_cmnd(cmd, false); + return sd_setup_write_same10_cmnd(cmd, false); } static void sd_config_write_same(struct scsi_disk *sdkp) @@ -816,9 +868,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp) sdkp->max_ws_blocks = 0; } + if (sdkp->lbprz && sdkp->lbpws) + sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; + else if (sdkp->lbprz && sdkp->lbpws10) + sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; + else if (sdkp->max_ws_blocks) + sdkp->zeroing_mode = SD_ZERO_WS; + else + sdkp->zeroing_mode = SD_ZERO_WRITE; + out: blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks * (logical_block_size >> 9)); + blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks * + (logical_block_size >> 9)); } /** @@ -1155,7 +1218,20 @@ static int sd_init_command(struct scsi_cmnd *cmd) switch (req_op(rq)) { case REQ_OP_DISCARD: - return sd_setup_discard_cmnd(cmd); + switch (scsi_disk(rq->rq_disk)->provisioning_mode) { + case SD_LBP_UNMAP: + return sd_setup_unmap_cmnd(cmd); + case SD_LBP_WS16: + return sd_setup_write_same16_cmnd(cmd, true); + case SD_LBP_WS10: + return sd_setup_write_same10_cmnd(cmd, true); + case SD_LBP_ZERO: + return sd_setup_write_same10_cmnd(cmd, false); + default: + return BLKPREP_INVALID; + } + case REQ_OP_WRITE_ZEROES: + return sd_setup_write_zeroes_cmnd(cmd); case REQ_OP_WRITE_SAME: return sd_setup_write_same_cmnd(cmd); case REQ_OP_FLUSH: @@ -1795,6 +1871,7 @@ static int sd_done(struct scsi_cmnd *SCpnt) switch (req_op(req)) { case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: if (!result) { @@ -2768,7 +2845,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp) sd_config_discard(sdkp, SD_LBP_WS16); } else { /* LBP VPD page tells us what to use */ - if (sdkp->lbpu && sdkp->max_unmap_blocks && !sdkp->lbprz) + if (sdkp->lbpu && sdkp->max_unmap_blocks) sd_config_discard(sdkp, SD_LBP_UNMAP); else if (sdkp->lbpws) sd_config_discard(sdkp, SD_LBP_WS16); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 4dac35e96a75..a2c4b5c35379 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -59,6 +59,13 @@ enum { SD_LBP_DISABLE, /* Discard disabled due to failed cmd */ }; +enum { + SD_ZERO_WRITE = 0, /* Use WRITE(10/16) command */ + SD_ZERO_WS, /* Use WRITE SAME(10/16) command */ + SD_ZERO_WS16_UNMAP, /* Use WRITE SAME(16) with UNMAP */ + SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */ +}; + struct scsi_disk { struct scsi_driver *driver; /* always &sd_template */ struct scsi_device *device; @@ -89,6 +96,7 @@ struct scsi_disk { u8 write_prot; u8 protection_type;/* Data Integrity Field */ u8 provisioning_mode; + u8 zeroing_mode; unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */ diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 92620c8ea8ad..1994f7799fce 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -329,6 +329,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, switch (req_op(rq)) { case REQ_OP_WRITE: + case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 225abaad4d1c..504504beaa5e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1300,7 +1300,7 @@ sg_rq_end_io(struct request *rq, int uptodate) pr_info("%s: device detaching\n", __func__); sense = req->sense; - result = rq->errors; + result = req->result; resid = req->resid_len; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, @@ -1718,7 +1718,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) srp->rq = rq; rq->end_io_data = srp; - rq->retries = SG_DEFAULT_RETRIES; + req->retries = SG_DEFAULT_RETRIES; if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index e5ef78a6848e..1ea34d6f5437 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -480,7 +480,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); atomic64_inc(&STp->stats->write_cnt); - if (req->errors) { + if (scsi_req(req)->result) { atomic64_add(atomic_read(&STp->stats->last_write_size) - STp->buffer->cmdstat.residual, &STp->stats->write_byte_cnt); @@ -494,7 +494,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); atomic64_inc(&STp->stats->read_cnt); - if (req->errors) { + if (scsi_req(req)->result) { atomic64_add(atomic_read(&STp->stats->last_read_size) - STp->buffer->cmdstat.residual, &STp->stats->read_byte_cnt); @@ -518,7 +518,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate) struct scsi_tape *STp = SRpnt->stp; struct bio *tmp; - STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; + STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result; STp->buffer->cmdstat.residual = rq->resid_len; st_do_stats(STp, req); @@ -579,7 +579,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, memset(rq->cmd, 0, BLK_MAX_CDB); memcpy(rq->cmd, cmd, rq->cmd_len); req->timeout = timeout; - req->retries = retries; + rq->retries = retries; req->end_io_data = SRpnt; blk_execute_rq_nowait(req->q, NULL, req, 1, st_scsi_execute_end); diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h index 8886458748c1..a676bccabd43 100644 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ b/drivers/staging/lustre/lustre/include/lustre_disk.h @@ -133,13 +133,9 @@ struct lustre_sb_info { struct obd_export *lsi_osd_exp; char lsi_osd_type[16]; char lsi_fstype[16]; - struct backing_dev_info lsi_bdi; /* each client mountpoint needs - * own backing_dev_info - */ }; #define LSI_UMOUNT_FAILOVER 0x00200000 -#define LSI_BDI_INITIALIZED 0x00400000 #define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info)) #define s2lsi_nocast(sb) ((sb)->s_fs_info) diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index b229cbc7bb33..d483c44aafe5 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -863,15 +863,6 @@ void ll_lli_init(struct ll_inode_info *lli) mutex_init(&lli->lli_layout_mutex); } -static inline int ll_bdi_register(struct backing_dev_info *bdi) -{ - static atomic_t ll_bdi_num = ATOMIC_INIT(0); - - bdi->name = "lustre"; - return bdi_register(bdi, NULL, "lustre-%d", - atomic_inc_return(&ll_bdi_num)); -} - int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) { struct lustre_profile *lprof = NULL; @@ -881,6 +872,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) char *profilenm = get_profile_name(sb); struct config_llog_instance *cfg; int err; + static atomic_t ll_bdi_num = ATOMIC_INIT(0); CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); @@ -903,16 +895,11 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) if (err) goto out_free; - err = bdi_init(&lsi->lsi_bdi); - if (err) - goto out_free; - lsi->lsi_flags |= LSI_BDI_INITIALIZED; - lsi->lsi_bdi.capabilities = 0; - err = ll_bdi_register(&lsi->lsi_bdi); + err = super_setup_bdi_name(sb, "lustre-%d", + atomic_inc_return(&ll_bdi_num)); if (err) goto out_free; - sb->s_bdi = &lsi->lsi_bdi; /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */ sb->s_d_op = &ll_d_ops; @@ -1033,11 +1020,6 @@ void ll_put_super(struct super_block *sb) if (profilenm) class_del_profile(profilenm); - if (lsi->lsi_flags & LSI_BDI_INITIALIZED) { - bdi_destroy(&lsi->lsi_bdi); - lsi->lsi_flags &= ~LSI_BDI_INITIALIZED; - } - ll_free_sbi(sb); lsi->lsi_llsbi = NULL; diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 344e8448869c..5798810197ec 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -167,10 +167,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( struct iscsi_portal_group *tpg; struct iscsi_tpg_np *tpg_np; char *str, *str2, *ip_str, *port_str; - struct sockaddr_storage sockaddr; - struct sockaddr_in *sock_in; - struct sockaddr_in6 *sock_in6; - unsigned long port; + struct sockaddr_storage sockaddr = { }; int ret; char buf[MAX_PORTAL_LEN + 1]; @@ -182,21 +179,19 @@ static struct se_tpg_np *lio_target_call_addnptotpg( memset(buf, 0, MAX_PORTAL_LEN + 1); snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name); - memset(&sockaddr, 0, sizeof(struct sockaddr_storage)); - str = strstr(buf, "["); if (str) { - const char *end; - str2 = strstr(str, "]"); if (!str2) { pr_err("Unable to locate trailing \"]\"" " in IPv6 iSCSI network portal address\n"); return ERR_PTR(-EINVAL); } - str++; /* Skip over leading "[" */ + + ip_str = str + 1; /* Skip over leading "[" */ *str2 = '\0'; /* Terminate the unbracketed IPv6 address */ str2++; /* Skip over the \0 */ + port_str = strstr(str2, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -205,23 +200,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg( } *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - - ret = kstrtoul(port_str, 0, &port); - if (ret < 0) { - pr_err("kstrtoul() failed for port_str: %d\n", ret); - return ERR_PTR(ret); - } - sock_in6 = (struct sockaddr_in6 *)&sockaddr; - sock_in6->sin6_family = AF_INET6; - sock_in6->sin6_port = htons((unsigned short)port); - ret = in6_pton(str, -1, - (void *)&sock_in6->sin6_addr.in6_u, -1, &end); - if (ret <= 0) { - pr_err("in6_pton returned: %d\n", ret); - return ERR_PTR(-EINVAL); - } } else { - str = ip_str = &buf[0]; + ip_str = &buf[0]; port_str = strstr(ip_str, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -230,17 +210,15 @@ static struct se_tpg_np *lio_target_call_addnptotpg( } *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ + } - ret = kstrtoul(port_str, 0, &port); - if (ret < 0) { - pr_err("kstrtoul() failed for port_str: %d\n", ret); - return ERR_PTR(ret); - } - sock_in = (struct sockaddr_in *)&sockaddr; - sock_in->sin_family = AF_INET; - sock_in->sin_port = htons((unsigned short)port); - sock_in->sin_addr.s_addr = in_aton(ip_str); + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, ip_str, + port_str, &sockaddr); + if (ret) { + pr_err("malformed ip/port passed: %s\n", name); + return ERR_PTR(ret); } + tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); ret = iscsit_get_tpg(tpg); if (ret < 0) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index c754ae33bf7b..d2f089cfa9ae 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -851,7 +851,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, attrib->unmap_granularity = q->limits.discard_granularity / block_size; attrib->unmap_granularity_alignment = q->limits.discard_alignment / block_size; - attrib->unmap_zeroes_data = q->limits.discard_zeroes_data; + attrib->unmap_zeroes_data = 0; return true; } EXPORT_SYMBOL(target_configure_unmap_from_queue); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 94cda7991e80..a93d94e68ab5 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1008,7 +1008,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) req->timeout = PS_TIMEOUT_DISK; else req->timeout = PS_TIMEOUT_OTHER; - req->retries = PS_RETRY; + scsi_req(req)->retries = PS_RETRY; blk_execute_rq_nowait(pdv->pdv_sd->request_queue, NULL, req, (cmd->sam_task_attr == TCM_HEAD_TAG), @@ -1050,7 +1050,7 @@ static void pscsi_req_done(struct request *req, int uptodate) struct se_cmd *cmd = req->end_io_data; struct pscsi_plugin_task *pt = cmd->priv; - pt->pscsi_result = req->errors; + pt->pscsi_result = scsi_req(req)->result; pt->pscsi_resid = scsi_req(req)->resid_len; cmd->scsi_status = status_byte(pt->pscsi_result) << 1; |