diff options
Diffstat (limited to 'drivers')
202 files changed, 17852 insertions, 3360 deletions
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 59844ee149be..33e609f63585 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id, handler, max_entries); } -static int srat_mem_cnt; - -void __init early_parse_srat(void) +int __init acpi_numa_init(void) { + int cnt = 0; + /* * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= * SRAT cpu entries could have different order with that in MADT. @@ -295,24 +295,21 @@ void __init early_parse_srat(void) /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, 0); + acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, 0); - srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, - acpi_parse_memory_affinity, - NR_NODE_MEMBLKS); + acpi_parse_processor_affinity, 0); + cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, + acpi_parse_memory_affinity, + NR_NODE_MEMBLKS); } -} -int __init acpi_numa_init(void) -{ /* SLIT: System Locality Information Table */ acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_numa_arch_fixup(); - if (srat_mem_cnt < 0) - return srat_mem_cnt; + if (cnt < 0) + return cnt; else if (!parsed_numa_memblks) return -ENOENT; return 0; diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index ab92785f54dc..093c43554963 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -130,7 +130,7 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) writel(value, ahb->regs + offset); } -#ifdef CONFIG_ARCH_TEGRA_3x_SOC +#ifdef CONFIG_TEGRA_IOMMU_SMMU static int tegra_ahb_match_by_smmu(struct device *dev, void *data) { struct tegra_ahb *ahb = dev_get_drvdata(dev); diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index e920cbe519fa..e507ab7df60b 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK config ARM_ARCH_TIMER bool + +config CLKSRC_METAG_GENERIC + def_bool y if METAG + help + This option enables support for the Meta per-thread timers. diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 7d671b85a98e..4d8283aec5b5 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o +obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c new file mode 100644 index 000000000000..ade7513a11d1 --- /dev/null +++ b/drivers/clocksource/metag_generic.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2005-2013 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Support for Meta per-thread timers. + * + * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used + * as a free-running time base (hz clocksource), and the interrupt timer + * (TXTIMERI) is used for the timer interrupt (clock event). Both counters + * traditionally count at approximately 1MHz. + */ + +#include <clocksource/metag_generic.h> +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/interrupt.h> + +#include <asm/clock.h> +#include <asm/hwthread.h> +#include <asm/core_reg.h> +#include <asm/metag_mem.h> +#include <asm/tbx.h> + +#define HARDWARE_FREQ 1000000 /* 1MHz */ +#define HARDWARE_DIV 1 /* divide by 1 = 1MHz clock */ +#define HARDWARE_TO_NS_SHIFT 10 /* convert ticks to ns */ + +static unsigned int hwtimer_freq = HARDWARE_FREQ; +static DEFINE_PER_CPU(struct clock_event_device, local_clockevent); +static DEFINE_PER_CPU(char [11], local_clockevent_name); + +static int metag_timer_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + __core_reg_set(TXTIMERI, -delta); + return 0; +} + +static void metag_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + /* We should disable the IRQ here */ + break; + + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_UNUSED: + WARN_ON(1); + break; + }; +} + +static cycle_t metag_clocksource_read(struct clocksource *cs) +{ + return __core_reg_get(TXTIMER); +} + +static struct clocksource clocksource_metag = { + .name = "META", + .rating = 200, + .mask = CLOCKSOURCE_MASK(32), + .read = metag_clocksource_read, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static irqreturn_t metag_timer_interrupt(int irq, void *dummy) +{ + struct clock_event_device *evt = &__get_cpu_var(local_clockevent); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct irqaction metag_timer_irq = { + .name = "META core timer", + .handler = metag_timer_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU, +}; + +unsigned long long sched_clock(void) +{ + unsigned long long ticks = __core_reg_get(TXTIMER); + return ticks << HARDWARE_TO_NS_SHIFT; +} + +static void __cpuinit arch_timer_setup(unsigned int cpu) +{ + unsigned int txdivtime; + struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); + char *name = per_cpu(local_clockevent_name, cpu); + + txdivtime = __core_reg_get(TXDIVTIME); + + txdivtime &= ~TXDIVTIME_DIV_BITS; + txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS); + + __core_reg_set(TXDIVTIME, txdivtime); + + sprintf(name, "META %d", cpu); + clk->name = name; + clk->features = CLOCK_EVT_FEAT_ONESHOT, + + clk->rating = 200, + clk->shift = 12, + clk->irq = tbisig_map(TBID_SIGNUM_TRT), + clk->set_mode = metag_timer_set_mode, + clk->set_next_event = metag_timer_set_next_event, + + clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift); + clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk); + clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clk->cpumask = cpumask_of(cpu); + + clockevents_register_device(clk); + + /* + * For all non-boot CPUs we need to synchronize our free + * running clock (TXTIMER) with the boot CPU's clock. + * + * While this won't be accurate, it should be close enough. + */ + if (cpu) { + unsigned int thread0 = cpu_2_hwthread_id[0]; + unsigned long val; + + val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0); + __core_reg_set(TXTIMER, val); + } +} + +static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + arch_timer_setup(cpu); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata arch_timer_cpu_nb = { + .notifier_call = arch_timer_cpu_notify, +}; + +int __init metag_generic_timer_init(void) +{ + /* + * On Meta 2 SoCs, the actual frequency of the timer is based on the + * Meta core clock speed divided by an integer, so it is only + * approximately 1MHz. Calculating the real frequency here drastically + * reduces clock skew on these SoCs. + */ +#ifdef CONFIG_METAG_META21 + hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1); +#endif + clocksource_register_hz(&clocksource_metag, hwtimer_freq); + + setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq); + + /* Configure timer on boot CPU */ + arch_timer_setup(smp_processor_id()); + + /* Hook cpu boot to configure other CPU's timers */ + register_cpu_notifier(&arch_timer_cpu_nb); + + return 0; +} diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 51c3ea2ed41a..c599558faeda 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/of.h> +#include <linux/of_dma.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -171,7 +172,13 @@ static void dwc_initialize(struct dw_dma_chan *dwc) if (dwc->initialized == true) return; - if (dws) { + if (dws && dws->cfg_hi == ~0 && dws->cfg_lo == ~0) { + /* autoconfigure based on request line from DT */ + if (dwc->direction == DMA_MEM_TO_DEV) + cfghi = DWC_CFGH_DST_PER(dwc->request_line); + else if (dwc->direction == DMA_DEV_TO_MEM) + cfghi = DWC_CFGH_SRC_PER(dwc->request_line); + } else if (dws) { /* * We need controller-specific data to set up slave * transfers. @@ -1226,49 +1233,64 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -bool dw_dma_generic_filter(struct dma_chan *chan, void *param) +struct dw_dma_filter_args { + struct dw_dma *dw; + unsigned int req; + unsigned int src; + unsigned int dst; +}; + +static bool dw_dma_generic_filter(struct dma_chan *chan, void *param) { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - static struct dw_dma *last_dw; - static char *last_bus_id; - int i = -1; + struct dw_dma_filter_args *fargs = param; + struct dw_dma_slave *dws = &dwc->slave; - /* - * dmaengine framework calls this routine for all channels of all dma - * controller, until true is returned. If 'param' bus_id is not - * registered with a dma controller (dw), then there is no need of - * running below function for all channels of dw. - * - * This block of code does this by saving the parameters of last - * failure. If dw and param are same, i.e. trying on same dw with - * different channel, return false. - */ - if ((last_dw == dw) && (last_bus_id == param)) - return false; - /* - * Return true: - * - If dw_dma's platform data is not filled with slave info, then all - * dma controllers are fine for transfer. - * - Or if param is NULL - */ - if (!dw->sd || !param) - return true; + /* ensure the device matches our channel */ + if (chan->device != &fargs->dw->dma) + return false; - while (++i < dw->sd_count) { - if (!strcmp(dw->sd[i].bus_id, param)) { - chan->private = &dw->sd[i]; - last_dw = NULL; - last_bus_id = NULL; + dws->dma_dev = dw->dma.dev; + dws->cfg_hi = ~0; + dws->cfg_lo = ~0; + dws->src_master = fargs->src; + dws->dst_master = fargs->dst; - return true; - } - } + dwc->request_line = fargs->req; - last_dw = dw; - last_bus_id = param; - return false; + chan->private = dws; + + return true; +} + +static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dw_dma *dw = ofdma->of_dma_data; + struct dw_dma_filter_args fargs = { + .dw = dw, + }; + dma_cap_mask_t cap; + + if (dma_spec->args_count != 3) + return NULL; + + fargs.req = be32_to_cpup(dma_spec->args+0); + fargs.src = be32_to_cpup(dma_spec->args+1); + fargs.dst = be32_to_cpup(dma_spec->args+2); + + if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS || + fargs.src >= dw->nr_masters || + fargs.dst >= dw->nr_masters)) + return NULL; + + dma_cap_zero(cap); + dma_cap_set(DMA_SLAVE, cap); + + /* TODO: there should be a simpler way to do this */ + return dma_request_channel(cap, dw_dma_generic_filter, &fargs); } -EXPORT_SYMBOL(dw_dma_generic_filter); /* --------------------- Cyclic DMA API extensions -------------------- */ @@ -1554,9 +1576,8 @@ static void dw_dma_off(struct dw_dma *dw) static struct dw_dma_platform_data * dw_dma_parse_dt(struct platform_device *pdev) { - struct device_node *sn, *cn, *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node; struct dw_dma_platform_data *pdata; - struct dw_dma_slave *sd; u32 tmp, arr[4]; if (!np) { @@ -1568,7 +1589,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!pdata) return NULL; - if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels)) + if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) return NULL; if (of_property_read_bool(np, "is_private")) @@ -1583,7 +1604,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!of_property_read_u32(np, "block_size", &tmp)) pdata->block_size = tmp; - if (!of_property_read_u32(np, "nr_masters", &tmp)) { + if (!of_property_read_u32(np, "dma-masters", &tmp)) { if (tmp > 4) return NULL; @@ -1595,36 +1616,6 @@ dw_dma_parse_dt(struct platform_device *pdev) for (tmp = 0; tmp < pdata->nr_masters; tmp++) pdata->data_width[tmp] = arr[tmp]; - /* parse slave data */ - sn = of_find_node_by_name(np, "slave_info"); - if (!sn) - return pdata; - - /* calculate number of slaves */ - tmp = of_get_child_count(sn); - if (!tmp) - return NULL; - - sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL); - if (!sd) - return NULL; - - pdata->sd = sd; - pdata->sd_count = tmp; - - for_each_child_of_node(sn, cn) { - sd->dma_dev = &pdev->dev; - of_property_read_string(cn, "bus_id", &sd->bus_id); - of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi); - of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo); - if (!of_property_read_u32(cn, "src_master", &tmp)) - sd->src_master = tmp; - - if (!of_property_read_u32(cn, "dst_master", &tmp)) - sd->dst_master = tmp; - sd++; - } - return pdata; } #else @@ -1705,8 +1696,6 @@ static int dw_probe(struct platform_device *pdev) clk_prepare_enable(dw->clk); dw->regs = regs; - dw->sd = pdata->sd; - dw->sd_count = pdata->sd_count; /* get hardware configuration parameters */ if (autocfg) { @@ -1837,6 +1826,14 @@ static int dw_probe(struct platform_device *pdev) dma_async_device_register(&dw->dma); + if (pdev->dev.of_node) { + err = of_dma_controller_register(pdev->dev.of_node, + dw_dma_xlate, dw); + if (err && err != -ENODEV) + dev_err(&pdev->dev, + "could not register of_dma_controller\n"); + } + return 0; } @@ -1845,6 +1842,8 @@ static int dw_remove(struct platform_device *pdev) struct dw_dma *dw = platform_get_drvdata(pdev); struct dw_dma_chan *dwc, *_dwc; + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); dw_dma_off(dw); dma_async_device_unregister(&dw->dma); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 88dd8eb31957..cf0ce5c77d60 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -13,6 +13,7 @@ #include <linux/dw_dmac.h> #define DW_DMA_MAX_NR_CHANNELS 8 +#define DW_DMA_MAX_NR_REQUESTS 16 /* flow controller */ enum dw_dma_fc { @@ -211,6 +212,8 @@ struct dw_dma_chan { /* hardware configuration */ unsigned int block_size; bool nollp; + unsigned int request_line; + struct dw_dma_slave slave; /* configuration passed via DMA_SLAVE_CONFIG */ struct dma_slave_config dma_sconfig; @@ -239,10 +242,6 @@ struct dw_dma { struct tasklet_struct tasklet; struct clk *clk; - /* slave information */ - struct dw_dma_slave *sd; - unsigned int sd_count; - u8 all_chan_mask; /* hardware configuration */ diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c index 2d58f939d27f..833dd1afbf46 100644 --- a/drivers/hsi/hsi.c +++ b/drivers/hsi/hsi.c @@ -420,7 +420,7 @@ static int hsi_event_notifier_call(struct notifier_block *nb, /** * hsi_register_port_event - Register a client to receive port events * @cl: HSI client that wants to receive port events - * @cb: Event handler callback + * @handler: Event handler callback * * Clients should register a callback to be able to receive * events from the ports. Registration should happen after diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 53a8600162a5..ff1be167eb04 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -318,7 +318,7 @@ static u32 get_vp_index(uuid_le *type_guid) return 0; } cur_cpu = (++next_vp % max_cpus); - return 0; + return cur_cpu; } /* diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 1c5481da6e4a..731158910c1e 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -272,7 +272,7 @@ u16 hv_signal_event(void *con_id) * retrieve the initialized message and event pages. Otherwise, we create and * initialize the message and event pages. */ -void hv_synic_init(void *irqarg) +void hv_synic_init(void *arg) { u64 version; union hv_synic_simp simp; @@ -281,7 +281,6 @@ void hv_synic_init(void *irqarg) union hv_synic_scontrol sctrl; u64 vp_index; - u32 irq_vector = *((u32 *)(irqarg)); int cpu = smp_processor_id(); if (!hv_context.hypercall_page) @@ -335,7 +334,7 @@ void hv_synic_init(void *irqarg) rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.as_uint64 = 0; - shared_sint.vector = irq_vector; /* HV_SHARED_SINT_IDT_VECTOR + 0x20; */ + shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR; shared_sint.masked = false; shared_sint.auto_eoi = true; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index cf19dfa5ead1..bf421e0efa1e 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -36,6 +36,7 @@ #include <linux/kernel_stat.h> #include <asm/hyperv.h> #include <asm/hypervisor.h> +#include <asm/mshyperv.h> #include "hyperv_vmbus.h" @@ -528,7 +529,6 @@ static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc) static int vmbus_bus_init(int irq) { int ret; - unsigned int vector; /* Hypervisor initialization...setup hypercall page..etc */ ret = hv_init(); @@ -558,13 +558,16 @@ static int vmbus_bus_init(int irq) */ irq_set_handler(irq, vmbus_flow_handler); - vector = IRQ0_VECTOR + irq; + /* + * Register our interrupt handler. + */ + hv_register_vmbus_handler(irq, vmbus_isr); /* - * Notify the hypervisor of our irq and + * Initialize the per-cpu interrupt state and * connect to the host. */ - on_each_cpu(hv_synic_init, (void *)&vector, 1); + on_each_cpu(hv_synic_init, NULL, 1); ret = vmbus_connect(); if (ret) goto err_irq; diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index e65fbf2cdf71..98e3b87bdf1b 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o +obj-$(CONFIG_METAG) += irq-metag-ext.o +obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c new file mode 100644 index 000000000000..92c41ab4dbfd --- /dev/null +++ b/drivers/irqchip/irq-metag-ext.c @@ -0,0 +1,868 @@ +/* + * Meta External interrupt code. + * + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * External interrupts on Meta are configured at two-levels, in the CPU core and + * in the external trigger block. Interrupts from SoC peripherals are + * multiplexed onto a single Meta CPU "trigger" - traditionally it has always + * been trigger 2 (TR2). For info on how de-multiplexing happens check out + * meta_intc_irq_demux(). + */ + +#include <linux/interrupt.h> +#include <linux/irqchip/metag-ext.h> +#include <linux/irqdomain.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> + +#include <asm/irq.h> +#include <asm/hwthread.h> + +#define HWSTAT_STRIDE 8 +#define HWVEC_BLK_STRIDE 0x1000 + +/** + * struct meta_intc_priv - private meta external interrupt data + * @nr_banks: Number of interrupt banks + * @domain: IRQ domain for all banks of external IRQs + * @unmasked: Record of unmasked IRQs + * @levels_altered: Record of altered level bits + */ +struct meta_intc_priv { + unsigned int nr_banks; + struct irq_domain *domain; + + unsigned long unmasked[4]; + +#ifdef CONFIG_METAG_SUSPEND_MEM + unsigned long levels_altered[4]; +#endif +}; + +/* Private data for the one and only external interrupt controller */ +static struct meta_intc_priv meta_intc_priv; + +/** + * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bit offset into the IRQ's bank registers + */ +static unsigned int meta_intc_offset(irq_hw_number_t hw) +{ + return hw & 0x1f; +} + +/** + * meta_intc_bank() - Get the bank number of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bank number indicating which register the IRQ's bits are + */ +static unsigned int meta_intc_bank(irq_hw_number_t hw) +{ + return hw >> 5; +} + +/** + * meta_intc_stat_addr() - Get the address of a HWSTATEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWSTATEXT register containing the status bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWSTATEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_level_addr() - Get the address of a HWLEVELEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWLEVELEXT register containing the sense bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_level_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWLEVELEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_mask_addr() - Get the address of a HWMASKEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWMASKEXT register containing the mask bit for the + * specified hardware IRQ number + */ +static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWMASKEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_vec_addr() - Get the vector address of a hardware interrupt + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWVECEXT register controlling the core trigger to + * vector the IRQ onto + */ +static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWVEC0EXT + + HWVEC_BLK_STRIDE * meta_intc_bank(hw) + + HWVECnEXT_STRIDE * meta_intc_offset(hw)); +} + +/** + * meta_intc_startup_irq() - set up an external irq + * @data: data for the external irq to start up + * + * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and + * unmask irq, both using the appropriate callbacks. + */ +static unsigned int meta_intc_startup_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + int thread = hard_processor_id(); + + /* Perform any necessary acking. */ + if (data->chip->irq_ack) + data->chip->irq_ack(data); + + /* Wire up this interrupt to the core with HWVECxEXT. */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Perform any necessary unmasking. */ + data->chip->irq_unmask(data); + + return 0; +} + +/** + * meta_intc_shutdown_irq() - turn off an external irq + * @data: data for the external irq to turn off + * + * Mask irq using the appropriate callback and stop muxing it onto TR2. + */ +static void meta_intc_shutdown_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + /* Mask the IRQ */ + data->chip->irq_mask(data); + + /* + * Disable the IRQ at the core by removing the interrupt from + * the HW vector mapping. + */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_ack_irq() - acknowledge an external irq + * @data: data for the external irq to ack + * + * Clear down an edge interrupt in the status register. + */ +static void meta_intc_ack_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + + /* Ack the int, if it is still 'on'. + * NOTE - this only works for edge triggered interrupts. + */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * record_irq_is_masked() - record the IRQ masked so it doesn't get handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is masked (by whichever + * callback is used). It records the IRQ masked so that it doesn't get handled + * if it still shows up in the status register. + */ +static void record_irq_is_masked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/** + * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is unmasked (by whichever + * callback is used). It records the IRQ unmasked so that it gets handled if it + * shows up in the status register. + */ +static void record_irq_is_unmasked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/* + * For use by wrapper IRQ drivers + */ + +/** + * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers + * @data: data for the external irq being masked + * + * This should be called by any wrapper IRQ driver mask functions. it doesn't do + * any masking but records the IRQ as masked so that the core code knows the + * mask has taken place. It is the callers responsibility to ensure that the IRQ + * won't trigger an interrupt to the core. + */ +void meta_intc_mask_irq_simple(struct irq_data *data) +{ + record_irq_is_masked(data); +} + +/** + * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers + * @data: data for the external irq being unmasked + * + * This should be called by any wrapper IRQ driver unmask functions. it doesn't + * do any unmasking but records the IRQ as unmasked so that the core code knows + * the unmask has taken place. It is the callers responsibility to ensure that + * the IRQ can now trigger an interrupt to the core. + */ +void meta_intc_unmask_irq_simple(struct irq_data *data) +{ + record_irq_is_unmasked(data); +} + + +/** + * meta_intc_mask_irq() - mask an external irq using HWMASKEXT + * @data: data for the external irq to mask + * + * This is a default implementation of a mask function which makes use of the + * HWMASKEXT registers available in newer versions. + * + * Earlier versions without these registers should use SoC level IRQ masking + * which call the meta_intc_*_simple() functions above, or if that isn't + * available should use the fallback meta_intc_*_nomask() functions below. + */ +static void meta_intc_mask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_masked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) & ~bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT + * @data: data for the external irq to unmask + * + * This is a default implementation of an unmask function which makes use of the + * HWMASKEXT registers available on new versions. It should be paired with + * meta_intc_mask_irq() above. + */ +static void meta_intc_unmask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_unmasked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) | bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring + * @data: data for the external irq to mask + * + * This is the version of the mask function for older versions which don't have + * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is + * unvectored from the core and retriggered if necessary later. + */ +static void meta_intc_mask_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + record_irq_is_masked(data); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to edge interrupts. + */ +static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(stat_addr) & bit) { + metag_out32(bit, stat_addr); + while (!(metag_in32(stat_addr) & bit)) + metag_out32(bit, stat_addr); + } +} + +/** + * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to level interrupts. + */ +static void meta_intc_unmask_level_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Re-trigger interrupt */ + /* Writing a 1 triggers interrupt */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * meta_intc_irq_set_type() - set the type of an external irq + * @data: data for the external irq to set the type of + * @flow_type: new irq flow type + * + * Set the flow type of an external interrupt. This updates the irq chip and irq + * handler depending on whether the irq is edge or level sensitive (the polarity + * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows + * when to trigger. + */ +static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type) +{ +#ifdef CONFIG_METAG_SUSPEND_MEM + struct meta_intc_priv *priv = &meta_intc_priv; +#endif + unsigned int irq = data->irq; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + unsigned long flags; + unsigned int level; + + /* update the chip/handler */ + if (flow_type & IRQ_TYPE_LEVEL_MASK) + __irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip, + handle_level_irq, NULL); + else + __irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip, + handle_edge_irq, NULL); + + /* and clear/set the bit in HWLEVELEXT */ + __global_lock2(flags); + level = metag_in32(level_addr); + if (flow_type & IRQ_TYPE_LEVEL_MASK) + level |= bit; + else + level &= ~bit; + metag_out32(level, level_addr); +#ifdef CONFIG_METAG_SUSPEND_MEM + priv->levels_altered[meta_intc_bank(hw)] |= bit; +#endif + __global_unlock2(flags); + + return 0; +} + +/** + * meta_intc_irq_demux() - external irq de-multiplexer + * @irq: the virtual interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is + * this function's job to demux this irq and figure out exactly which external + * irq needs servicing. + * + * Whilst using TR2 to detect external interrupts is a software convention it is + * (hopefully) unlikely to change. + */ +static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw; + unsigned int bank, irq_no, status; + void __iomem *stat_addr = meta_intc_stat_addr(0); + + /* + * Locate which interrupt has caused our handler to run. + */ + for (bank = 0; bank < priv->nr_banks; ++bank) { + /* Which interrupts are currently pending in this bank? */ +recalculate: + status = metag_in32(stat_addr) & priv->unmasked[bank]; + + for (hw = bank*32; status; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual + * Linux IRQ number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off external interrupts that are + * registered to be handled by the kernel. + * Other external interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } + stat_addr += HWSTAT_STRIDE; + } +} + +#ifdef CONFIG_SMP +/** + * meta_intc_set_affinity() - set the affinity for an interrupt + * @data: data for the external irq to set the affinity of + * @cpumask: cpu mask representing cpus which can handle the interrupt + * @force: whether to force (ignored) + * + * Revector the specified external irq onto a specific cpu's TR2 trigger, so + * that that cpu tends to be the one who handles it. + */ +static int meta_intc_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int cpu, thread; + + /* + * Wire up this interrupt from HWVECxEXT to the Meta core. + * + * Note that we can't wire up HWVECxEXT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + return 0; +} +#else +#define meta_intc_set_affinity NULL +#endif + +#ifdef CONFIG_PM_SLEEP +#define META_INTC_CHIP_FLAGS (IRQCHIP_MASK_ON_SUSPEND \ + | IRQCHIP_SKIP_SET_WAKE) +#else +#define META_INTC_CHIP_FLAGS 0 +#endif + +/* public edge/level irq chips which SoCs can override */ + +struct irq_chip meta_intc_edge_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_ack = meta_intc_ack_irq, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +struct irq_chip meta_intc_level_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +/** + * meta_intc_map() - map an external irq + * @d: irq domain of external trigger block + * @irq: virtual irq number + * @hw: hardware irq number within external trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured, using the HWLEVELEXT registers to determine + * edge/level flow type. These registers will have been set when the irq type is + * set (or set to a default at init time). + */ +static int meta_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + + /* Go by the current sense in the HWLEVELEXT register */ + if (metag_in32(level_addr) & bit) + irq_set_chip_and_handler(irq, &meta_intc_level_chip, + handle_level_irq); + else + irq_set_chip_and_handler(irq, &meta_intc_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops meta_intc_domain_ops = { + .map = meta_intc_map, + .xlate = irq_domain_xlate_twocell, +}; + +#ifdef CONFIG_METAG_SUSPEND_MEM + +/** + * struct meta_intc_context - suspend context + * @levels: State of HWLEVELEXT registers + * @masks: State of HWMASKEXT registers + * @vectors: State of HWVECEXT registers + * @txvecint: State of TxVECINT registers + * + * This structure stores the IRQ state across suspend. + */ +struct meta_intc_context { + u32 levels[4]; + u32 masks[4]; + u8 vectors[4*32]; + + u8 txvecint[4][4]; +}; + +/* suspend context */ +static struct meta_intc_context *meta_intc_context; + +/** + * meta_intc_suspend() - store irq state + * + * To avoid interfering with other threads we only save the IRQ state of IRQs in + * use by Linux. + */ +static int meta_intc_suspend(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit; + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return -ENOMEM; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* save mapped irqs which are enabled or have actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* save trigger vector */ + context->vectors[hw] = metag_in32(vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + /* save level state if any IRQ levels altered */ + if (priv->levels_altered[bank]) + context->levels[bank] = metag_in32(level_addr); + /* save mask state if any IRQs in use */ + if (mask) + context->masks[bank] = metag_in32(mask_addr); + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* save trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) + context->txvecint[i][j] = metag_in32(T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + __global_unlock2(flags); + + meta_intc_context = context; + return 0; +} + +/** + * meta_intc_resume() - restore saved irq state + * + * Restore the saved IRQ state and drop it. + */ +static void meta_intc_resume(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context = meta_intc_context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit, tmp; + + meta_intc_context = NULL; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* restore mapped irqs, enabled or with actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* restore trigger vector */ + metag_out32(context->vectors[hw], vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + if (mask) { + /* restore mask state */ + __global_lock2(flags); + tmp = metag_in32(mask_addr); + tmp = (tmp & ~mask) | (context->masks[bank] & mask); + metag_out32(tmp, mask_addr); + __global_unlock2(flags); + } + + mask = priv->levels_altered[bank]; + if (mask) { + /* restore level state */ + __global_lock2(flags); + tmp = metag_in32(level_addr); + tmp = (tmp & ~mask) | (context->levels[bank] & mask); + metag_out32(tmp, level_addr); + __global_unlock2(flags); + } + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* restore trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + metag_out32(context->txvecint[i][j], + T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + } + } + __global_unlock2(flags); + + kfree(context); +} + +static struct syscore_ops meta_intc_syscore_ops = { + .suspend = meta_intc_suspend, + .resume = meta_intc_resume, +}; + +static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv) +{ + register_syscore_ops(&meta_intc_syscore_ops); +} +#else +#define meta_intc_init_syscore_ops(priv) do {} while (0) +#endif + +/** + * meta_intc_init_cpu() - register with a Meta cpu + * @priv: private interrupt controller data + * @cpu: the CPU to register on + * + * Configure @cpu's TR2 irq so that we can demux external irqs. + */ +static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR2(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_chained_handler(irq, meta_intc_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * meta_intc_no_mask() - indicate lack of HWMASKEXT registers + * + * Called from SoC code (or init code below) to dynamically indicate the lack of + * HWMASKEXT registers (for example depending on some SoC revision register). + * This alters the irq mask and unmask callbacks to use the fallback + * unvectoring/retriggering technique instead of using HWMASKEXT registers. + */ +void __init meta_intc_no_mask(void) +{ + meta_intc_edge_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_edge_chip.irq_unmask = meta_intc_unmask_edge_irq_nomask; + meta_intc_level_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_level_chip.irq_unmask = meta_intc_unmask_level_irq_nomask; +} + +/** + * init_external_IRQ() - initialise the external irq controller + * + * Set up the external irq controller using device tree properties. This is + * called from init_IRQ(). + */ +int __init init_external_IRQ(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + struct device_node *node; + int ret, cpu; + u32 val; + bool no_masks = false; + + node = of_find_compatible_node(NULL, NULL, "img,meta-intc"); + if (!node) + return -ENOENT; + + /* Get number of banks */ + ret = of_property_read_u32(node, "num-banks", &val); + if (ret) { + pr_err("meta-intc: No num-banks property found\n"); + return ret; + } + if (val < 1 || val > 4) { + pr_err("meta-intc: num-banks (%u) out of range\n", val); + return -EINVAL; + } + priv->nr_banks = val; + + /* Are any mask registers present? */ + if (of_get_property(node, "no-mask", NULL)) + no_masks = true; + + /* No HWMASKEXT registers present? */ + if (no_masks) + meta_intc_no_mask(); + + /* Set up an IRQ domain */ + /* + * This is a legacy IRQ domain for now until all the platform setup code + * has been converted to devicetree. + */ + priv->domain = irq_domain_add_linear(node, priv->nr_banks*32, + &meta_intc_domain_ops, priv); + if (unlikely(!priv->domain)) { + pr_err("meta-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR2 for all cpus. */ + for_each_possible_cpu(cpu) + meta_intc_init_cpu(priv, cpu); + + /* Set up system suspend/resume callbacks */ + meta_intc_init_syscore_ops(priv); + + pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n", + priv->nr_banks*32); + + return 0; +} diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c new file mode 100644 index 000000000000..8e94d7a3b20d --- /dev/null +++ b/drivers/irqchip/irq-metag.c @@ -0,0 +1,343 @@ +/* + * Meta internal (HWSTATMETA) interrupt code. + * + * Copyright (C) 2011-2012 Imagination Technologies Ltd. + * + * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c + * The code base could be generalised/merged as a lot of the functionality is + * similar. Until this is done, we try to keep the code simple here. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irqdomain.h> + +#include <asm/irq.h> +#include <asm/hwthread.h> + +#define PERF0VECINT 0x04820580 +#define PERF1VECINT 0x04820588 +#define PERF0TRIG_OFFSET 16 +#define PERF1TRIG_OFFSET 17 + +/** + * struct metag_internal_irq_priv - private meta internal interrupt data + * @domain: IRQ domain for all internal Meta IRQs (HWSTATMETA) + * @unmasked: Record of unmasked IRQs + */ +struct metag_internal_irq_priv { + struct irq_domain *domain; + + unsigned long unmasked; +}; + +/* Private data for the one and only internal interrupt controller */ +static struct metag_internal_irq_priv metag_internal_irq_priv; + +static unsigned int metag_internal_irq_startup(struct irq_data *data); +static void metag_internal_irq_shutdown(struct irq_data *data); +static void metag_internal_irq_ack(struct irq_data *data); +static void metag_internal_irq_mask(struct irq_data *data); +static void metag_internal_irq_unmask(struct irq_data *data); +#ifdef CONFIG_SMP +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force); +#endif + +static struct irq_chip internal_irq_edge_chip = { + .name = "HWSTATMETA-IRQ", + .irq_startup = metag_internal_irq_startup, + .irq_shutdown = metag_internal_irq_shutdown, + .irq_ack = metag_internal_irq_ack, + .irq_mask = metag_internal_irq_mask, + .irq_unmask = metag_internal_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = metag_internal_irq_set_affinity, +#endif +}; + +/* + * metag_hwvec_addr - get the address of *VECINT regs of irq + * + * This function is a table of supported triggers on HWSTATMETA + * Could do with a structure, but better keep it simple. Changes + * in this code should be rare. + */ +static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw) +{ + void __iomem *addr; + + switch (hw) { + case PERF0TRIG_OFFSET: + addr = (void __iomem *)PERF0VECINT; + break; + case PERF1TRIG_OFFSET: + addr = (void __iomem *)PERF1VECINT; + break; + default: + addr = NULL; + break; + } + return addr; +} + +/* + * metag_internal_startup - setup an internal irq + * @irq: the irq to startup + * + * Multiplex interrupts for @irq onto TR1. Clear any pending + * interrupts. + */ +static unsigned int metag_internal_irq_startup(struct irq_data *data) +{ + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); + + /* Enable the interrupt by unmasking it */ + metag_internal_irq_unmask(data); + + return 0; +} + +/* + * metag_internal_irq_shutdown - turn off the irq + * @irq: the irq number to turn off + * + * Mask @irq and clear any pending interrupts. + * Stop muxing @irq onto TR1. + */ +static void metag_internal_irq_shutdown(struct irq_data *data) +{ + /* Disable the IRQ at the core by masking it. */ + metag_internal_irq_mask(data); + + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); +} + +/* + * metag_internal_irq_ack - acknowledge irq + * @irq: the irq to ack + */ +static void metag_internal_irq_ack(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + + if (metag_in32(HWSTATMETA) & bit) + metag_out32(bit, HWSTATMETA); +} + +/** + * metag_internal_irq_mask() - mask an internal irq by unvectoring + * @data: data for the internal irq to mask + * + * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core + * and retriggered if necessary later. + */ +static void metag_internal_irq_mask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = metag_hwvec_addr(hw); + + clear_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the internal irq to unmask + * + * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the + * core and retriggered if necessary. + */ +static void metag_internal_irq_unmask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + void __iomem *vec_addr = metag_hwvec_addr(hw); + unsigned int thread = hard_processor_id(); + + set_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(HWSTATMETA) & bit) { + metag_out32(bit, HWSTATMETA); + while (!(metag_in32(HWSTATMETA) & bit)) + metag_out32(bit, HWSTATMETA); + } +} + +#ifdef CONFIG_SMP +/* + * metag_internal_irq_set_affinity - set the affinity for an interrupt + */ +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + unsigned int cpu, thread; + irq_hw_number_t hw = data->hwirq; + /* + * Wire up this interrupt from *VECINT to the Meta core. + * + * Note that we can't wire up *VECINT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), + metag_hwvec_addr(hw)); + + return 0; +} +#endif + +/* + * metag_internal_irq_demux - irq de-multiplexer + * @irq: the interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR1 when an interrupt has + * occurred. It is this function's job to demux this irq and + * figure out exactly which trigger needs servicing. + */ +static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc); + irq_hw_number_t hw; + unsigned int irq_no; + u32 status; + +recalculate: + status = metag_in32(HWSTATMETA) & priv->unmasked; + + for (hw = 0; status != 0; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual Linux IRQ + * number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off interrupts that are + * registered to be handled by the kernel. + * Other interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } +} + +/** + * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number. + * @hw: Number of the internal IRQ. Must be in range. + * + * Returns: The virtual IRQ number of the Meta internal IRQ specified by + * @hw. + */ +int internal_irq_map(unsigned int hw) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + if (!priv->domain) + return -ENODEV; + return irq_create_mapping(priv->domain, hw); +} + +/** + * metag_internal_irq_init_cpu - regsister with the Meta cpu + * @cpu: the CPU to register on + * + * Configure @cpu's TR1 irq so that we can demux irqs. + */ +static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv, + int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR1(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_handler_data(irq, priv); + irq_set_chained_handler(irq, metag_internal_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * metag_internal_intc_map() - map an internal irq + * @d: irq domain of internal trigger block + * @irq: virtual irq number + * @hw: hardware irq number within internal trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured. + */ +static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* only register interrupt if it is mapped */ + if (!metag_hwvec_addr(hw)) + return -EINVAL; + + irq_set_chip_and_handler(irq, &internal_irq_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops metag_internal_intc_domain_ops = { + .map = metag_internal_intc_map, +}; + +/** + * metag_internal_irq_register - register internal IRQs + * + * Register the irq chip and handler function for all internal IRQs + */ +int __init init_internal_IRQ(void) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + unsigned int cpu; + + /* Set up an IRQ domain */ + priv->domain = irq_domain_add_linear(NULL, 32, + &metag_internal_intc_domain_ops, + priv); + if (unlikely(!priv->domain)) { + pr_err("meta-internal-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR1 for all cpus. */ + for_each_possible_cpu(cpu) + metag_internal_irq_init_cpu(priv, cpu); + + return 0; +}; diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 91a02eeeb319..e30b490055aa 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -210,7 +210,7 @@ config DM_DEBUG config DM_BUFIO tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM ---help--- This interface allows you to do buffered I/O on a device and acts as a cache, holding recently-read blocks in memory and performing @@ -218,7 +218,7 @@ config DM_BUFIO config DM_BIO_PRISON tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM ---help--- Some bio locking schemes used by other device-mapper targets including thin provisioning. @@ -251,8 +251,8 @@ config DM_SNAPSHOT Allow volume managers to take writable snapshots of a device. config DM_THIN_PROVISIONING - tristate "Thin provisioning target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "Thin provisioning target" + depends on BLK_DEV_DM select DM_PERSISTENT_DATA select DM_BIO_PRISON ---help--- @@ -268,6 +268,37 @@ config DM_DEBUG_BLOCK_STACK_TRACING If unsure, say N. +config DM_CACHE + tristate "Cache target (EXPERIMENTAL)" + depends on BLK_DEV_DM + default n + select DM_PERSISTENT_DATA + select DM_BIO_PRISON + ---help--- + dm-cache attempts to improve performance of a block device by + moving frequently used data to a smaller, higher performance + device. Different 'policy' plugins can be used to change the + algorithms used to select which blocks are promoted, demoted, + cleaned etc. It supports writeback and writethrough modes. + +config DM_CACHE_MQ + tristate "MQ Cache Policy (EXPERIMENTAL)" + depends on DM_CACHE + default y + ---help--- + A cache policy that uses a multiqueue ordered by recent hit + count to select which blocks should be promoted and demoted. + This is meant to be a general purpose policy. It prioritises + reads over writes. + +config DM_CACHE_CLEANER + tristate "Cleaner Cache Policy (EXPERIMENTAL)" + depends on DM_CACHE + default y + ---help--- + A simple cache policy that writes back all data to the + origin. Used when decommissioning a dm-cache. + config DM_MIRROR tristate "Mirror target" depends on BLK_DEV_DM @@ -302,8 +333,8 @@ config DM_RAID in one of the available parity distribution methods. config DM_LOG_USERSPACE - tristate "Mirror userspace logging (EXPERIMENTAL)" - depends on DM_MIRROR && EXPERIMENTAL && NET + tristate "Mirror userspace logging" + depends on DM_MIRROR && NET select CONNECTOR ---help--- The userspace logging module provides a mechanism for @@ -350,8 +381,8 @@ config DM_MULTIPATH_ST If unsure, say N. config DM_DELAY - tristate "I/O delaying target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "I/O delaying target" + depends on BLK_DEV_DM ---help--- A target that delays reads and/or writes and can send them to different devices. Useful for testing. @@ -365,14 +396,14 @@ config DM_UEVENT Generate udev events for DM events. config DM_FLAKEY - tristate "Flakey target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "Flakey target" + depends on BLK_DEV_DM ---help--- A target that intermittently fails I/O for debugging purposes. config DM_VERITY - tristate "Verity target support (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "Verity target support" + depends on BLK_DEV_DM select CRYPTO select CRYPTO_HASH select DM_BUFIO diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 94dce8b49324..7ceeaefc0e95 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -11,6 +11,9 @@ dm-mirror-y += dm-raid1.o dm-log-userspace-y \ += dm-log-userspace-base.o dm-log-userspace-transfer.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o +dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o +dm-cache-mq-y += dm-cache-policy-mq.o +dm-cache-cleaner-y += dm-cache-policy-cleaner.o md-mod-y += md.o bitmap.o raid456-y += raid5.o @@ -44,6 +47,9 @@ obj-$(CONFIG_DM_ZERO) += dm-zero.o obj-$(CONFIG_DM_RAID) += dm-raid.o obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o +obj-$(CONFIG_DM_CACHE) += dm-cache.o +obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o +obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index d9d3f1c7b662..85f0b7074257 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -14,14 +14,6 @@ /*----------------------------------------------------------------*/ -struct dm_bio_prison_cell { - struct hlist_node list; - struct dm_bio_prison *prison; - struct dm_cell_key key; - struct bio *holder; - struct bio_list bios; -}; - struct dm_bio_prison { spinlock_t lock; mempool_t *cell_pool; @@ -87,6 +79,19 @@ void dm_bio_prison_destroy(struct dm_bio_prison *prison) } EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); +struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp) +{ + return mempool_alloc(prison->cell_pool, gfp); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell); + +void dm_bio_prison_free_cell(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) +{ + mempool_free(cell, prison->cell_pool); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); + static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) { const unsigned long BIG_PRIME = 4294967291UL; @@ -114,91 +119,95 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, return NULL; } -/* - * This may block if a new cell needs allocating. You must ensure that - * cells will be unlocked even if the calling thread is blocked. - * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. - */ -int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref) +static void __setup_new_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *holder, + uint32_t hash, + struct dm_bio_prison_cell *cell) { - int r = 1; - unsigned long flags; - uint32_t hash = hash_key(prison, key); - struct dm_bio_prison_cell *cell, *cell2; - - BUG_ON(hash > prison->nr_buckets); - - spin_lock_irqsave(&prison->lock, flags); - - cell = __search_bucket(prison->cells + hash, key); - if (cell) { - bio_list_add(&cell->bios, inmate); - goto out; - } + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = holder; + bio_list_init(&cell->bios); + hlist_add_head(&cell->list, prison->cells + hash); +} - /* - * Allocate a new cell - */ - spin_unlock_irqrestore(&prison->lock, flags); - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); - spin_lock_irqsave(&prison->lock, flags); +static int __bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + uint32_t hash = hash_key(prison, key); + struct dm_bio_prison_cell *cell; - /* - * We've been unlocked, so we have to double check that - * nobody else has inserted this cell in the meantime. - */ cell = __search_bucket(prison->cells + hash, key); if (cell) { - mempool_free(cell2, prison->cell_pool); - bio_list_add(&cell->bios, inmate); - goto out; + if (inmate) + bio_list_add(&cell->bios, inmate); + *cell_result = cell; + return 1; } - /* - * Use new cell. - */ - cell = cell2; - - cell->prison = prison; - memcpy(&cell->key, key, sizeof(cell->key)); - cell->holder = inmate; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); + __setup_new_cell(prison, key, inmate, hash, cell_prealloc); + *cell_result = cell_prealloc; + return 0; +} - r = 0; +static int bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + int r; + unsigned long flags; -out: + spin_lock_irqsave(&prison->lock, flags); + r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); spin_unlock_irqrestore(&prison->lock, flags); - *ref = cell; - return r; } + +int dm_bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + return bio_detain(prison, key, inmate, cell_prealloc, cell_result); +} EXPORT_SYMBOL_GPL(dm_bio_detain); +int dm_get_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + return bio_detain(prison, key, NULL, cell_prealloc, cell_result); +} +EXPORT_SYMBOL_GPL(dm_get_cell); + /* * @inmates must have been initialised prior to this call */ -static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +static void __cell_release(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { - struct dm_bio_prison *prison = cell->prison; - hlist_del(&cell->list); if (inmates) { - bio_list_add(inmates, cell->holder); + if (cell->holder) + bio_list_add(inmates, cell->holder); bio_list_merge(inmates, &cell->bios); } - - mempool_free(cell, prison->cell_pool); } -void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) +void dm_cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) { unsigned long flags; - struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release(cell, bios); @@ -209,20 +218,18 @@ EXPORT_SYMBOL_GPL(dm_cell_release); /* * Sometimes we don't want the holder, just the additional bios. */ -static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { - struct dm_bio_prison *prison = cell->prison; - hlist_del(&cell->list); bio_list_merge(inmates, &cell->bios); - - mempool_free(cell, prison->cell_pool); } -void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +void dm_cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { unsigned long flags; - struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release_no_holder(cell, inmates); @@ -230,9 +237,9 @@ void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list } EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); -void dm_cell_error(struct dm_bio_prison_cell *cell) +void dm_cell_error(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) { - struct dm_bio_prison *prison = cell->prison; struct bio_list bios; struct bio *bio; unsigned long flags; diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 53d1a7a84e2f..3f833190eadf 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -22,7 +22,6 @@ * subsequently unlocked the bios become available. */ struct dm_bio_prison; -struct dm_bio_prison_cell; /* FIXME: this needs to be more abstract */ struct dm_cell_key { @@ -31,21 +30,62 @@ struct dm_cell_key { dm_block_t block; }; +/* + * Treat this as opaque, only in header so callers can manage allocation + * themselves. + */ +struct dm_bio_prison_cell { + struct hlist_node list; + struct dm_cell_key key; + struct bio *holder; + struct bio_list bios; +}; + struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); void dm_bio_prison_destroy(struct dm_bio_prison *prison); /* - * This may block if a new cell needs allocating. You must ensure that - * cells will be unlocked even if the calling thread is blocked. + * These two functions just wrap a mempool. This is a transitory step: + * Eventually all bio prison clients should manage their own cell memory. * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called + * in interrupt context or passed GFP_NOWAIT. */ -int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref); +struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, + gfp_t gfp); +void dm_bio_prison_free_cell(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell); -void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); -void dm_cell_error(struct dm_bio_prison_cell *cell); +/* + * Creates, or retrieves a cell for the given key. + * + * Returns 1 if pre-existing cell returned, zero if new cell created using + * @cell_prealloc. + */ +int dm_get_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result); + +/* + * An atomic op that combines retrieving a cell, and adding a bio to it. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result); + +void dm_cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *bios); +void dm_cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates); +void dm_cell_error(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell); /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 93205e32a004..3c955e10a618 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1192,7 +1192,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { - .bi_rw = REQ_FLUSH, + .bi_rw = WRITE_FLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h new file mode 100644 index 000000000000..bed4ad4e1b7c --- /dev/null +++ b/drivers/md/dm-cache-block-types.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_BLOCK_TYPES_H +#define DM_CACHE_BLOCK_TYPES_H + +#include "persistent-data/dm-block-manager.h" + +/*----------------------------------------------------------------*/ + +/* + * It's helpful to get sparse to differentiate between indexes into the + * origin device, indexes into the cache device, and indexes into the + * discard bitset. + */ + +typedef dm_block_t __bitwise__ dm_oblock_t; +typedef uint32_t __bitwise__ dm_cblock_t; +typedef dm_block_t __bitwise__ dm_dblock_t; + +static inline dm_oblock_t to_oblock(dm_block_t b) +{ + return (__force dm_oblock_t) b; +} + +static inline dm_block_t from_oblock(dm_oblock_t b) +{ + return (__force dm_block_t) b; +} + +static inline dm_cblock_t to_cblock(uint32_t b) +{ + return (__force dm_cblock_t) b; +} + +static inline uint32_t from_cblock(dm_cblock_t b) +{ + return (__force uint32_t) b; +} + +static inline dm_dblock_t to_dblock(dm_block_t b) +{ + return (__force dm_dblock_t) b; +} + +static inline dm_block_t from_dblock(dm_dblock_t b) +{ + return (__force dm_block_t) b; +} + +#endif /* DM_CACHE_BLOCK_TYPES_H */ diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c new file mode 100644 index 000000000000..fbd3625f2748 --- /dev/null +++ b/drivers/md/dm-cache-metadata.c @@ -0,0 +1,1146 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-cache-metadata.h" + +#include "persistent-data/dm-array.h" +#include "persistent-data/dm-bitset.h" +#include "persistent-data/dm-space-map.h" +#include "persistent-data/dm-space-map-disk.h" +#include "persistent-data/dm-transaction-manager.h" + +#include <linux/device-mapper.h> + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "cache metadata" + +#define CACHE_SUPERBLOCK_MAGIC 06142003 +#define CACHE_SUPERBLOCK_LOCATION 0 +#define CACHE_VERSION 1 +#define CACHE_METADATA_CACHE_SIZE 64 + +/* + * 3 for btree insert + + * 2 for btree lookup used within space map + */ +#define CACHE_MAX_CONCURRENT_LOCKS 5 +#define SPACE_MAP_ROOT_SIZE 128 + +enum superblock_flag_bits { + /* for spotting crashes that would invalidate the dirty bitset */ + CLEAN_SHUTDOWN, +}; + +/* + * Each mapping from cache block -> origin block carries a set of flags. + */ +enum mapping_bits { + /* + * A valid mapping. Because we're using an array we clear this + * flag for an non existant mapping. + */ + M_VALID = 1, + + /* + * The data on the cache is different from that on the origin. + */ + M_DIRTY = 2 +}; + +struct cache_disk_superblock { + __le32 csum; + __le32 flags; + __le64 blocknr; + + __u8 uuid[16]; + __le64 magic; + __le32 version; + + __u8 policy_name[CACHE_POLICY_NAME_SIZE]; + __le32 policy_hint_size; + + __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; + __le64 mapping_root; + __le64 hint_root; + + __le64 discard_root; + __le64 discard_block_size; + __le64 discard_nr_blocks; + + __le32 data_block_size; + __le32 metadata_block_size; + __le32 cache_blocks; + + __le32 compat_flags; + __le32 compat_ro_flags; + __le32 incompat_flags; + + __le32 read_hits; + __le32 read_misses; + __le32 write_hits; + __le32 write_misses; +} __packed; + +struct dm_cache_metadata { + struct block_device *bdev; + struct dm_block_manager *bm; + struct dm_space_map *metadata_sm; + struct dm_transaction_manager *tm; + + struct dm_array_info info; + struct dm_array_info hint_info; + struct dm_disk_bitset discard_info; + + struct rw_semaphore root_lock; + dm_block_t root; + dm_block_t hint_root; + dm_block_t discard_root; + + sector_t discard_block_size; + dm_dblock_t discard_nr_blocks; + + sector_t data_block_size; + dm_cblock_t cache_blocks; + bool changed:1; + bool clean_when_opened:1; + + char policy_name[CACHE_POLICY_NAME_SIZE]; + size_t policy_hint_size; + struct dm_cache_statistics stats; +}; + +/*------------------------------------------------------------------- + * superblock validator + *-----------------------------------------------------------------*/ + +#define SUPERBLOCK_CSUM_XOR 9031977 + +static void sb_prepare_for_write(struct dm_block_validator *v, + struct dm_block *b, + size_t sb_block_size) +{ + struct cache_disk_superblock *disk_super = dm_block_data(b); + + disk_super->blocknr = cpu_to_le64(dm_block_location(b)); + disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags, + sb_block_size - sizeof(__le32), + SUPERBLOCK_CSUM_XOR)); +} + +static int sb_check(struct dm_block_validator *v, + struct dm_block *b, + size_t sb_block_size) +{ + struct cache_disk_superblock *disk_super = dm_block_data(b); + __le32 csum_le; + + if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) { + DMERR("sb_check failed: blocknr %llu: wanted %llu", + le64_to_cpu(disk_super->blocknr), + (unsigned long long)dm_block_location(b)); + return -ENOTBLK; + } + + if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) { + DMERR("sb_check failed: magic %llu: wanted %llu", + le64_to_cpu(disk_super->magic), + (unsigned long long)CACHE_SUPERBLOCK_MAGIC); + return -EILSEQ; + } + + csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags, + sb_block_size - sizeof(__le32), + SUPERBLOCK_CSUM_XOR)); + if (csum_le != disk_super->csum) { + DMERR("sb_check failed: csum %u: wanted %u", + le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum)); + return -EILSEQ; + } + + return 0; +} + +static struct dm_block_validator sb_validator = { + .name = "superblock", + .prepare_for_write = sb_prepare_for_write, + .check = sb_check +}; + +/*----------------------------------------------------------------*/ + +static int superblock_read_lock(struct dm_cache_metadata *cmd, + struct dm_block **sblock) +{ + return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock_zero(struct dm_cache_metadata *cmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock(struct dm_cache_metadata *cmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +/*----------------------------------------------------------------*/ + +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) +{ + int r; + unsigned i; + struct dm_block *b; + __le64 *data_le, zero = cpu_to_le64(0); + unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); + + /* + * We can't use a validator here - it may be all zeroes. + */ + r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b); + if (r) + return r; + + data_le = dm_block_data(b); + *result = 1; + for (i = 0; i < sb_block_size; i++) { + if (data_le[i] != zero) { + *result = 0; + break; + } + } + + return dm_bm_unlock(b); +} + +static void __setup_mapping_info(struct dm_cache_metadata *cmd) +{ + struct dm_btree_value_type vt; + + vt.context = NULL; + vt.size = sizeof(__le64); + vt.inc = NULL; + vt.dec = NULL; + vt.equal = NULL; + dm_array_info_init(&cmd->info, cmd->tm, &vt); + + if (cmd->policy_hint_size) { + vt.size = sizeof(__le32); + dm_array_info_init(&cmd->hint_info, cmd->tm, &vt); + } +} + +static int __write_initial_superblock(struct dm_cache_metadata *cmd) +{ + int r; + struct dm_block *sblock; + size_t metadata_len; + struct cache_disk_superblock *disk_super; + sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; + + /* FIXME: see if we can lose the max sectors limit */ + if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) + bdev_size = DM_CACHE_METADATA_MAX_SECTORS; + + r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = dm_tm_pre_commit(cmd->tm); + if (r < 0) + return r; + + r = superblock_lock_zero(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + disk_super->flags = 0; + memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); + disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); + disk_super->version = cpu_to_le32(CACHE_VERSION); + memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE); + disk_super->policy_hint_size = 0; + + r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) + goto bad_locked; + + disk_super->mapping_root = cpu_to_le64(cmd->root); + disk_super->hint_root = cpu_to_le64(cmd->hint_root); + disk_super->discard_root = cpu_to_le64(cmd->discard_root); + disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); + disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); + disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); + disk_super->cache_blocks = cpu_to_le32(0); + memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); + + disk_super->read_hits = cpu_to_le32(0); + disk_super->read_misses = cpu_to_le32(0); + disk_super->write_hits = cpu_to_le32(0); + disk_super->write_misses = cpu_to_le32(0); + + return dm_tm_commit(cmd->tm, sblock); + +bad_locked: + dm_bm_unlock(sblock); + return r; +} + +static int __format_metadata(struct dm_cache_metadata *cmd) +{ + int r; + + r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &cmd->tm, &cmd->metadata_sm); + if (r < 0) { + DMERR("tm_create_with_sm failed"); + return r; + } + + __setup_mapping_info(cmd); + + r = dm_array_empty(&cmd->info, &cmd->root); + if (r < 0) + goto bad; + + dm_disk_bitset_init(cmd->tm, &cmd->discard_info); + + r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); + if (r < 0) + goto bad; + + cmd->discard_block_size = 0; + cmd->discard_nr_blocks = 0; + + r = __write_initial_superblock(cmd); + if (r) + goto bad; + + cmd->clean_when_opened = true; + return 0; + +bad: + dm_tm_destroy(cmd->tm); + dm_sm_destroy(cmd->metadata_sm); + + return r; +} + +static int __check_incompat_features(struct cache_disk_superblock *disk_super, + struct dm_cache_metadata *cmd) +{ + uint32_t features; + + features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; + if (features) { + DMERR("could not access metadata due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + /* + * Check for read-only metadata to skip the following RDWR checks. + */ + if (get_disk_ro(cmd->bdev->bd_disk)) + return 0; + + features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP; + if (features) { + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + return 0; +} + +static int __open_metadata(struct dm_cache_metadata *cmd) +{ + int r; + struct dm_block *sblock; + struct cache_disk_superblock *disk_super; + unsigned long sb_flags; + + r = superblock_read_lock(cmd, &sblock); + if (r < 0) { + DMERR("couldn't read lock superblock"); + return r; + } + + disk_super = dm_block_data(sblock); + + r = __check_incompat_features(disk_super, cmd); + if (r < 0) + goto bad; + + r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &cmd->tm, &cmd->metadata_sm); + if (r < 0) { + DMERR("tm_open_with_sm failed"); + goto bad; + } + + __setup_mapping_info(cmd); + dm_disk_bitset_init(cmd->tm, &cmd->discard_info); + sb_flags = le32_to_cpu(disk_super->flags); + cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); + return dm_bm_unlock(sblock); + +bad: + dm_bm_unlock(sblock); + return r; +} + +static int __open_or_format_metadata(struct dm_cache_metadata *cmd, + bool format_device) +{ + int r, unformatted; + + r = __superblock_all_zeroes(cmd->bm, &unformatted); + if (r) + return r; + + if (unformatted) + return format_device ? __format_metadata(cmd) : -EPERM; + + return __open_metadata(cmd); +} + +static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, + bool may_format_device) +{ + int r; + cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE, + CACHE_METADATA_CACHE_SIZE, + CACHE_MAX_CONCURRENT_LOCKS); + if (IS_ERR(cmd->bm)) { + DMERR("could not create block manager"); + return PTR_ERR(cmd->bm); + } + + r = __open_or_format_metadata(cmd, may_format_device); + if (r) + dm_block_manager_destroy(cmd->bm); + + return r; +} + +static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) +{ + dm_sm_destroy(cmd->metadata_sm); + dm_tm_destroy(cmd->tm); + dm_block_manager_destroy(cmd->bm); +} + +typedef unsigned long (*flags_mutator)(unsigned long); + +static void update_flags(struct cache_disk_superblock *disk_super, + flags_mutator mutator) +{ + uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags)); + disk_super->flags = cpu_to_le32(sb_flags); +} + +static unsigned long set_clean_shutdown(unsigned long flags) +{ + set_bit(CLEAN_SHUTDOWN, &flags); + return flags; +} + +static unsigned long clear_clean_shutdown(unsigned long flags) +{ + clear_bit(CLEAN_SHUTDOWN, &flags); + return flags; +} + +static void read_superblock_fields(struct dm_cache_metadata *cmd, + struct cache_disk_superblock *disk_super) +{ + cmd->root = le64_to_cpu(disk_super->mapping_root); + cmd->hint_root = le64_to_cpu(disk_super->hint_root); + cmd->discard_root = le64_to_cpu(disk_super->discard_root); + cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); + cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); + cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); + cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); + strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); + cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); + + cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); + cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses); + cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); + cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); + + cmd->changed = false; +} + +/* + * The mutator updates the superblock flags. + */ +static int __begin_transaction_flags(struct dm_cache_metadata *cmd, + flags_mutator mutator) +{ + int r; + struct cache_disk_superblock *disk_super; + struct dm_block *sblock; + + r = superblock_lock(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + update_flags(disk_super, mutator); + read_superblock_fields(cmd, disk_super); + + return dm_bm_flush_and_unlock(cmd->bm, sblock); +} + +static int __begin_transaction(struct dm_cache_metadata *cmd) +{ + int r; + struct cache_disk_superblock *disk_super; + struct dm_block *sblock; + + /* + * We re-read the superblock every time. Shouldn't need to do this + * really. + */ + r = superblock_read_lock(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + read_superblock_fields(cmd, disk_super); + dm_bm_unlock(sblock); + + return 0; +} + +static int __commit_transaction(struct dm_cache_metadata *cmd, + flags_mutator mutator) +{ + int r; + size_t metadata_len; + struct cache_disk_superblock *disk_super; + struct dm_block *sblock; + + /* + * We need to know if the cache_disk_superblock exceeds a 512-byte sector. + */ + BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); + + r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, + &cmd->discard_root); + if (r) + return r; + + r = dm_tm_pre_commit(cmd->tm); + if (r < 0) + return r; + + r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = superblock_lock(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + + if (mutator) + update_flags(disk_super, mutator); + + disk_super->mapping_root = cpu_to_le64(cmd->root); + disk_super->hint_root = cpu_to_le64(cmd->hint_root); + disk_super->discard_root = cpu_to_le64(cmd->discard_root); + disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); + disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); + disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); + strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); + + disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); + disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); + disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); + disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); + + r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) { + dm_bm_unlock(sblock); + return r; + } + + return dm_tm_commit(cmd->tm, sblock); +} + +/*----------------------------------------------------------------*/ + +/* + * The mappings are held in a dm-array that has 64-bit values stored in + * little-endian format. The index is the cblock, the high 48bits of the + * value are the oblock and the low 16 bit the flags. + */ +#define FLAGS_MASK ((1 << 16) - 1) + +static __le64 pack_value(dm_oblock_t block, unsigned flags) +{ + uint64_t value = from_oblock(block); + value <<= 16; + value = value | (flags & FLAGS_MASK); + return cpu_to_le64(value); +} + +static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) +{ + uint64_t value = le64_to_cpu(value_le); + uint64_t b = value >> 16; + *block = to_oblock(b); + *flags = value & FLAGS_MASK; +} + +/*----------------------------------------------------------------*/ + +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + int r; + struct dm_cache_metadata *cmd; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) { + DMERR("could not allocate metadata struct"); + return NULL; + } + + init_rwsem(&cmd->root_lock); + cmd->bdev = bdev; + cmd->data_block_size = data_block_size; + cmd->cache_blocks = 0; + cmd->policy_hint_size = policy_hint_size; + cmd->changed = true; + + r = __create_persistent_data_objects(cmd, may_format_device); + if (r) { + kfree(cmd); + return ERR_PTR(r); + } + + r = __begin_transaction_flags(cmd, clear_clean_shutdown); + if (r < 0) { + dm_cache_metadata_close(cmd); + return ERR_PTR(r); + } + + return cmd; +} + +void dm_cache_metadata_close(struct dm_cache_metadata *cmd) +{ + __destroy_persistent_data_objects(cmd); + kfree(cmd); +} + +int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) +{ + int r; + __le64 null_mapping = pack_value(0, 0); + + down_write(&cmd->root_lock); + __dm_bless_for_disk(&null_mapping); + r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), + from_cblock(new_cache_size), + &null_mapping, &cmd->root); + if (!r) + cmd->cache_blocks = new_cache_size; + cmd->changed = true; + up_write(&cmd->root_lock); + + return r; +} + +int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, + sector_t discard_block_size, + dm_dblock_t new_nr_entries) +{ + int r; + + down_write(&cmd->root_lock); + r = dm_bitset_resize(&cmd->discard_info, + cmd->discard_root, + from_dblock(cmd->discard_nr_blocks), + from_dblock(new_nr_entries), + false, &cmd->discard_root); + if (!r) { + cmd->discard_block_size = discard_block_size; + cmd->discard_nr_blocks = new_nr_entries; + } + + cmd->changed = true; + up_write(&cmd->root_lock); + + return r; +} + +static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) +{ + return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, + from_dblock(b), &cmd->discard_root); +} + +static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) +{ + return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, + from_dblock(b), &cmd->discard_root); +} + +static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, + bool *is_discarded) +{ + return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, + from_dblock(b), &cmd->discard_root, + is_discarded); +} + +static int __discard(struct dm_cache_metadata *cmd, + dm_dblock_t dblock, bool discard) +{ + int r; + + r = (discard ? __set_discard : __clear_discard)(cmd, dblock); + if (r) + return r; + + cmd->changed = true; + return 0; +} + +int dm_cache_set_discard(struct dm_cache_metadata *cmd, + dm_dblock_t dblock, bool discard) +{ + int r; + + down_write(&cmd->root_lock); + r = __discard(cmd, dblock, discard); + up_write(&cmd->root_lock); + + return r; +} + +static int __load_discards(struct dm_cache_metadata *cmd, + load_discard_fn fn, void *context) +{ + int r = 0; + dm_block_t b; + bool discard; + + for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { + dm_dblock_t dblock = to_dblock(b); + + if (cmd->clean_when_opened) { + r = __is_discarded(cmd, dblock, &discard); + if (r) + return r; + } else + discard = false; + + r = fn(context, cmd->discard_block_size, dblock, discard); + if (r) + break; + } + + return r; +} + +int dm_cache_load_discards(struct dm_cache_metadata *cmd, + load_discard_fn fn, void *context) +{ + int r; + + down_read(&cmd->root_lock); + r = __load_discards(cmd, fn, context); + up_read(&cmd->root_lock); + + return r; +} + +dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd) +{ + dm_cblock_t r; + + down_read(&cmd->root_lock); + r = cmd->cache_blocks; + up_read(&cmd->root_lock); + + return r; +} + +static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) +{ + int r; + __le64 value = pack_value(0, 0); + + __dm_bless_for_disk(&value); + r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), + &value, &cmd->root); + if (r) + return r; + + cmd->changed = true; + return 0; +} + +int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock) +{ + int r; + + down_write(&cmd->root_lock); + r = __remove(cmd, cblock); + up_write(&cmd->root_lock); + + return r; +} + +static int __insert(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, dm_oblock_t oblock) +{ + int r; + __le64 value = pack_value(oblock, M_VALID); + __dm_bless_for_disk(&value); + + r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), + &value, &cmd->root); + if (r) + return r; + + cmd->changed = true; + return 0; +} + +int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, dm_oblock_t oblock) +{ + int r; + + down_write(&cmd->root_lock); + r = __insert(cmd, cblock, oblock); + up_write(&cmd->root_lock); + + return r; +} + +struct thunk { + load_mapping_fn fn; + void *context; + + struct dm_cache_metadata *cmd; + bool respect_dirty_flags; + bool hints_valid; +}; + +static bool hints_array_initialized(struct dm_cache_metadata *cmd) +{ + return cmd->hint_root && cmd->policy_hint_size; +} + +static bool hints_array_available(struct dm_cache_metadata *cmd, + const char *policy_name) +{ + bool policy_names_match = !strncmp(cmd->policy_name, policy_name, + sizeof(cmd->policy_name)); + + return cmd->clean_when_opened && policy_names_match && + hints_array_initialized(cmd); +} + +static int __load_mapping(void *context, uint64_t cblock, void *leaf) +{ + int r = 0; + bool dirty; + __le64 value; + __le32 hint_value = 0; + dm_oblock_t oblock; + unsigned flags; + struct thunk *thunk = context; + struct dm_cache_metadata *cmd = thunk->cmd; + + memcpy(&value, leaf, sizeof(value)); + unpack_value(value, &oblock, &flags); + + if (flags & M_VALID) { + if (thunk->hints_valid) { + r = dm_array_get_value(&cmd->hint_info, cmd->hint_root, + cblock, &hint_value); + if (r && r != -ENODATA) + return r; + } + + dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true; + r = thunk->fn(thunk->context, oblock, to_cblock(cblock), + dirty, le32_to_cpu(hint_value), thunk->hints_valid); + } + + return r; +} + +static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, + load_mapping_fn fn, void *context) +{ + struct thunk thunk; + + thunk.fn = fn; + thunk.context = context; + + thunk.cmd = cmd; + thunk.respect_dirty_flags = cmd->clean_when_opened; + thunk.hints_valid = hints_array_available(cmd, policy_name); + + return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); +} + +int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, + load_mapping_fn fn, void *context) +{ + int r; + + down_read(&cmd->root_lock); + r = __load_mappings(cmd, policy_name, fn, context); + up_read(&cmd->root_lock); + + return r; +} + +static int __dump_mapping(void *context, uint64_t cblock, void *leaf) +{ + int r = 0; + __le64 value; + dm_oblock_t oblock; + unsigned flags; + + memcpy(&value, leaf, sizeof(value)); + unpack_value(value, &oblock, &flags); + + return r; +} + +static int __dump_mappings(struct dm_cache_metadata *cmd) +{ + return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL); +} + +void dm_cache_dump(struct dm_cache_metadata *cmd) +{ + down_read(&cmd->root_lock); + __dump_mappings(cmd); + up_read(&cmd->root_lock); +} + +int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) +{ + int r; + + down_read(&cmd->root_lock); + r = cmd->changed; + up_read(&cmd->root_lock); + + return r; +} + +static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty) +{ + int r; + unsigned flags; + dm_oblock_t oblock; + __le64 value; + + r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value); + if (r) + return r; + + unpack_value(value, &oblock, &flags); + + if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty)) + /* nothing to be done */ + return 0; + + value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0)); + __dm_bless_for_disk(&value); + + r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), + &value, &cmd->root); + if (r) + return r; + + cmd->changed = true; + return 0; + +} + +int dm_cache_set_dirty(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, bool dirty) +{ + int r; + + down_write(&cmd->root_lock); + r = __dirty(cmd, cblock, dirty); + up_write(&cmd->root_lock); + + return r; +} + +void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats) +{ + down_read(&cmd->root_lock); + memcpy(stats, &cmd->stats, sizeof(*stats)); + up_read(&cmd->root_lock); +} + +void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats) +{ + down_write(&cmd->root_lock); + memcpy(&cmd->stats, stats, sizeof(*stats)); + up_write(&cmd->root_lock); +} + +int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown) +{ + int r; + flags_mutator mutator = (clean_shutdown ? set_clean_shutdown : + clear_clean_shutdown); + + down_write(&cmd->root_lock); + r = __commit_transaction(cmd, mutator); + if (r) + goto out; + + r = __begin_transaction(cmd); + +out: + up_write(&cmd->root_lock); + return r; +} + +int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, + dm_block_t *result) +{ + int r = -EINVAL; + + down_read(&cmd->root_lock); + r = dm_sm_get_nr_free(cmd->metadata_sm, result); + up_read(&cmd->root_lock); + + return r; +} + +int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, + dm_block_t *result) +{ + int r = -EINVAL; + + down_read(&cmd->root_lock); + r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); + up_read(&cmd->root_lock); + + return r; +} + +/*----------------------------------------------------------------*/ + +static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) +{ + int r; + __le32 value; + size_t hint_size; + const char *policy_name = dm_cache_policy_get_name(policy); + + if (!policy_name[0] || + (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) + return -EINVAL; + + if (strcmp(cmd->policy_name, policy_name)) { + strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); + + hint_size = dm_cache_policy_get_hint_size(policy); + if (!hint_size) + return 0; /* short-circuit hints initialization */ + cmd->policy_hint_size = hint_size; + + if (cmd->hint_root) { + r = dm_array_del(&cmd->hint_info, cmd->hint_root); + if (r) + return r; + } + + r = dm_array_empty(&cmd->hint_info, &cmd->hint_root); + if (r) + return r; + + value = cpu_to_le32(0); + __dm_bless_for_disk(&value); + r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0, + from_cblock(cmd->cache_blocks), + &value, &cmd->hint_root); + if (r) + return r; + } + + return 0; +} + +int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) +{ + int r; + + down_write(&cmd->root_lock); + r = begin_hints(cmd, policy); + up_write(&cmd->root_lock); + + return r; +} + +static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, + uint32_t hint) +{ + int r; + __le32 value = cpu_to_le32(hint); + __dm_bless_for_disk(&value); + + r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, + from_cblock(cblock), &value, &cmd->hint_root); + cmd->changed = true; + + return r; +} + +int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, + uint32_t hint) +{ + int r; + + if (!hints_array_initialized(cmd)) + return 0; + + down_write(&cmd->root_lock); + r = save_hint(cmd, cblock, hint); + up_write(&cmd->root_lock); + + return r; +} diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h new file mode 100644 index 000000000000..135864ea0eee --- /dev/null +++ b/drivers/md/dm-cache-metadata.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_METADATA_H +#define DM_CACHE_METADATA_H + +#include "dm-cache-block-types.h" +#include "dm-cache-policy-internal.h" + +/*----------------------------------------------------------------*/ + +#define DM_CACHE_METADATA_BLOCK_SIZE 4096 + +/* FIXME: remove this restriction */ +/* + * The metadata device is currently limited in size. + * + * We have one block of index, which can hold 255 index entries. Each + * index entry contains allocation info about 16k metadata blocks. + */ +#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) + +/* + * A metadata device larger than 16GB triggers a warning. + */ +#define DM_CACHE_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT)) + +/*----------------------------------------------------------------*/ + +/* + * Ext[234]-style compat feature flags. + * + * A new feature which old metadata will still be compatible with should + * define a DM_CACHE_FEATURE_COMPAT_* flag (rarely useful). + * + * A new feature that is not compatible with old code should define a + * DM_CACHE_FEATURE_INCOMPAT_* flag and guard the relevant code with + * that flag. + * + * A new feature that is not compatible with old code accessing the + * metadata RDWR should define a DM_CACHE_FEATURE_RO_COMPAT_* flag and + * guard the relevant code with that flag. + * + * As these various flags are defined they should be added to the + * following masks. + */ +#define DM_CACHE_FEATURE_COMPAT_SUPP 0UL +#define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL +#define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL + +/* + * Reopens or creates a new, empty metadata volume. + * Returns an ERR_PTR on failure. + */ +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size); + +void dm_cache_metadata_close(struct dm_cache_metadata *cmd); + +/* + * The metadata needs to know how many cache blocks there are. We don't + * care about the origin, assuming the core target is giving us valid + * origin blocks to map to. + */ +int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size); +dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd); + +int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, + sector_t discard_block_size, + dm_dblock_t new_nr_entries); + +typedef int (*load_discard_fn)(void *context, sector_t discard_block_size, + dm_dblock_t dblock, bool discarded); +int dm_cache_load_discards(struct dm_cache_metadata *cmd, + load_discard_fn fn, void *context); + +int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard); + +int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock); +int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock); +int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd); + +typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock, + dm_cblock_t cblock, bool dirty, + uint32_t hint, bool hint_valid); +int dm_cache_load_mappings(struct dm_cache_metadata *cmd, + const char *policy_name, + load_mapping_fn fn, + void *context); + +int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty); + +struct dm_cache_statistics { + uint32_t read_hits; + uint32_t read_misses; + uint32_t write_hits; + uint32_t write_misses; +}; + +void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats); +void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats); + +int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown); + +int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, + dm_block_t *result); + +int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, + dm_block_t *result); + +void dm_cache_dump(struct dm_cache_metadata *cmd); + +/* + * The policy is invited to save a 32bit hint value for every cblock (eg, + * for a hit count). These are stored against the policy name. If + * policies are changed, then hints will be lost. If the machine crashes, + * hints will be lost. + * + * The hints are indexed by the cblock, but many policies will not + * neccessarily have a fast way of accessing efficiently via cblock. So + * rather than querying the policy for each cblock, we let it walk its data + * structures and fill in the hints in whatever order it wishes. + */ + +int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p); + +/* + * requests hints for every cblock and stores in the metadata device. + */ +int dm_cache_save_hint(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, uint32_t hint); + +/*----------------------------------------------------------------*/ + +#endif /* DM_CACHE_METADATA_H */ diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c new file mode 100644 index 000000000000..cc05d70b3cb8 --- /dev/null +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -0,0 +1,464 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * writeback cache policy supporting flushing out dirty cache blocks. + * + * This file is released under the GPL. + */ + +#include "dm-cache-policy.h" +#include "dm.h" + +#include <linux/hash.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "cache cleaner" +#define CLEANER_VERSION "1.0.0" + +/* Cache entry struct. */ +struct wb_cache_entry { + struct list_head list; + struct hlist_node hlist; + + dm_oblock_t oblock; + dm_cblock_t cblock; + bool dirty:1; + bool pending:1; +}; + +struct hash { + struct hlist_head *table; + dm_block_t hash_bits; + unsigned nr_buckets; +}; + +struct policy { + struct dm_cache_policy policy; + spinlock_t lock; + + struct list_head free; + struct list_head clean; + struct list_head clean_pending; + struct list_head dirty; + + /* + * We know exactly how many cblocks will be needed, + * so we can allocate them up front. + */ + dm_cblock_t cache_size, nr_cblocks_allocated; + struct wb_cache_entry *cblocks; + struct hash chash; +}; + +/*----------------------------------------------------------------------------*/ + +/* + * Low-level functions. + */ +static unsigned next_power(unsigned n, unsigned min) +{ + return roundup_pow_of_two(max(n, min)); +} + +static struct policy *to_policy(struct dm_cache_policy *p) +{ + return container_of(p, struct policy, policy); +} + +static struct list_head *list_pop(struct list_head *q) +{ + struct list_head *r = q->next; + + list_del(r); + + return r; +} + +/*----------------------------------------------------------------------------*/ + +/* Allocate/free various resources. */ +static int alloc_hash(struct hash *hash, unsigned elts) +{ + hash->nr_buckets = next_power(elts >> 4, 16); + hash->hash_bits = ffs(hash->nr_buckets) - 1; + hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets); + + return hash->table ? 0 : -ENOMEM; +} + +static void free_hash(struct hash *hash) +{ + vfree(hash->table); +} + +static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size) +{ + int r = -ENOMEM; + + p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size)); + if (p->cblocks) { + unsigned u = from_cblock(cache_size); + + while (u--) + list_add(&p->cblocks[u].list, &p->free); + + p->nr_cblocks_allocated = 0; + + /* Cache entries hash. */ + r = alloc_hash(&p->chash, from_cblock(cache_size)); + if (r) + vfree(p->cblocks); + } + + return r; +} + +static void free_cache_blocks_and_hash(struct policy *p) +{ + free_hash(&p->chash); + vfree(p->cblocks); +} + +static struct wb_cache_entry *alloc_cache_entry(struct policy *p) +{ + struct wb_cache_entry *e; + + BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size)); + + e = list_entry(list_pop(&p->free), struct wb_cache_entry, list); + p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1); + + return e; +} + +/*----------------------------------------------------------------------------*/ + +/* Hash functions (lookup, insert, remove). */ +static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock) +{ + struct hash *hash = &p->chash; + unsigned h = hash_64(from_oblock(oblock), hash->hash_bits); + struct wb_cache_entry *cur; + struct hlist_head *bucket = &hash->table[h]; + + hlist_for_each_entry(cur, bucket, hlist) { + if (cur->oblock == oblock) { + /* Move upfront bucket for faster access. */ + hlist_del(&cur->hlist); + hlist_add_head(&cur->hlist, bucket); + return cur; + } + } + + return NULL; +} + +static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e) +{ + unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits); + + hlist_add_head(&e->hlist, &p->chash.table[h]); +} + +static void remove_cache_hash_entry(struct wb_cache_entry *e) +{ + hlist_del(&e->hlist); +} + +/* Public interface (see dm-cache-policy.h */ +static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + result->op = POLICY_MISS; + + if (can_block) + spin_lock_irqsave(&p->lock, flags); + + else if (!spin_trylock_irqsave(&p->lock, flags)) + return -EWOULDBLOCK; + + e = lookup_cache_entry(p, oblock); + if (e) { + result->op = POLICY_HIT; + result->cblock = e->cblock; + + } + + spin_unlock_irqrestore(&p->lock, flags); + + return 0; +} + +static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + int r; + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + if (!spin_trylock_irqsave(&p->lock, flags)) + return -EWOULDBLOCK; + + e = lookup_cache_entry(p, oblock); + if (e) { + *cblock = e->cblock; + r = 0; + + } else + r = -ENOENT; + + spin_unlock_irqrestore(&p->lock, flags); + + return r; +} + +static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + + e = lookup_cache_entry(p, oblock); + BUG_ON(!e); + + if (set) { + if (!e->dirty) { + e->dirty = true; + list_move(&e->list, &p->dirty); + } + + } else { + if (e->dirty) { + e->pending = false; + e->dirty = false; + list_move(&e->list, &p->clean); + } + } +} + +static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + __set_clear_dirty(pe, oblock, true); + spin_unlock_irqrestore(&p->lock, flags); +} + +static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + __set_clear_dirty(pe, oblock, false); + spin_unlock_irqrestore(&p->lock, flags); +} + +static void add_cache_entry(struct policy *p, struct wb_cache_entry *e) +{ + insert_cache_hash_entry(p, e); + if (e->dirty) + list_add(&e->list, &p->dirty); + else + list_add(&e->list, &p->clean); +} + +static int wb_load_mapping(struct dm_cache_policy *pe, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + int r; + struct policy *p = to_policy(pe); + struct wb_cache_entry *e = alloc_cache_entry(p); + + if (e) { + e->cblock = cblock; + e->oblock = oblock; + e->dirty = false; /* blocks default to clean */ + add_cache_entry(p, e); + r = 0; + + } else + r = -ENOMEM; + + return r; +} + +static void wb_destroy(struct dm_cache_policy *pe) +{ + struct policy *p = to_policy(pe); + + free_cache_blocks_and_hash(p); + kfree(p); +} + +static struct wb_cache_entry *__wb_force_remove_mapping(struct policy *p, dm_oblock_t oblock) +{ + struct wb_cache_entry *r = lookup_cache_entry(p, oblock); + + BUG_ON(!r); + + remove_cache_hash_entry(r); + list_del(&r->list); + + return r; +} + +static void wb_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + e = __wb_force_remove_mapping(p, oblock); + list_add_tail(&e->list, &p->free); + BUG_ON(!from_cblock(p->nr_cblocks_allocated)); + p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1); + spin_unlock_irqrestore(&p->lock, flags); +} + +static void wb_force_mapping(struct dm_cache_policy *pe, + dm_oblock_t current_oblock, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + e = __wb_force_remove_mapping(p, current_oblock); + e->oblock = oblock; + add_cache_entry(p, e); + spin_unlock_irqrestore(&p->lock, flags); +} + +static struct wb_cache_entry *get_next_dirty_entry(struct policy *p) +{ + struct list_head *l; + struct wb_cache_entry *r; + + if (list_empty(&p->dirty)) + return NULL; + + l = list_pop(&p->dirty); + r = container_of(l, struct wb_cache_entry, list); + list_add(l, &p->clean_pending); + + return r; +} + +static int wb_writeback_work(struct dm_cache_policy *pe, + dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + int r = -ENOENT; + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + + e = get_next_dirty_entry(p); + if (e) { + *oblock = e->oblock; + *cblock = e->cblock; + r = 0; + } + + spin_unlock_irqrestore(&p->lock, flags); + + return r; +} + +static dm_cblock_t wb_residency(struct dm_cache_policy *pe) +{ + return to_policy(pe)->nr_cblocks_allocated; +} + +/* Init the policy plugin interface function pointers. */ +static void init_policy_functions(struct policy *p) +{ + p->policy.destroy = wb_destroy; + p->policy.map = wb_map; + p->policy.lookup = wb_lookup; + p->policy.set_dirty = wb_set_dirty; + p->policy.clear_dirty = wb_clear_dirty; + p->policy.load_mapping = wb_load_mapping; + p->policy.walk_mappings = NULL; + p->policy.remove_mapping = wb_remove_mapping; + p->policy.writeback_work = wb_writeback_work; + p->policy.force_mapping = wb_force_mapping; + p->policy.residency = wb_residency; + p->policy.tick = NULL; +} + +static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + int r; + struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL); + + if (!p) + return NULL; + + init_policy_functions(p); + INIT_LIST_HEAD(&p->free); + INIT_LIST_HEAD(&p->clean); + INIT_LIST_HEAD(&p->clean_pending); + INIT_LIST_HEAD(&p->dirty); + + p->cache_size = cache_size; + spin_lock_init(&p->lock); + + /* Allocate cache entry structs and add them to free list. */ + r = alloc_cache_blocks_with_hash(p, cache_size); + if (!r) + return &p->policy; + + kfree(p); + + return NULL; +} +/*----------------------------------------------------------------------------*/ + +static struct dm_cache_policy_type wb_policy_type = { + .name = "cleaner", + .hint_size = 0, + .owner = THIS_MODULE, + .create = wb_create +}; + +static int __init wb_init(void) +{ + int r = dm_cache_policy_register(&wb_policy_type); + + if (r < 0) + DMERR("register failed %d", r); + else + DMINFO("version " CLEANER_VERSION " loaded"); + + return r; +} + +static void __exit wb_exit(void) +{ + dm_cache_policy_unregister(&wb_policy_type); +} + +module_init(wb_init); +module_exit(wb_exit); + +MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("cleaner cache policy"); diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h new file mode 100644 index 000000000000..52a75beeced5 --- /dev/null +++ b/drivers/md/dm-cache-policy-internal.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_POLICY_INTERNAL_H +#define DM_CACHE_POLICY_INTERNAL_H + +#include "dm-cache-policy.h" + +/*----------------------------------------------------------------*/ + +/* + * Little inline functions that simplify calling the policy methods. + */ +static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result); +} + +static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + BUG_ON(!p->lookup); + return p->lookup(p, oblock, cblock); +} + +static inline void policy_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + if (p->set_dirty) + p->set_dirty(p, oblock); +} + +static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + if (p->clear_dirty) + p->clear_dirty(p, oblock); +} + +static inline int policy_load_mapping(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + return p->load_mapping(p, oblock, cblock, hint, hint_valid); +} + +static inline int policy_walk_mappings(struct dm_cache_policy *p, + policy_walk_fn fn, void *context) +{ + return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0; +} + +static inline int policy_writeback_work(struct dm_cache_policy *p, + dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + return p->writeback_work ? p->writeback_work(p, oblock, cblock) : -ENOENT; +} + +static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + return p->remove_mapping(p, oblock); +} + +static inline void policy_force_mapping(struct dm_cache_policy *p, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + return p->force_mapping(p, current_oblock, new_oblock); +} + +static inline dm_cblock_t policy_residency(struct dm_cache_policy *p) +{ + return p->residency(p); +} + +static inline void policy_tick(struct dm_cache_policy *p) +{ + if (p->tick) + return p->tick(p); +} + +static inline int policy_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen) +{ + ssize_t sz = 0; + if (p->emit_config_values) + return p->emit_config_values(p, result, maxlen); + + DMEMIT("0"); + return 0; +} + +static inline int policy_set_config_value(struct dm_cache_policy *p, + const char *key, const char *value) +{ + return p->set_config_value ? p->set_config_value(p, key, value) : -EINVAL; +} + +/*----------------------------------------------------------------*/ + +/* + * Creates a new cache policy given a policy name, a cache size, an origin size and the block size. + */ +struct dm_cache_policy *dm_cache_policy_create(const char *name, dm_cblock_t cache_size, + sector_t origin_size, sector_t block_size); + +/* + * Destroys the policy. This drops references to the policy module as well + * as calling it's destroy method. So always use this rather than calling + * the policy->destroy method directly. + */ +void dm_cache_policy_destroy(struct dm_cache_policy *p); + +/* + * In case we've forgotten. + */ +const char *dm_cache_policy_get_name(struct dm_cache_policy *p); + +size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); + +/*----------------------------------------------------------------*/ + +#endif /* DM_CACHE_POLICY_INTERNAL_H */ diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c new file mode 100644 index 000000000000..964153255076 --- /dev/null +++ b/drivers/md/dm-cache-policy-mq.c @@ -0,0 +1,1195 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm-cache-policy.h" +#include "dm.h" + +#include <linux/hash.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#define DM_MSG_PREFIX "cache-policy-mq" +#define MQ_VERSION "1.0.0" + +static struct kmem_cache *mq_entry_cache; + +/*----------------------------------------------------------------*/ + +static unsigned next_power(unsigned n, unsigned min) +{ + return roundup_pow_of_two(max(n, min)); +} + +/*----------------------------------------------------------------*/ + +static unsigned long *alloc_bitset(unsigned nr_entries) +{ + size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); + return vzalloc(s); +} + +static void free_bitset(unsigned long *bits) +{ + vfree(bits); +} + +/*----------------------------------------------------------------*/ + +/* + * Large, sequential ios are probably better left on the origin device since + * spindles tend to have good bandwidth. + * + * The io_tracker tries to spot when the io is in one of these sequential + * modes. + * + * Two thresholds to switch between random and sequential io mode are defaulting + * as follows and can be adjusted via the constructor and message interfaces. + */ +#define RANDOM_THRESHOLD_DEFAULT 4 +#define SEQUENTIAL_THRESHOLD_DEFAULT 512 + +enum io_pattern { + PATTERN_SEQUENTIAL, + PATTERN_RANDOM +}; + +struct io_tracker { + enum io_pattern pattern; + + unsigned nr_seq_samples; + unsigned nr_rand_samples; + unsigned thresholds[2]; + + dm_oblock_t last_end_oblock; +}; + +static void iot_init(struct io_tracker *t, + int sequential_threshold, int random_threshold) +{ + t->pattern = PATTERN_RANDOM; + t->nr_seq_samples = 0; + t->nr_rand_samples = 0; + t->last_end_oblock = 0; + t->thresholds[PATTERN_RANDOM] = random_threshold; + t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold; +} + +static enum io_pattern iot_pattern(struct io_tracker *t) +{ + return t->pattern; +} + +static void iot_update_stats(struct io_tracker *t, struct bio *bio) +{ + if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1) + t->nr_seq_samples++; + else { + /* + * Just one non-sequential IO is enough to reset the + * counters. + */ + if (t->nr_seq_samples) { + t->nr_seq_samples = 0; + t->nr_rand_samples = 0; + } + + t->nr_rand_samples++; + } + + t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1); +} + +static void iot_check_for_pattern_switch(struct io_tracker *t) +{ + switch (t->pattern) { + case PATTERN_SEQUENTIAL: + if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) { + t->pattern = PATTERN_RANDOM; + t->nr_seq_samples = t->nr_rand_samples = 0; + } + break; + + case PATTERN_RANDOM: + if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) { + t->pattern = PATTERN_SEQUENTIAL; + t->nr_seq_samples = t->nr_rand_samples = 0; + } + break; + } +} + +static void iot_examine_bio(struct io_tracker *t, struct bio *bio) +{ + iot_update_stats(t, bio); + iot_check_for_pattern_switch(t); +} + +/*----------------------------------------------------------------*/ + + +/* + * This queue is divided up into different levels. Allowing us to push + * entries to the back of any of the levels. Think of it as a partially + * sorted queue. + */ +#define NR_QUEUE_LEVELS 16u + +struct queue { + struct list_head qs[NR_QUEUE_LEVELS]; +}; + +static void queue_init(struct queue *q) +{ + unsigned i; + + for (i = 0; i < NR_QUEUE_LEVELS; i++) + INIT_LIST_HEAD(q->qs + i); +} + +/* + * Insert an entry to the back of the given level. + */ +static void queue_push(struct queue *q, unsigned level, struct list_head *elt) +{ + list_add_tail(elt, q->qs + level); +} + +static void queue_remove(struct list_head *elt) +{ + list_del(elt); +} + +/* + * Shifts all regions down one level. This has no effect on the order of + * the queue. + */ +static void queue_shift_down(struct queue *q) +{ + unsigned level; + + for (level = 1; level < NR_QUEUE_LEVELS; level++) + list_splice_init(q->qs + level, q->qs + level - 1); +} + +/* + * Gives us the oldest entry of the lowest popoulated level. If the first + * level is emptied then we shift down one level. + */ +static struct list_head *queue_pop(struct queue *q) +{ + unsigned level; + struct list_head *r; + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + if (!list_empty(q->qs + level)) { + r = q->qs[level].next; + list_del(r); + + /* have we just emptied the bottom level? */ + if (level == 0 && list_empty(q->qs)) + queue_shift_down(q); + + return r; + } + + return NULL; +} + +static struct list_head *list_pop(struct list_head *lh) +{ + struct list_head *r = lh->next; + + BUG_ON(!r); + list_del_init(r); + + return r; +} + +/*----------------------------------------------------------------*/ + +/* + * Describes a cache entry. Used in both the cache and the pre_cache. + */ +struct entry { + struct hlist_node hlist; + struct list_head list; + dm_oblock_t oblock; + dm_cblock_t cblock; /* valid iff in_cache */ + + /* + * FIXME: pack these better + */ + bool in_cache:1; + unsigned hit_count; + unsigned generation; + unsigned tick; +}; + +struct mq_policy { + struct dm_cache_policy policy; + + /* protects everything */ + struct mutex lock; + dm_cblock_t cache_size; + struct io_tracker tracker; + + /* + * We maintain two queues of entries. The cache proper contains + * the currently active mappings. Whereas the pre_cache tracks + * blocks that are being hit frequently and potential candidates + * for promotion to the cache. + */ + struct queue pre_cache; + struct queue cache; + + /* + * Keeps track of time, incremented by the core. We use this to + * avoid attributing multiple hits within the same tick. + * + * Access to tick_protected should be done with the spin lock held. + * It's copied to tick at the start of the map function (within the + * mutex). + */ + spinlock_t tick_lock; + unsigned tick_protected; + unsigned tick; + + /* + * A count of the number of times the map function has been called + * and found an entry in the pre_cache or cache. Currently used to + * calculate the generation. + */ + unsigned hit_count; + + /* + * A generation is a longish period that is used to trigger some + * book keeping effects. eg, decrementing hit counts on entries. + * This is needed to allow the cache to evolve as io patterns + * change. + */ + unsigned generation; + unsigned generation_period; /* in lookups (will probably change) */ + + /* + * Entries in the pre_cache whose hit count passes the promotion + * threshold move to the cache proper. Working out the correct + * value for the promotion_threshold is crucial to this policy. + */ + unsigned promote_threshold; + + /* + * We need cache_size entries for the cache, and choose to have + * cache_size entries for the pre_cache too. One motivation for + * using the same size is to make the hit counts directly + * comparable between pre_cache and cache. + */ + unsigned nr_entries; + unsigned nr_entries_allocated; + struct list_head free; + + /* + * Cache blocks may be unallocated. We store this info in a + * bitset. + */ + unsigned long *allocation_bitset; + unsigned nr_cblocks_allocated; + unsigned find_free_nr_words; + unsigned find_free_last_word; + + /* + * The hash table allows us to quickly find an entry by origin + * block. Both pre_cache and cache entries are in here. + */ + unsigned nr_buckets; + dm_block_t hash_bits; + struct hlist_head *table; +}; + +/*----------------------------------------------------------------*/ +/* Free/alloc mq cache entry structures. */ +static void takeout_queue(struct list_head *lh, struct queue *q) +{ + unsigned level; + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_splice(q->qs + level, lh); +} + +static void free_entries(struct mq_policy *mq) +{ + struct entry *e, *tmp; + + takeout_queue(&mq->free, &mq->pre_cache); + takeout_queue(&mq->free, &mq->cache); + + list_for_each_entry_safe(e, tmp, &mq->free, list) + kmem_cache_free(mq_entry_cache, e); +} + +static int alloc_entries(struct mq_policy *mq, unsigned elts) +{ + unsigned u = mq->nr_entries; + + INIT_LIST_HEAD(&mq->free); + mq->nr_entries_allocated = 0; + + while (u--) { + struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL); + + if (!e) { + free_entries(mq); + return -ENOMEM; + } + + + list_add(&e->list, &mq->free); + } + + return 0; +} + +/*----------------------------------------------------------------*/ + +/* + * Simple hash table implementation. Should replace with the standard hash + * table that's making its way upstream. + */ +static void hash_insert(struct mq_policy *mq, struct entry *e) +{ + unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits); + + hlist_add_head(&e->hlist, mq->table + h); +} + +static struct entry *hash_lookup(struct mq_policy *mq, dm_oblock_t oblock) +{ + unsigned h = hash_64(from_oblock(oblock), mq->hash_bits); + struct hlist_head *bucket = mq->table + h; + struct entry *e; + + hlist_for_each_entry(e, bucket, hlist) + if (e->oblock == oblock) { + hlist_del(&e->hlist); + hlist_add_head(&e->hlist, bucket); + return e; + } + + return NULL; +} + +static void hash_remove(struct entry *e) +{ + hlist_del(&e->hlist); +} + +/*----------------------------------------------------------------*/ + +/* + * Allocates a new entry structure. The memory is allocated in one lump, + * so we just handing it out here. Returns NULL if all entries have + * already been allocated. Cannot fail otherwise. + */ +static struct entry *alloc_entry(struct mq_policy *mq) +{ + struct entry *e; + + if (mq->nr_entries_allocated >= mq->nr_entries) { + BUG_ON(!list_empty(&mq->free)); + return NULL; + } + + e = list_entry(list_pop(&mq->free), struct entry, list); + INIT_LIST_HEAD(&e->list); + INIT_HLIST_NODE(&e->hlist); + + mq->nr_entries_allocated++; + return e; +} + +/*----------------------------------------------------------------*/ + +/* + * Mark cache blocks allocated or not in the bitset. + */ +static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock) +{ + BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); + BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset)); + + set_bit(from_cblock(cblock), mq->allocation_bitset); + mq->nr_cblocks_allocated++; +} + +static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock) +{ + BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); + BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset)); + + clear_bit(from_cblock(cblock), mq->allocation_bitset); + mq->nr_cblocks_allocated--; +} + +static bool any_free_cblocks(struct mq_policy *mq) +{ + return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); +} + +/* + * Fills result out with a cache block that isn't in use, or return + * -ENOSPC. This does _not_ mark the cblock as allocated, the caller is + * reponsible for that. + */ +static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end, + dm_cblock_t *result, unsigned *last_word) +{ + int r = -ENOSPC; + unsigned w; + + for (w = begin; w < end; w++) { + /* + * ffz is undefined if no zero exists + */ + if (mq->allocation_bitset[w] != ~0UL) { + *last_word = w; + *result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w])); + if (from_cblock(*result) < from_cblock(mq->cache_size)) + r = 0; + + break; + } + } + + return r; +} + +static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result) +{ + int r; + + if (!any_free_cblocks(mq)) + return -ENOSPC; + + r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word); + if (r == -ENOSPC && mq->find_free_last_word) + r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word); + + return r; +} + +/*----------------------------------------------------------------*/ + +/* + * Now we get to the meat of the policy. This section deals with deciding + * when to to add entries to the pre_cache and cache, and move between + * them. + */ + +/* + * The queue level is based on the log2 of the hit count. + */ +static unsigned queue_level(struct entry *e) +{ + return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); +} + +/* + * Inserts the entry into the pre_cache or the cache. Ensures the cache + * block is marked as allocated if necc. Inserts into the hash table. Sets the + * tick which records when the entry was last moved about. + */ +static void push(struct mq_policy *mq, struct entry *e) +{ + e->tick = mq->tick; + hash_insert(mq, e); + + if (e->in_cache) { + alloc_cblock(mq, e->cblock); + queue_push(&mq->cache, queue_level(e), &e->list); + } else + queue_push(&mq->pre_cache, queue_level(e), &e->list); +} + +/* + * Removes an entry from pre_cache or cache. Removes from the hash table. + * Frees off the cache block if necc. + */ +static void del(struct mq_policy *mq, struct entry *e) +{ + queue_remove(&e->list); + hash_remove(e); + if (e->in_cache) + free_cblock(mq, e->cblock); +} + +/* + * Like del, except it removes the first entry in the queue (ie. the least + * recently used). + */ +static struct entry *pop(struct mq_policy *mq, struct queue *q) +{ + struct entry *e = container_of(queue_pop(q), struct entry, list); + + if (e) { + hash_remove(e); + + if (e->in_cache) + free_cblock(mq, e->cblock); + } + + return e; +} + +/* + * Has this entry already been updated? + */ +static bool updated_this_tick(struct mq_policy *mq, struct entry *e) +{ + return mq->tick == e->tick; +} + +/* + * The promotion threshold is adjusted every generation. As are the counts + * of the entries. + * + * At the moment the threshold is taken by averaging the hit counts of some + * of the entries in the cache (the first 20 entries of the first level). + * + * We can be much cleverer than this though. For example, each promotion + * could bump up the threshold helping to prevent churn. Much more to do + * here. + */ + +#define MAX_TO_AVERAGE 20 + +static void check_generation(struct mq_policy *mq) +{ + unsigned total = 0, nr = 0, count = 0, level; + struct list_head *head; + struct entry *e; + + if ((mq->hit_count >= mq->generation_period) && + (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) { + + mq->hit_count = 0; + mq->generation++; + + for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { + head = mq->cache.qs + level; + list_for_each_entry(e, head, list) { + nr++; + total += e->hit_count; + + if (++count >= MAX_TO_AVERAGE) + break; + } + } + + mq->promote_threshold = nr ? total / nr : 1; + if (mq->promote_threshold * nr < total) + mq->promote_threshold++; + } +} + +/* + * Whenever we use an entry we bump up it's hit counter, and push it to the + * back to it's current level. + */ +static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) +{ + if (updated_this_tick(mq, e)) + return; + + e->hit_count++; + mq->hit_count++; + check_generation(mq); + + /* generation adjustment, to stop the counts increasing forever. */ + /* FIXME: divide? */ + /* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */ + e->generation = mq->generation; + + del(mq, e); + push(mq, e); +} + +/* + * Demote the least recently used entry from the cache to the pre_cache. + * Returns the new cache entry to use, and the old origin block it was + * mapped to. + * + * We drop the hit count on the demoted entry back to 1 to stop it bouncing + * straight back into the cache if it's subsequently hit. There are + * various options here, and more experimentation would be good: + * + * - just forget about the demoted entry completely (ie. don't insert it + into the pre_cache). + * - divide the hit count rather that setting to some hard coded value. + * - set the hit count to a hard coded value other than 1, eg, is it better + * if it goes in at level 2? + */ +static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +{ + dm_cblock_t result; + struct entry *demoted = pop(mq, &mq->cache); + + BUG_ON(!demoted); + result = demoted->cblock; + *oblock = demoted->oblock; + demoted->in_cache = false; + demoted->hit_count = 1; + push(mq, demoted); + + return result; +} + +/* + * We modify the basic promotion_threshold depending on the specific io. + * + * If the origin block has been discarded then there's no cost to copy it + * to the cache. + * + * We bias towards reads, since they can be demoted at no cost if they + * haven't been dirtied. + */ +#define DISCARDED_PROMOTE_THRESHOLD 1 +#define READ_PROMOTE_THRESHOLD 4 +#define WRITE_PROMOTE_THRESHOLD 8 + +static unsigned adjusted_promote_threshold(struct mq_policy *mq, + bool discarded_oblock, int data_dir) +{ + if (discarded_oblock && any_free_cblocks(mq) && data_dir == WRITE) + /* + * We don't need to do any copying at all, so give this a + * very low threshold. In practice this only triggers + * during initial population after a format. + */ + return DISCARDED_PROMOTE_THRESHOLD; + + return data_dir == READ ? + (mq->promote_threshold + READ_PROMOTE_THRESHOLD) : + (mq->promote_threshold + WRITE_PROMOTE_THRESHOLD); +} + +static bool should_promote(struct mq_policy *mq, struct entry *e, + bool discarded_oblock, int data_dir) +{ + return e->hit_count >= + adjusted_promote_threshold(mq, discarded_oblock, data_dir); +} + +static int cache_entry_found(struct mq_policy *mq, + struct entry *e, + struct policy_result *result) +{ + requeue_and_update_tick(mq, e); + + if (e->in_cache) { + result->op = POLICY_HIT; + result->cblock = e->cblock; + } + + return 0; +} + +/* + * Moves and entry from the pre_cache to the cache. The main work is + * finding which cache block to use. + */ +static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, + struct policy_result *result) +{ + dm_cblock_t cblock; + + if (find_free_cblock(mq, &cblock) == -ENOSPC) { + result->op = POLICY_REPLACE; + cblock = demote_cblock(mq, &result->old_oblock); + } else + result->op = POLICY_NEW; + + result->cblock = e->cblock = cblock; + + del(mq, e); + e->in_cache = true; + push(mq, e); + + return 0; +} + +static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, + bool can_migrate, bool discarded_oblock, + int data_dir, struct policy_result *result) +{ + int r = 0; + bool updated = updated_this_tick(mq, e); + + requeue_and_update_tick(mq, e); + + if ((!discarded_oblock && updated) || + !should_promote(mq, e, discarded_oblock, data_dir)) + result->op = POLICY_MISS; + else if (!can_migrate) + r = -EWOULDBLOCK; + else + r = pre_cache_to_cache(mq, e, result); + + return r; +} + +static void insert_in_pre_cache(struct mq_policy *mq, + dm_oblock_t oblock) +{ + struct entry *e = alloc_entry(mq); + + if (!e) + /* + * There's no spare entry structure, so we grab the least + * used one from the pre_cache. + */ + e = pop(mq, &mq->pre_cache); + + if (unlikely(!e)) { + DMWARN("couldn't pop from pre cache"); + return; + } + + e->in_cache = false; + e->oblock = oblock; + e->hit_count = 1; + e->generation = mq->generation; + push(mq, e); +} + +static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, + struct policy_result *result) +{ + struct entry *e; + dm_cblock_t cblock; + + if (find_free_cblock(mq, &cblock) == -ENOSPC) { + result->op = POLICY_MISS; + insert_in_pre_cache(mq, oblock); + return; + } + + e = alloc_entry(mq); + if (unlikely(!e)) { + result->op = POLICY_MISS; + return; + } + + e->oblock = oblock; + e->cblock = cblock; + e->in_cache = true; + e->hit_count = 1; + e->generation = mq->generation; + push(mq, e); + + result->op = POLICY_NEW; + result->cblock = e->cblock; +} + +static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, + bool can_migrate, bool discarded_oblock, + int data_dir, struct policy_result *result) +{ + if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) { + if (can_migrate) + insert_in_cache(mq, oblock, result); + else + return -EWOULDBLOCK; + } else { + insert_in_pre_cache(mq, oblock); + result->op = POLICY_MISS; + } + + return 0; +} + +/* + * Looks the oblock up in the hash table, then decides whether to put in + * pre_cache, or cache etc. + */ +static int map(struct mq_policy *mq, dm_oblock_t oblock, + bool can_migrate, bool discarded_oblock, + int data_dir, struct policy_result *result) +{ + int r = 0; + struct entry *e = hash_lookup(mq, oblock); + + if (e && e->in_cache) + r = cache_entry_found(mq, e, result); + else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) + result->op = POLICY_MISS; + else if (e) + r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, + data_dir, result); + else + r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, + data_dir, result); + + if (r == -EWOULDBLOCK) + result->op = POLICY_MISS; + + return r; +} + +/*----------------------------------------------------------------*/ + +/* + * Public interface, via the policy struct. See dm-cache-policy.h for a + * description of these. + */ + +static struct mq_policy *to_mq_policy(struct dm_cache_policy *p) +{ + return container_of(p, struct mq_policy, policy); +} + +static void mq_destroy(struct dm_cache_policy *p) +{ + struct mq_policy *mq = to_mq_policy(p); + + free_bitset(mq->allocation_bitset); + kfree(mq->table); + free_entries(mq); + kfree(mq); +} + +static void copy_tick(struct mq_policy *mq) +{ + unsigned long flags; + + spin_lock_irqsave(&mq->tick_lock, flags); + mq->tick = mq->tick_protected; + spin_unlock_irqrestore(&mq->tick_lock, flags); +} + +static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + + result->op = POLICY_MISS; + + if (can_block) + mutex_lock(&mq->lock); + else if (!mutex_trylock(&mq->lock)) + return -EWOULDBLOCK; + + copy_tick(mq); + + iot_examine_bio(&mq->tracker, bio); + r = map(mq, oblock, can_migrate, discarded_oblock, + bio_data_dir(bio), result); + + mutex_unlock(&mq->lock); + + return r; +} + +static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + struct entry *e; + + if (!mutex_trylock(&mq->lock)) + return -EWOULDBLOCK; + + e = hash_lookup(mq, oblock); + if (e && e->in_cache) { + *cblock = e->cblock; + r = 0; + } else + r = -ENOENT; + + mutex_unlock(&mq->lock); + + return r; +} + +static int mq_load_mapping(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + struct mq_policy *mq = to_mq_policy(p); + struct entry *e; + + e = alloc_entry(mq); + if (!e) + return -ENOMEM; + + e->cblock = cblock; + e->oblock = oblock; + e->in_cache = true; + e->hit_count = hint_valid ? hint : 1; + e->generation = mq->generation; + push(mq, e); + + return 0; +} + +static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, + void *context) +{ + struct mq_policy *mq = to_mq_policy(p); + int r = 0; + struct entry *e; + unsigned level; + + mutex_lock(&mq->lock); + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_for_each_entry(e, &mq->cache.qs[level], list) { + r = fn(context, e->cblock, e->oblock, e->hit_count); + if (r) + goto out; + } + +out: + mutex_unlock(&mq->lock); + + return r; +} + +static void remove_mapping(struct mq_policy *mq, dm_oblock_t oblock) +{ + struct entry *e = hash_lookup(mq, oblock); + + BUG_ON(!e || !e->in_cache); + + del(mq, e); + e->in_cache = false; + push(mq, e); +} + +static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + remove_mapping(mq, oblock); + mutex_unlock(&mq->lock); +} + +static void force_mapping(struct mq_policy *mq, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + struct entry *e = hash_lookup(mq, current_oblock); + + BUG_ON(!e || !e->in_cache); + + del(mq, e); + e->oblock = new_oblock; + push(mq, e); +} + +static void mq_force_mapping(struct dm_cache_policy *p, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + force_mapping(mq, current_oblock, new_oblock); + mutex_unlock(&mq->lock); +} + +static dm_cblock_t mq_residency(struct dm_cache_policy *p) +{ + struct mq_policy *mq = to_mq_policy(p); + + /* FIXME: lock mutex, not sure we can block here */ + return to_cblock(mq->nr_cblocks_allocated); +} + +static void mq_tick(struct dm_cache_policy *p) +{ + struct mq_policy *mq = to_mq_policy(p); + unsigned long flags; + + spin_lock_irqsave(&mq->tick_lock, flags); + mq->tick_protected++; + spin_unlock_irqrestore(&mq->tick_lock, flags); +} + +static int mq_set_config_value(struct dm_cache_policy *p, + const char *key, const char *value) +{ + struct mq_policy *mq = to_mq_policy(p); + enum io_pattern pattern; + unsigned long tmp; + + if (!strcasecmp(key, "random_threshold")) + pattern = PATTERN_RANDOM; + else if (!strcasecmp(key, "sequential_threshold")) + pattern = PATTERN_SEQUENTIAL; + else + return -EINVAL; + + if (kstrtoul(value, 10, &tmp)) + return -EINVAL; + + mq->tracker.thresholds[pattern] = tmp; + + return 0; +} + +static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen) +{ + ssize_t sz = 0; + struct mq_policy *mq = to_mq_policy(p); + + DMEMIT("4 random_threshold %u sequential_threshold %u", + mq->tracker.thresholds[PATTERN_RANDOM], + mq->tracker.thresholds[PATTERN_SEQUENTIAL]); + + return 0; +} + +/* Init the policy plugin interface function pointers. */ +static void init_policy_functions(struct mq_policy *mq) +{ + mq->policy.destroy = mq_destroy; + mq->policy.map = mq_map; + mq->policy.lookup = mq_lookup; + mq->policy.load_mapping = mq_load_mapping; + mq->policy.walk_mappings = mq_walk_mappings; + mq->policy.remove_mapping = mq_remove_mapping; + mq->policy.writeback_work = NULL; + mq->policy.force_mapping = mq_force_mapping; + mq->policy.residency = mq_residency; + mq->policy.tick = mq_tick; + mq->policy.emit_config_values = mq_emit_config_values; + mq->policy.set_config_value = mq_set_config_value; +} + +static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + int r; + struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); + + if (!mq) + return NULL; + + init_policy_functions(mq); + iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); + + mq->cache_size = cache_size; + mq->tick_protected = 0; + mq->tick = 0; + mq->hit_count = 0; + mq->generation = 0; + mq->promote_threshold = 0; + mutex_init(&mq->lock); + spin_lock_init(&mq->tick_lock); + mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG); + mq->find_free_last_word = 0; + + queue_init(&mq->pre_cache); + queue_init(&mq->cache); + mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); + + mq->nr_entries = 2 * from_cblock(cache_size); + r = alloc_entries(mq, mq->nr_entries); + if (r) + goto bad_cache_alloc; + + mq->nr_entries_allocated = 0; + mq->nr_cblocks_allocated = 0; + + mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); + mq->hash_bits = ffs(mq->nr_buckets) - 1; + mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); + if (!mq->table) + goto bad_alloc_table; + + mq->allocation_bitset = alloc_bitset(from_cblock(cache_size)); + if (!mq->allocation_bitset) + goto bad_alloc_bitset; + + return &mq->policy; + +bad_alloc_bitset: + kfree(mq->table); +bad_alloc_table: + free_entries(mq); +bad_cache_alloc: + kfree(mq); + + return NULL; +} + +/*----------------------------------------------------------------*/ + +static struct dm_cache_policy_type mq_policy_type = { + .name = "mq", + .hint_size = 4, + .owner = THIS_MODULE, + .create = mq_create +}; + +static struct dm_cache_policy_type default_policy_type = { + .name = "default", + .hint_size = 4, + .owner = THIS_MODULE, + .create = mq_create +}; + +static int __init mq_init(void) +{ + int r; + + mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry", + sizeof(struct entry), + __alignof__(struct entry), + 0, NULL); + if (!mq_entry_cache) + goto bad; + + r = dm_cache_policy_register(&mq_policy_type); + if (r) { + DMERR("register failed %d", r); + goto bad_register_mq; + } + + r = dm_cache_policy_register(&default_policy_type); + if (!r) { + DMINFO("version " MQ_VERSION " loaded"); + return 0; + } + + DMERR("register failed (as default) %d", r); + + dm_cache_policy_unregister(&mq_policy_type); +bad_register_mq: + kmem_cache_destroy(mq_entry_cache); +bad: + return -ENOMEM; +} + +static void __exit mq_exit(void) +{ + dm_cache_policy_unregister(&mq_policy_type); + dm_cache_policy_unregister(&default_policy_type); + + kmem_cache_destroy(mq_entry_cache); +} + +module_init(mq_init); +module_exit(mq_exit); + +MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("mq cache policy"); + +MODULE_ALIAS("dm-cache-default"); diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c new file mode 100644 index 000000000000..2cbf5fdaac52 --- /dev/null +++ b/drivers/md/dm-cache-policy.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm-cache-policy-internal.h" +#include "dm.h" + +#include <linux/module.h> +#include <linux/slab.h> + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "cache-policy" + +static DEFINE_SPINLOCK(register_lock); +static LIST_HEAD(register_list); + +static struct dm_cache_policy_type *__find_policy(const char *name) +{ + struct dm_cache_policy_type *t; + + list_for_each_entry(t, ®ister_list, list) + if (!strcmp(t->name, name)) + return t; + + return NULL; +} + +static struct dm_cache_policy_type *__get_policy_once(const char *name) +{ + struct dm_cache_policy_type *t = __find_policy(name); + + if (t && !try_module_get(t->owner)) { + DMWARN("couldn't get module %s", name); + t = ERR_PTR(-EINVAL); + } + + return t; +} + +static struct dm_cache_policy_type *get_policy_once(const char *name) +{ + struct dm_cache_policy_type *t; + + spin_lock(®ister_lock); + t = __get_policy_once(name); + spin_unlock(®ister_lock); + + return t; +} + +static struct dm_cache_policy_type *get_policy(const char *name) +{ + struct dm_cache_policy_type *t; + + t = get_policy_once(name); + if (IS_ERR(t)) + return NULL; + + if (t) + return t; + + request_module("dm-cache-%s", name); + + t = get_policy_once(name); + if (IS_ERR(t)) + return NULL; + + return t; +} + +static void put_policy(struct dm_cache_policy_type *t) +{ + module_put(t->owner); +} + +int dm_cache_policy_register(struct dm_cache_policy_type *type) +{ + int r; + + /* One size fits all for now */ + if (type->hint_size != 0 && type->hint_size != 4) { + DMWARN("hint size must be 0 or 4 but %llu supplied.", (unsigned long long) type->hint_size); + return -EINVAL; + } + + spin_lock(®ister_lock); + if (__find_policy(type->name)) { + DMWARN("attempt to register policy under duplicate name %s", type->name); + r = -EINVAL; + } else { + list_add(&type->list, ®ister_list); + r = 0; + } + spin_unlock(®ister_lock); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_register); + +void dm_cache_policy_unregister(struct dm_cache_policy_type *type) +{ + spin_lock(®ister_lock); + list_del_init(&type->list); + spin_unlock(®ister_lock); +} +EXPORT_SYMBOL_GPL(dm_cache_policy_unregister); + +struct dm_cache_policy *dm_cache_policy_create(const char *name, + dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + struct dm_cache_policy *p = NULL; + struct dm_cache_policy_type *type; + + type = get_policy(name); + if (!type) { + DMWARN("unknown policy type"); + return NULL; + } + + p = type->create(cache_size, origin_size, cache_block_size); + if (!p) { + put_policy(type); + return NULL; + } + p->private = type; + + return p; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_create); + +void dm_cache_policy_destroy(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + p->destroy(p); + put_policy(t); +} +EXPORT_SYMBOL_GPL(dm_cache_policy_destroy); + +const char *dm_cache_policy_get_name(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + return t->name; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); + +size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + return t->hint_size; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_get_hint_size); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h new file mode 100644 index 000000000000..f0f51b260544 --- /dev/null +++ b/drivers/md/dm-cache-policy.h @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_POLICY_H +#define DM_CACHE_POLICY_H + +#include "dm-cache-block-types.h" + +#include <linux/device-mapper.h> + +/*----------------------------------------------------------------*/ + +/* FIXME: make it clear which methods are optional. Get debug policy to + * double check this at start. + */ + +/* + * The cache policy makes the important decisions about which blocks get to + * live on the faster cache device. + * + * When the core target has to remap a bio it calls the 'map' method of the + * policy. This returns an instruction telling the core target what to do. + * + * POLICY_HIT: + * That block is in the cache. Remap to the cache and carry on. + * + * POLICY_MISS: + * This block is on the origin device. Remap and carry on. + * + * POLICY_NEW: + * This block is currently on the origin device, but the policy wants to + * move it. The core should: + * + * - hold any further io to this origin block + * - copy the origin to the given cache block + * - release all the held blocks + * - remap the original block to the cache + * + * POLICY_REPLACE: + * This block is currently on the origin device. The policy wants to + * move it to the cache, with the added complication that the destination + * cache block needs a writeback first. The core should: + * + * - hold any further io to this origin block + * - hold any further io to the origin block that's being written back + * - writeback + * - copy new block to cache + * - release held blocks + * - remap bio to cache and reissue. + * + * Should the core run into trouble while processing a POLICY_NEW or + * POLICY_REPLACE instruction it will roll back the policies mapping using + * remove_mapping() or force_mapping(). These methods must not fail. This + * approach avoids having transactional semantics in the policy (ie, the + * core informing the policy when a migration is complete), and hence makes + * it easier to write new policies. + * + * In general policy methods should never block, except in the case of the + * map function when can_migrate is set. So be careful to implement using + * bounded, preallocated memory. + */ +enum policy_operation { + POLICY_HIT, + POLICY_MISS, + POLICY_NEW, + POLICY_REPLACE +}; + +/* + * This is the instruction passed back to the core target. + */ +struct policy_result { + enum policy_operation op; + dm_oblock_t old_oblock; /* POLICY_REPLACE */ + dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */ +}; + +typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock, + dm_oblock_t oblock, uint32_t hint); + +/* + * The cache policy object. Just a bunch of methods. It is envisaged that + * this structure will be embedded in a bigger, policy specific structure + * (ie. use container_of()). + */ +struct dm_cache_policy { + + /* + * FIXME: make it clear which methods are optional, and which may + * block. + */ + + /* + * Destroys this object. + */ + void (*destroy)(struct dm_cache_policy *p); + + /* + * See large comment above. + * + * oblock - the origin block we're interested in. + * + * can_block - indicates whether the current thread is allowed to + * block. -EWOULDBLOCK returned if it can't and would. + * + * can_migrate - gives permission for POLICY_NEW or POLICY_REPLACE + * instructions. If denied and the policy would have + * returned one of these instructions it should + * return -EWOULDBLOCK. + * + * discarded_oblock - indicates whether the whole origin block is + * in a discarded state (FIXME: better to tell the + * policy about this sooner, so it can recycle that + * cache block if it wants.) + * bio - the bio that triggered this call. + * result - gets filled in with the instruction. + * + * May only return 0, or -EWOULDBLOCK (if !can_migrate) + */ + int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result); + + /* + * Sometimes we want to see if a block is in the cache, without + * triggering any update of stats. (ie. it's not a real hit). + * + * Must not block. + * + * Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK + * would be typical). + */ + int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); + + /* + * oblock must be a mapped block. Must not block. + */ + void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); + void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); + + /* + * Called when a cache target is first created. Used to load a + * mapping from the metadata device into the policy. + */ + int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock, + dm_cblock_t cblock, uint32_t hint, bool hint_valid); + + int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn, + void *context); + + /* + * Override functions used on the error paths of the core target. + * They must succeed. + */ + void (*remove_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock); + void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock, + dm_oblock_t new_oblock); + + int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock); + + + /* + * How full is the cache? + */ + dm_cblock_t (*residency)(struct dm_cache_policy *p); + + /* + * Because of where we sit in the block layer, we can be asked to + * map a lot of little bios that are all in the same block (no + * queue merging has occurred). To stop the policy being fooled by + * these the core target sends regular tick() calls to the policy. + * The policy should only count an entry as hit once per tick. + */ + void (*tick)(struct dm_cache_policy *p); + + /* + * Configuration. + */ + int (*emit_config_values)(struct dm_cache_policy *p, + char *result, unsigned maxlen); + int (*set_config_value)(struct dm_cache_policy *p, + const char *key, const char *value); + + /* + * Book keeping ptr for the policy register, not for general use. + */ + void *private; +}; + +/*----------------------------------------------------------------*/ + +/* + * We maintain a little register of the different policy types. + */ +#define CACHE_POLICY_NAME_SIZE 16 + +struct dm_cache_policy_type { + /* For use by the register code only. */ + struct list_head list; + + /* + * Policy writers should fill in these fields. The name field is + * what gets passed on the target line to select your policy. + */ + char name[CACHE_POLICY_NAME_SIZE]; + + /* + * Policies may store a hint for each each cache block. + * Currently the size of this hint must be 0 or 4 bytes but we + * expect to relax this in future. + */ + size_t hint_size; + + struct module *owner; + struct dm_cache_policy *(*create)(dm_cblock_t cache_size, + sector_t origin_size, + sector_t block_size); +}; + +int dm_cache_policy_register(struct dm_cache_policy_type *type); +void dm_cache_policy_unregister(struct dm_cache_policy_type *type); + +/*----------------------------------------------------------------*/ + +#endif /* DM_CACHE_POLICY_H */ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c new file mode 100644 index 000000000000..0f4e84b15c30 --- /dev/null +++ b/drivers/md/dm-cache-target.c @@ -0,0 +1,2584 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison.h" +#include "dm-cache-metadata.h" + +#include <linux/dm-io.h> +#include <linux/dm-kcopyd.h> +#include <linux/init.h> +#include <linux/mempool.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#define DM_MSG_PREFIX "cache" + +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, + "A percentage of time allocated for copying to and/or from cache"); + +/*----------------------------------------------------------------*/ + +/* + * Glossary: + * + * oblock: index of an origin block + * cblock: index of a cache block + * promotion: movement of a block from origin to cache + * demotion: movement of a block from cache to origin + * migration: movement of a block between the origin and cache device, + * either direction + */ + +/*----------------------------------------------------------------*/ + +static size_t bitset_size_in_bytes(unsigned nr_entries) +{ + return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); +} + +static unsigned long *alloc_bitset(unsigned nr_entries) +{ + size_t s = bitset_size_in_bytes(nr_entries); + return vzalloc(s); +} + +static void clear_bitset(void *bitset, unsigned nr_entries) +{ + size_t s = bitset_size_in_bytes(nr_entries); + memset(bitset, 0, s); +} + +static void free_bitset(unsigned long *bits) +{ + vfree(bits); +} + +/*----------------------------------------------------------------*/ + +#define PRISON_CELLS 1024 +#define MIGRATION_POOL_SIZE 128 +#define COMMIT_PERIOD HZ +#define MIGRATION_COUNT_WINDOW 10 + +/* + * The block size of the device holding cache data must be >= 32KB + */ +#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) + +/* + * FIXME: the cache is read/write for the time being. + */ +enum cache_mode { + CM_WRITE, /* metadata may be changed */ + CM_READ_ONLY, /* metadata may not be changed */ +}; + +struct cache_features { + enum cache_mode mode; + bool write_through:1; +}; + +struct cache_stats { + atomic_t read_hit; + atomic_t read_miss; + atomic_t write_hit; + atomic_t write_miss; + atomic_t demotion; + atomic_t promotion; + atomic_t copies_avoided; + atomic_t cache_cell_clash; + atomic_t commit_count; + atomic_t discard_count; +}; + +struct cache { + struct dm_target *ti; + struct dm_target_callbacks callbacks; + + /* + * Metadata is written to this device. + */ + struct dm_dev *metadata_dev; + + /* + * The slower of the two data devices. Typically a spindle. + */ + struct dm_dev *origin_dev; + + /* + * The faster of the two data devices. Typically an SSD. + */ + struct dm_dev *cache_dev; + + /* + * Cache features such as write-through. + */ + struct cache_features features; + + /* + * Size of the origin device in _complete_ blocks and native sectors. + */ + dm_oblock_t origin_blocks; + sector_t origin_sectors; + + /* + * Size of the cache device in blocks. + */ + dm_cblock_t cache_size; + + /* + * Fields for converting from sectors to blocks. + */ + uint32_t sectors_per_block; + int sectors_per_block_shift; + + struct dm_cache_metadata *cmd; + + spinlock_t lock; + struct bio_list deferred_bios; + struct bio_list deferred_flush_bios; + struct list_head quiesced_migrations; + struct list_head completed_migrations; + struct list_head need_commit_migrations; + sector_t migration_threshold; + atomic_t nr_migrations; + wait_queue_head_t migration_wait; + + /* + * cache_size entries, dirty if set + */ + dm_cblock_t nr_dirty; + unsigned long *dirty_bitset; + + /* + * origin_blocks entries, discarded if set. + */ + sector_t discard_block_size; /* a power of 2 times sectors per block */ + dm_dblock_t discard_nr_blocks; + unsigned long *discard_bitset; + + struct dm_kcopyd_client *copier; + struct workqueue_struct *wq; + struct work_struct worker; + + struct delayed_work waker; + unsigned long last_commit_jiffies; + + struct dm_bio_prison *prison; + struct dm_deferred_set *all_io_ds; + + mempool_t *migration_pool; + struct dm_cache_migration *next_migration; + + struct dm_cache_policy *policy; + unsigned policy_nr_args; + + bool need_tick_bio:1; + bool sized:1; + bool quiescing:1; + bool commit_requested:1; + bool loaded_mappings:1; + bool loaded_discards:1; + + struct cache_stats stats; + + /* + * Rather than reconstructing the table line for the status we just + * save it and regurgitate. + */ + unsigned nr_ctr_args; + const char **ctr_args; +}; + +struct per_bio_data { + bool tick:1; + unsigned req_nr:2; + struct dm_deferred_entry *all_io_entry; +}; + +struct dm_cache_migration { + struct list_head list; + struct cache *cache; + + unsigned long start_jiffies; + dm_oblock_t old_oblock; + dm_oblock_t new_oblock; + dm_cblock_t cblock; + + bool err:1; + bool writeback:1; + bool demote:1; + bool promote:1; + + struct dm_bio_prison_cell *old_ocell; + struct dm_bio_prison_cell *new_ocell; +}; + +/* + * Processing a bio in the worker thread may require these memory + * allocations. We prealloc to avoid deadlocks (the same worker thread + * frees them back to the mempool). + */ +struct prealloc { + struct dm_cache_migration *mg; + struct dm_bio_prison_cell *cell1; + struct dm_bio_prison_cell *cell2; +}; + +static void wake_worker(struct cache *cache) +{ + queue_work(cache->wq, &cache->worker); +} + +/*----------------------------------------------------------------*/ + +static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) +{ + /* FIXME: change to use a local slab. */ + return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); +} + +static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) +{ + dm_bio_prison_free_cell(cache->prison, cell); +} + +static int prealloc_data_structs(struct cache *cache, struct prealloc *p) +{ + if (!p->mg) { + p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + if (!p->mg) + return -ENOMEM; + } + + if (!p->cell1) { + p->cell1 = alloc_prison_cell(cache); + if (!p->cell1) + return -ENOMEM; + } + + if (!p->cell2) { + p->cell2 = alloc_prison_cell(cache); + if (!p->cell2) + return -ENOMEM; + } + + return 0; +} + +static void prealloc_free_structs(struct cache *cache, struct prealloc *p) +{ + if (p->cell2) + free_prison_cell(cache, p->cell2); + + if (p->cell1) + free_prison_cell(cache, p->cell1); + + if (p->mg) + mempool_free(p->mg, cache->migration_pool); +} + +static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) +{ + struct dm_cache_migration *mg = p->mg; + + BUG_ON(!mg); + p->mg = NULL; + + return mg; +} + +/* + * You must have a cell within the prealloc struct to return. If not this + * function will BUG() rather than returning NULL. + */ +static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) +{ + struct dm_bio_prison_cell *r = NULL; + + if (p->cell1) { + r = p->cell1; + p->cell1 = NULL; + + } else if (p->cell2) { + r = p->cell2; + p->cell2 = NULL; + } else + BUG(); + + return r; +} + +/* + * You can't have more than two cells in a prealloc struct. BUG() will be + * called if you try and overfill. + */ +static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) +{ + if (!p->cell2) + p->cell2 = cell; + + else if (!p->cell1) + p->cell1 = cell; + + else + BUG(); +} + +/*----------------------------------------------------------------*/ + +static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) +{ + key->virtual = 0; + key->dev = 0; + key->block = from_oblock(oblock); +} + +/* + * The caller hands in a preallocated cell, and a free function for it. + * The cell will be freed if there's an error, or if it wasn't used because + * a cell with that key already exists. + */ +typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); + +static int bio_detain(struct cache *cache, dm_oblock_t oblock, + struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, + cell_free_fn free_fn, void *free_context, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct dm_cell_key key; + + build_key(oblock, &key); + r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); + if (r) + free_fn(free_context, cell_prealloc); + + return r; +} + +static int get_cell(struct cache *cache, + dm_oblock_t oblock, + struct prealloc *structs, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct dm_cell_key key; + struct dm_bio_prison_cell *cell_prealloc; + + cell_prealloc = prealloc_get_cell(structs); + + build_key(oblock, &key); + r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); + if (r) + prealloc_put_cell(structs, cell_prealloc); + + return r; +} + + /*----------------------------------------------------------------*/ + +static bool is_dirty(struct cache *cache, dm_cblock_t b) +{ + return test_bit(from_cblock(b), cache->dirty_bitset); +} + +static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) +{ + if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { + cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); + policy_set_dirty(cache->policy, oblock); + } +} + +static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) +{ + if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { + policy_clear_dirty(cache->policy, oblock); + cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); + if (!from_cblock(cache->nr_dirty)) + dm_table_event(cache->ti->table); + } +} + +/*----------------------------------------------------------------*/ +static bool block_size_is_power_of_two(struct cache *cache) +{ + return cache->sectors_per_block_shift >= 0; +} + +static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) +{ + sector_t discard_blocks = cache->discard_block_size; + dm_block_t b = from_oblock(oblock); + + if (!block_size_is_power_of_two(cache)) + (void) sector_div(discard_blocks, cache->sectors_per_block); + else + discard_blocks >>= cache->sectors_per_block_shift; + + (void) sector_div(b, discard_blocks); + + return to_dblock(b); +} + +static void set_discard(struct cache *cache, dm_dblock_t b) +{ + unsigned long flags; + + atomic_inc(&cache->stats.discard_count); + + spin_lock_irqsave(&cache->lock, flags); + set_bit(from_dblock(b), cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void clear_discard(struct cache *cache, dm_dblock_t b) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + clear_bit(from_dblock(b), cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); +} + +static bool is_discarded(struct cache *cache, dm_dblock_t b) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + r = test_bit(from_dblock(b), cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); + + return r; +} + +static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + r = test_bit(from_dblock(oblock_to_dblock(cache, b)), + cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); + + return r; +} + +/*----------------------------------------------------------------*/ + +static void load_stats(struct cache *cache) +{ + struct dm_cache_statistics stats; + + dm_cache_metadata_get_stats(cache->cmd, &stats); + atomic_set(&cache->stats.read_hit, stats.read_hits); + atomic_set(&cache->stats.read_miss, stats.read_misses); + atomic_set(&cache->stats.write_hit, stats.write_hits); + atomic_set(&cache->stats.write_miss, stats.write_misses); +} + +static void save_stats(struct cache *cache) +{ + struct dm_cache_statistics stats; + + stats.read_hits = atomic_read(&cache->stats.read_hit); + stats.read_misses = atomic_read(&cache->stats.read_miss); + stats.write_hits = atomic_read(&cache->stats.write_hit); + stats.write_misses = atomic_read(&cache->stats.write_miss); + + dm_cache_metadata_set_stats(cache->cmd, &stats); +} + +/*---------------------------------------------------------------- + * Per bio data + *--------------------------------------------------------------*/ +static struct per_bio_data *get_per_bio_data(struct bio *bio) +{ + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + BUG_ON(!pb); + return pb; +} + +static struct per_bio_data *init_per_bio_data(struct bio *bio) +{ + struct per_bio_data *pb = get_per_bio_data(bio); + + pb->tick = false; + pb->req_nr = dm_bio_get_target_bio_nr(bio); + pb->all_io_entry = NULL; + + return pb; +} + +/*---------------------------------------------------------------- + * Remapping + *--------------------------------------------------------------*/ +static void remap_to_origin(struct cache *cache, struct bio *bio) +{ + bio->bi_bdev = cache->origin_dev->bdev; +} + +static void remap_to_cache(struct cache *cache, struct bio *bio, + dm_cblock_t cblock) +{ + sector_t bi_sector = bio->bi_sector; + + bio->bi_bdev = cache->cache_dev->bdev; + if (!block_size_is_power_of_two(cache)) + bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) + + sector_div(bi_sector, cache->sectors_per_block); + else + bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) | + (bi_sector & (cache->sectors_per_block - 1)); +} + +static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + struct per_bio_data *pb = get_per_bio_data(bio); + + spin_lock_irqsave(&cache->lock, flags); + if (cache->need_tick_bio && + !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { + pb->tick = true; + cache->need_tick_bio = false; + } + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, + dm_oblock_t oblock) +{ + check_if_tick_bio_needed(cache, bio); + remap_to_origin(cache, bio); + if (bio_data_dir(bio) == WRITE) + clear_discard(cache, oblock_to_dblock(cache, oblock)); +} + +static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, dm_cblock_t cblock) +{ + remap_to_cache(cache, bio, cblock); + if (bio_data_dir(bio) == WRITE) { + set_dirty(cache, oblock, cblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); + } +} + +static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) +{ + sector_t block_nr = bio->bi_sector; + + if (!block_size_is_power_of_two(cache)) + (void) sector_div(block_nr, cache->sectors_per_block); + else + block_nr >>= cache->sectors_per_block_shift; + + return to_oblock(block_nr); +} + +static int bio_triggers_commit(struct cache *cache, struct bio *bio) +{ + return bio->bi_rw & (REQ_FLUSH | REQ_FUA); +} + +static void issue(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + + if (!bio_triggers_commit(cache, bio)) { + generic_make_request(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in do_worker(). + */ + spin_lock_irqsave(&cache->lock, flags); + cache->commit_requested = true; + bio_list_add(&cache->deferred_flush_bios, bio); + spin_unlock_irqrestore(&cache->lock, flags); +} + +/*---------------------------------------------------------------- + * Migration processing + * + * Migration covers moving data from the origin device to the cache, or + * vice versa. + *--------------------------------------------------------------*/ +static void free_migration(struct dm_cache_migration *mg) +{ + mempool_free(mg, mg->cache->migration_pool); +} + +static void inc_nr_migrations(struct cache *cache) +{ + atomic_inc(&cache->nr_migrations); +} + +static void dec_nr_migrations(struct cache *cache) +{ + atomic_dec(&cache->nr_migrations); + + /* + * Wake the worker in case we're suspending the target. + */ + wake_up(&cache->migration_wait); +} + +static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, + bool holder) +{ + (holder ? dm_cell_release : dm_cell_release_no_holder) + (cache->prison, cell, &cache->deferred_bios); + free_prison_cell(cache, cell); +} + +static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, + bool holder) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + __cell_defer(cache, cell, holder); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void cleanup_migration(struct dm_cache_migration *mg) +{ + dec_nr_migrations(mg->cache); + free_migration(mg); +} + +static void migration_failure(struct dm_cache_migration *mg) +{ + struct cache *cache = mg->cache; + + if (mg->writeback) { + DMWARN_LIMIT("writeback failed; couldn't copy block"); + set_dirty(cache, mg->old_oblock, mg->cblock); + cell_defer(cache, mg->old_ocell, false); + + } else if (mg->demote) { + DMWARN_LIMIT("demotion failed; couldn't copy block"); + policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); + + cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); + if (mg->promote) + cell_defer(cache, mg->new_ocell, 1); + } else { + DMWARN_LIMIT("promotion failed; couldn't copy block"); + policy_remove_mapping(cache->policy, mg->new_oblock); + cell_defer(cache, mg->new_ocell, 1); + } + + cleanup_migration(mg); +} + +static void migration_success_pre_commit(struct dm_cache_migration *mg) +{ + unsigned long flags; + struct cache *cache = mg->cache; + + if (mg->writeback) { + cell_defer(cache, mg->old_ocell, false); + clear_dirty(cache, mg->old_oblock, mg->cblock); + cleanup_migration(mg); + return; + + } else if (mg->demote) { + if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { + DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); + policy_force_mapping(cache->policy, mg->new_oblock, + mg->old_oblock); + if (mg->promote) + cell_defer(cache, mg->new_ocell, true); + cleanup_migration(mg); + return; + } + } else { + if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { + DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); + policy_remove_mapping(cache->policy, mg->new_oblock); + cleanup_migration(mg); + return; + } + } + + spin_lock_irqsave(&cache->lock, flags); + list_add_tail(&mg->list, &cache->need_commit_migrations); + cache->commit_requested = true; + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void migration_success_post_commit(struct dm_cache_migration *mg) +{ + unsigned long flags; + struct cache *cache = mg->cache; + + if (mg->writeback) { + DMWARN("writeback unexpectedly triggered commit"); + return; + + } else if (mg->demote) { + cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); + + if (mg->promote) { + mg->demote = false; + + spin_lock_irqsave(&cache->lock, flags); + list_add_tail(&mg->list, &cache->quiesced_migrations); + spin_unlock_irqrestore(&cache->lock, flags); + + } else + cleanup_migration(mg); + + } else { + cell_defer(cache, mg->new_ocell, true); + clear_dirty(cache, mg->new_oblock, mg->cblock); + cleanup_migration(mg); + } +} + +static void copy_complete(int read_err, unsigned long write_err, void *context) +{ + unsigned long flags; + struct dm_cache_migration *mg = (struct dm_cache_migration *) context; + struct cache *cache = mg->cache; + + if (read_err || write_err) + mg->err = true; + + spin_lock_irqsave(&cache->lock, flags); + list_add_tail(&mg->list, &cache->completed_migrations); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void issue_copy_real(struct dm_cache_migration *mg) +{ + int r; + struct dm_io_region o_region, c_region; + struct cache *cache = mg->cache; + + o_region.bdev = cache->origin_dev->bdev; + o_region.count = cache->sectors_per_block; + + c_region.bdev = cache->cache_dev->bdev; + c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; + c_region.count = cache->sectors_per_block; + + if (mg->writeback || mg->demote) { + /* demote */ + o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; + r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); + } else { + /* promote */ + o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; + r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); + } + + if (r < 0) + migration_failure(mg); +} + +static void avoid_copy(struct dm_cache_migration *mg) +{ + atomic_inc(&mg->cache->stats.copies_avoided); + migration_success_pre_commit(mg); +} + +static void issue_copy(struct dm_cache_migration *mg) +{ + bool avoid; + struct cache *cache = mg->cache; + + if (mg->writeback || mg->demote) + avoid = !is_dirty(cache, mg->cblock) || + is_discarded_oblock(cache, mg->old_oblock); + else + avoid = is_discarded_oblock(cache, mg->new_oblock); + + avoid ? avoid_copy(mg) : issue_copy_real(mg); +} + +static void complete_migration(struct dm_cache_migration *mg) +{ + if (mg->err) + migration_failure(mg); + else + migration_success_pre_commit(mg); +} + +static void process_migrations(struct cache *cache, struct list_head *head, + void (*fn)(struct dm_cache_migration *)) +{ + unsigned long flags; + struct list_head list; + struct dm_cache_migration *mg, *tmp; + + INIT_LIST_HEAD(&list); + spin_lock_irqsave(&cache->lock, flags); + list_splice_init(head, &list); + spin_unlock_irqrestore(&cache->lock, flags); + + list_for_each_entry_safe(mg, tmp, &list, list) + fn(mg); +} + +static void __queue_quiesced_migration(struct dm_cache_migration *mg) +{ + list_add_tail(&mg->list, &mg->cache->quiesced_migrations); +} + +static void queue_quiesced_migration(struct dm_cache_migration *mg) +{ + unsigned long flags; + struct cache *cache = mg->cache; + + spin_lock_irqsave(&cache->lock, flags); + __queue_quiesced_migration(mg); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) +{ + unsigned long flags; + struct dm_cache_migration *mg, *tmp; + + spin_lock_irqsave(&cache->lock, flags); + list_for_each_entry_safe(mg, tmp, work, list) + __queue_quiesced_migration(mg); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void check_for_quiesced_migrations(struct cache *cache, + struct per_bio_data *pb) +{ + struct list_head work; + + if (!pb->all_io_entry) + return; + + INIT_LIST_HEAD(&work); + if (pb->all_io_entry) + dm_deferred_entry_dec(pb->all_io_entry, &work); + + if (!list_empty(&work)) + queue_quiesced_migrations(cache, &work); +} + +static void quiesce_migration(struct dm_cache_migration *mg) +{ + if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) + queue_quiesced_migration(mg); +} + +static void promote(struct cache *cache, struct prealloc *structs, + dm_oblock_t oblock, dm_cblock_t cblock, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = false; + mg->demote = false; + mg->promote = true; + mg->cache = cache; + mg->new_oblock = oblock; + mg->cblock = cblock; + mg->old_ocell = NULL; + mg->new_ocell = cell; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + +static void writeback(struct cache *cache, struct prealloc *structs, + dm_oblock_t oblock, dm_cblock_t cblock, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = true; + mg->demote = false; + mg->promote = false; + mg->cache = cache; + mg->old_oblock = oblock; + mg->cblock = cblock; + mg->old_ocell = cell; + mg->new_ocell = NULL; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + +static void demote_then_promote(struct cache *cache, struct prealloc *structs, + dm_oblock_t old_oblock, dm_oblock_t new_oblock, + dm_cblock_t cblock, + struct dm_bio_prison_cell *old_ocell, + struct dm_bio_prison_cell *new_ocell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = false; + mg->demote = true; + mg->promote = true; + mg->cache = cache; + mg->old_oblock = old_oblock; + mg->new_oblock = new_oblock; + mg->cblock = cblock; + mg->old_ocell = old_ocell; + mg->new_ocell = new_ocell; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + +/*---------------------------------------------------------------- + * bio processing + *--------------------------------------------------------------*/ +static void defer_bio(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + bio_list_add(&cache->deferred_bios, bio); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void process_flush_bio(struct cache *cache, struct bio *bio) +{ + struct per_bio_data *pb = get_per_bio_data(bio); + + BUG_ON(bio->bi_size); + if (!pb->req_nr) + remap_to_origin(cache, bio); + else + remap_to_cache(cache, bio, 0); + + issue(cache, bio); +} + +/* + * People generally discard large parts of a device, eg, the whole device + * when formatting. Splitting these large discards up into cache block + * sized ios and then quiescing (always neccessary for discard) takes too + * long. + * + * We keep it simple, and allow any size of discard to come in, and just + * mark off blocks on the discard bitset. No passdown occurs! + * + * To implement passdown we need to change the bio_prison such that a cell + * can have a key that spans many blocks. + */ +static void process_discard_bio(struct cache *cache, struct bio *bio) +{ + dm_block_t start_block = dm_sector_div_up(bio->bi_sector, + cache->discard_block_size); + dm_block_t end_block = bio->bi_sector + bio_sectors(bio); + dm_block_t b; + + (void) sector_div(end_block, cache->discard_block_size); + + for (b = start_block; b < end_block; b++) + set_discard(cache, to_dblock(b)); + + bio_endio(bio, 0); +} + +static bool spare_migration_bandwidth(struct cache *cache) +{ + sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * + cache->sectors_per_block; + return current_volume < cache->migration_threshold; +} + +static bool is_writethrough_io(struct cache *cache, struct bio *bio, + dm_cblock_t cblock) +{ + return bio_data_dir(bio) == WRITE && + cache->features.write_through && !is_dirty(cache, cblock); +} + +static void inc_hit_counter(struct cache *cache, struct bio *bio) +{ + atomic_inc(bio_data_dir(bio) == READ ? + &cache->stats.read_hit : &cache->stats.write_hit); +} + +static void inc_miss_counter(struct cache *cache, struct bio *bio) +{ + atomic_inc(bio_data_dir(bio) == READ ? + &cache->stats.read_miss : &cache->stats.write_miss); +} + +static void process_bio(struct cache *cache, struct prealloc *structs, + struct bio *bio) +{ + int r; + bool release_cell = true; + dm_oblock_t block = get_bio_block(cache, bio); + struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; + struct policy_result lookup_result; + struct per_bio_data *pb = get_per_bio_data(bio); + bool discarded_block = is_discarded_oblock(cache, block); + bool can_migrate = discarded_block || spare_migration_bandwidth(cache); + + /* + * Check to see if that block is currently migrating. + */ + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain(cache, block, bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &new_ocell); + if (r > 0) + return; + + r = policy_map(cache->policy, block, true, can_migrate, discarded_block, + bio, &lookup_result); + + if (r == -EWOULDBLOCK) + /* migration has been denied */ + lookup_result.op = POLICY_MISS; + + switch (lookup_result.op) { + case POLICY_HIT: + inc_hit_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (is_writethrough_io(cache, bio, lookup_result.cblock)) { + /* + * No need to mark anything dirty in write through mode. + */ + pb->req_nr == 0 ? + remap_to_cache(cache, bio, lookup_result.cblock) : + remap_to_origin_clear_discard(cache, bio, block); + } else + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + + issue(cache, bio); + break; + + case POLICY_MISS: + inc_miss_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (pb->req_nr != 0) { + /* + * This is a duplicate writethrough io that is no + * longer needed because the block has been demoted. + */ + bio_endio(bio, 0); + } else { + remap_to_origin_clear_discard(cache, bio, block); + issue(cache, bio); + } + break; + + case POLICY_NEW: + atomic_inc(&cache->stats.promotion); + promote(cache, structs, block, lookup_result.cblock, new_ocell); + release_cell = false; + break; + + case POLICY_REPLACE: + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &old_ocell); + if (r > 0) { + /* + * We have to be careful to avoid lock inversion of + * the cells. So we back off, and wait for the + * old_ocell to become free. + */ + policy_force_mapping(cache->policy, block, + lookup_result.old_oblock); + atomic_inc(&cache->stats.cache_cell_clash); + break; + } + atomic_inc(&cache->stats.demotion); + atomic_inc(&cache->stats.promotion); + + demote_then_promote(cache, structs, lookup_result.old_oblock, + block, lookup_result.cblock, + old_ocell, new_ocell); + release_cell = false; + break; + + default: + DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, + (unsigned) lookup_result.op); + bio_io_error(bio); + } + + if (release_cell) + cell_defer(cache, new_ocell, false); +} + +static int need_commit_due_to_time(struct cache *cache) +{ + return jiffies < cache->last_commit_jiffies || + jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; +} + +static int commit_if_needed(struct cache *cache) +{ + if (dm_cache_changed_this_transaction(cache->cmd) && + (cache->commit_requested || need_commit_due_to_time(cache))) { + atomic_inc(&cache->stats.commit_count); + cache->last_commit_jiffies = jiffies; + cache->commit_requested = false; + return dm_cache_commit(cache->cmd, false); + } + + return 0; +} + +static void process_deferred_bios(struct cache *cache) +{ + unsigned long flags; + struct bio_list bios; + struct bio *bio; + struct prealloc structs; + + memset(&structs, 0, sizeof(structs)); + bio_list_init(&bios); + + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&bios, &cache->deferred_bios); + bio_list_init(&cache->deferred_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + while (!bio_list_empty(&bios)) { + /* + * If we've got no free migration structs, and processing + * this bio might require one, we pause until there are some + * prepared mappings to process. + */ + if (prealloc_data_structs(cache, &structs)) { + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&cache->deferred_bios, &bios); + spin_unlock_irqrestore(&cache->lock, flags); + break; + } + + bio = bio_list_pop(&bios); + + if (bio->bi_rw & REQ_FLUSH) + process_flush_bio(cache, bio); + else if (bio->bi_rw & REQ_DISCARD) + process_discard_bio(cache, bio); + else + process_bio(cache, &structs, bio); + } + + prealloc_free_structs(cache, &structs); +} + +static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) +{ + unsigned long flags; + struct bio_list bios; + struct bio *bio; + + bio_list_init(&bios); + + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&bios, &cache->deferred_flush_bios); + bio_list_init(&cache->deferred_flush_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + while ((bio = bio_list_pop(&bios))) + submit_bios ? generic_make_request(bio) : bio_io_error(bio); +} + +static void writeback_some_dirty_blocks(struct cache *cache) +{ + int r = 0; + dm_oblock_t oblock; + dm_cblock_t cblock; + struct prealloc structs; + struct dm_bio_prison_cell *old_ocell; + + memset(&structs, 0, sizeof(structs)); + + while (spare_migration_bandwidth(cache)) { + if (prealloc_data_structs(cache, &structs)) + break; + + r = policy_writeback_work(cache->policy, &oblock, &cblock); + if (r) + break; + + r = get_cell(cache, oblock, &structs, &old_ocell); + if (r) { + policy_set_dirty(cache->policy, oblock); + break; + } + + writeback(cache, &structs, oblock, cblock, old_ocell); + } + + prealloc_free_structs(cache, &structs); +} + +/*---------------------------------------------------------------- + * Main worker loop + *--------------------------------------------------------------*/ +static void start_quiescing(struct cache *cache) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + cache->quiescing = 1; + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void stop_quiescing(struct cache *cache) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + cache->quiescing = 0; + spin_unlock_irqrestore(&cache->lock, flags); +} + +static bool is_quiescing(struct cache *cache) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + r = cache->quiescing; + spin_unlock_irqrestore(&cache->lock, flags); + + return r; +} + +static void wait_for_migrations(struct cache *cache) +{ + wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); +} + +static void stop_worker(struct cache *cache) +{ + cancel_delayed_work(&cache->waker); + flush_workqueue(cache->wq); +} + +static void requeue_deferred_io(struct cache *cache) +{ + struct bio *bio; + struct bio_list bios; + + bio_list_init(&bios); + bio_list_merge(&bios, &cache->deferred_bios); + bio_list_init(&cache->deferred_bios); + + while ((bio = bio_list_pop(&bios))) + bio_endio(bio, DM_ENDIO_REQUEUE); +} + +static int more_work(struct cache *cache) +{ + if (is_quiescing(cache)) + return !list_empty(&cache->quiesced_migrations) || + !list_empty(&cache->completed_migrations) || + !list_empty(&cache->need_commit_migrations); + else + return !bio_list_empty(&cache->deferred_bios) || + !bio_list_empty(&cache->deferred_flush_bios) || + !list_empty(&cache->quiesced_migrations) || + !list_empty(&cache->completed_migrations) || + !list_empty(&cache->need_commit_migrations); +} + +static void do_worker(struct work_struct *ws) +{ + struct cache *cache = container_of(ws, struct cache, worker); + + do { + if (!is_quiescing(cache)) + process_deferred_bios(cache); + + process_migrations(cache, &cache->quiesced_migrations, issue_copy); + process_migrations(cache, &cache->completed_migrations, complete_migration); + + writeback_some_dirty_blocks(cache); + + if (commit_if_needed(cache)) { + process_deferred_flush_bios(cache, false); + + /* + * FIXME: rollback metadata or just go into a + * failure mode and error everything + */ + } else { + process_deferred_flush_bios(cache, true); + process_migrations(cache, &cache->need_commit_migrations, + migration_success_post_commit); + } + } while (more_work(cache)); +} + +/* + * We want to commit periodically so that not too much + * unwritten metadata builds up. + */ +static void do_waker(struct work_struct *ws) +{ + struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); + wake_worker(cache); + queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); +} + +/*----------------------------------------------------------------*/ + +static int is_congested(struct dm_dev *dev, int bdi_bits) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + return bdi_congested(&q->backing_dev_info, bdi_bits); +} + +static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) +{ + struct cache *cache = container_of(cb, struct cache, callbacks); + + return is_congested(cache->origin_dev, bdi_bits) || + is_congested(cache->cache_dev, bdi_bits); +} + +/*---------------------------------------------------------------- + * Target methods + *--------------------------------------------------------------*/ + +/* + * This function gets called on the error paths of the constructor, so we + * have to cope with a partially initialised struct. + */ +static void destroy(struct cache *cache) +{ + unsigned i; + + if (cache->next_migration) + mempool_free(cache->next_migration, cache->migration_pool); + + if (cache->migration_pool) + mempool_destroy(cache->migration_pool); + + if (cache->all_io_ds) + dm_deferred_set_destroy(cache->all_io_ds); + + if (cache->prison) + dm_bio_prison_destroy(cache->prison); + + if (cache->wq) + destroy_workqueue(cache->wq); + + if (cache->dirty_bitset) + free_bitset(cache->dirty_bitset); + + if (cache->discard_bitset) + free_bitset(cache->discard_bitset); + + if (cache->copier) + dm_kcopyd_client_destroy(cache->copier); + + if (cache->cmd) + dm_cache_metadata_close(cache->cmd); + + if (cache->metadata_dev) + dm_put_device(cache->ti, cache->metadata_dev); + + if (cache->origin_dev) + dm_put_device(cache->ti, cache->origin_dev); + + if (cache->cache_dev) + dm_put_device(cache->ti, cache->cache_dev); + + if (cache->policy) + dm_cache_policy_destroy(cache->policy); + + for (i = 0; i < cache->nr_ctr_args ; i++) + kfree(cache->ctr_args[i]); + kfree(cache->ctr_args); + + kfree(cache); +} + +static void cache_dtr(struct dm_target *ti) +{ + struct cache *cache = ti->private; + + destroy(cache); +} + +static sector_t get_dev_size(struct dm_dev *dev) +{ + return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; +} + +/*----------------------------------------------------------------*/ + +/* + * Construct a cache device mapping. + * + * cache <metadata dev> <cache dev> <origin dev> <block size> + * <#feature args> [<feature arg>]* + * <policy> <#policy args> [<policy arg>]* + * + * metadata dev : fast device holding the persistent metadata + * cache dev : fast device holding cached data blocks + * origin dev : slow device holding original data blocks + * block size : cache unit size in sectors + * + * #feature args : number of feature arguments passed + * feature args : writethrough. (The default is writeback.) + * + * policy : the replacement policy to use + * #policy args : an even number of policy arguments corresponding + * to key/value pairs passed to the policy + * policy args : key/value pairs passed to the policy + * E.g. 'sequential_threshold 1024' + * See cache-policies.txt for details. + * + * Optional feature arguments are: + * writethrough : write through caching that prohibits cache block + * content from being different from origin block content. + * Without this argument, the default behaviour is to write + * back cache block contents later for performance reasons, + * so they may differ from the corresponding origin blocks. + */ +struct cache_args { + struct dm_target *ti; + + struct dm_dev *metadata_dev; + + struct dm_dev *cache_dev; + sector_t cache_sectors; + + struct dm_dev *origin_dev; + sector_t origin_sectors; + + uint32_t block_size; + + const char *policy_name; + int policy_argc; + const char **policy_argv; + + struct cache_features features; +}; + +static void destroy_cache_args(struct cache_args *ca) +{ + if (ca->metadata_dev) + dm_put_device(ca->ti, ca->metadata_dev); + + if (ca->cache_dev) + dm_put_device(ca->ti, ca->cache_dev); + + if (ca->origin_dev) + dm_put_device(ca->ti, ca->origin_dev); + + kfree(ca); +} + +static bool at_least_one_arg(struct dm_arg_set *as, char **error) +{ + if (!as->argc) { + *error = "Insufficient args"; + return false; + } + + return true; +} + +static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + int r; + sector_t metadata_dev_size; + char b[BDEVNAME_SIZE]; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, + &ca->metadata_dev); + if (r) { + *error = "Error opening metadata device"; + return r; + } + + metadata_dev_size = get_dev_size(ca->metadata_dev); + if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) + DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", + bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); + + return 0; +} + +static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + int r; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, + &ca->cache_dev); + if (r) { + *error = "Error opening cache device"; + return r; + } + ca->cache_sectors = get_dev_size(ca->cache_dev); + + return 0; +} + +static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + int r; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, + &ca->origin_dev); + if (r) { + *error = "Error opening origin device"; + return r; + } + + ca->origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > ca->origin_sectors) { + *error = "Device size larger than cached device"; + return -EINVAL; + } + + return 0; +} + +static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + unsigned long tmp; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp || + tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || + tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { + *error = "Invalid data block size"; + return -EINVAL; + } + + if (tmp > ca->cache_sectors) { + *error = "Data block size is larger than the cache device"; + return -EINVAL; + } + + ca->block_size = tmp; + + return 0; +} + +static void init_features(struct cache_features *cf) +{ + cf->mode = CM_WRITE; + cf->write_through = false; +} + +static int parse_features(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + static struct dm_arg _args[] = { + {0, 1, "Invalid number of cache feature arguments"}, + }; + + int r; + unsigned argc; + const char *arg; + struct cache_features *cf = &ca->features; + + init_features(cf); + + r = dm_read_arg_group(_args, as, &argc, error); + if (r) + return -EINVAL; + + while (argc--) { + arg = dm_shift_arg(as); + + if (!strcasecmp(arg, "writeback")) + cf->write_through = false; + + else if (!strcasecmp(arg, "writethrough")) + cf->write_through = true; + + else { + *error = "Unrecognised cache feature requested"; + return -EINVAL; + } + } + + return 0; +} + +static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + static struct dm_arg _args[] = { + {0, 1024, "Invalid number of policy arguments"}, + }; + + int r; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + ca->policy_name = dm_shift_arg(as); + + r = dm_read_arg_group(_args, as, &ca->policy_argc, error); + if (r) + return -EINVAL; + + ca->policy_argv = (const char **)as->argv; + dm_consume_args(as, ca->policy_argc); + + return 0; +} + +static int parse_cache_args(struct cache_args *ca, int argc, char **argv, + char **error) +{ + int r; + struct dm_arg_set as; + + as.argc = argc; + as.argv = argv; + + r = parse_metadata_dev(ca, &as, error); + if (r) + return r; + + r = parse_cache_dev(ca, &as, error); + if (r) + return r; + + r = parse_origin_dev(ca, &as, error); + if (r) + return r; + + r = parse_block_size(ca, &as, error); + if (r) + return r; + + r = parse_features(ca, &as, error); + if (r) + return r; + + r = parse_policy(ca, &as, error); + if (r) + return r; + + return 0; +} + +/*----------------------------------------------------------------*/ + +static struct kmem_cache *migration_cache; + +static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) +{ + int r = 0; + + if (argc & 1) { + DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); + return -EINVAL; + } + + while (argc) { + r = policy_set_config_value(p, argv[0], argv[1]); + if (r) { + DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", + argv[0], argv[1]); + return r; + } + + argc -= 2; + argv += 2; + } + + return r; +} + +static int create_cache_policy(struct cache *cache, struct cache_args *ca, + char **error) +{ + int r; + + cache->policy = dm_cache_policy_create(ca->policy_name, + cache->cache_size, + cache->origin_sectors, + cache->sectors_per_block); + if (!cache->policy) { + *error = "Error creating cache's policy"; + return -ENOMEM; + } + + r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); + if (r) + dm_cache_policy_destroy(cache->policy); + + return r; +} + +/* + * We want the discard block size to be a power of two, at least the size + * of the cache block size, and have no more than 2^14 discard blocks + * across the origin. + */ +#define MAX_DISCARD_BLOCKS (1 << 14) + +static bool too_many_discard_blocks(sector_t discard_block_size, + sector_t origin_size) +{ + (void) sector_div(origin_size, discard_block_size); + + return origin_size > MAX_DISCARD_BLOCKS; +} + +static sector_t calculate_discard_block_size(sector_t cache_block_size, + sector_t origin_size) +{ + sector_t discard_block_size; + + discard_block_size = roundup_pow_of_two(cache_block_size); + + if (origin_size) + while (too_many_discard_blocks(discard_block_size, origin_size)) + discard_block_size *= 2; + + return discard_block_size; +} + +#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) + +static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio); + +static int cache_create(struct cache_args *ca, struct cache **result) +{ + int r = 0; + char **error = &ca->ti->error; + struct cache *cache; + struct dm_target *ti = ca->ti; + dm_block_t origin_blocks; + struct dm_cache_metadata *cmd; + bool may_format = ca->features.mode == CM_WRITE; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + + cache->ti = ca->ti; + ti->private = cache; + ti->per_bio_data_size = sizeof(struct per_bio_data); + ti->num_flush_bios = 2; + ti->flush_supported = true; + + ti->num_discard_bios = 1; + ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; + + memcpy(&cache->features, &ca->features, sizeof(cache->features)); + + if (cache->features.write_through) + ti->num_write_bios = cache_num_write_bios; + + cache->callbacks.congested_fn = cache_is_congested; + dm_table_add_target_callbacks(ti->table, &cache->callbacks); + + cache->metadata_dev = ca->metadata_dev; + cache->origin_dev = ca->origin_dev; + cache->cache_dev = ca->cache_dev; + + ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; + + /* FIXME: factor out this whole section */ + origin_blocks = cache->origin_sectors = ca->origin_sectors; + (void) sector_div(origin_blocks, ca->block_size); + cache->origin_blocks = to_oblock(origin_blocks); + + cache->sectors_per_block = ca->block_size; + if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { + r = -EINVAL; + goto bad; + } + + if (ca->block_size & (ca->block_size - 1)) { + dm_block_t cache_size = ca->cache_sectors; + + cache->sectors_per_block_shift = -1; + (void) sector_div(cache_size, ca->block_size); + cache->cache_size = to_cblock(cache_size); + } else { + cache->sectors_per_block_shift = __ffs(ca->block_size); + cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); + } + + r = create_cache_policy(cache, ca, error); + if (r) + goto bad; + cache->policy_nr_args = ca->policy_argc; + + cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, + ca->block_size, may_format, + dm_cache_policy_get_hint_size(cache->policy)); + if (IS_ERR(cmd)) { + *error = "Error creating metadata object"; + r = PTR_ERR(cmd); + goto bad; + } + cache->cmd = cmd; + + spin_lock_init(&cache->lock); + bio_list_init(&cache->deferred_bios); + bio_list_init(&cache->deferred_flush_bios); + INIT_LIST_HEAD(&cache->quiesced_migrations); + INIT_LIST_HEAD(&cache->completed_migrations); + INIT_LIST_HEAD(&cache->need_commit_migrations); + cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; + atomic_set(&cache->nr_migrations, 0); + init_waitqueue_head(&cache->migration_wait); + + cache->nr_dirty = 0; + cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); + if (!cache->dirty_bitset) { + *error = "could not allocate dirty bitset"; + goto bad; + } + clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); + + cache->discard_block_size = + calculate_discard_block_size(cache->sectors_per_block, + cache->origin_sectors); + cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); + cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); + if (!cache->discard_bitset) { + *error = "could not allocate discard bitset"; + goto bad; + } + clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); + + cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); + if (IS_ERR(cache->copier)) { + *error = "could not create kcopyd client"; + r = PTR_ERR(cache->copier); + goto bad; + } + + cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); + if (!cache->wq) { + *error = "could not create workqueue for metadata object"; + goto bad; + } + INIT_WORK(&cache->worker, do_worker); + INIT_DELAYED_WORK(&cache->waker, do_waker); + cache->last_commit_jiffies = jiffies; + + cache->prison = dm_bio_prison_create(PRISON_CELLS); + if (!cache->prison) { + *error = "could not create bio prison"; + goto bad; + } + + cache->all_io_ds = dm_deferred_set_create(); + if (!cache->all_io_ds) { + *error = "could not create all_io deferred set"; + goto bad; + } + + cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, + migration_cache); + if (!cache->migration_pool) { + *error = "Error creating cache's migration mempool"; + goto bad; + } + + cache->next_migration = NULL; + + cache->need_tick_bio = true; + cache->sized = false; + cache->quiescing = false; + cache->commit_requested = false; + cache->loaded_mappings = false; + cache->loaded_discards = false; + + load_stats(cache); + + atomic_set(&cache->stats.demotion, 0); + atomic_set(&cache->stats.promotion, 0); + atomic_set(&cache->stats.copies_avoided, 0); + atomic_set(&cache->stats.cache_cell_clash, 0); + atomic_set(&cache->stats.commit_count, 0); + atomic_set(&cache->stats.discard_count, 0); + + *result = cache; + return 0; + +bad: + destroy(cache); + return r; +} + +static int copy_ctr_args(struct cache *cache, int argc, const char **argv) +{ + unsigned i; + const char **copy; + + copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); + if (!copy) + return -ENOMEM; + for (i = 0; i < argc; i++) { + copy[i] = kstrdup(argv[i], GFP_KERNEL); + if (!copy[i]) { + while (i--) + kfree(copy[i]); + kfree(copy); + return -ENOMEM; + } + } + + cache->nr_ctr_args = argc; + cache->ctr_args = copy; + + return 0; +} + +static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + int r = -EINVAL; + struct cache_args *ca; + struct cache *cache = NULL; + + ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) { + ti->error = "Error allocating memory for cache"; + return -ENOMEM; + } + ca->ti = ti; + + r = parse_cache_args(ca, argc, argv, &ti->error); + if (r) + goto out; + + r = cache_create(ca, &cache); + + r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); + if (r) { + destroy(cache); + goto out; + } + + ti->private = cache; + +out: + destroy_cache_args(ca); + return r; +} + +static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio) +{ + int r; + struct cache *cache = ti->private; + dm_oblock_t block = get_bio_block(cache, bio); + dm_cblock_t cblock; + + r = policy_lookup(cache->policy, block, &cblock); + if (r < 0) + return 2; /* assume the worst */ + + return (!r && !is_dirty(cache, cblock)) ? 2 : 1; +} + +static int cache_map(struct dm_target *ti, struct bio *bio) +{ + struct cache *cache = ti->private; + + int r; + dm_oblock_t block = get_bio_block(cache, bio); + bool can_migrate = false; + bool discarded_block; + struct dm_bio_prison_cell *cell; + struct policy_result lookup_result; + struct per_bio_data *pb; + + if (from_oblock(block) > from_oblock(cache->origin_blocks)) { + /* + * This can only occur if the io goes to a partial block at + * the end of the origin device. We don't cache these. + * Just remap to the origin and carry on. + */ + remap_to_origin_clear_discard(cache, bio, block); + return DM_MAPIO_REMAPPED; + } + + pb = init_per_bio_data(bio); + + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { + defer_bio(cache, bio); + return DM_MAPIO_SUBMITTED; + } + + /* + * Check to see if that block is currently migrating. + */ + cell = alloc_prison_cell(cache); + if (!cell) { + defer_bio(cache, bio); + return DM_MAPIO_SUBMITTED; + } + + r = bio_detain(cache, block, bio, cell, + (cell_free_fn) free_prison_cell, + cache, &cell); + if (r) { + if (r < 0) + defer_bio(cache, bio); + + return DM_MAPIO_SUBMITTED; + } + + discarded_block = is_discarded_oblock(cache, block); + + r = policy_map(cache->policy, block, false, can_migrate, discarded_block, + bio, &lookup_result); + if (r == -EWOULDBLOCK) { + cell_defer(cache, cell, true); + return DM_MAPIO_SUBMITTED; + + } else if (r) { + DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + + switch (lookup_result.op) { + case POLICY_HIT: + inc_hit_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (is_writethrough_io(cache, bio, lookup_result.cblock)) { + /* + * No need to mark anything dirty in write through mode. + */ + pb->req_nr == 0 ? + remap_to_cache(cache, bio, lookup_result.cblock) : + remap_to_origin_clear_discard(cache, bio, block); + cell_defer(cache, cell, false); + } else { + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + cell_defer(cache, cell, false); + } + break; + + case POLICY_MISS: + inc_miss_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (pb->req_nr != 0) { + /* + * This is a duplicate writethrough io that is no + * longer needed because the block has been demoted. + */ + bio_endio(bio, 0); + cell_defer(cache, cell, false); + return DM_MAPIO_SUBMITTED; + } else { + remap_to_origin_clear_discard(cache, bio, block); + cell_defer(cache, cell, false); + } + break; + + default: + DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, + (unsigned) lookup_result.op); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + + return DM_MAPIO_REMAPPED; +} + +static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) +{ + struct cache *cache = ti->private; + unsigned long flags; + struct per_bio_data *pb = get_per_bio_data(bio); + + if (pb->tick) { + policy_tick(cache->policy); + + spin_lock_irqsave(&cache->lock, flags); + cache->need_tick_bio = true; + spin_unlock_irqrestore(&cache->lock, flags); + } + + check_for_quiesced_migrations(cache, pb); + + return 0; +} + +static int write_dirty_bitset(struct cache *cache) +{ + unsigned i, r; + + for (i = 0; i < from_cblock(cache->cache_size); i++) { + r = dm_cache_set_dirty(cache->cmd, to_cblock(i), + is_dirty(cache, to_cblock(i))); + if (r) + return r; + } + + return 0; +} + +static int write_discard_bitset(struct cache *cache) +{ + unsigned i, r; + + r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, + cache->discard_nr_blocks); + if (r) { + DMERR("could not resize on-disk discard bitset"); + return r; + } + + for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { + r = dm_cache_set_discard(cache->cmd, to_dblock(i), + is_discarded(cache, to_dblock(i))); + if (r) + return r; + } + + return 0; +} + +static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, + uint32_t hint) +{ + struct cache *cache = context; + return dm_cache_save_hint(cache->cmd, cblock, hint); +} + +static int write_hints(struct cache *cache) +{ + int r; + + r = dm_cache_begin_hints(cache->cmd, cache->policy); + if (r) { + DMERR("dm_cache_begin_hints failed"); + return r; + } + + r = policy_walk_mappings(cache->policy, save_hint, cache); + if (r) + DMERR("policy_walk_mappings failed"); + + return r; +} + +/* + * returns true on success + */ +static bool sync_metadata(struct cache *cache) +{ + int r1, r2, r3, r4; + + r1 = write_dirty_bitset(cache); + if (r1) + DMERR("could not write dirty bitset"); + + r2 = write_discard_bitset(cache); + if (r2) + DMERR("could not write discard bitset"); + + save_stats(cache); + + r3 = write_hints(cache); + if (r3) + DMERR("could not write hints"); + + /* + * If writing the above metadata failed, we still commit, but don't + * set the clean shutdown flag. This will effectively force every + * dirty bit to be set on reload. + */ + r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); + if (r4) + DMERR("could not write cache metadata. Data loss may occur."); + + return !r1 && !r2 && !r3 && !r4; +} + +static void cache_postsuspend(struct dm_target *ti) +{ + struct cache *cache = ti->private; + + start_quiescing(cache); + wait_for_migrations(cache); + stop_worker(cache); + requeue_deferred_io(cache); + stop_quiescing(cache); + + (void) sync_metadata(cache); +} + +static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, + bool dirty, uint32_t hint, bool hint_valid) +{ + int r; + struct cache *cache = context; + + r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); + if (r) + return r; + + if (dirty) + set_dirty(cache, oblock, cblock); + else + clear_dirty(cache, oblock, cblock); + + return 0; +} + +static int load_discard(void *context, sector_t discard_block_size, + dm_dblock_t dblock, bool discard) +{ + struct cache *cache = context; + + /* FIXME: handle mis-matched block size */ + + if (discard) + set_discard(cache, dblock); + else + clear_discard(cache, dblock); + + return 0; +} + +static int cache_preresume(struct dm_target *ti) +{ + int r = 0; + struct cache *cache = ti->private; + sector_t actual_cache_size = get_dev_size(cache->cache_dev); + (void) sector_div(actual_cache_size, cache->sectors_per_block); + + /* + * Check to see if the cache has resized. + */ + if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) { + cache->cache_size = to_cblock(actual_cache_size); + + r = dm_cache_resize(cache->cmd, cache->cache_size); + if (r) { + DMERR("could not resize cache metadata"); + return r; + } + + cache->sized = true; + } + + if (!cache->loaded_mappings) { + r = dm_cache_load_mappings(cache->cmd, + dm_cache_policy_get_name(cache->policy), + load_mapping, cache); + if (r) { + DMERR("could not load cache mappings"); + return r; + } + + cache->loaded_mappings = true; + } + + if (!cache->loaded_discards) { + r = dm_cache_load_discards(cache->cmd, load_discard, cache); + if (r) { + DMERR("could not load origin discards"); + return r; + } + + cache->loaded_discards = true; + } + + return r; +} + +static void cache_resume(struct dm_target *ti) +{ + struct cache *cache = ti->private; + + cache->need_tick_bio = true; + do_waker(&cache->waker.work); +} + +/* + * Status format: + * + * <#used metadata blocks>/<#total metadata blocks> + * <#read hits> <#read misses> <#write hits> <#write misses> + * <#demotions> <#promotions> <#blocks in cache> <#dirty> + * <#features> <features>* + * <#core args> <core args> + * <#policy args> <policy args>* + */ +static void cache_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) +{ + int r = 0; + unsigned i; + ssize_t sz = 0; + dm_block_t nr_free_blocks_metadata = 0; + dm_block_t nr_blocks_metadata = 0; + char buf[BDEVNAME_SIZE]; + struct cache *cache = ti->private; + dm_cblock_t residency; + + switch (type) { + case STATUSTYPE_INFO: + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { + r = dm_cache_commit(cache->cmd, false); + if (r) + DMERR("could not commit metadata for accurate status"); + } + + r = dm_cache_get_free_metadata_block_count(cache->cmd, + &nr_free_blocks_metadata); + if (r) { + DMERR("could not get metadata free block count"); + goto err; + } + + r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); + if (r) { + DMERR("could not get metadata device size"); + goto err; + } + + residency = policy_residency(cache->policy); + + DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ", + (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), + (unsigned long long)nr_blocks_metadata, + (unsigned) atomic_read(&cache->stats.read_hit), + (unsigned) atomic_read(&cache->stats.read_miss), + (unsigned) atomic_read(&cache->stats.write_hit), + (unsigned) atomic_read(&cache->stats.write_miss), + (unsigned) atomic_read(&cache->stats.demotion), + (unsigned) atomic_read(&cache->stats.promotion), + (unsigned long long) from_cblock(residency), + cache->nr_dirty); + + if (cache->features.write_through) + DMEMIT("1 writethrough "); + else + DMEMIT("0 "); + + DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); + if (sz < maxlen) { + r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); + if (r) + DMERR("policy_emit_config_values returned %d", r); + } + + break; + + case STATUSTYPE_TABLE: + format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); + DMEMIT("%s ", buf); + format_dev_t(buf, cache->cache_dev->bdev->bd_dev); + DMEMIT("%s ", buf); + format_dev_t(buf, cache->origin_dev->bdev->bd_dev); + DMEMIT("%s", buf); + + for (i = 0; i < cache->nr_ctr_args - 1; i++) + DMEMIT(" %s", cache->ctr_args[i]); + if (cache->nr_ctr_args) + DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); + } + + return; + +err: + DMEMIT("Error"); +} + +#define NOT_CORE_OPTION 1 + +static int process_config_option(struct cache *cache, char **argv) +{ + unsigned long tmp; + + if (!strcasecmp(argv[0], "migration_threshold")) { + if (kstrtoul(argv[1], 10, &tmp)) + return -EINVAL; + + cache->migration_threshold = tmp; + return 0; + } + + return NOT_CORE_OPTION; +} + +/* + * Supports <key> <value>. + * + * The key migration_threshold is supported by the cache target core. + */ +static int cache_message(struct dm_target *ti, unsigned argc, char **argv) +{ + int r; + struct cache *cache = ti->private; + + if (argc != 2) + return -EINVAL; + + r = process_config_option(cache, argv); + if (r == NOT_CORE_OPTION) + return policy_set_config_value(cache->policy, argv[0], argv[1]); + + return r; +} + +static int cache_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + int r = 0; + struct cache *cache = ti->private; + + r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); + if (!r) + r = fn(ti, cache->origin_dev, 0, ti->len, data); + + return r; +} + +/* + * We assume I/O is going to the origin (which is the volume + * more likely to have restrictions e.g. by being striped). + * (Looking up the exact location of the data would be expensive + * and could always be out of date by the time the bio is submitted.) + */ +static int cache_bvec_merge(struct dm_target *ti, + struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct cache *cache = ti->private; + struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = cache->origin_dev->bdev; + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + +static void set_discard_limits(struct cache *cache, struct queue_limits *limits) +{ + /* + * FIXME: these limits may be incompatible with the cache device + */ + limits->max_discard_sectors = cache->discard_block_size * 1024; + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; +} + +static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct cache *cache = ti->private; + + blk_limits_io_min(limits, 0); + blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); + set_discard_limits(cache, limits); +} + +/*----------------------------------------------------------------*/ + +static struct target_type cache_target = { + .name = "cache", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = cache_ctr, + .dtr = cache_dtr, + .map = cache_map, + .end_io = cache_end_io, + .postsuspend = cache_postsuspend, + .preresume = cache_preresume, + .resume = cache_resume, + .status = cache_status, + .message = cache_message, + .iterate_devices = cache_iterate_devices, + .merge = cache_bvec_merge, + .io_hints = cache_io_hints, +}; + +static int __init dm_cache_init(void) +{ + int r; + + r = dm_register_target(&cache_target); + if (r) { + DMERR("cache target registration failed: %d", r); + return r; + } + + migration_cache = KMEM_CACHE(dm_cache_migration, 0); + if (!migration_cache) { + dm_unregister_target(&cache_target); + return -ENOMEM; + } + + return 0; +} + +static void __exit dm_cache_exit(void) +{ + dm_unregister_target(&cache_target); + kmem_cache_destroy(migration_cache); +} + +module_init(dm_cache_init); +module_exit(dm_cache_exit); + +MODULE_DESCRIPTION(DM_NAME " cache target"); +MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f7369f9d8595..13c15480d940 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1234,20 +1234,6 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) return 0; } -/* - * Encode key into its hex representation - */ -static void crypt_encode_key(char *hex, u8 *key, unsigned int size) -{ - unsigned int i; - - for (i = 0; i < size; i++) { - sprintf(hex, "%02x", *key); - hex += 2; - key++; - } -} - static void crypt_free_tfms(struct crypt_config *cc) { unsigned i; @@ -1651,7 +1637,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (opt_params == 1 && opt_string && !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; else if (opt_params) { ret = -EINVAL; ti->error = "Invalid feature arguments"; @@ -1679,7 +1665,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->discard_zeroes_data_unsupported = true; return 0; @@ -1717,11 +1703,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -static int crypt_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void crypt_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; - unsigned int sz = 0; + unsigned i, sz = 0; switch (type) { case STATUSTYPE_INFO: @@ -1731,27 +1717,20 @@ static int crypt_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s ", cc->cipher_string); - if (cc->key_size > 0) { - if ((maxlen - sz) < ((cc->key_size << 1) + 1)) - return -ENOMEM; - - crypt_encode_key(result + sz, cc->key, cc->key_size); - sz += cc->key_size << 1; - } else { - if (sz >= maxlen) - return -ENOMEM; - result[sz++] = '-'; - } + if (cc->key_size > 0) + for (i = 0; i < cc->key_size; i++) + DMEMIT("%02x", cc->key[i]); + else + DMEMIT("-"); DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - if (ti->num_discard_requests) + if (ti->num_discard_bios) DMEMIT(" 1 allow_discards"); break; } - return 0; } static void crypt_postsuspend(struct dm_target *ti) @@ -1845,7 +1824,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 12, 0}, + .version = {1, 12, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index cc1bd048acb2..496d5f3646a5 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -198,8 +198,8 @@ out: mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->private = dc; return 0; @@ -293,8 +293,8 @@ static int delay_map(struct dm_target *ti, struct bio *bio) return delay_bio(dc, dc->read_delay, bio); } -static int delay_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void delay_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; @@ -314,8 +314,6 @@ static int delay_status(struct dm_target *ti, status_type_t type, dc->write_delay); break; } - - return 0; } static int delay_iterate_devices(struct dm_target *ti, @@ -337,7 +335,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 2, 0}, + .version = {1, 2, 1}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 9721f2ffb1a2..7fcf21cb4ff8 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -337,8 +337,8 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) return error; } -static int flakey_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void flakey_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; @@ -368,7 +368,6 @@ static int flakey_status(struct dm_target *ti, status_type_t type, break; } - return 0; } static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) @@ -411,7 +410,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 3, 0}, + .version = {1, 3, 1}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0666b5d14b88..aa04f0224642 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1067,6 +1067,7 @@ static void retrieve_status(struct dm_table *table, num_targets = dm_table_get_num_targets(table); for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); + size_t l; remaining = len - (outptr - outbuf); if (remaining <= sizeof(struct dm_target_spec)) { @@ -1093,14 +1094,17 @@ static void retrieve_status(struct dm_table *table, if (ti->type->status) { if (param->flags & DM_NOFLUSH_FLAG) status_flags |= DM_STATUS_NOFLUSH_FLAG; - if (ti->type->status(ti, type, status_flags, outptr, remaining)) { - param->flags |= DM_BUFFER_FULL_FLAG; - break; - } + ti->type->status(ti, type, status_flags, outptr, remaining); } else outptr[0] = '\0'; - outptr += strlen(outptr) + 1; + l = strlen(outptr) + 1; + if (l == remaining) { + param->flags |= DM_BUFFER_FULL_FLAG; + break; + } + + outptr += l; used = param->data_start + (outptr - outbuf); outptr = align_ptr(outptr); @@ -1410,6 +1414,22 @@ static int table_status(struct dm_ioctl *param, size_t param_size) return 0; } +static bool buffer_test_overflow(char *result, unsigned maxlen) +{ + return !maxlen || strlen(result) + 1 >= maxlen; +} + +/* + * Process device-mapper dependent messages. + * Returns a number <= 1 if message was processed by device mapper. + * Returns 2 if message should be delivered to the target. + */ +static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, + char *result, unsigned maxlen) +{ + return 2; +} + /* * Pass a message to the target that's at the supplied device offset. */ @@ -1421,6 +1441,8 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_table *table; struct dm_target *ti; struct dm_target_msg *tmsg = (void *) param + param->data_start; + size_t maxlen; + char *result = get_result_buffer(param, param_size, &maxlen); md = find_device(param); if (!md) @@ -1444,6 +1466,10 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out_argv; } + r = message_for_md(md, argc, argv, result, maxlen); + if (r <= 1) + goto out_argv; + table = dm_get_live_table(md); if (!table) goto out_argv; @@ -1469,44 +1495,68 @@ static int target_message(struct dm_ioctl *param, size_t param_size) out_argv: kfree(argv); out: - param->data_size = 0; + if (r >= 0) + __dev_status(md, param); + + if (r == 1) { + param->flags |= DM_DATA_OUT_FLAG; + if (buffer_test_overflow(result, maxlen)) + param->flags |= DM_BUFFER_FULL_FLAG; + else + param->data_size = param->data_start + strlen(result) + 1; + r = 0; + } + dm_put(md); return r; } +/* + * The ioctl parameter block consists of two parts, a dm_ioctl struct + * followed by a data buffer. This flag is set if the second part, + * which has a variable size, is not used by the function processing + * the ioctl. + */ +#define IOCTL_FLAGS_NO_PARAMS 1 + /*----------------------------------------------------------------- * Implementation of open/close/ioctl on the special char * device. *---------------------------------------------------------------*/ -static ioctl_fn lookup_ioctl(unsigned int cmd) +static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) { static struct { int cmd; + int flags; ioctl_fn fn; } _ioctls[] = { - {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ - {DM_REMOVE_ALL_CMD, remove_all}, - {DM_LIST_DEVICES_CMD, list_devices}, - - {DM_DEV_CREATE_CMD, dev_create}, - {DM_DEV_REMOVE_CMD, dev_remove}, - {DM_DEV_RENAME_CMD, dev_rename}, - {DM_DEV_SUSPEND_CMD, dev_suspend}, - {DM_DEV_STATUS_CMD, dev_status}, - {DM_DEV_WAIT_CMD, dev_wait}, - - {DM_TABLE_LOAD_CMD, table_load}, - {DM_TABLE_CLEAR_CMD, table_clear}, - {DM_TABLE_DEPS_CMD, table_deps}, - {DM_TABLE_STATUS_CMD, table_status}, - - {DM_LIST_VERSIONS_CMD, list_versions}, - - {DM_TARGET_MSG_CMD, target_message}, - {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry} + {DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */ + {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all}, + {DM_LIST_DEVICES_CMD, 0, list_devices}, + + {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create}, + {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove}, + {DM_DEV_RENAME_CMD, 0, dev_rename}, + {DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend}, + {DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status}, + {DM_DEV_WAIT_CMD, 0, dev_wait}, + + {DM_TABLE_LOAD_CMD, 0, table_load}, + {DM_TABLE_CLEAR_CMD, IOCTL_FLAGS_NO_PARAMS, table_clear}, + {DM_TABLE_DEPS_CMD, 0, table_deps}, + {DM_TABLE_STATUS_CMD, 0, table_status}, + + {DM_LIST_VERSIONS_CMD, 0, list_versions}, + + {DM_TARGET_MSG_CMD, 0, target_message}, + {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry} }; - return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; + if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) + return NULL; + + *ioctl_flags = _ioctls[cmd].flags; + return _ioctls[cmd].fn; } /* @@ -1543,7 +1593,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */ +#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */ #define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) @@ -1551,66 +1602,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla if (param_flags & DM_WIPE_BUFFER) memset(param, 0, param_size); + if (param_flags & DM_PARAMS_KMALLOC) + kfree(param); if (param_flags & DM_PARAMS_VMALLOC) vfree(param); - else - kfree(param); } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, + int ioctl_flags, + struct dm_ioctl **param, int *param_flags) { - struct dm_ioctl tmp, *dmi; + struct dm_ioctl *dmi; int secure_data; + const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); - if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data))) + if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; - if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data))) + if (param_kernel->data_size < minimum_data_size) return -EINVAL; - secure_data = tmp.flags & DM_SECURE_DATA_FLAG; + secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) { + dmi = param_kernel; + dmi->data_size = minimum_data_size; + goto data_copied; + } + /* * Try to avoid low memory issues when a device is suspended. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; - if (tmp.data_size <= KMALLOC_MAX_SIZE) - dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (param_kernel->data_size <= KMALLOC_MAX_SIZE) { + dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (dmi) + *param_flags |= DM_PARAMS_KMALLOC; + } if (!dmi) { - dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); - *param_flags |= DM_PARAMS_VMALLOC; + dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + if (dmi) + *param_flags |= DM_PARAMS_VMALLOC; } if (!dmi) { - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) return -EFAULT; return -ENOMEM; } - if (copy_from_user(dmi, user, tmp.data_size)) + if (copy_from_user(dmi, user, param_kernel->data_size)) goto bad; +data_copied: /* * Abort if something changed the ioctl data while it was being copied. */ - if (dmi->data_size != tmp.data_size) { + if (dmi->data_size != param_kernel->data_size) { DMERR("rejecting ioctl: data size modified while processing parameters"); goto bad; } /* Wipe the user buffer so we do not return it to userspace */ - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; *param = dmi; return 0; bad: - free_params(dmi, tmp.data_size, *param_flags); + free_params(dmi, param_kernel->data_size, *param_flags); return -EFAULT; } @@ -1621,6 +1686,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) param->flags &= ~DM_BUFFER_FULL_FLAG; param->flags &= ~DM_UEVENT_GENERATED_FLAG; param->flags &= ~DM_SECURE_DATA_FLAG; + param->flags &= ~DM_DATA_OUT_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || @@ -1648,11 +1714,13 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; + int ioctl_flags; int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; size_t input_param_size; + struct dm_ioctl param_kernel; /* only root can play with this */ if (!capable(CAP_SYS_ADMIN)) @@ -1677,7 +1745,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) if (cmd == DM_VERSION_CMD) return 0; - fn = lookup_ioctl(cmd); + fn = lookup_ioctl(cmd, &ioctl_flags); if (!fn) { DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); return -ENOTTY; @@ -1686,7 +1754,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m, ¶m_flags); + r = copy_params(user, ¶m_kernel, ioctl_flags, ¶m, ¶m_flags); if (r) return r; @@ -1699,6 +1767,10 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) param->data_size = sizeof(*param); r = fn(param, input_param_size); + if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) && + unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS)) + DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd); + /* * Copy the results back to userland. */ diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 68c02673263b..d581fe5d2faf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/delay.h> #include <linux/device-mapper.h> #include <linux/dm-kcopyd.h> @@ -51,6 +52,8 @@ struct dm_kcopyd_client { struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; + struct dm_kcopyd_throttle *throttle; + /* * We maintain three lists of jobs: * @@ -68,6 +71,117 @@ struct dm_kcopyd_client { static struct page_list zero_page_list; +static DEFINE_SPINLOCK(throttle_spinlock); + +/* + * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. + * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided + * by 2. + */ +#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ + +/* + * Sleep this number of milliseconds. + * + * The value was decided experimentally. + * Smaller values seem to cause an increased copy rate above the limit. + * The reason for this is unknown but possibly due to jiffies rounding errors + * or read/write cache inside the disk. + */ +#define SLEEP_MSEC 100 + +/* + * Maximum number of sleep events. There is a theoretical livelock if more + * kcopyd clients do work simultaneously which this limit avoids. + */ +#define MAX_SLEEPS 10 + +static void io_job_start(struct dm_kcopyd_throttle *t) +{ + unsigned throttle, now, difference; + int slept = 0, skew; + + if (unlikely(!t)) + return; + +try_again: + spin_lock_irq(&throttle_spinlock); + + throttle = ACCESS_ONCE(t->throttle); + + if (likely(throttle >= 100)) + goto skip_limit; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + if (t->num_io_jobs) + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + + if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { + int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); + t->total_period >>= shift; + t->io_period >>= shift; + } + + skew = t->io_period - throttle * t->total_period / 100; + + if (unlikely(skew > 0) && slept < MAX_SLEEPS) { + slept++; + spin_unlock_irq(&throttle_spinlock); + msleep(SLEEP_MSEC); + goto try_again; + } + +skip_limit: + t->num_io_jobs++; + + spin_unlock_irq(&throttle_spinlock); +} + +static void io_job_finish(struct dm_kcopyd_throttle *t) +{ + unsigned long flags; + + if (unlikely(!t)) + return; + + spin_lock_irqsave(&throttle_spinlock, flags); + + t->num_io_jobs--; + + if (likely(ACCESS_ONCE(t->throttle) >= 100)) + goto skip_limit; + + if (!t->num_io_jobs) { + unsigned now, difference; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + } + +skip_limit: + spin_unlock_irqrestore(&throttle_spinlock, flags); +} + + static void wake(struct dm_kcopyd_client *kc) { queue_work(kc->kcopyd_wq, &kc->kcopyd_work); @@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context) struct kcopyd_job *job = (struct kcopyd_job *) context; struct dm_kcopyd_client *kc = job->kc; + io_job_finish(kc->throttle); + if (error) { if (job->rw & WRITE) job->write_err |= error; @@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + io_job_start(job->kc->throttle); + if (job->rw == READ) r = dm_io(&io_req, 1, &job->source, NULL); else @@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -struct dm_kcopyd_client *dm_kcopyd_client_create(void) +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { int r = -ENOMEM; struct dm_kcopyd_client *kc; @@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void) INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); + kc->throttle = throttle; kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); if (!kc->job_pool) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 328cad5617ab..4f99d267340c 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; - ti->num_write_same_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; + ti->num_write_same_bios = 1; ti->private = lc; return 0; @@ -95,8 +95,8 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -static int linear_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void linear_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -110,7 +110,6 @@ static int linear_status(struct dm_target *ti, status_type_t type, (unsigned long long)lc->start); break; } - return 0; } static int linear_ioctl(struct dm_target *ti, unsigned int cmd, @@ -155,7 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 2, 0}, + .version = {1, 2, 1}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 573bd04591bf..51bb81676be3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; return 0; @@ -1378,8 +1378,8 @@ static void multipath_resume(struct dm_target *ti) * [priority selector-name num_ps_args [ps_args]* * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ -static int multipath_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void multipath_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; @@ -1485,8 +1485,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type, } spin_unlock_irqrestore(&m->lock, flags); - - return 0; } static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) @@ -1695,7 +1693,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9e58dbd8d8cb..9a01d1e4c783 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; mutex_lock(&rs->md.reconfig_mutex); ret = md_run(&rs->md); @@ -1201,8 +1201,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -static int raid_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void raid_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ @@ -1344,8 +1344,6 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" -"); } } - - return 0; } static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) @@ -1405,7 +1403,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 4, 1}, + .version = {1, 4, 2}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fa519185ebba..d053098c6a91 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -82,6 +82,9 @@ struct mirror_set { struct mirror mirror[0]; }; +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle, + "A percentage of time allocated for raid resynchronization"); + static void wakeup_mirrord(void *context) { struct mirror_set *ms = context; @@ -1072,8 +1075,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto err_free_context; - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; @@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - ms->kcopyd_client = dm_kcopyd_client_create(); + ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(ms->kcopyd_client)) { r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; @@ -1347,8 +1350,8 @@ static char device_status_char(struct mirror *m) } -static int mirror_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void mirror_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; @@ -1383,8 +1386,6 @@ static int mirror_status(struct dm_target *ti, status_type_t type, if (ms->features & DM_RAID1_HANDLE_ERRORS) DMEMIT(" 1 handle_errors"); } - - return 0; } static int mirror_iterate_devices(struct dm_target *ti, @@ -1403,7 +1404,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 13, 1}, + .version = {1, 13, 2}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 10079e07edf4..c0e07026a8d1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -124,6 +124,9 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + struct dm_dev *dm_snap_origin(struct dm_snapshot *s) { return s->origin; @@ -1037,7 +1040,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) int i; int r = -EINVAL; char *origin_path, *cow_path; - unsigned args_used, num_flush_requests = 1; + unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; if (argc != 4) { @@ -1047,7 +1050,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (dm_target_is_snapshot_merge(ti)) { - num_flush_requests = 2; + num_flush_bios = 2; origin_mode = FMODE_WRITE; } @@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - s->kcopyd_client = dm_kcopyd_client_create(); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; @@ -1127,7 +1130,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); ti->private = s; - ti->num_flush_requests = num_flush_requests; + ti->num_flush_bios = num_flush_bios; ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1691,7 +1694,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); if (bio->bi_rw & REQ_FLUSH) { - if (!dm_bio_get_target_request_nr(bio)) + if (!dm_bio_get_target_bio_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; @@ -1836,8 +1839,8 @@ static void snapshot_merge_resume(struct dm_target *ti) start_merge(s); } -static int snapshot_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void snapshot_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -1883,8 +1886,6 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, maxlen - sz); break; } - - return 0; } static int snapshot_iterate_devices(struct dm_target *ti, @@ -2104,7 +2105,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = dev; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; return 0; } @@ -2138,8 +2139,8 @@ static void origin_resume(struct dm_target *ti) ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; @@ -2152,8 +2153,6 @@ static int origin_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, "%s", dev->name); break; } - - return 0; } static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, @@ -2180,7 +2179,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 8, 0}, + .version = {1, 8, 1}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2193,7 +2192,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 11, 0}, + .version = {1, 11, 1}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2306,3 +2305,5 @@ module_exit(dm_snapshot_exit); MODULE_DESCRIPTION(DM_NAME " snapshot target"); MODULE_AUTHOR("Joe Thornber"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("dm-snapshot-origin"); +MODULE_ALIAS("dm-snapshot-merge"); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c89cde86d400..d8837d313f54 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) return r; - ti->num_flush_requests = stripes; - ti->num_discard_requests = stripes; - ti->num_write_same_requests = stripes; + ti->num_flush_bios = stripes; + ti->num_discard_bios = stripes; + ti->num_write_same_bios = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; - unsigned target_request_nr; + unsigned target_bio_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; return DM_MAPIO_REMAPPED; } if (unlikely(bio->bi_rw & REQ_DISCARD) || unlikely(bio->bi_rw & REQ_WRITE_SAME)) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_range(sc, bio, target_request_nr); + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + return stripe_map_range(sc, bio, target_bio_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); @@ -312,8 +312,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) * */ -static int stripe_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; @@ -340,7 +340,6 @@ static int stripe_status(struct dm_target *ti, status_type_t type, (unsigned long long)sc->stripe[i].physical_start); break; } - return 0; } static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) @@ -428,7 +427,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static struct target_type stripe_target = { .name = "striped", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index daf25d0890b3..e50dad0c65f4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -217,7 +217,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (alloc_targets(t, num_targets)) { kfree(t); - t = NULL; return -ENOMEM; } @@ -823,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests && tgt->discards_supported) - DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + if (!tgt->num_discard_bios && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", dm_device_name(t->md), type); return 0; @@ -1360,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_flush_requests) + if (!ti->num_flush_bios) continue; if (ti->flush_supported) @@ -1439,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_write_same_requests) + if (!ti->num_write_same_bios) return false; if (!ti->type->iterate_devices || @@ -1657,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_discard_requests) + if (!ti->num_discard_bios) continue; if (ti->discards_supported) diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 617d21a77256..37ba5db71cd9 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) /* * Return error for discards instead of -EOPNOTSUPP */ - tt->num_discard_requests = 1; + tt->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4d6e85367b84..00cee02f8fc9 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -280,7 +280,7 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t) *t = v & ((1 << 24) - 1); } -static void data_block_inc(void *context, void *value_le) +static void data_block_inc(void *context, const void *value_le) { struct dm_space_map *sm = context; __le64 v_le; @@ -292,7 +292,7 @@ static void data_block_inc(void *context, void *value_le) dm_sm_inc_block(sm, b); } -static void data_block_dec(void *context, void *value_le) +static void data_block_dec(void *context, const void *value_le) { struct dm_space_map *sm = context; __le64 v_le; @@ -304,7 +304,7 @@ static void data_block_dec(void *context, void *value_le) dm_sm_dec_block(sm, b); } -static int data_block_equal(void *context, void *value1_le, void *value2_le) +static int data_block_equal(void *context, const void *value1_le, const void *value2_le) { __le64 v1_le, v2_le; uint64_t b1, b2; @@ -318,7 +318,7 @@ static int data_block_equal(void *context, void *value1_le, void *value2_le) return b1 == b2; } -static void subtree_inc(void *context, void *value) +static void subtree_inc(void *context, const void *value) { struct dm_btree_info *info = context; __le64 root_le; @@ -329,7 +329,7 @@ static void subtree_inc(void *context, void *value) dm_tm_inc(info->tm, root); } -static void subtree_dec(void *context, void *value) +static void subtree_dec(void *context, const void *value) { struct dm_btree_info *info = context; __le64 root_le; @@ -341,7 +341,7 @@ static void subtree_dec(void *context, void *value) DMERR("btree delete failed\n"); } -static int subtree_equal(void *context, void *value1_le, void *value2_le) +static int subtree_equal(void *context, const void *value1_le, const void *value2_le) { __le64 v1_le, v2_le; memcpy(&v1_le, value1_le, sizeof(v1_le)); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 5409607d4875..009339d62828 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -26,6 +26,9 @@ #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + /* * The block size of the device holding pool data must be * between 64KB and 1GB. @@ -227,6 +230,78 @@ struct thin_c { /*----------------------------------------------------------------*/ /* + * wake_worker() is used when new work is queued and when pool_resume is + * ready to continue deferred IO processing. + */ +static void wake_worker(struct pool *pool) +{ + queue_work(pool->wq, &pool->worker); +} + +/*----------------------------------------------------------------*/ + +static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct dm_bio_prison_cell *cell_prealloc; + + /* + * Allocate a cell from the prison's mempool. + * This might block but it can't fail. + */ + cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO); + + r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result); + if (r) + /* + * We reused an old cell; we can get rid of + * the new one. + */ + dm_bio_prison_free_cell(pool->prison, cell_prealloc); + + return r; +} + +static void cell_release(struct pool *pool, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) +{ + dm_cell_release(pool->prison, cell, bios); + dm_bio_prison_free_cell(pool->prison, cell); +} + +static void cell_release_no_holder(struct pool *pool, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) +{ + dm_cell_release_no_holder(pool->prison, cell, bios); + dm_bio_prison_free_cell(pool->prison, cell); +} + +static void cell_defer_no_holder_no_free(struct thin_c *tc, + struct dm_bio_prison_cell *cell) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios); + spin_unlock_irqrestore(&pool->lock, flags); + + wake_worker(pool); +} + +static void cell_error(struct pool *pool, + struct dm_bio_prison_cell *cell) +{ + dm_cell_error(pool->prison, cell); + dm_bio_prison_free_cell(pool->prison, cell); +} + +/*----------------------------------------------------------------*/ + +/* * A global list of pools that uses a struct mapped_device as a key. */ static struct dm_thin_pool_table { @@ -330,14 +405,20 @@ static void requeue_io(struct thin_c *tc) * target. */ +static bool block_size_is_power_of_two(struct pool *pool) +{ + return pool->sectors_per_block_shift >= 0; +} + static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { + struct pool *pool = tc->pool; sector_t block_nr = bio->bi_sector; - if (tc->pool->sectors_per_block_shift < 0) - (void) sector_div(block_nr, tc->pool->sectors_per_block); + if (block_size_is_power_of_two(pool)) + block_nr >>= pool->sectors_per_block_shift; else - block_nr >>= tc->pool->sectors_per_block_shift; + (void) sector_div(block_nr, pool->sectors_per_block); return block_nr; } @@ -348,12 +429,12 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) sector_t bi_sector = bio->bi_sector; bio->bi_bdev = tc->pool_dev->bdev; - if (tc->pool->sectors_per_block_shift < 0) - bio->bi_sector = (block * pool->sectors_per_block) + - sector_div(bi_sector, pool->sectors_per_block); - else + if (block_size_is_power_of_two(pool)) bio->bi_sector = (block << pool->sectors_per_block_shift) | (bi_sector & (pool->sectors_per_block - 1)); + else + bio->bi_sector = (block * pool->sectors_per_block) + + sector_div(bi_sector, pool->sectors_per_block); } static void remap_to_origin(struct thin_c *tc, struct bio *bio) @@ -420,15 +501,6 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, issue(tc, bio); } -/* - * wake_worker() is used when new work is queued and when pool_resume is - * ready to continue deferred IO processing. - */ -static void wake_worker(struct pool *pool) -{ - queue_work(pool->wq, &pool->worker); -} - /*----------------------------------------------------------------*/ /* @@ -515,14 +587,14 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) unsigned long flags; spin_lock_irqsave(&pool->lock, flags); - dm_cell_release(cell, &pool->deferred_bios); + cell_release(pool, cell, &pool->deferred_bios); spin_unlock_irqrestore(&tc->pool->lock, flags); wake_worker(pool); } /* - * Same as cell_defer except it omits the original holder of the cell. + * Same as cell_defer above, except it omits the original holder of the cell. */ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { @@ -530,7 +602,7 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c unsigned long flags; spin_lock_irqsave(&pool->lock, flags); - dm_cell_release_no_holder(cell, &pool->deferred_bios); + cell_release_no_holder(pool, cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); wake_worker(pool); @@ -540,13 +612,15 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { if (m->bio) m->bio->bi_end_io = m->saved_bi_end_io; - dm_cell_error(m->cell); + cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); } + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + struct pool *pool = tc->pool; struct bio *bio; int r; @@ -555,7 +629,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) bio->bi_end_io = m->saved_bi_end_io; if (m->err) { - dm_cell_error(m->cell); + cell_error(pool, m->cell); goto out; } @@ -567,7 +641,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { DMERR_LIMIT("dm_thin_insert_block() failed"); - dm_cell_error(m->cell); + cell_error(pool, m->cell); goto out; } @@ -585,7 +659,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) out: list_del(&m->list); - mempool_free(m, tc->pool->mapping_pool); + mempool_free(m, pool->mapping_pool); } static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) @@ -736,7 +810,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, if (r < 0) { mempool_free(m, pool->mapping_pool); DMERR_LIMIT("dm_kcopyd_copy() failed"); - dm_cell_error(cell); + cell_error(pool, cell); } } } @@ -802,7 +876,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, if (r < 0) { mempool_free(m, pool->mapping_pool); DMERR_LIMIT("dm_kcopyd_zero() failed"); - dm_cell_error(cell); + cell_error(pool, cell); } } } @@ -908,13 +982,13 @@ static void retry_on_resume(struct bio *bio) spin_unlock_irqrestore(&pool->lock, flags); } -static void no_space(struct dm_bio_prison_cell *cell) +static void no_space(struct pool *pool, struct dm_bio_prison_cell *cell) { struct bio *bio; struct bio_list bios; bio_list_init(&bios); - dm_cell_release(cell, &bios); + cell_release(pool, cell, &bios); while ((bio = bio_list_pop(&bios))) retry_on_resume(bio); @@ -932,7 +1006,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) struct dm_thin_new_mapping *m; build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell)) + if (bio_detain(tc->pool, &key, bio, &cell)) return; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); @@ -944,7 +1018,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) * on this block. */ build_data_key(tc->td, lookup_result.block, &key2); - if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { + if (bio_detain(tc->pool, &key2, bio, &cell2)) { cell_defer_no_holder(tc, cell); break; } @@ -1020,13 +1094,13 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; case -ENOSPC: - no_space(cell); + no_space(tc->pool, cell); break; default: DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", __func__, r); - dm_cell_error(cell); + cell_error(tc->pool, cell); break; } } @@ -1044,7 +1118,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, * of being broken so we have nothing further to do here. */ build_data_key(tc->td, lookup_result->block, &key); - if (dm_bio_detain(pool->prison, &key, bio, &cell)) + if (bio_detain(pool, &key, bio, &cell)) return; if (bio_data_dir(bio) == WRITE && bio->bi_size) @@ -1065,12 +1139,13 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block { int r; dm_block_t data_block; + struct pool *pool = tc->pool; /* * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - inc_all_io_entry(tc->pool, bio); + inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, 0); @@ -1097,14 +1172,14 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; case -ENOSPC: - no_space(cell); + no_space(pool, cell); break; default: DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", __func__, r); - set_pool_mode(tc->pool, PM_READ_ONLY); - dm_cell_error(cell); + set_pool_mode(pool, PM_READ_ONLY); + cell_error(pool, cell); break; } } @@ -1112,6 +1187,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block static void process_bio(struct thin_c *tc, struct bio *bio) { int r; + struct pool *pool = tc->pool; dm_block_t block = get_bio_block(tc, bio); struct dm_bio_prison_cell *cell; struct dm_cell_key key; @@ -1122,7 +1198,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) * being provisioned so we have nothing further to do here. */ build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell)) + if (bio_detain(pool, &key, bio, &cell)) return; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); @@ -1130,9 +1206,9 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */ } else { - inc_all_io_entry(tc->pool, bio); + inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, lookup_result.block); @@ -1141,7 +1217,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - inc_all_io_entry(tc->pool, bio); + inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); remap_to_origin_and_issue(tc, bio); @@ -1378,7 +1454,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; - struct dm_bio_prison_cell *cell1, *cell2; + struct dm_bio_prison_cell cell1, cell2; + struct dm_bio_prison_cell *cell_result; struct dm_cell_key key; thin_hook_bio(tc, bio); @@ -1420,18 +1497,18 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) } build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) return DM_MAPIO_SUBMITTED; build_data_key(tc->td, result.block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { - cell_defer_no_holder(tc, cell1); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) { + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } inc_all_io_entry(tc->pool, bio); - cell_defer_no_holder(tc, cell2); - cell_defer_no_holder(tc, cell1); + cell_defer_no_holder_no_free(tc, &cell2); + cell_defer_no_holder_no_free(tc, &cell1); remap(tc, bio, result.block); return DM_MAPIO_REMAPPED; @@ -1636,7 +1713,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_prison; } - pool->copier = dm_kcopyd_client_create(); + pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(pool->copier)) { r = PTR_ERR(pool->copier); *error = "Error creating pool's kcopyd client"; @@ -1938,7 +2015,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; /* * Only need to enable discards if the pool should pass @@ -1946,7 +2023,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * processing will cause mappings to be removed from the btree. */ if (pf.discard_enabled && pf.discard_passdown) { - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; /* * Setting 'discards_supported' circumvents the normal @@ -2299,8 +2376,8 @@ static void emit_flags(struct pool_features *pf, char *result, * <transaction id> <used metadata sectors>/<total metadata sectors> * <used data sectors>/<total data sectors> <held metadata root> */ -static int pool_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void pool_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int r; unsigned sz = 0; @@ -2326,32 +2403,41 @@ static int pool_status(struct dm_target *ti, status_type_t type, if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) (void) commit_or_fallback(pool); - r = dm_pool_get_metadata_transaction_id(pool->pmd, - &transaction_id); - if (r) - return r; + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); + if (r) { + DMERR("dm_pool_get_metadata_transaction_id returned %d", r); + goto err; + } - r = dm_pool_get_free_metadata_block_count(pool->pmd, - &nr_free_blocks_metadata); - if (r) - return r; + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata); + if (r) { + DMERR("dm_pool_get_free_metadata_block_count returned %d", r); + goto err; + } r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_metadata_dev_size returned %d", r); + goto err; + } - r = dm_pool_get_free_block_count(pool->pmd, - &nr_free_blocks_data); - if (r) - return r; + r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data); + if (r) { + DMERR("dm_pool_get_free_block_count returned %d", r); + goto err; + } r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_data_dev_size returned %d", r); + goto err; + } r = dm_pool_get_metadata_snap(pool->pmd, &held_root); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_metadata_snap returned %d", r); + goto err; + } DMEMIT("%llu %llu/%llu %llu/%llu ", (unsigned long long)transaction_id, @@ -2388,8 +2474,10 @@ static int pool_status(struct dm_target *ti, status_type_t type, emit_flags(&pt->requested_pf, result, sz, maxlen); break; } + return; - return 0; +err: + DMEMIT("Error"); } static int pool_iterate_devices(struct dm_target *ti, @@ -2414,11 +2502,6 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static bool block_size_is_power_of_two(struct pool *pool) -{ - return pool->sectors_per_block_shift >= 0; -} - static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { struct pool *pool = pt->pool; @@ -2432,15 +2515,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) if (pt->adjusted_pf.discard_passdown) { data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; limits->discard_granularity = data_limits->discard_granularity; - } else if (block_size_is_power_of_two(pool)) + } else limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - else - /* - * Use largest power of 2 that is a factor of sectors_per_block - * but at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS. - */ - limits->discard_granularity = max(1 << (ffs(pool->sectors_per_block) - 1), - DATA_DEV_BLOCK_SIZE_MIN_SECTORS) << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2468,7 +2544,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 6, 0}, + .version = {1, 6, 1}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2588,17 +2664,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (r) goto bad_thin_open; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->flush_supported = true; ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; ti->discard_zeroes_data_unsupported = true; - /* Discard requests must be split on a block boundary */ - ti->split_discard_requests = true; + /* Discard bios must be split on a block boundary */ + ti->split_discard_bios = true; } dm_put(pool_md); @@ -2676,8 +2752,8 @@ static void thin_postsuspend(struct dm_target *ti) /* * <nr mapped sectors> <highest mapped sector> */ -static int thin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void thin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; @@ -2687,7 +2763,7 @@ static int thin_status(struct dm_target *ti, status_type_t type, if (get_pool_mode(tc->pool) == PM_FAIL) { DMEMIT("Fail"); - return 0; + return; } if (!tc->td) @@ -2696,12 +2772,16 @@ static int thin_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: r = dm_thin_get_mapped_count(tc->td, &mapped); - if (r) - return r; + if (r) { + DMERR("dm_thin_get_mapped_count returned %d", r); + goto err; + } r = dm_thin_get_highest_mapped_block(tc->td, &highest); - if (r < 0) - return r; + if (r < 0) { + DMERR("dm_thin_get_highest_mapped_block returned %d", r); + goto err; + } DMEMIT("%llu ", mapped * tc->pool->sectors_per_block); if (r) @@ -2721,7 +2801,10 @@ static int thin_status(struct dm_target *ti, status_type_t type, } } - return 0; + return; + +err: + DMEMIT("Error"); } static int thin_iterate_devices(struct dm_target *ti, @@ -2748,7 +2831,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 7, 0}, + .version = {1, 7, 1}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 52cde982164a..6ad538375c3c 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -508,8 +508,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) /* * Status: V (valid) or C (corruption found) */ -static int verity_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void verity_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; @@ -540,8 +540,6 @@ static int verity_status(struct dm_target *ti, status_type_t type, DMEMIT("%02x", v->salt[x]); break; } - - return 0; } static int verity_ioctl(struct dm_target *ti, unsigned cmd, @@ -860,7 +858,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 1, 0}, + .version = {1, 1, 1}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 69a5c3b3b340..c99003e0d47a 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Silently drop discards, avoiding -EOPNOTSUPP. */ - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bb2cd3ce9b0f..7e469260fe5e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -163,7 +163,6 @@ struct mapped_device { * io objects are allocated from here. */ mempool_t *io_pool; - mempool_t *tio_pool; struct bio_set *bs; @@ -197,7 +196,6 @@ struct mapped_device { */ struct dm_md_mempools { mempool_t *io_pool; - mempool_t *tio_pool; struct bio_set *bs; }; @@ -205,12 +203,6 @@ struct dm_md_mempools { static struct kmem_cache *_io_cache; static struct kmem_cache *_rq_tio_cache; -/* - * Unused now, and needs to be deleted. But since io_pool is overloaded and it's - * still used for _io_cache, I'm leaving this for a later cleanup - */ -static struct kmem_cache *_rq_bio_info_cache; - static int __init local_init(void) { int r = -ENOMEM; @@ -224,13 +216,9 @@ static int __init local_init(void) if (!_rq_tio_cache) goto out_free_io_cache; - _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); - if (!_rq_bio_info_cache) - goto out_free_rq_tio_cache; - r = dm_uevent_init(); if (r) - goto out_free_rq_bio_info_cache; + goto out_free_rq_tio_cache; _major = major; r = register_blkdev(_major, _name); @@ -244,8 +232,6 @@ static int __init local_init(void) out_uevent_exit: dm_uevent_exit(); -out_free_rq_bio_info_cache: - kmem_cache_destroy(_rq_bio_info_cache); out_free_rq_tio_cache: kmem_cache_destroy(_rq_tio_cache); out_free_io_cache: @@ -256,7 +242,6 @@ out_free_io_cache: static void local_exit(void) { - kmem_cache_destroy(_rq_bio_info_cache); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); @@ -448,12 +433,12 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, gfp_t gfp_mask) { - return mempool_alloc(md->tio_pool, gfp_mask); + return mempool_alloc(md->io_pool, gfp_mask); } static void free_rq_tio(struct dm_rq_target_io *tio) { - mempool_free(tio, tio->md->tio_pool); + mempool_free(tio, tio->md->io_pool); } static int md_in_flight(struct mapped_device *md) @@ -985,12 +970,13 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); -static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) +static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; struct bio *clone = &tio->clone; + struct dm_target *ti = tio->ti; clone->bi_end_io = clone_endio; clone->bi_private = tio; @@ -1031,32 +1017,54 @@ struct clone_info { unsigned short idx; }; +static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) +{ + bio->bi_sector = sector; + bio->bi_size = to_bytes(len); +} + +static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count) +{ + bio->bi_idx = idx; + bio->bi_vcnt = idx + bv_count; + bio->bi_flags &= ~(1 << BIO_SEG_VALID); +} + +static void clone_bio_integrity(struct bio *bio, struct bio *clone, + unsigned short idx, unsigned len, unsigned offset, + unsigned trim) +{ + if (!bio_integrity(bio)) + return; + + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (trim) + bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); +} + /* * Creates a little bio that just does part of a bvec. */ -static void split_bvec(struct dm_target_io *tio, struct bio *bio, - sector_t sector, unsigned short idx, unsigned int offset, - unsigned int len, struct bio_set *bs) +static void clone_split_bio(struct dm_target_io *tio, struct bio *bio, + sector_t sector, unsigned short idx, + unsigned offset, unsigned len) { struct bio *clone = &tio->clone; struct bio_vec *bv = bio->bi_io_vec + idx; *clone->bi_io_vec = *bv; - clone->bi_sector = sector; + bio_setup_sector(clone, sector, len); + clone->bi_bdev = bio->bi_bdev; clone->bi_rw = bio->bi_rw; clone->bi_vcnt = 1; - clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; - if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); - bio_integrity_trim(clone, - bio_sector_offset(bio, idx, offset), len); - } + clone_bio_integrity(bio, clone, idx, len, offset, 1); } /* @@ -1064,29 +1072,23 @@ static void split_bvec(struct dm_target_io *tio, struct bio *bio, */ static void clone_bio(struct dm_target_io *tio, struct bio *bio, sector_t sector, unsigned short idx, - unsigned short bv_count, unsigned int len, - struct bio_set *bs) + unsigned short bv_count, unsigned len) { struct bio *clone = &tio->clone; + unsigned trim = 0; __bio_clone(clone, bio); - clone->bi_sector = sector; - clone->bi_idx = idx; - clone->bi_vcnt = idx + bv_count; - clone->bi_size = to_bytes(len); - clone->bi_flags &= ~(1 << BIO_SEG_VALID); - - if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); - - if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) - bio_integrity_trim(clone, - bio_sector_offset(bio, idx, 0), len); - } + bio_setup_sector(clone, sector, len); + bio_setup_bv(clone, idx, bv_count); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + trim = 1; + clone_bio_integrity(bio, clone, idx, len, 0, trim); } static struct dm_target_io *alloc_tio(struct clone_info *ci, - struct dm_target *ti, int nr_iovecs) + struct dm_target *ti, int nr_iovecs, + unsigned target_bio_nr) { struct dm_target_io *tio; struct bio *clone; @@ -1097,96 +1099,104 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); - tio->target_request_nr = 0; + tio->target_bio_nr = target_bio_nr; return tio; } -static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, - unsigned request_nr, sector_t len) +static void __clone_and_map_simple_bio(struct clone_info *ci, + struct dm_target *ti, + unsigned target_bio_nr, sector_t len) { - struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); + struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; - tio->target_request_nr = request_nr; - /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush * and discard, so no need for concern about wasted bvec allocations. */ - __bio_clone(clone, ci->bio); - if (len) { - clone->bi_sector = ci->sector; - clone->bi_size = to_bytes(len); - } + if (len) + bio_setup_sector(clone, ci->sector, len); - __map_bio(ti, tio); + __map_bio(tio); } -static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, - unsigned num_requests, sector_t len) +static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + unsigned num_bios, sector_t len) { - unsigned request_nr; + unsigned target_bio_nr; - for (request_nr = 0; request_nr < num_requests; request_nr++) - __issue_target_request(ci, ti, request_nr, len); + for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) + __clone_and_map_simple_bio(ci, ti, target_bio_nr, len); } -static int __clone_and_map_empty_flush(struct clone_info *ci) +static int __send_empty_flush(struct clone_info *ci) { unsigned target_nr = 0; struct dm_target *ti; BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __issue_target_requests(ci, ti, ti->num_flush_requests, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); return 0; } -/* - * Perform all io with a single clone. - */ -static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) +static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, + sector_t sector, int nr_iovecs, + unsigned short idx, unsigned short bv_count, + unsigned offset, unsigned len, + unsigned split_bvec) { struct bio *bio = ci->bio; struct dm_target_io *tio; + unsigned target_bio_nr; + unsigned num_target_bios = 1; + + /* + * Does the target want to receive duplicate copies of the bio? + */ + if (bio_data_dir(bio) == WRITE && ti->num_write_bios) + num_target_bios = ti->num_write_bios(ti, bio); - tio = alloc_tio(ci, ti, bio->bi_max_vecs); - clone_bio(tio, bio, ci->sector, ci->idx, bio->bi_vcnt - ci->idx, - ci->sector_count, ci->md->bs); - __map_bio(ti, tio); - ci->sector_count = 0; + for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { + tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); + if (split_bvec) + clone_split_bio(tio, bio, sector, idx, offset, len); + else + clone_bio(tio, bio, sector, idx, bv_count, len); + __map_bio(tio); + } } -typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); +typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); -static unsigned get_num_discard_requests(struct dm_target *ti) +static unsigned get_num_discard_bios(struct dm_target *ti) { - return ti->num_discard_requests; + return ti->num_discard_bios; } -static unsigned get_num_write_same_requests(struct dm_target *ti) +static unsigned get_num_write_same_bios(struct dm_target *ti) { - return ti->num_write_same_requests; + return ti->num_write_same_bios; } typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) { - return ti->split_discard_requests; + return ti->split_discard_bios; } -static int __clone_and_map_changing_extent_only(struct clone_info *ci, - get_num_requests_fn get_num_requests, - is_split_required_fn is_split_required) +static int __send_changing_extent_only(struct clone_info *ci, + get_num_bios_fn get_num_bios, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; - unsigned num_requests; + unsigned num_bios; do { ti = dm_table_find_target(ci->map, ci->sector); @@ -1199,8 +1209,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, * reconfiguration might also have changed that since the * check was performed. */ - num_requests = get_num_requests ? get_num_requests(ti) : 0; - if (!num_requests) + num_bios = get_num_bios ? get_num_bios(ti) : 0; + if (!num_bios) return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) @@ -1208,7 +1218,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, else len = min(ci->sector_count, max_io_len(ci->sector, ti)); - __issue_target_requests(ci, ti, num_requests, len); + __send_duplicate_bios(ci, ti, num_bios, len); ci->sector += len; } while (ci->sector_count -= len); @@ -1216,108 +1226,129 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, return 0; } -static int __clone_and_map_discard(struct clone_info *ci) +static int __send_discard(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, - is_split_required_for_discard); + return __send_changing_extent_only(ci, get_num_discard_bios, + is_split_required_for_discard); } -static int __clone_and_map_write_same(struct clone_info *ci) +static int __send_write_same(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); + return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); } -static int __clone_and_map(struct clone_info *ci) +/* + * Find maximum number of sectors / bvecs we can process with a single bio. + */ +static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx) { struct bio *bio = ci->bio; - struct dm_target *ti; - sector_t len = 0, max; - struct dm_target_io *tio; - - if (unlikely(bio->bi_rw & REQ_DISCARD)) - return __clone_and_map_discard(ci); - else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) - return __clone_and_map_write_same(ci); + sector_t bv_len, total_len = 0; - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; + for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) { + bv_len = to_sector(bio->bi_io_vec[*idx].bv_len); - max = max_io_len(ci->sector, ti); + if (bv_len > max) + break; - if (ci->sector_count <= max) { - /* - * Optimise for the simple case where we can do all of - * the remaining io with a single clone. - */ - __clone_and_map_simple(ci, ti); + max -= bv_len; + total_len += bv_len; + } - } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { - /* - * There are some bvecs that don't span targets. - * Do as many of these as possible. - */ - int i; - sector_t remaining = max; - sector_t bv_len; + return total_len; +} - for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { - bv_len = to_sector(bio->bi_io_vec[i].bv_len); +static int __split_bvec_across_targets(struct clone_info *ci, + struct dm_target *ti, sector_t max) +{ + struct bio *bio = ci->bio; + struct bio_vec *bv = bio->bi_io_vec + ci->idx; + sector_t remaining = to_sector(bv->bv_len); + unsigned offset = 0; + sector_t len; - if (bv_len > remaining) - break; + do { + if (offset) { + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; - remaining -= bv_len; - len += bv_len; + max = max_io_len(ci->sector, ti); } - tio = alloc_tio(ci, ti, bio->bi_max_vecs); - clone_bio(tio, bio, ci->sector, ci->idx, i - ci->idx, len, - ci->md->bs); - __map_bio(ti, tio); + len = min(remaining, max); + + __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0, + bv->bv_offset + offset, len, 1); ci->sector += len; ci->sector_count -= len; - ci->idx = i; + offset += to_bytes(len); + } while (remaining -= len); - } else { - /* - * Handle a bvec that must be split between two or more targets. - */ - struct bio_vec *bv = bio->bi_io_vec + ci->idx; - sector_t remaining = to_sector(bv->bv_len); - unsigned int offset = 0; + ci->idx++; + + return 0; +} + +/* + * Select the correct strategy for processing a non-flush bio. + */ +static int __split_and_process_non_flush(struct clone_info *ci) +{ + struct bio *bio = ci->bio; + struct dm_target *ti; + sector_t len, max; + int idx; + + if (unlikely(bio->bi_rw & REQ_DISCARD)) + return __send_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __send_write_same(ci); - do { - if (offset) { - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; - max = max_io_len(ci->sector, ti); - } + max = max_io_len(ci->sector, ti); - len = min(remaining, max); + /* + * Optimise for the simple case where we can do all of + * the remaining io with a single clone. + */ + if (ci->sector_count <= max) { + __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, + ci->idx, bio->bi_vcnt - ci->idx, 0, + ci->sector_count, 0); + ci->sector_count = 0; + return 0; + } - tio = alloc_tio(ci, ti, 1); - split_bvec(tio, bio, ci->sector, ci->idx, - bv->bv_offset + offset, len, ci->md->bs); + /* + * There are some bvecs that don't span targets. + * Do as many of these as possible. + */ + if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { + len = __len_within_target(ci, max, &idx); - __map_bio(ti, tio); + __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, + ci->idx, idx - ci->idx, 0, len, 0); - ci->sector += len; - ci->sector_count -= len; - offset += to_bytes(len); - } while (remaining -= len); + ci->sector += len; + ci->sector_count -= len; + ci->idx = idx; - ci->idx++; + return 0; } - return 0; + /* + * Handle a bvec that must be split between two or more targets. + */ + return __split_bvec_across_targets(ci, ti, max); } /* - * Split the bio into several clones and submit it to targets. + * Entry point to split a bio into clones and submit them to the targets. */ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) { @@ -1341,16 +1372,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.idx = bio->bi_idx; start_io_acct(ci.io); + if (bio->bi_rw & REQ_FLUSH) { ci.bio = &ci.md->flush_bio; ci.sector_count = 0; - error = __clone_and_map_empty_flush(&ci); + error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ } else { ci.bio = bio; ci.sector_count = bio_sectors(bio); while (ci.sector_count && !error) - error = __clone_and_map(&ci); + error = __split_and_process_non_flush(&ci); } /* drop the extra reference count */ @@ -1923,8 +1955,6 @@ static void free_dev(struct mapped_device *md) unlock_fs(md); bdput(md->bdev); destroy_workqueue(md->wq); - if (md->tio_pool) - mempool_destroy(md->tio_pool); if (md->io_pool) mempool_destroy(md->io_pool); if (md->bs) @@ -1947,24 +1977,33 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { - /* - * The md already has necessary mempools. Reload just the - * bioset because front_pad may have changed because - * a different table was loaded. - */ - bioset_free(md->bs); - md->bs = p->bs; - p->bs = NULL; + if (md->io_pool && md->bs) { + /* The md already has necessary mempools. */ + if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { + /* + * Reload bioset because front_pad may have changed + * because a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; + } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) { + /* + * There's no need to reload with request-based dm + * because the size of front_pad doesn't change. + * Note for future: If you are to reload bioset, + * prep-ed requests in the queue may refer + * to bio from the old bioset, so you must walk + * through the queue to unprep. + */ + } goto out; } - BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); + BUG_ON(!p || md->io_pool || md->bs); md->io_pool = p->io_pool; p->io_pool = NULL; - md->tio_pool = p->tio_pool; - p->tio_pool = NULL; md->bs = p->bs; p->bs = NULL; @@ -2395,7 +2434,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *live_map, *map = ERR_PTR(-EINVAL); + struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2418,10 +2457,12 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) dm_table_put(live_map); } - r = dm_calculate_queue_limits(table, &limits); - if (r) { - map = ERR_PTR(r); - goto out; + if (!live_map) { + r = dm_calculate_queue_limits(table, &limits); + if (r) { + map = ERR_PTR(r); + goto out; + } } map = __bind(md, table, &limits); @@ -2719,52 +2760,42 @@ EXPORT_SYMBOL_GPL(dm_noflush_suspending); struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { - struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); - unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; + struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); + struct kmem_cache *cachep; + unsigned int pool_size; + unsigned int front_pad; if (!pools) return NULL; - per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + if (type == DM_TYPE_BIO_BASED) { + cachep = _io_cache; + pool_size = 16; + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); + } else if (type == DM_TYPE_REQUEST_BASED) { + cachep = _rq_tio_cache; + pool_size = MIN_IOS; + front_pad = offsetof(struct dm_rq_clone_bio_info, clone); + /* per_bio_data_size is not used. See __bind_mempools(). */ + WARN_ON(per_bio_data_size != 0); + } else + goto out; - pools->io_pool = (type == DM_TYPE_BIO_BASED) ? - mempool_create_slab_pool(MIN_IOS, _io_cache) : - mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); + pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); if (!pools->io_pool) - goto free_pools_and_out; - - pools->tio_pool = NULL; - if (type == DM_TYPE_REQUEST_BASED) { - pools->tio_pool = mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); - if (!pools->tio_pool) - goto free_io_pool_and_out; - } + goto out; - pools->bs = (type == DM_TYPE_BIO_BASED) ? - bioset_create(pool_size, - per_bio_data_size + offsetof(struct dm_target_io, clone)) : - bioset_create(pool_size, - offsetof(struct dm_rq_clone_bio_info, clone)); + pools->bs = bioset_create(pool_size, front_pad); if (!pools->bs) - goto free_tio_pool_and_out; + goto out; if (integrity && bioset_integrity_create(pools->bs, pool_size)) - goto free_bioset_and_out; + goto out; return pools; -free_bioset_and_out: - bioset_free(pools->bs); - -free_tio_pool_and_out: - if (pools->tio_pool) - mempool_destroy(pools->tio_pool); - -free_io_pool_and_out: - mempool_destroy(pools->io_pool); - -free_pools_and_out: - kfree(pools); +out: + dm_free_md_mempools(pools); return NULL; } @@ -2777,9 +2808,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) if (pools->io_pool) mempool_destroy(pools->io_pool); - if (pools->tio_pool) - mempool_destroy(pools->tio_pool); - if (pools->bs) bioset_free(pools->bs); diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index ceb359050a59..19b268795415 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -1,6 +1,6 @@ config DM_PERSISTENT_DATA tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM select LIBCRC32C select DM_BUFIO ---help--- diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index d8e7cb767c1e..ff528792c358 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o dm-persistent-data-objs := \ + dm-array.o \ + dm-bitset.o \ dm-block-manager.o \ dm-space-map-common.o \ dm-space-map-disk.o \ diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c new file mode 100644 index 000000000000..172147eb1d40 --- /dev/null +++ b/drivers/md/persistent-data/dm-array.c @@ -0,0 +1,808 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-array.h" +#include "dm-space-map.h" +#include "dm-transaction-manager.h" + +#include <linux/export.h> +#include <linux/device-mapper.h> + +#define DM_MSG_PREFIX "array" + +/*----------------------------------------------------------------*/ + +/* + * The array is implemented as a fully populated btree, which points to + * blocks that contain the packed values. This is more space efficient + * than just using a btree since we don't store 1 key per value. + */ +struct array_block { + __le32 csum; + __le32 max_entries; + __le32 nr_entries; + __le32 value_size; + __le64 blocknr; /* Block this node is supposed to live in. */ +} __packed; + +/*----------------------------------------------------------------*/ + +/* + * Validator methods. As usual we calculate a checksum, and also write the + * block location into the header (paranoia about ssds remapping areas by + * mistake). + */ +#define CSUM_XOR 595846735 + +static void array_block_prepare_for_write(struct dm_block_validator *v, + struct dm_block *b, + size_t size_of_block) +{ + struct array_block *bh_le = dm_block_data(b); + + bh_le->blocknr = cpu_to_le64(dm_block_location(b)); + bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, + size_of_block - sizeof(__le32), + CSUM_XOR)); +} + +static int array_block_check(struct dm_block_validator *v, + struct dm_block *b, + size_t size_of_block) +{ + struct array_block *bh_le = dm_block_data(b); + __le32 csum_disk; + + if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) { + DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu", + (unsigned long long) le64_to_cpu(bh_le->blocknr), + (unsigned long long) dm_block_location(b)); + return -ENOTBLK; + } + + csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, + size_of_block - sizeof(__le32), + CSUM_XOR)); + if (csum_disk != bh_le->csum) { + DMERR_LIMIT("array_block_check failed: csum %u != wanted %u", + (unsigned) le32_to_cpu(csum_disk), + (unsigned) le32_to_cpu(bh_le->csum)); + return -EILSEQ; + } + + return 0; +} + +static struct dm_block_validator array_validator = { + .name = "array", + .prepare_for_write = array_block_prepare_for_write, + .check = array_block_check +}; + +/*----------------------------------------------------------------*/ + +/* + * Functions for manipulating the array blocks. + */ + +/* + * Returns a pointer to a value within an array block. + * + * index - The index into _this_ specific block. + */ +static void *element_at(struct dm_array_info *info, struct array_block *ab, + unsigned index) +{ + unsigned char *entry = (unsigned char *) (ab + 1); + + entry += index * info->value_type.size; + + return entry; +} + +/* + * Utility function that calls one of the value_type methods on every value + * in an array block. + */ +static void on_entries(struct dm_array_info *info, struct array_block *ab, + void (*fn)(void *, const void *)) +{ + unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); + + for (i = 0; i < nr_entries; i++) + fn(info->value_type.context, element_at(info, ab, i)); +} + +/* + * Increment every value in an array block. + */ +static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab) +{ + struct dm_btree_value_type *vt = &info->value_type; + + if (vt->inc) + on_entries(info, ab, vt->inc); +} + +/* + * Decrement every value in an array block. + */ +static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab) +{ + struct dm_btree_value_type *vt = &info->value_type; + + if (vt->dec) + on_entries(info, ab, vt->dec); +} + +/* + * Each array block can hold this many values. + */ +static uint32_t calc_max_entries(size_t value_size, size_t size_of_block) +{ + return (size_of_block - sizeof(struct array_block)) / value_size; +} + +/* + * Allocate a new array block. The caller will need to unlock block. + */ +static int alloc_ablock(struct dm_array_info *info, size_t size_of_block, + uint32_t max_entries, + struct dm_block **block, struct array_block **ab) +{ + int r; + + r = dm_tm_new_block(info->btree_info.tm, &array_validator, block); + if (r) + return r; + + (*ab) = dm_block_data(*block); + (*ab)->max_entries = cpu_to_le32(max_entries); + (*ab)->nr_entries = cpu_to_le32(0); + (*ab)->value_size = cpu_to_le32(info->value_type.size); + + return 0; +} + +/* + * Pad an array block out with a particular value. Every instance will + * cause an increment of the value_type. new_nr must always be more than + * the current number of entries. + */ +static void fill_ablock(struct dm_array_info *info, struct array_block *ab, + const void *value, unsigned new_nr) +{ + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = nr_entries; i < new_nr; i++) { + if (vt->inc) + vt->inc(vt->context, value); + memcpy(element_at(info, ab, i), value, vt->size); + } + ab->nr_entries = cpu_to_le32(new_nr); +} + +/* + * Remove some entries from the back of an array block. Every value + * removed will be decremented. new_nr must be <= the current number of + * entries. + */ +static void trim_ablock(struct dm_array_info *info, struct array_block *ab, + unsigned new_nr) +{ + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = nr_entries; i > new_nr; i--) + if (vt->dec) + vt->dec(vt->context, element_at(info, ab, i - 1)); + ab->nr_entries = cpu_to_le32(new_nr); +} + +/* + * Read locks a block, and coerces it to an array block. The caller must + * unlock 'block' when finished. + */ +static int get_ablock(struct dm_array_info *info, dm_block_t b, + struct dm_block **block, struct array_block **ab) +{ + int r; + + r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block); + if (r) + return r; + + *ab = dm_block_data(*block); + return 0; +} + +/* + * Unlocks an array block. + */ +static int unlock_ablock(struct dm_array_info *info, struct dm_block *block) +{ + return dm_tm_unlock(info->btree_info.tm, block); +} + +/*----------------------------------------------------------------*/ + +/* + * Btree manipulation. + */ + +/* + * Looks up an array block in the btree, and then read locks it. + * + * index is the index of the index of the array_block, (ie. the array index + * / max_entries). + */ +static int lookup_ablock(struct dm_array_info *info, dm_block_t root, + unsigned index, struct dm_block **block, + struct array_block **ab) +{ + int r; + uint64_t key = index; + __le64 block_le; + + r = dm_btree_lookup(&info->btree_info, root, &key, &block_le); + if (r) + return r; + + return get_ablock(info, le64_to_cpu(block_le), block, ab); +} + +/* + * Insert an array block into the btree. The block is _not_ unlocked. + */ +static int insert_ablock(struct dm_array_info *info, uint64_t index, + struct dm_block *block, dm_block_t *root) +{ + __le64 block_le = cpu_to_le64(dm_block_location(block)); + + __dm_bless_for_disk(block_le); + return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); +} + +/* + * Looks up an array block in the btree. Then shadows it, and updates the + * btree to point to this new shadow. 'root' is an input/output parameter + * for both the current root block, and the new one. + */ +static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, + unsigned index, struct dm_block **block, + struct array_block **ab) +{ + int r, inc; + uint64_t key = index; + dm_block_t b; + __le64 block_le; + + /* + * lookup + */ + r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); + if (r) + return r; + b = le64_to_cpu(block_le); + + /* + * shadow + */ + r = dm_tm_shadow_block(info->btree_info.tm, b, + &array_validator, block, &inc); + if (r) + return r; + + *ab = dm_block_data(*block); + if (inc) + inc_ablock_entries(info, *ab); + + /* + * Reinsert. + * + * The shadow op will often be a noop. Only insert if it really + * copied data. + */ + if (dm_block_location(*block) != b) + r = insert_ablock(info, index, *block, root); + + return r; +} + +/* + * Allocate an new array block, and fill it with some values. + */ +static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block, + uint32_t max_entries, + unsigned block_index, uint32_t nr, + const void *value, dm_block_t *root) +{ + int r; + struct dm_block *block; + struct array_block *ab; + + r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); + if (r) + return r; + + fill_ablock(info, ab, value, nr); + r = insert_ablock(info, block_index, block, root); + unlock_ablock(info, block); + + return r; +} + +static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block, + unsigned begin_block, unsigned end_block, + unsigned max_entries, const void *value, + dm_block_t *root) +{ + int r = 0; + + for (; !r && begin_block != end_block; begin_block++) + r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root); + + return r; +} + +/* + * There are a bunch of functions involved with resizing an array. This + * structure holds information that commonly needed by them. Purely here + * to reduce parameter count. + */ +struct resize { + /* + * Describes the array. + */ + struct dm_array_info *info; + + /* + * The current root of the array. This gets updated. + */ + dm_block_t root; + + /* + * Metadata block size. Used to calculate the nr entries in an + * array block. + */ + size_t size_of_block; + + /* + * Maximum nr entries in an array block. + */ + unsigned max_entries; + + /* + * nr of completely full blocks in the array. + * + * 'old' refers to before the resize, 'new' after. + */ + unsigned old_nr_full_blocks, new_nr_full_blocks; + + /* + * Number of entries in the final block. 0 iff only full blocks in + * the array. + */ + unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block; + + /* + * The default value used when growing the array. + */ + const void *value; +}; + +/* + * Removes a consecutive set of array blocks from the btree. The values + * in block are decremented as a side effect of the btree remove. + * + * begin_index - the index of the first array block to remove. + * end_index - the one-past-the-end value. ie. this block is not removed. + */ +static int drop_blocks(struct resize *resize, unsigned begin_index, + unsigned end_index) +{ + int r; + + while (begin_index != end_index) { + uint64_t key = begin_index++; + r = dm_btree_remove(&resize->info->btree_info, resize->root, + &key, &resize->root); + if (r) + return r; + } + + return 0; +} + +/* + * Calculates how many blocks are needed for the array. + */ +static unsigned total_nr_blocks_needed(unsigned nr_full_blocks, + unsigned nr_entries_in_last_block) +{ + return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0); +} + +/* + * Shrink an array. + */ +static int shrink(struct resize *resize) +{ + int r; + unsigned begin, end; + struct dm_block *block; + struct array_block *ab; + + /* + * Lose some blocks from the back? + */ + if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) { + begin = total_nr_blocks_needed(resize->new_nr_full_blocks, + resize->new_nr_entries_in_last_block); + end = total_nr_blocks_needed(resize->old_nr_full_blocks, + resize->old_nr_entries_in_last_block); + + r = drop_blocks(resize, begin, end); + if (r) + return r; + } + + /* + * Trim the new tail block + */ + if (resize->new_nr_entries_in_last_block) { + r = shadow_ablock(resize->info, &resize->root, + resize->new_nr_full_blocks, &block, &ab); + if (r) + return r; + + trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block); + unlock_ablock(resize->info, block); + } + + return 0; +} + +/* + * Grow an array. + */ +static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries) +{ + int r; + struct dm_block *block; + struct array_block *ab; + + r = shadow_ablock(resize->info, &resize->root, + resize->old_nr_full_blocks, &block, &ab); + if (r) + return r; + + fill_ablock(resize->info, ab, resize->value, new_nr_entries); + unlock_ablock(resize->info, block); + + return r; +} + +static int grow_add_tail_block(struct resize *resize) +{ + return insert_new_ablock(resize->info, resize->size_of_block, + resize->max_entries, + resize->new_nr_full_blocks, + resize->new_nr_entries_in_last_block, + resize->value, &resize->root); +} + +static int grow_needs_more_blocks(struct resize *resize) +{ + int r; + + if (resize->old_nr_entries_in_last_block > 0) { + r = grow_extend_tail_block(resize, resize->max_entries); + if (r) + return r; + } + + r = insert_full_ablocks(resize->info, resize->size_of_block, + resize->old_nr_full_blocks, + resize->new_nr_full_blocks, + resize->max_entries, resize->value, + &resize->root); + if (r) + return r; + + if (resize->new_nr_entries_in_last_block) + r = grow_add_tail_block(resize); + + return r; +} + +static int grow(struct resize *resize) +{ + if (resize->new_nr_full_blocks > resize->old_nr_full_blocks) + return grow_needs_more_blocks(resize); + + else if (resize->old_nr_entries_in_last_block) + return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block); + + else + return grow_add_tail_block(resize); +} + +/*----------------------------------------------------------------*/ + +/* + * These are the value_type functions for the btree elements, which point + * to array blocks. + */ +static void block_inc(void *context, const void *value) +{ + __le64 block_le; + struct dm_array_info *info = context; + + memcpy(&block_le, value, sizeof(block_le)); + dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); +} + +static void block_dec(void *context, const void *value) +{ + int r; + uint64_t b; + __le64 block_le; + uint32_t ref_count; + struct dm_block *block; + struct array_block *ab; + struct dm_array_info *info = context; + + memcpy(&block_le, value, sizeof(block_le)); + b = le64_to_cpu(block_le); + + r = dm_tm_ref(info->btree_info.tm, b, &ref_count); + if (r) { + DMERR_LIMIT("couldn't get reference count for block %llu", + (unsigned long long) b); + return; + } + + if (ref_count == 1) { + /* + * We're about to drop the last reference to this ablock. + * So we need to decrement the ref count of the contents. + */ + r = get_ablock(info, b, &block, &ab); + if (r) { + DMERR_LIMIT("couldn't get array block %llu", + (unsigned long long) b); + return; + } + + dec_ablock_entries(info, ab); + unlock_ablock(info, block); + } + + dm_tm_dec(info->btree_info.tm, b); +} + +static int block_equal(void *context, const void *value1, const void *value2) +{ + return !memcmp(value1, value2, sizeof(__le64)); +} + +/*----------------------------------------------------------------*/ + +void dm_array_info_init(struct dm_array_info *info, + struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt) +{ + struct dm_btree_value_type *bvt = &info->btree_info.value_type; + + memcpy(&info->value_type, vt, sizeof(info->value_type)); + info->btree_info.tm = tm; + info->btree_info.levels = 1; + + bvt->context = info; + bvt->size = sizeof(__le64); + bvt->inc = block_inc; + bvt->dec = block_dec; + bvt->equal = block_equal; +} +EXPORT_SYMBOL_GPL(dm_array_info_init); + +int dm_array_empty(struct dm_array_info *info, dm_block_t *root) +{ + return dm_btree_empty(&info->btree_info, root); +} +EXPORT_SYMBOL_GPL(dm_array_empty); + +static int array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) +{ + int r; + struct resize resize; + + if (old_size == new_size) + return 0; + + resize.info = info; + resize.root = root; + resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + resize.max_entries = calc_max_entries(info->value_type.size, + resize.size_of_block); + + resize.old_nr_full_blocks = old_size / resize.max_entries; + resize.old_nr_entries_in_last_block = old_size % resize.max_entries; + resize.new_nr_full_blocks = new_size / resize.max_entries; + resize.new_nr_entries_in_last_block = new_size % resize.max_entries; + resize.value = value; + + r = ((new_size > old_size) ? grow : shrink)(&resize); + if (r) + return r; + + *new_root = resize.root; + return 0; +} + +int dm_array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + int r = array_resize(info, root, old_size, new_size, value, new_root); + __dm_unbless_for_disk(value); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_resize); + +int dm_array_del(struct dm_array_info *info, dm_block_t root) +{ + return dm_btree_del(&info->btree_info, root); +} +EXPORT_SYMBOL_GPL(dm_array_del); + +int dm_array_get_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, void *value_le) +{ + int r; + struct dm_block *block; + struct array_block *ab; + size_t size_of_block; + unsigned entry, max_entries; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + + r = lookup_ablock(info, root, index / max_entries, &block, &ab); + if (r) + return r; + + entry = index % max_entries; + if (entry >= le32_to_cpu(ab->nr_entries)) + r = -ENODATA; + else + memcpy(value_le, element_at(info, ab, entry), + info->value_type.size); + + unlock_ablock(info, block); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_get_value); + +static int array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) +{ + int r; + struct dm_block *block; + struct array_block *ab; + size_t size_of_block; + unsigned max_entries; + unsigned entry; + void *old_value; + struct dm_btree_value_type *vt = &info->value_type; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + + r = shadow_ablock(info, &root, index / max_entries, &block, &ab); + if (r) + return r; + *new_root = root; + + entry = index % max_entries; + if (entry >= le32_to_cpu(ab->nr_entries)) { + r = -ENODATA; + goto out; + } + + old_value = element_at(info, ab, entry); + if (vt->dec && + (!vt->equal || !vt->equal(vt->context, old_value, value))) { + vt->dec(vt->context, old_value); + if (vt->inc) + vt->inc(vt->context, value); + } + + memcpy(old_value, value, info->value_type.size); + +out: + unlock_ablock(info, block); + return r; +} + +int dm_array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + int r; + + r = array_set_value(info, root, index, value, new_root); + __dm_unbless_for_disk(value); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_set_value); + +struct walk_info { + struct dm_array_info *info; + int (*fn)(void *context, uint64_t key, void *leaf); + void *context; +}; + +static int walk_ablock(void *context, uint64_t *keys, void *leaf) +{ + struct walk_info *wi = context; + + int r; + unsigned i; + __le64 block_le; + unsigned nr_entries, max_entries; + struct dm_block *block; + struct array_block *ab; + + memcpy(&block_le, leaf, sizeof(block_le)); + r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab); + if (r) + return r; + + max_entries = le32_to_cpu(ab->max_entries); + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = 0; i < nr_entries; i++) { + r = wi->fn(wi->context, keys[0] * max_entries + i, + element_at(wi->info, ab, i)); + + if (r) + break; + } + + unlock_ablock(wi->info, block); + return r; +} + +int dm_array_walk(struct dm_array_info *info, dm_block_t root, + int (*fn)(void *, uint64_t key, void *leaf), + void *context) +{ + struct walk_info wi; + + wi.info = info; + wi.fn = fn; + wi.context = context; + + return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi); +} +EXPORT_SYMBOL_GPL(dm_array_walk); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h new file mode 100644 index 000000000000..ea177d6fa58f --- /dev/null +++ b/drivers/md/persistent-data/dm-array.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#ifndef _LINUX_DM_ARRAY_H +#define _LINUX_DM_ARRAY_H + +#include "dm-btree.h" + +/*----------------------------------------------------------------*/ + +/* + * The dm-array is a persistent version of an array. It packs the data + * more efficiently than a btree which will result in less disk space use, + * and a performance boost. The element get and set operations are still + * O(ln(n)), but with a much smaller constant. + * + * The value type structure is reused from the btree type to support proper + * reference counting of values. + * + * The arrays implicitly know their length, and bounds are checked for + * lookups and updated. It doesn't store this in an accessible place + * because it would waste a whole metadata block. Make sure you store the + * size along with the array root in your encompassing data. + * + * Array entries are indexed via an unsigned integer starting from zero. + * Arrays are not sparse; if you resize an array to have 'n' entries then + * 'n - 1' will be the last valid index. + * + * Typical use: + * + * a) initialise a dm_array_info structure. This describes the array + * values and ties it into a specific transaction manager. It holds no + * instance data; the same info can be used for many similar arrays if + * you wish. + * + * b) Get yourself a root. The root is the index of a block of data on the + * disk that holds a particular instance of an array. You may have a + * pre existing root in your metadata that you wish to use, or you may + * want to create a brand new, empty array with dm_array_empty(). + * + * Like the other data structures in this library, dm_array objects are + * immutable between transactions. Update functions will return you the + * root for a _new_ array. If you've incremented the old root, via + * dm_tm_inc(), before calling the update function you may continue to use + * it in parallel with the new root. + * + * c) resize an array with dm_array_resize(). + * + * d) Get a value from the array with dm_array_get_value(). + * + * e) Set a value in the array with dm_array_set_value(). + * + * f) Walk an array of values in index order with dm_array_walk(). More + * efficient than making many calls to dm_array_get_value(). + * + * g) Destroy the array with dm_array_del(). This tells the transaction + * manager that you're no longer using this data structure so it can + * recycle it's blocks. (dm_array_dec() would be a better name for it, + * but del is in keeping with dm_btree_del()). + */ + +/* + * Describes an array. Don't initialise this structure yourself, use the + * init function below. + */ +struct dm_array_info { + struct dm_transaction_manager *tm; + struct dm_btree_value_type value_type; + struct dm_btree_info btree_info; +}; + +/* + * Sets up a dm_array_info structure. You don't need to do anything with + * this structure when you finish using it. + * + * info - the structure being filled in. + * tm - the transaction manager that should supervise this structure. + * vt - describes the leaf values. + */ +void dm_array_info_init(struct dm_array_info *info, + struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt); + +/* + * Create an empty, zero length array. + * + * info - describes the array + * root - on success this will be filled out with the root block + */ +int dm_array_empty(struct dm_array_info *info, dm_block_t *root); + +/* + * Resizes the array. + * + * info - describes the array + * root - the root block of the array on disk + * old_size - the caller is responsible for remembering the size of + * the array + * new_size - can be bigger or smaller than old_size + * value - if we're growing the array the new entries will have this value + * new_root - on success, points to the new root block + * + * If growing the inc function for 'value' will be called the appropriate + * number of times. So if the caller is holding a reference they may want + * to drop it. + */ +int dm_array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) + __dm_written_to_disk(value); + +/* + * Frees a whole array. The value_type's decrement operation will be called + * for all values in the array + */ +int dm_array_del(struct dm_array_info *info, dm_block_t root); + +/* + * Lookup a value in the array + * + * info - describes the array + * root - root block of the array + * index - array index + * value - the value to be read. Will be in on-disk format of course. + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_array_get_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, void *value); + +/* + * Set an entry in the array. + * + * info - describes the array + * root - root block of the array + * index - array index + * value - value to be written to disk. Make sure you confirm the value is + * in on-disk format with__dm_bless_for_disk() before calling. + * new_root - the new root block + * + * The old value being overwritten will be decremented, the new value + * incremented. + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) + __dm_written_to_disk(value); + +/* + * Walk through all the entries in an array. + * + * info - describes the array + * root - root block of the array + * fn - called back for every element + * context - passed to the callback + */ +int dm_array_walk(struct dm_array_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t key, void *leaf), + void *context); + +/*----------------------------------------------------------------*/ + +#endif /* _LINUX_DM_ARRAY_H */ diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c new file mode 100644 index 000000000000..cd9a86d4cdf0 --- /dev/null +++ b/drivers/md/persistent-data/dm-bitset.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-bitset.h" +#include "dm-transaction-manager.h" + +#include <linux/export.h> +#include <linux/device-mapper.h> + +#define DM_MSG_PREFIX "bitset" +#define BITS_PER_ARRAY_ENTRY 64 + +/*----------------------------------------------------------------*/ + +static struct dm_btree_value_type bitset_bvt = { + .context = NULL, + .size = sizeof(__le64), + .inc = NULL, + .dec = NULL, + .equal = NULL, +}; + +/*----------------------------------------------------------------*/ + +void dm_disk_bitset_init(struct dm_transaction_manager *tm, + struct dm_disk_bitset *info) +{ + dm_array_info_init(&info->array_info, tm, &bitset_bvt); + info->current_index_set = false; +} +EXPORT_SYMBOL_GPL(dm_disk_bitset_init); + +int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root) +{ + return dm_array_empty(&info->array_info, root); +} +EXPORT_SYMBOL_GPL(dm_bitset_empty); + +int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, + uint32_t old_nr_entries, uint32_t new_nr_entries, + bool default_value, dm_block_t *new_root) +{ + uint32_t old_blocks = dm_div_up(old_nr_entries, BITS_PER_ARRAY_ENTRY); + uint32_t new_blocks = dm_div_up(new_nr_entries, BITS_PER_ARRAY_ENTRY); + __le64 value = default_value ? cpu_to_le64(~0) : cpu_to_le64(0); + + __dm_bless_for_disk(&value); + return dm_array_resize(&info->array_info, root, old_blocks, new_blocks, + &value, new_root); +} +EXPORT_SYMBOL_GPL(dm_bitset_resize); + +int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root) +{ + return dm_array_del(&info->array_info, root); +} +EXPORT_SYMBOL_GPL(dm_bitset_del); + +int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, + dm_block_t *new_root) +{ + int r; + __le64 value; + + if (!info->current_index_set) + return 0; + + value = cpu_to_le64(info->current_bits); + + __dm_bless_for_disk(&value); + r = dm_array_set_value(&info->array_info, root, info->current_index, + &value, new_root); + if (r) + return r; + + info->current_index_set = false; + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_flush); + +static int read_bits(struct dm_disk_bitset *info, dm_block_t root, + uint32_t array_index) +{ + int r; + __le64 value; + + r = dm_array_get_value(&info->array_info, root, array_index, &value); + if (r) + return r; + + info->current_bits = le64_to_cpu(value); + info->current_index_set = true; + info->current_index = array_index; + return 0; +} + +static int get_array_entry(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned array_index = index / BITS_PER_ARRAY_ENTRY; + + if (info->current_index_set) { + if (info->current_index == array_index) + return 0; + + r = dm_bitset_flush(info, root, new_root); + if (r) + return r; + } + + return read_bits(info, root, array_index); +} + +int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + set_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_set_bit); + +int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + clear_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); + +int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root, bool *result) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + *result = test_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_test_bit); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h new file mode 100644 index 000000000000..e1b9bea14aa1 --- /dev/null +++ b/drivers/md/persistent-data/dm-bitset.h @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#ifndef _LINUX_DM_BITSET_H +#define _LINUX_DM_BITSET_H + +#include "dm-array.h" + +/*----------------------------------------------------------------*/ + +/* + * This bitset type is a thin wrapper round a dm_array of 64bit words. It + * uses a tiny, one word cache to reduce the number of array lookups and so + * increase performance. + * + * Like the dm-array that it's based on, the caller needs to keep track of + * the size of the bitset separately. The underlying dm-array implicitly + * knows how many words it's storing and will return -ENODATA if you try + * and access an out of bounds word. However, an out of bounds bit in the + * final word will _not_ be detected, you have been warned. + * + * Bits are indexed from zero. + + * Typical use: + * + * a) Initialise a dm_disk_bitset structure with dm_disk_bitset_init(). + * This describes the bitset and includes the cache. It's not called it + * dm_bitset_info in line with other data structures because it does + * include instance data. + * + * b) Get yourself a root. The root is the index of a block of data on the + * disk that holds a particular instance of an bitset. You may have a + * pre existing root in your metadata that you wish to use, or you may + * want to create a brand new, empty bitset with dm_bitset_empty(). + * + * Like the other data structures in this library, dm_bitset objects are + * immutable between transactions. Update functions will return you the + * root for a _new_ array. If you've incremented the old root, via + * dm_tm_inc(), before calling the update function you may continue to use + * it in parallel with the new root. + * + * Even read operations may trigger the cache to be flushed and as such + * return a root for a new, updated bitset. + * + * c) resize a bitset with dm_bitset_resize(). + * + * d) Set a bit with dm_bitset_set_bit(). + * + * e) Clear a bit with dm_bitset_clear_bit(). + * + * f) Test a bit with dm_bitset_test_bit(). + * + * g) Flush all updates from the cache with dm_bitset_flush(). + * + * h) Destroy the bitset with dm_bitset_del(). This tells the transaction + * manager that you're no longer using this data structure so it can + * recycle it's blocks. (dm_bitset_dec() would be a better name for it, + * but del is in keeping with dm_btree_del()). + */ + +/* + * Opaque object. Unlike dm_array_info, you should have one of these per + * bitset. Initialise with dm_disk_bitset_init(). + */ +struct dm_disk_bitset { + struct dm_array_info array_info; + + uint32_t current_index; + uint64_t current_bits; + + bool current_index_set:1; +}; + +/* + * Sets up a dm_disk_bitset structure. You don't need to do anything with + * this structure when you finish using it. + * + * tm - the transaction manager that should supervise this structure + * info - the structure being initialised + */ +void dm_disk_bitset_init(struct dm_transaction_manager *tm, + struct dm_disk_bitset *info); + +/* + * Create an empty, zero length bitset. + * + * info - describes the bitset + * new_root - on success, points to the new root block + */ +int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); + +/* + * Resize the bitset. + * + * info - describes the bitset + * old_root - the root block of the array on disk + * old_nr_entries - the number of bits in the old bitset + * new_nr_entries - the number of bits you want in the new bitset + * default_value - the value for any new bits + * new_root - on success, points to the new root block + */ +int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t old_root, + uint32_t old_nr_entries, uint32_t new_nr_entries, + bool default_value, dm_block_t *new_root); + +/* + * Frees the bitset. + */ +int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root); + +/* + * Set a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root); + +/* + * Clears a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root); + +/* + * Tests a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block (cached values may have been written) + * result - the bit value you're after + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root, bool *result); + +/* + * Flush any cached changes to disk. + * + * info - describes the bitset + * root - the root block of the bitset + * new_root - on success, points to the new root block + */ +int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, + dm_block_t *new_root); + +/*----------------------------------------------------------------*/ + +#endif /* _LINUX_DM_BITSET_H */ diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 28c3ed072a79..81b513890e2b 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -613,6 +613,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, return dm_bufio_write_dirty_buffers(bm->bufio); } +EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); void dm_bm_set_read_only(struct dm_block_manager *bm) { diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index accbb05f17b6..37d367bb9aa8 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -64,6 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); +void ro_pop(struct ro_spine *s); struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index f199a0c4ed04..cf9fd676ae44 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -164,6 +164,13 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } +void ro_pop(struct ro_spine *s) +{ + BUG_ON(!s->count); + --s->count; + unlock_block(s->info, s->nodes[s->count]); +} + struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 4caf66918cdb..35865425e4b4 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -807,3 +807,55 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, return r ? r : count; } EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); + +/* + * FIXME: We shouldn't use a recursive algorithm when we have limited stack + * space. Also this only works for single level trees. + */ +static int walk_node(struct ro_spine *s, dm_block_t block, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) +{ + int r; + unsigned i, nr; + struct btree_node *n; + uint64_t keys; + + r = ro_step(s, block); + n = ro_node(s); + + nr = le32_to_cpu(n->header.nr_entries); + for (i = 0; i < nr; i++) { + if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { + r = walk_node(s, value64(n, i), fn, context); + if (r) + goto out; + } else { + keys = le64_to_cpu(*key_ptr(n, i)); + r = fn(context, &keys, value_ptr(n, i)); + if (r) + goto out; + } + } + +out: + ro_pop(s); + return r; +} + +int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) +{ + int r; + struct ro_spine spine; + + BUG_ON(info->levels > 1); + + init_ro_spine(&spine, info); + r = walk_node(&spine, root, fn, context); + exit_ro_spine(&spine); + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_walk); diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index a2cd50441ca1..8672d159e0b5 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -58,21 +58,21 @@ struct dm_btree_value_type { * somewhere.) This method is _not_ called for insertion of a new * value: It is assumed the ref count is already 1. */ - void (*inc)(void *context, void *value); + void (*inc)(void *context, const void *value); /* * This value is being deleted. The btree takes care of freeing * the memory pointed to by @value. Often the del function just * needs to decrement a reference count somewhere. */ - void (*dec)(void *context, void *value); + void (*dec)(void *context, const void *value); /* * A test for equality between two values. When a value is * overwritten with a new one, the old one has the dec method * called _unless_ the new and old value are deemed equal. */ - int (*equal)(void *context, void *value1, void *value2); + int (*equal)(void *context, const void *value1, const void *value2); }; /* @@ -142,4 +142,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, uint64_t *result_keys); +/* + * Iterate through the a btree, calling fn() on each entry. + * It only works for single level trees and is internally recursive, so + * monitor stack usage carefully. + */ +int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context); + #endif /* _LINUX_DM_BTREE_H */ diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 3aa9a969b373..36f5d52775a9 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -103,6 +103,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/module.h> +#include <asm/sections.h> #define v1printk(a...) do { \ if (verbose) \ @@ -222,6 +223,7 @@ static unsigned long lookup_addr(char *arg) addr = (unsigned long)do_fork; else if (!strcmp(arg, "hw_break_val")) addr = (unsigned long)&hw_break_val; + addr = (unsigned long) dereference_function_descriptor((void *)addr); return addr; } diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 03f2eb5627ec..557bec599f4f 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -74,8 +74,8 @@ config MTD_REDBOOT_PARTS_READONLY endif # MTD_REDBOOT_PARTS config MTD_CMDLINE_PARTS - bool "Command line partition table parsing" - depends on MTD = "y" + tristate "Command line partition table parsing" + depends on MTD ---help--- Allow generic configuration of the MTD partition tables via the kernel command line. Multiple flash resources are supported for hardware where diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c index 7c057a05adb6..ddc0a4287a4b 100644 --- a/drivers/mtd/ar7part.c +++ b/drivers/mtd/ar7part.c @@ -142,7 +142,13 @@ static int __init ar7_parser_init(void) return register_mtd_parser(&ar7_parser); } +static void __exit ar7_parser_exit(void) +{ + deregister_mtd_parser(&ar7_parser); +} + module_init(ar7_parser_init); +module_exit(ar7_parser_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR( "Felix Fietkau <nbd@openwrt.org>, " diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index e06d782489a6..63feb75cc8e0 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -14,17 +14,11 @@ #include <linux/slab.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> /* 10 parts were found on sflash on Netgear WNDR4500 */ #define BCM47XXPART_MAX_PARTS 12 -/* - * Amount of bytes we read when analyzing each block of flash memory. - * Set it big enough to allow detecting partition and reading important data. - */ -#define BCM47XXPART_BYTES_TO_READ 0x404 - /* Magics */ #define BOARD_DATA_MAGIC 0x5246504D /* MPFR */ #define POT_MAGIC1 0x54544f50 /* POTT */ @@ -59,13 +53,21 @@ static int bcm47xxpart_parse(struct mtd_info *master, uint32_t *buf; size_t bytes_read; uint32_t offset; - uint32_t blocksize = 0x10000; + uint32_t blocksize = master->erasesize; struct trx_header *trx; + int trx_part = -1; + int last_trx_part = -1; + int max_bytes_to_read = 0x8004; + + if (blocksize <= 0x10000) + blocksize = 0x10000; + if (blocksize == 0x20000) + max_bytes_to_read = 0x18004; /* Alloc */ parts = kzalloc(sizeof(struct mtd_partition) * BCM47XXPART_MAX_PARTS, GFP_KERNEL); - buf = kzalloc(BCM47XXPART_BYTES_TO_READ, GFP_KERNEL); + buf = kzalloc(max_bytes_to_read, GFP_KERNEL); /* Parse block by block looking for magics */ for (offset = 0; offset <= master->size - blocksize; @@ -80,7 +82,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Read beginning of the block */ - if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ, + if (mtd_read(master, offset, max_bytes_to_read, &bytes_read, (uint8_t *)buf) < 0) { pr_err("mtd_read error while parsing (offset: 0x%X)!\n", offset); @@ -95,9 +97,16 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Standard NVRAM */ - if (buf[0x000 / 4] == NVRAM_HEADER) { + if (buf[0x000 / 4] == NVRAM_HEADER || + buf[0x1000 / 4] == NVRAM_HEADER || + buf[0x8000 / 4] == NVRAM_HEADER || + (blocksize == 0x20000 && ( + buf[0x10000 / 4] == NVRAM_HEADER || + buf[0x11000 / 4] == NVRAM_HEADER || + buf[0x18000 / 4] == NVRAM_HEADER))) { bcm47xxpart_add_part(&parts[curr_part++], "nvram", offset, 0); + offset = rounddown(offset, blocksize); continue; } @@ -131,6 +140,10 @@ static int bcm47xxpart_parse(struct mtd_info *master, if (buf[0x000 / 4] == TRX_MAGIC) { trx = (struct trx_header *)buf; + trx_part = curr_part; + bcm47xxpart_add_part(&parts[curr_part++], "firmware", + offset, 0); + i = 0; /* We have LZMA loader if offset[2] points to sth */ if (trx->offset[2]) { @@ -154,6 +167,8 @@ static int bcm47xxpart_parse(struct mtd_info *master, offset + trx->offset[i], 0); i++; + last_trx_part = curr_part - 1; + /* * We have whole TRX scanned, skip to the next part. Use * roundown (not roundup), as the loop will increase @@ -169,11 +184,15 @@ static int bcm47xxpart_parse(struct mtd_info *master, * Assume that partitions end at the beginning of the one they are * followed by. */ - for (i = 0; i < curr_part - 1; i++) - parts[i].size = parts[i + 1].offset - parts[i].offset; - if (curr_part > 0) - parts[curr_part - 1].size = - master->size - parts[curr_part - 1].offset; + for (i = 0; i < curr_part; i++) { + u64 next_part_offset = (i < curr_part - 1) ? + parts[i + 1].offset : master->size; + + parts[i].size = next_part_offset - parts[i].offset; + if (i == last_trx_part && trx_part >= 0) + parts[trx_part].size = next_part_offset - + parts[trx_part].offset; + } *pparts = parts; return curr_part; diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index b86197286f24..fff665d59a0d 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -33,6 +33,8 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/reboot.h> +#include <linux/of.h> +#include <linux/of_platform.h> #include <linux/mtd/map.h> #include <linux/mtd/mtd.h> #include <linux/mtd/cfi.h> @@ -74,6 +76,10 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +static int cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +static int cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +static int cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len); + static struct mtd_chip_driver cfi_amdstd_chipdrv = { .probe = NULL, /* Not usable directly */ .destroy = cfi_amdstd_destroy, @@ -496,6 +502,7 @@ static void cfi_fixup_m29ew_delay_after_resume(struct cfi_private *cfi) struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) { struct cfi_private *cfi = map->fldrv_priv; + struct device_node __maybe_unused *np = map->device_node; struct mtd_info *mtd; int i; @@ -570,6 +577,17 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) cfi_tell_features(extp); #endif +#ifdef CONFIG_OF + if (np && of_property_read_bool( + np, "use-advanced-sector-protection") + && extp->BlkProtUnprot == 8) { + printk(KERN_INFO " Advanced Sector Protection (PPB Locking) supported\n"); + mtd->_lock = cfi_ppb_lock; + mtd->_unlock = cfi_ppb_unlock; + mtd->_is_locked = cfi_ppb_is_locked; + } +#endif + bootloc = extp->TopBottom; if ((bootloc < 2) || (bootloc > 5)) { printk(KERN_WARNING "%s: CFI contains unrecognised boot " @@ -2172,6 +2190,205 @@ static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL); } +/* + * Advanced Sector Protection - PPB (Persistent Protection Bit) locking + */ + +struct ppb_lock { + struct flchip *chip; + loff_t offset; + int locked; +}; + +#define MAX_SECTORS 512 + +#define DO_XXLOCK_ONEBLOCK_LOCK ((void *)1) +#define DO_XXLOCK_ONEBLOCK_UNLOCK ((void *)2) +#define DO_XXLOCK_ONEBLOCK_GETLOCK ((void *)3) + +static int __maybe_unused do_ppb_xxlock(struct map_info *map, + struct flchip *chip, + unsigned long adr, int len, void *thunk) +{ + struct cfi_private *cfi = map->fldrv_priv; + unsigned long timeo; + int ret; + + mutex_lock(&chip->mutex); + ret = get_chip(map, chip, adr + chip->start, FL_LOCKING); + if (ret) { + mutex_unlock(&chip->mutex); + return ret; + } + + pr_debug("MTD %s(): XXLOCK 0x%08lx len %d\n", __func__, adr, len); + + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, + cfi->device_type, NULL); + /* PPB entry command */ + cfi_send_gen_cmd(0xC0, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + + if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) { + chip->state = FL_LOCKING; + map_write(map, CMD(0xA0), chip->start + adr); + map_write(map, CMD(0x00), chip->start + adr); + } else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) { + /* + * Unlocking of one specific sector is not supported, so we + * have to unlock all sectors of this device instead + */ + chip->state = FL_UNLOCKING; + map_write(map, CMD(0x80), chip->start); + map_write(map, CMD(0x30), chip->start); + } else if (thunk == DO_XXLOCK_ONEBLOCK_GETLOCK) { + chip->state = FL_JEDEC_QUERY; + /* Return locked status: 0->locked, 1->unlocked */ + ret = !cfi_read_query(map, adr); + } else + BUG(); + + /* + * Wait for some time as unlocking of all sectors takes quite long + */ + timeo = jiffies + msecs_to_jiffies(2000); /* 2s max (un)locking */ + for (;;) { + if (chip_ready(map, adr)) + break; + + if (time_after(jiffies, timeo)) { + printk(KERN_ERR "Waiting for chip to be ready timed out.\n"); + ret = -EIO; + break; + } + + UDELAY(map, chip, adr, 1); + } + + /* Exit BC commands */ + map_write(map, CMD(0x90), chip->start); + map_write(map, CMD(0x00), chip->start); + + chip->state = FL_READY; + put_chip(map, chip, adr + chip->start); + mutex_unlock(&chip->mutex); + + return ret; +} + +static int __maybe_unused cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs, + uint64_t len) +{ + return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len, + DO_XXLOCK_ONEBLOCK_LOCK); +} + +static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, + uint64_t len) +{ + struct mtd_erase_region_info *regions = mtd->eraseregions; + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + struct ppb_lock *sect; + unsigned long adr; + loff_t offset; + uint64_t length; + int chipnum; + int i; + int sectors; + int ret; + + /* + * PPB unlocking always unlocks all sectors of the flash chip. + * We need to re-lock all previously locked sectors. So lets + * first check the locking status of all sectors and save + * it for future use. + */ + sect = kzalloc(MAX_SECTORS * sizeof(struct ppb_lock), GFP_KERNEL); + if (!sect) + return -ENOMEM; + + /* + * This code to walk all sectors is a slightly modified version + * of the cfi_varsize_frob() code. + */ + i = 0; + chipnum = 0; + adr = 0; + sectors = 0; + offset = 0; + length = mtd->size; + + while (length) { + int size = regions[i].erasesize; + + /* + * Only test sectors that shall not be unlocked. The other + * sectors shall be unlocked, so lets keep their locking + * status at "unlocked" (locked=0) for the final re-locking. + */ + if ((adr < ofs) || (adr >= (ofs + len))) { + sect[sectors].chip = &cfi->chips[chipnum]; + sect[sectors].offset = offset; + sect[sectors].locked = do_ppb_xxlock( + map, &cfi->chips[chipnum], adr, 0, + DO_XXLOCK_ONEBLOCK_GETLOCK); + } + + adr += size; + offset += size; + length -= size; + + if (offset == regions[i].offset + size * regions[i].numblocks) + i++; + + if (adr >> cfi->chipshift) { + adr = 0; + chipnum++; + + if (chipnum >= cfi->numchips) + break; + } + + sectors++; + if (sectors >= MAX_SECTORS) { + printk(KERN_ERR "Only %d sectors for PPB locking supported!\n", + MAX_SECTORS); + kfree(sect); + return -EINVAL; + } + } + + /* Now unlock the whole chip */ + ret = cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len, + DO_XXLOCK_ONEBLOCK_UNLOCK); + if (ret) { + kfree(sect); + return ret; + } + + /* + * PPB unlocking always unlocks all sectors of the flash chip. + * We need to re-lock all previously locked sectors. + */ + for (i = 0; i < sectors; i++) { + if (sect[i].locked) + do_ppb_xxlock(map, sect[i].chip, sect[i].offset, 0, + DO_XXLOCK_ONEBLOCK_LOCK); + } + + kfree(sect); + return ret; +} + +static int __maybe_unused cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs, + uint64_t len) +{ + return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len, + DO_XXLOCK_ONEBLOCK_GETLOCK) ? 1 : 0; +} static void cfi_amdstd_sync (struct mtd_info *mtd) { diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c index c533f27d863f..721caebbc5cc 100644 --- a/drivers/mtd/cmdlinepart.c +++ b/drivers/mtd/cmdlinepart.c @@ -22,11 +22,22 @@ * * mtdparts=<mtddef>[;<mtddef] * <mtddef> := <mtd-id>:<partdef>[,<partdef>] - * where <mtd-id> is the name from the "cat /proc/mtd" command - * <partdef> := <size>[@offset][<name>][ro][lk] + * <partdef> := <size>[@<offset>][<name>][ro][lk] * <mtd-id> := unique name used in mapping driver/device (mtd->name) * <size> := standard linux memsize OR "-" to denote all remaining space + * size is automatically truncated at end of device + * if specified or trucated size is 0 the part is skipped + * <offset> := standard linux memsize + * if omitted the part will immediately follow the previous part + * or 0 if the first part * <name> := '(' NAME ')' + * NAME will appear in /proc/mtd + * + * <size> and <offset> can be specified such that the parts are out of order + * in physical memory and may even overlap. + * + * The parts are assigned MTD numbers in the order they are specified in the + * command line regardless of their order in physical memory. * * Examples: * @@ -70,6 +81,7 @@ struct cmdline_mtd_partition { static struct cmdline_mtd_partition *partitions; /* the command line passed to mtdpart_setup() */ +static char *mtdparts; static char *cmdline; static int cmdline_parsed; @@ -330,6 +342,14 @@ static int parse_cmdline_partitions(struct mtd_info *master, if (part->parts[i].size == SIZE_REMAINING) part->parts[i].size = master->size - offset; + if (offset + part->parts[i].size > master->size) { + printk(KERN_WARNING ERRP + "%s: partitioning exceeds flash size, truncating\n", + part->mtd_id); + part->parts[i].size = master->size - offset; + } + offset += part->parts[i].size; + if (part->parts[i].size == 0) { printk(KERN_WARNING ERRP "%s: skipping zero sized partition\n", @@ -337,16 +357,8 @@ static int parse_cmdline_partitions(struct mtd_info *master, part->num_parts--; memmove(&part->parts[i], &part->parts[i + 1], sizeof(*part->parts) * (part->num_parts - i)); - continue; - } - - if (offset + part->parts[i].size > master->size) { - printk(KERN_WARNING ERRP - "%s: partitioning exceeds flash size, truncating\n", - part->mtd_id); - part->parts[i].size = master->size - offset; + i--; } - offset += part->parts[i].size; } *pparts = kmemdup(part->parts, sizeof(*part->parts) * part->num_parts, @@ -365,7 +377,7 @@ static int parse_cmdline_partitions(struct mtd_info *master, * * This function needs to be visible for bootloaders. */ -static int mtdpart_setup(char *s) +static int __init mtdpart_setup(char *s) { cmdline = s; return 1; @@ -381,10 +393,21 @@ static struct mtd_part_parser cmdline_parser = { static int __init cmdline_parser_init(void) { + if (mtdparts) + mtdpart_setup(mtdparts); return register_mtd_parser(&cmdline_parser); } +static void __exit cmdline_parser_exit(void) +{ + deregister_mtd_parser(&cmdline_parser); +} + module_init(cmdline_parser_init); +module_exit(cmdline_parser_exit); + +MODULE_PARM_DESC(mtdparts, "Partitioning specification"); +module_param(mtdparts, charp, 0); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marius Groeger <mag@sysgo.de>"); diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index 395733a30ef4..369a1943ca25 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -17,8 +17,10 @@ obj-$(CONFIG_MTD_LART) += lart.o obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o obj-$(CONFIG_MTD_M25P80) += m25p80.o +obj-$(CONFIG_MTD_NAND_OMAP_BCH) += elm.o obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o obj-$(CONFIG_MTD_SST25L) += sst25l.o obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o -CFLAGS_docg3.o += -I$(src)
\ No newline at end of file + +CFLAGS_docg3.o += -I$(src) diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c index 4714584aa993..95266285acb1 100644 --- a/drivers/mtd/devices/bcm47xxsflash.c +++ b/drivers/mtd/devices/bcm47xxsflash.c @@ -5,6 +5,8 @@ #include <linux/platform_device.h> #include <linux/bcma/bcma.h> +#include "bcm47xxsflash.h" + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serial flash driver for BCMA bus"); @@ -13,26 +15,28 @@ static const char *probes[] = { "bcm47xxpart", NULL }; static int bcm47xxsflash_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { - struct bcma_sflash *sflash = mtd->priv; + struct bcm47xxsflash *b47s = mtd->priv; /* Check address range */ if ((from + len) > mtd->size) return -EINVAL; - memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(sflash->window + from), + memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(b47s->window + from), len); + *retlen = len; return len; } -static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash, - struct mtd_info *mtd) +static void bcm47xxsflash_fill_mtd(struct bcm47xxsflash *b47s) { - mtd->priv = sflash; + struct mtd_info *mtd = &b47s->mtd; + + mtd->priv = b47s; mtd->name = "bcm47xxsflash"; mtd->owner = THIS_MODULE; mtd->type = MTD_ROM; - mtd->size = sflash->size; + mtd->size = b47s->size; mtd->_read = bcm47xxsflash_read; /* TODO: implement writing support and verify/change following code */ @@ -40,19 +44,30 @@ static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash, mtd->writebufsize = mtd->writesize = 1; } -static int bcm47xxsflash_probe(struct platform_device *pdev) +/************************************************** + * BCMA + **************************************************/ + +static int bcm47xxsflash_bcma_probe(struct platform_device *pdev) { struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev); + struct bcm47xxsflash *b47s; int err; - sflash->mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL); - if (!sflash->mtd) { + b47s = kzalloc(sizeof(*b47s), GFP_KERNEL); + if (!b47s) { err = -ENOMEM; goto out; } - bcm47xxsflash_fill_mtd(sflash, sflash->mtd); + sflash->priv = b47s; - err = mtd_device_parse_register(sflash->mtd, probes, NULL, NULL, 0); + b47s->window = sflash->window; + b47s->blocksize = sflash->blocksize; + b47s->numblocks = sflash->numblocks; + b47s->size = sflash->size; + bcm47xxsflash_fill_mtd(b47s); + + err = mtd_device_parse_register(&b47s->mtd, probes, NULL, NULL, 0); if (err) { pr_err("Failed to register MTD device: %d\n", err); goto err_dev_reg; @@ -61,34 +76,40 @@ static int bcm47xxsflash_probe(struct platform_device *pdev) return 0; err_dev_reg: - kfree(sflash->mtd); + kfree(&b47s->mtd); out: return err; } -static int bcm47xxsflash_remove(struct platform_device *pdev) +static int bcm47xxsflash_bcma_remove(struct platform_device *pdev) { struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev); + struct bcm47xxsflash *b47s = sflash->priv; - mtd_device_unregister(sflash->mtd); - kfree(sflash->mtd); + mtd_device_unregister(&b47s->mtd); + kfree(b47s); return 0; } static struct platform_driver bcma_sflash_driver = { - .remove = bcm47xxsflash_remove, + .probe = bcm47xxsflash_bcma_probe, + .remove = bcm47xxsflash_bcma_remove, .driver = { .name = "bcma_sflash", .owner = THIS_MODULE, }, }; +/************************************************** + * Init + **************************************************/ + static int __init bcm47xxsflash_init(void) { int err; - err = platform_driver_probe(&bcma_sflash_driver, bcm47xxsflash_probe); + err = platform_driver_register(&bcma_sflash_driver); if (err) pr_err("Failed to register BCMA serial flash driver: %d\n", err); diff --git a/drivers/mtd/devices/bcm47xxsflash.h b/drivers/mtd/devices/bcm47xxsflash.h new file mode 100644 index 000000000000..ebf6f710e23c --- /dev/null +++ b/drivers/mtd/devices/bcm47xxsflash.h @@ -0,0 +1,15 @@ +#ifndef __BCM47XXSFLASH_H +#define __BCM47XXSFLASH_H + +#include <linux/mtd/mtd.h> + +struct bcm47xxsflash { + u32 window; + u32 blocksize; + u16 numblocks; + u32 size; + + struct mtd_info mtd; +}; + +#endif /* BCM47XXSFLASH */ diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c new file mode 100644 index 000000000000..2ec5da9ee248 --- /dev/null +++ b/drivers/mtd/devices/elm.c @@ -0,0 +1,404 @@ +/* + * Error Location Module + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/pm_runtime.h> +#include <linux/platform_data/elm.h> + +#define ELM_IRQSTATUS 0x018 +#define ELM_IRQENABLE 0x01c +#define ELM_LOCATION_CONFIG 0x020 +#define ELM_PAGE_CTRL 0x080 +#define ELM_SYNDROME_FRAGMENT_0 0x400 +#define ELM_SYNDROME_FRAGMENT_6 0x418 +#define ELM_LOCATION_STATUS 0x800 +#define ELM_ERROR_LOCATION_0 0x880 + +/* ELM Interrupt Status Register */ +#define INTR_STATUS_PAGE_VALID BIT(8) + +/* ELM Interrupt Enable Register */ +#define INTR_EN_PAGE_MASK BIT(8) + +/* ELM Location Configuration Register */ +#define ECC_BCH_LEVEL_MASK 0x3 + +/* ELM syndrome */ +#define ELM_SYNDROME_VALID BIT(16) + +/* ELM_LOCATION_STATUS Register */ +#define ECC_CORRECTABLE_MASK BIT(8) +#define ECC_NB_ERRORS_MASK 0x1f + +/* ELM_ERROR_LOCATION_0-15 Registers */ +#define ECC_ERROR_LOCATION_MASK 0x1fff + +#define ELM_ECC_SIZE 0x7ff + +#define SYNDROME_FRAGMENT_REG_SIZE 0x40 +#define ERROR_LOCATION_SIZE 0x100 + +struct elm_info { + struct device *dev; + void __iomem *elm_base; + struct completion elm_completion; + struct list_head list; + enum bch_ecc bch_type; +}; + +static LIST_HEAD(elm_devices); + +static void elm_write_reg(struct elm_info *info, int offset, u32 val) +{ + writel(val, info->elm_base + offset); +} + +static u32 elm_read_reg(struct elm_info *info, int offset) +{ + return readl(info->elm_base + offset); +} + +/** + * elm_config - Configure ELM module + * @dev: ELM device + * @bch_type: Type of BCH ecc + */ +void elm_config(struct device *dev, enum bch_ecc bch_type) +{ + u32 reg_val; + struct elm_info *info = dev_get_drvdata(dev); + + reg_val = (bch_type & ECC_BCH_LEVEL_MASK) | (ELM_ECC_SIZE << 16); + elm_write_reg(info, ELM_LOCATION_CONFIG, reg_val); + info->bch_type = bch_type; +} +EXPORT_SYMBOL(elm_config); + +/** + * elm_configure_page_mode - Enable/Disable page mode + * @info: elm info + * @index: index number of syndrome fragment vector + * @enable: enable/disable flag for page mode + * + * Enable page mode for syndrome fragment index + */ +static void elm_configure_page_mode(struct elm_info *info, int index, + bool enable) +{ + u32 reg_val; + + reg_val = elm_read_reg(info, ELM_PAGE_CTRL); + if (enable) + reg_val |= BIT(index); /* enable page mode */ + else + reg_val &= ~BIT(index); /* disable page mode */ + + elm_write_reg(info, ELM_PAGE_CTRL, reg_val); +} + +/** + * elm_load_syndrome - Load ELM syndrome reg + * @info: elm info + * @err_vec: elm error vectors + * @ecc: buffer with calculated ecc + * + * Load syndrome fragment registers with calculated ecc in reverse order. + */ +static void elm_load_syndrome(struct elm_info *info, + struct elm_errorvec *err_vec, u8 *ecc) +{ + int i, offset; + u32 val; + + for (i = 0; i < ERROR_VECTOR_MAX; i++) { + + /* Check error reported */ + if (err_vec[i].error_reported) { + elm_configure_page_mode(info, i, true); + offset = ELM_SYNDROME_FRAGMENT_0 + + SYNDROME_FRAGMENT_REG_SIZE * i; + + /* BCH8 */ + if (info->bch_type) { + + /* syndrome fragment 0 = ecc[9-12B] */ + val = cpu_to_be32(*(u32 *) &ecc[9]); + elm_write_reg(info, offset, val); + + /* syndrome fragment 1 = ecc[5-8B] */ + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[5]); + elm_write_reg(info, offset, val); + + /* syndrome fragment 2 = ecc[1-4B] */ + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[1]); + elm_write_reg(info, offset, val); + + /* syndrome fragment 3 = ecc[0B] */ + offset += 4; + val = ecc[0]; + elm_write_reg(info, offset, val); + } else { + /* syndrome fragment 0 = ecc[20-52b] bits */ + val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) | + ((ecc[2] & 0xf) << 28); + elm_write_reg(info, offset, val); + + /* syndrome fragment 1 = ecc[0-20b] bits */ + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12; + elm_write_reg(info, offset, val); + } + } + + /* Update ecc pointer with ecc byte size */ + ecc += info->bch_type ? BCH8_SIZE : BCH4_SIZE; + } +} + +/** + * elm_start_processing - start elm syndrome processing + * @info: elm info + * @err_vec: elm error vectors + * + * Set syndrome valid bit for syndrome fragment registers for which + * elm syndrome fragment registers are loaded. This enables elm module + * to start processing syndrome vectors. + */ +static void elm_start_processing(struct elm_info *info, + struct elm_errorvec *err_vec) +{ + int i, offset; + u32 reg_val; + + /* + * Set syndrome vector valid, so that ELM module + * will process it for vectors error is reported + */ + for (i = 0; i < ERROR_VECTOR_MAX; i++) { + if (err_vec[i].error_reported) { + offset = ELM_SYNDROME_FRAGMENT_6 + + SYNDROME_FRAGMENT_REG_SIZE * i; + reg_val = elm_read_reg(info, offset); + reg_val |= ELM_SYNDROME_VALID; + elm_write_reg(info, offset, reg_val); + } + } +} + +/** + * elm_error_correction - locate correctable error position + * @info: elm info + * @err_vec: elm error vectors + * + * On completion of processing by elm module, error location status + * register updated with correctable/uncorrectable error information. + * In case of correctable errors, number of errors located from + * elm location status register & read the positions from + * elm error location register. + */ +static void elm_error_correction(struct elm_info *info, + struct elm_errorvec *err_vec) +{ + int i, j, errors = 0; + int offset; + u32 reg_val; + + for (i = 0; i < ERROR_VECTOR_MAX; i++) { + + /* Check error reported */ + if (err_vec[i].error_reported) { + offset = ELM_LOCATION_STATUS + ERROR_LOCATION_SIZE * i; + reg_val = elm_read_reg(info, offset); + + /* Check correctable error or not */ + if (reg_val & ECC_CORRECTABLE_MASK) { + offset = ELM_ERROR_LOCATION_0 + + ERROR_LOCATION_SIZE * i; + + /* Read count of correctable errors */ + err_vec[i].error_count = reg_val & + ECC_NB_ERRORS_MASK; + + /* Update the error locations in error vector */ + for (j = 0; j < err_vec[i].error_count; j++) { + + reg_val = elm_read_reg(info, offset); + err_vec[i].error_loc[j] = reg_val & + ECC_ERROR_LOCATION_MASK; + + /* Update error location register */ + offset += 4; + } + + errors += err_vec[i].error_count; + } else { + err_vec[i].error_uncorrectable = true; + } + + /* Clearing interrupts for processed error vectors */ + elm_write_reg(info, ELM_IRQSTATUS, BIT(i)); + + /* Disable page mode */ + elm_configure_page_mode(info, i, false); + } + } +} + +/** + * elm_decode_bch_error_page - Locate error position + * @dev: device pointer + * @ecc_calc: calculated ECC bytes from GPMC + * @err_vec: elm error vectors + * + * Called with one or more error reported vectors & vectors with + * error reported is updated in err_vec[].error_reported + */ +void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, + struct elm_errorvec *err_vec) +{ + struct elm_info *info = dev_get_drvdata(dev); + u32 reg_val; + + /* Enable page mode interrupt */ + reg_val = elm_read_reg(info, ELM_IRQSTATUS); + elm_write_reg(info, ELM_IRQSTATUS, reg_val & INTR_STATUS_PAGE_VALID); + elm_write_reg(info, ELM_IRQENABLE, INTR_EN_PAGE_MASK); + + /* Load valid ecc byte to syndrome fragment register */ + elm_load_syndrome(info, err_vec, ecc_calc); + + /* Enable syndrome processing for which syndrome fragment is updated */ + elm_start_processing(info, err_vec); + + /* Wait for ELM module to finish locating error correction */ + wait_for_completion(&info->elm_completion); + + /* Disable page mode interrupt */ + reg_val = elm_read_reg(info, ELM_IRQENABLE); + elm_write_reg(info, ELM_IRQENABLE, reg_val & ~INTR_EN_PAGE_MASK); + elm_error_correction(info, err_vec); +} +EXPORT_SYMBOL(elm_decode_bch_error_page); + +static irqreturn_t elm_isr(int this_irq, void *dev_id) +{ + u32 reg_val; + struct elm_info *info = dev_id; + + reg_val = elm_read_reg(info, ELM_IRQSTATUS); + + /* All error vectors processed */ + if (reg_val & INTR_STATUS_PAGE_VALID) { + elm_write_reg(info, ELM_IRQSTATUS, + reg_val & INTR_STATUS_PAGE_VALID); + complete(&info->elm_completion); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static int elm_probe(struct platform_device *pdev) +{ + int ret = 0; + struct resource *res, *irq; + struct elm_info *info; + + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); + if (!info) { + dev_err(&pdev->dev, "failed to allocate memory\n"); + return -ENOMEM; + } + + info->dev = &pdev->dev; + + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!irq) { + dev_err(&pdev->dev, "no irq resource defined\n"); + return -ENODEV; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "no memory resource defined\n"); + return -ENODEV; + } + + info->elm_base = devm_request_and_ioremap(&pdev->dev, res); + if (!info->elm_base) + return -EADDRNOTAVAIL; + + ret = devm_request_irq(&pdev->dev, irq->start, elm_isr, 0, + pdev->name, info); + if (ret) { + dev_err(&pdev->dev, "failure requesting irq %i\n", irq->start); + return ret; + } + + pm_runtime_enable(&pdev->dev); + if (pm_runtime_get_sync(&pdev->dev)) { + ret = -EINVAL; + pm_runtime_disable(&pdev->dev); + dev_err(&pdev->dev, "can't enable clock\n"); + return ret; + } + + init_completion(&info->elm_completion); + INIT_LIST_HEAD(&info->list); + list_add(&info->list, &elm_devices); + platform_set_drvdata(pdev, info); + return ret; +} + +static int elm_remove(struct platform_device *pdev) +{ + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + platform_set_drvdata(pdev, NULL); + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id elm_of_match[] = { + { .compatible = "ti,am3352-elm" }, + {}, +}; +MODULE_DEVICE_TABLE(of, elm_of_match); +#endif + +static struct platform_driver elm_driver = { + .driver = { + .name = "elm", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(elm_of_match), + }, + .probe = elm_probe, + .remove = elm_remove, +}; + +module_platform_driver(elm_driver); + +MODULE_DESCRIPTION("ELM driver for BCH error correction"); +MODULE_AUTHOR("Texas Instruments"); +MODULE_ALIAS("platform: elm"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 4eeeb2d7f6ea..5b6b0728be21 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -565,6 +565,96 @@ time_out: return ret; } +static int m25p80_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct m25p *flash = mtd_to_m25p(mtd); + uint32_t offset = ofs; + uint8_t status_old, status_new; + int res = 0; + + mutex_lock(&flash->lock); + /* Wait until finished previous command */ + if (wait_till_ready(flash)) { + res = 1; + goto err; + } + + status_old = read_sr(flash); + + if (offset < flash->mtd.size-(flash->mtd.size/2)) + status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0; + else if (offset < flash->mtd.size-(flash->mtd.size/4)) + status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1; + else if (offset < flash->mtd.size-(flash->mtd.size/8)) + status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0; + else if (offset < flash->mtd.size-(flash->mtd.size/16)) + status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2; + else if (offset < flash->mtd.size-(flash->mtd.size/32)) + status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0; + else if (offset < flash->mtd.size-(flash->mtd.size/64)) + status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1; + else + status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0; + + /* Only modify protection if it will not unlock other areas */ + if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) > + (status_old&(SR_BP2|SR_BP1|SR_BP0))) { + write_enable(flash); + if (write_sr(flash, status_new) < 0) { + res = 1; + goto err; + } + } + +err: mutex_unlock(&flash->lock); + return res; +} + +static int m25p80_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct m25p *flash = mtd_to_m25p(mtd); + uint32_t offset = ofs; + uint8_t status_old, status_new; + int res = 0; + + mutex_lock(&flash->lock); + /* Wait until finished previous command */ + if (wait_till_ready(flash)) { + res = 1; + goto err; + } + + status_old = read_sr(flash); + + if (offset+len > flash->mtd.size-(flash->mtd.size/64)) + status_new = status_old & ~(SR_BP2|SR_BP1|SR_BP0); + else if (offset+len > flash->mtd.size-(flash->mtd.size/32)) + status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0; + else if (offset+len > flash->mtd.size-(flash->mtd.size/16)) + status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1; + else if (offset+len > flash->mtd.size-(flash->mtd.size/8)) + status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0; + else if (offset+len > flash->mtd.size-(flash->mtd.size/4)) + status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2; + else if (offset+len > flash->mtd.size-(flash->mtd.size/2)) + status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0; + else + status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1; + + /* Only modify protection if it will not lock other areas */ + if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) < + (status_old&(SR_BP2|SR_BP1|SR_BP0))) { + write_enable(flash); + if (write_sr(flash, status_new) < 0) { + res = 1; + goto err; + } + } + +err: mutex_unlock(&flash->lock); + return res; +} + /****************************************************************************/ /* @@ -642,6 +732,10 @@ static const struct spi_device_id m25p_ids[] = { /* Everspin */ { "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2) }, + /* GigaDevice */ + { "gd25q32", INFO(0xc84016, 0, 64 * 1024, 64, SECT_4K) }, + { "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) }, + /* Intel/Numonyx -- xxxs33b */ { "160s33b", INFO(0x898911, 0, 64 * 1024, 32, 0) }, { "320s33b", INFO(0x898912, 0, 64 * 1024, 64, 0) }, @@ -899,6 +993,12 @@ static int m25p_probe(struct spi_device *spi) flash->mtd._erase = m25p80_erase; flash->mtd._read = m25p80_read; + /* flash protection support for STmicro chips */ + if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) { + flash->mtd._lock = m25p80_lock; + flash->mtd._unlock = m25p80_unlock; + } + /* sst flash chips use AAI word program */ if (JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) flash->mtd._write = sst_write; diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 62ba82c396c2..3ed17c4d4358 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -429,7 +429,7 @@ config MTD_GPIO_ADDR config MTD_UCLINUX bool "Generic uClinux RAM/ROM filesystem support" - depends on MTD_RAM=y && (!MMU || COLDFIRE) + depends on (MTD_RAM=y || MTD_ROM=y) && (!MMU || COLDFIRE) help Map driver to support image based filesystems for uClinux. diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 7901d72c9242..363939dfad05 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -68,9 +68,6 @@ static int of_flash_remove(struct platform_device *dev) kfree(info->list[i].res); } } - - kfree(info); - return 0; } @@ -199,8 +196,9 @@ static int of_flash_probe(struct platform_device *dev) map_indirect = of_property_read_bool(dp, "no-unaligned-direct-access"); err = -ENOMEM; - info = kzalloc(sizeof(struct of_flash) + - sizeof(struct of_flash_list) * count, GFP_KERNEL); + info = devm_kzalloc(&dev->dev, + sizeof(struct of_flash) + + sizeof(struct of_flash_list) * count, GFP_KERNEL); if (!info) goto err_flash_remove; @@ -241,6 +239,7 @@ static int of_flash_probe(struct platform_device *dev) info->list[i].map.phys = res.start; info->list[i].map.size = res_size; info->list[i].map.bankwidth = be32_to_cpup(width); + info->list[i].map.device_node = dp; err = -ENOMEM; info->list[i].map.virt = ioremap(info->list[i].map.phys, diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c index 299bf88a6f41..c1af83db5202 100644 --- a/drivers/mtd/maps/uclinux.c +++ b/drivers/mtd/maps/uclinux.c @@ -23,12 +23,26 @@ /****************************************************************************/ +#ifdef CONFIG_MTD_ROM +#define MAP_NAME "rom" +#else +#define MAP_NAME "ram" +#endif + +/* + * Blackfin uses uclinux_ram_map during startup, so it must not be static. + * Provide a dummy declaration to make sparse happy. + */ +extern struct map_info uclinux_ram_map; + struct map_info uclinux_ram_map = { - .name = "RAM", - .phys = (unsigned long)__bss_stop, + .name = MAP_NAME, .size = 0, }; +static unsigned long physaddr = -1; +module_param(physaddr, ulong, S_IRUGO); + static struct mtd_info *uclinux_ram_mtdinfo; /****************************************************************************/ @@ -60,11 +74,17 @@ static int __init uclinux_mtd_init(void) struct map_info *mapp; mapp = &uclinux_ram_map; + + if (physaddr == -1) + mapp->phys = (resource_size_t)__bss_stop; + else + mapp->phys = physaddr; + if (!mapp->size) mapp->size = PAGE_ALIGN(ntohl(*((unsigned long *)(mapp->phys + 8)))); mapp->bankwidth = 4; - printk("uclinux[mtd]: RAM probe address=0x%x size=0x%x\n", + printk("uclinux[mtd]: probe address=0x%x size=0x%x\n", (int) mapp->phys, (int) mapp->size); /* @@ -82,7 +102,7 @@ static int __init uclinux_mtd_init(void) simple_map_init(mapp); - mtd = do_map_probe("map_ram", mapp); + mtd = do_map_probe("map_" MAP_NAME, mapp); if (!mtd) { printk("uclinux[mtd]: failed to find a mapping?\n"); return(-ENXIO); @@ -118,6 +138,6 @@ module_exit(uclinux_mtd_cleanup); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Greg Ungerer <gerg@snapgear.com>"); -MODULE_DESCRIPTION("Generic RAM based MTD for uClinux"); +MODULE_DESCRIPTION("Generic MTD for uClinux"); /****************************************************************************/ diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index c516a9408087..ffcbcca2fd2d 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -101,6 +101,8 @@ struct atmel_nand_host { u8 pmecc_corr_cap; u16 pmecc_sector_size; u32 pmecc_lookup_table_offset; + u32 pmecc_lookup_table_offset_512; + u32 pmecc_lookup_table_offset_1024; int pmecc_bytes_per_sector; int pmecc_sector_number; @@ -908,6 +910,84 @@ static void atmel_pmecc_core_init(struct mtd_info *mtd) pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE); } +/* + * Get ECC requirement in ONFI parameters, returns -1 if ONFI + * parameters is not supported. + * return 0 if success to get the ECC requirement. + */ +static int get_onfi_ecc_param(struct nand_chip *chip, + int *ecc_bits, int *sector_size) +{ + *ecc_bits = *sector_size = 0; + + if (chip->onfi_params.ecc_bits == 0xff) + /* TODO: the sector_size and ecc_bits need to be find in + * extended ecc parameter, currently we don't support it. + */ + return -1; + + *ecc_bits = chip->onfi_params.ecc_bits; + + /* The default sector size (ecc codeword size) is 512 */ + *sector_size = 512; + + return 0; +} + +/* + * Get ecc requirement from ONFI parameters ecc requirement. + * If pmecc-cap, pmecc-sector-size in DTS are not specified, this function + * will set them according to ONFI ecc requirement. Otherwise, use the + * value in DTS file. + * return 0 if success. otherwise return error code. + */ +static int pmecc_choose_ecc(struct atmel_nand_host *host, + int *cap, int *sector_size) +{ + /* Get ECC requirement from ONFI parameters */ + *cap = *sector_size = 0; + if (host->nand_chip.onfi_version) { + if (!get_onfi_ecc_param(&host->nand_chip, cap, sector_size)) + dev_info(host->dev, "ONFI params, minimum required ECC: %d bits in %d bytes\n", + *cap, *sector_size); + else + dev_info(host->dev, "NAND chip ECC reqirement is in Extended ONFI parameter, we don't support yet.\n"); + } else { + dev_info(host->dev, "NAND chip is not ONFI compliant, assume ecc_bits is 2 in 512 bytes"); + } + if (*cap == 0 && *sector_size == 0) { + *cap = 2; + *sector_size = 512; + } + + /* If dts file doesn't specify then use the one in ONFI parameters */ + if (host->pmecc_corr_cap == 0) { + /* use the most fitable ecc bits (the near bigger one ) */ + if (*cap <= 2) + host->pmecc_corr_cap = 2; + else if (*cap <= 4) + host->pmecc_corr_cap = 4; + else if (*cap < 8) + host->pmecc_corr_cap = 8; + else if (*cap < 12) + host->pmecc_corr_cap = 12; + else if (*cap < 24) + host->pmecc_corr_cap = 24; + else + return -EINVAL; + } + if (host->pmecc_sector_size == 0) { + /* use the most fitable sector size (the near smaller one ) */ + if (*sector_size >= 1024) + host->pmecc_sector_size = 1024; + else if (*sector_size >= 512) + host->pmecc_sector_size = 512; + else + return -EINVAL; + } + return 0; +} + static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev, struct atmel_nand_host *host) { @@ -916,8 +996,22 @@ static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev, struct resource *regs, *regs_pmerr, *regs_rom; int cap, sector_size, err_no; + err_no = pmecc_choose_ecc(host, &cap, §or_size); + if (err_no) { + dev_err(host->dev, "The NAND flash's ECC requirement are not support!"); + return err_no; + } + + if (cap != host->pmecc_corr_cap || + sector_size != host->pmecc_sector_size) + dev_info(host->dev, "WARNING: Be Caution! Using different PMECC parameters from Nand ONFI ECC reqirement.\n"); + cap = host->pmecc_corr_cap; sector_size = host->pmecc_sector_size; + host->pmecc_lookup_table_offset = (sector_size == 512) ? + host->pmecc_lookup_table_offset_512 : + host->pmecc_lookup_table_offset_1024; + dev_info(host->dev, "Initialize PMECC params, cap: %d, sector: %d\n", cap, sector_size); @@ -1215,7 +1309,7 @@ static void atmel_nand_hwctl(struct mtd_info *mtd, int mode) static int atmel_of_init_port(struct atmel_nand_host *host, struct device_node *np) { - u32 val, table_offset; + u32 val; u32 offset[2]; int ecc_mode; struct atmel_nand_data *board = &host->board; @@ -1259,42 +1353,41 @@ static int atmel_of_init_port(struct atmel_nand_host *host, /* use PMECC, get correction capability, sector size and lookup * table offset. + * If correction bits and sector size are not specified, then find + * them from NAND ONFI parameters. */ - if (of_property_read_u32(np, "atmel,pmecc-cap", &val) != 0) { - dev_err(host->dev, "Cannot decide PMECC Capability\n"); - return -EINVAL; - } else if ((val != 2) && (val != 4) && (val != 8) && (val != 12) && - (val != 24)) { - dev_err(host->dev, - "Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n", - val); - return -EINVAL; + if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) { + if ((val != 2) && (val != 4) && (val != 8) && (val != 12) && + (val != 24)) { + dev_err(host->dev, + "Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n", + val); + return -EINVAL; + } + host->pmecc_corr_cap = (u8)val; } - host->pmecc_corr_cap = (u8)val; - if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) != 0) { - dev_err(host->dev, "Cannot decide PMECC Sector Size\n"); - return -EINVAL; - } else if ((val != 512) && (val != 1024)) { - dev_err(host->dev, - "Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n", - val); - return -EINVAL; + if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) { + if ((val != 512) && (val != 1024)) { + dev_err(host->dev, + "Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n", + val); + return -EINVAL; + } + host->pmecc_sector_size = (u16)val; } - host->pmecc_sector_size = (u16)val; if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset", offset, 2) != 0) { dev_err(host->dev, "Cannot get PMECC lookup table offset\n"); return -EINVAL; } - table_offset = host->pmecc_sector_size == 512 ? offset[0] : offset[1]; - - if (!table_offset) { + if (!offset[0] && !offset[1]) { dev_err(host->dev, "Invalid PMECC lookup table offset\n"); return -EINVAL; } - host->pmecc_lookup_table_offset = table_offset; + host->pmecc_lookup_table_offset_512 = offset[0]; + host->pmecc_lookup_table_offset_1024 = offset[1]; return 0; } diff --git a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h index 0bdb2ce4da75..c005a62330b1 100644 --- a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h +++ b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h @@ -1,6 +1,10 @@ #ifndef __BCM47XXNFLASH_H #define __BCM47XXNFLASH_H +#ifndef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#endif + #include <linux/mtd/mtd.h> #include <linux/mtd/nand.h> diff --git a/drivers/mtd/nand/bcm47xxnflash/main.c b/drivers/mtd/nand/bcm47xxnflash/main.c index 8363a9a5fa3f..7bae569fdc79 100644 --- a/drivers/mtd/nand/bcm47xxnflash/main.c +++ b/drivers/mtd/nand/bcm47xxnflash/main.c @@ -9,14 +9,14 @@ * */ +#include "bcm47xxnflash.h" + #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/platform_device.h> #include <linux/bcma/bcma.h> -#include "bcm47xxnflash.h" - MODULE_DESCRIPTION("NAND flash driver for BCMA bus"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("RafaÅ‚ MiÅ‚ecki"); @@ -77,6 +77,7 @@ static int bcm47xxnflash_remove(struct platform_device *pdev) } static struct platform_driver bcm47xxnflash_driver = { + .probe = bcm47xxnflash_probe, .remove = bcm47xxnflash_remove, .driver = { .name = "bcma_nflash", @@ -88,13 +89,10 @@ static int __init bcm47xxnflash_init(void) { int err; - /* - * Platform device "bcma_nflash" exists on SoCs and is registered very - * early, it won't be added during runtime (use platform_driver_probe). - */ - err = platform_driver_probe(&bcm47xxnflash_driver, bcm47xxnflash_probe); + err = platform_driver_register(&bcm47xxnflash_driver); if (err) - pr_err("Failed to register serial flash driver: %d\n", err); + pr_err("Failed to register bcm47xx nand flash driver: %d\n", + err); return err; } diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c index 595de4012e71..b2ab373c9eef 100644 --- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c +++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c @@ -9,13 +9,13 @@ * */ +#include "bcm47xxnflash.h" + #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/bcma/bcma.h> -#include "bcm47xxnflash.h" - /* Broadcom uses 1'000'000 but it seems to be too many. Tests on WNDR4500 has * shown ~1000 retries as maxiumum. */ #define NFLASH_READY_RETRIES 10000 diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c index feae55c7b880..94e17af8e450 100644 --- a/drivers/mtd/nand/davinci_nand.c +++ b/drivers/mtd/nand/davinci_nand.c @@ -606,7 +606,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev) if (pdev->id < 0 || pdev->id > 3) return -ENODEV; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (!info) { dev_err(&pdev->dev, "unable to allocate memory\n"); ret = -ENOMEM; @@ -623,11 +623,11 @@ static int __init nand_davinci_probe(struct platform_device *pdev) goto err_nomem; } - vaddr = ioremap(res1->start, resource_size(res1)); - base = ioremap(res2->start, resource_size(res2)); + vaddr = devm_request_and_ioremap(&pdev->dev, res1); + base = devm_request_and_ioremap(&pdev->dev, res2); if (!vaddr || !base) { dev_err(&pdev->dev, "ioremap failed\n"); - ret = -EINVAL; + ret = -EADDRNOTAVAIL; goto err_ioremap; } @@ -717,7 +717,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev) } info->chip.ecc.mode = ecc_mode; - info->clk = clk_get(&pdev->dev, "aemif"); + info->clk = devm_clk_get(&pdev->dev, "aemif"); if (IS_ERR(info->clk)) { ret = PTR_ERR(info->clk); dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret); @@ -845,8 +845,6 @@ err_timing: clk_disable_unprepare(info->clk); err_clk_enable: - clk_put(info->clk); - spin_lock_irq(&davinci_nand_lock); if (ecc_mode == NAND_ECC_HW_SYNDROME) ecc4_busy = false; @@ -855,13 +853,7 @@ err_clk_enable: err_ecc: err_clk: err_ioremap: - if (base) - iounmap(base); - if (vaddr) - iounmap(vaddr); - err_nomem: - kfree(info); return ret; } @@ -874,15 +866,9 @@ static int __exit nand_davinci_remove(struct platform_device *pdev) ecc4_busy = false; spin_unlock_irq(&davinci_nand_lock); - iounmap(info->base); - iounmap(info->vaddr); - nand_release(&info->mtd); clk_disable_unprepare(info->clk); - clk_put(info->clk); - - kfree(info); return 0; } diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index ad6222627fed..f1f7f12ab501 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -176,8 +176,8 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob) ifc_nand_ctrl->page = page_addr; /* Program ROW0/COL0 */ - out_be32(&ifc->ifc_nand.row0, page_addr); - out_be32(&ifc->ifc_nand.col0, (oob ? IFC_NAND_COL_MS : 0) | column); + iowrite32be(page_addr, &ifc->ifc_nand.row0); + iowrite32be((oob ? IFC_NAND_COL_MS : 0) | column, &ifc->ifc_nand.col0); buf_num = page_addr & priv->bufnum_mask; @@ -239,18 +239,19 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int i; /* set the chip select for NAND Transaction */ - out_be32(&ifc->ifc_nand.nand_csel, priv->bank << IFC_NAND_CSEL_SHIFT); + iowrite32be(priv->bank << IFC_NAND_CSEL_SHIFT, + &ifc->ifc_nand.nand_csel); dev_vdbg(priv->dev, "%s: fir0=%08x fcr0=%08x\n", __func__, - in_be32(&ifc->ifc_nand.nand_fir0), - in_be32(&ifc->ifc_nand.nand_fcr0)); + ioread32be(&ifc->ifc_nand.nand_fir0), + ioread32be(&ifc->ifc_nand.nand_fcr0)); ctrl->nand_stat = 0; /* start read/write seq */ - out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT); + iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -273,7 +274,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int sector_end = sector + chip->ecc.steps - 1; for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = in_be32(&ifc->ifc_nand.nand_eccstat[i]); + eccstat[i] = ioread32be(&ifc->ifc_nand.nand_eccstat[i]); for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); @@ -313,31 +314,33 @@ static void fsl_ifc_do_read(struct nand_chip *chip, /* Program FIR/IFC_NAND_FCR0 for Small/Large page */ if (mtd->writesize > 512) { - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir1, 0x0); - - out_be32(&ifc->ifc_nand.nand_fcr0, - (NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) | - (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT)); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(0x0, &ifc->ifc_nand.nand_fir1); + + iowrite32be((NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) | + (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT), + &ifc->ifc_nand.nand_fcr0); } else { - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir1, 0x0); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(0x0, &ifc->ifc_nand.nand_fir1); if (oob) - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_READOOB << IFC_NAND_FCR0_CMD0_SHIFT); + iowrite32be(NAND_CMD_READOOB << + IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); else - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT); + iowrite32be(NAND_CMD_READ0 << + IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); } } @@ -357,7 +360,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, switch (command) { /* READ0 read the entire buffer to use hardware ECC. */ case NAND_CMD_READ0: - out_be32(&ifc->ifc_nand.nand_fbcr, 0); + iowrite32be(0, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, page_addr, 0); ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; @@ -372,7 +375,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* READOOB reads only the OOB because no ECC is performed. */ case NAND_CMD_READOOB: - out_be32(&ifc->ifc_nand.nand_fbcr, mtd->oobsize - column); + iowrite32be(mtd->oobsize - column, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, column, page_addr, 1); ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; @@ -388,19 +391,19 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, if (command == NAND_CMD_PARAM) timing = IFC_FIR_OP_RBCD; - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (timing << IFC_NAND_FIR0_OP2_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, - command << IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.row3, column); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (timing << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(command << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(column, &ifc->ifc_nand.row3); /* * although currently it's 8 bytes for READID, we always read * the maximum 256 bytes(for PARAM) */ - out_be32(&ifc->ifc_nand.nand_fbcr, 256); + iowrite32be(256, &ifc->ifc_nand.nand_fbcr); ifc_nand_ctrl->read_bytes = 256; set_addr(mtd, 0, 0, 0); @@ -415,16 +418,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* ERASE2 uses the block and page address from ERASE1 */ case NAND_CMD_ERASE2: - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT)); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); - out_be32(&ifc->ifc_nand.nand_fcr0, - (NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) | - (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT)); + iowrite32be((NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) | + (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT), + &ifc->ifc_nand.nand_fcr0); - out_be32(&ifc->ifc_nand.nand_fbcr, 0); + iowrite32be(0, &ifc->ifc_nand.nand_fbcr); ifc_nand_ctrl->read_bytes = 0; fsl_ifc_run_command(mtd); return; @@ -440,26 +443,28 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, (NAND_CMD_SEQIN << IFC_NAND_FCR0_CMD0_SHIFT) | (NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT); - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT)); + iowrite32be( + (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); } else { nand_fcr0 = ((NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT) | (NAND_CMD_SEQIN << IFC_NAND_FCR0_CMD2_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir1, - (IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT)); + iowrite32be( + (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT, + &ifc->ifc_nand.nand_fir1); if (column >= mtd->writesize) nand_fcr0 |= @@ -474,7 +479,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, column -= mtd->writesize; ifc_nand_ctrl->oob = 1; } - out_be32(&ifc->ifc_nand.nand_fcr0, nand_fcr0); + iowrite32be(nand_fcr0, &ifc->ifc_nand.nand_fcr0); set_addr(mtd, column, page_addr, ifc_nand_ctrl->oob); return; } @@ -482,10 +487,11 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* PAGEPROG reuses all of the setup from SEQIN and adds the length */ case NAND_CMD_PAGEPROG: { if (ifc_nand_ctrl->oob) { - out_be32(&ifc->ifc_nand.nand_fbcr, - ifc_nand_ctrl->index - ifc_nand_ctrl->column); + iowrite32be(ifc_nand_ctrl->index - + ifc_nand_ctrl->column, + &ifc->ifc_nand.nand_fbcr); } else { - out_be32(&ifc->ifc_nand.nand_fbcr, 0); + iowrite32be(0, &ifc->ifc_nand.nand_fbcr); } fsl_ifc_run_command(mtd); @@ -493,12 +499,12 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, } case NAND_CMD_STATUS: - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.nand_fbcr, 1); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(1, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, 0, 0); ifc_nand_ctrl->read_bytes = 1; @@ -512,10 +518,10 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, return; case NAND_CMD_RESET: - out_be32(&ifc->ifc_nand.nand_fir0, - IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT); - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT); + iowrite32be(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT, + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); fsl_ifc_run_command(mtd); return; @@ -639,18 +645,18 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) u32 nand_fsr; /* Use READ_STATUS command, but wait for the device to be ready */ - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, NAND_CMD_STATUS << - IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.nand_fbcr, 1); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(1, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, 0, 0); ifc_nand_ctrl->read_bytes = 1; fsl_ifc_run_command(mtd); - nand_fsr = in_be32(&ifc->ifc_nand.nand_fsr); + nand_fsr = ioread32be(&ifc->ifc_nand.nand_fsr); /* * The chip always seems to report that it is @@ -744,34 +750,34 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) uint32_t cs = priv->bank; /* Save CSOR and CSOR_ext */ - csor = in_be32(&ifc->csor_cs[cs].csor); - csor_ext = in_be32(&ifc->csor_cs[cs].csor_ext); + csor = ioread32be(&ifc->csor_cs[cs].csor); + csor_ext = ioread32be(&ifc->csor_cs[cs].csor_ext); /* chage PageSize 8K and SpareSize 1K*/ csor_8k = (csor & ~(CSOR_NAND_PGS_MASK)) | 0x0018C000; - out_be32(&ifc->csor_cs[cs].csor, csor_8k); - out_be32(&ifc->csor_cs[cs].csor_ext, 0x0000400); + iowrite32be(csor_8k, &ifc->csor_cs[cs].csor); + iowrite32be(0x0000400, &ifc->csor_cs[cs].csor_ext); /* READID */ - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.row3, 0x0); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(0x0, &ifc->ifc_nand.row3); - out_be32(&ifc->ifc_nand.nand_fbcr, 0x0); + iowrite32be(0x0, &ifc->ifc_nand.nand_fbcr); /* Program ROW0/COL0 */ - out_be32(&ifc->ifc_nand.row0, 0x0); - out_be32(&ifc->ifc_nand.col0, 0x0); + iowrite32be(0x0, &ifc->ifc_nand.row0); + iowrite32be(0x0, &ifc->ifc_nand.col0); /* set the chip select for NAND Transaction */ - out_be32(&ifc->ifc_nand.nand_csel, cs << IFC_NAND_CSEL_SHIFT); + iowrite32be(cs << IFC_NAND_CSEL_SHIFT, &ifc->ifc_nand.nand_csel); /* start read seq */ - out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT); + iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -781,8 +787,8 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n"); /* Restore CSOR and CSOR_ext */ - out_be32(&ifc->csor_cs[cs].csor, csor); - out_be32(&ifc->csor_cs[cs].csor_ext, csor_ext); + iowrite32be(csor, &ifc->csor_cs[cs].csor); + iowrite32be(csor_ext, &ifc->csor_cs[cs].csor_ext); } static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) @@ -799,7 +805,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) /* fill in nand_chip structure */ /* set up function call table */ - if ((in_be32(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) + if ((ioread32be(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) chip->read_byte = fsl_ifc_read_byte16; else chip->read_byte = fsl_ifc_read_byte; @@ -813,13 +819,13 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->bbt_td = &bbt_main_descr; chip->bbt_md = &bbt_mirror_descr; - out_be32(&ifc->ifc_nand.ncfgr, 0x0); + iowrite32be(0x0, &ifc->ifc_nand.ncfgr); /* set up nand options */ chip->bbt_options = NAND_BBT_USE_FLASH; - if (in_be32(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { + if (ioread32be(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { chip->read_byte = fsl_ifc_read_byte16; chip->options |= NAND_BUSWIDTH_16; } else { @@ -832,7 +838,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.read_page = fsl_ifc_read_page; chip->ecc.write_page = fsl_ifc_write_page; - csor = in_be32(&ifc->csor_cs[priv->bank].csor); + csor = ioread32be(&ifc->csor_cs[priv->bank].csor); /* Hardware generates ECC per 512 Bytes */ chip->ecc.size = 512; @@ -884,7 +890,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.mode = NAND_ECC_SOFT; } - ver = in_be32(&ifc->ifc_rev); + ver = ioread32be(&ifc->ifc_rev); if (ver == FSL_IFC_V1_1_0) fsl_ifc_sram_init(priv); @@ -910,7 +916,7 @@ static int fsl_ifc_chip_remove(struct fsl_ifc_mtd *priv) static int match_bank(struct fsl_ifc_regs __iomem *ifc, int bank, phys_addr_t addr) { - u32 cspr = in_be32(&ifc->cspr_cs[bank].cspr); + u32 cspr = ioread32be(&ifc->cspr_cs[bank].cspr); if (!(cspr & CSPR_V)) return 0; @@ -997,17 +1003,16 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) dev_set_drvdata(priv->dev, priv); - out_be32(&ifc->ifc_nand.nand_evter_en, - IFC_NAND_EVTER_EN_OPC_EN | - IFC_NAND_EVTER_EN_FTOER_EN | - IFC_NAND_EVTER_EN_WPER_EN); + iowrite32be(IFC_NAND_EVTER_EN_OPC_EN | + IFC_NAND_EVTER_EN_FTOER_EN | + IFC_NAND_EVTER_EN_WPER_EN, + &ifc->ifc_nand.nand_evter_en); /* enable NAND Machine Interrupts */ - out_be32(&ifc->ifc_nand.nand_evter_intr_en, - IFC_NAND_EVTER_INTR_OPCIR_EN | - IFC_NAND_EVTER_INTR_FTOERIR_EN | - IFC_NAND_EVTER_INTR_WPERIR_EN); - + iowrite32be(IFC_NAND_EVTER_INTR_OPCIR_EN | + IFC_NAND_EVTER_INTR_FTOERIR_EN | + IFC_NAND_EVTER_INTR_WPERIR_EN, + &ifc->ifc_nand.nand_evter_intr_en); priv->mtd.name = kasprintf(GFP_KERNEL, "%x.flash", (unsigned)res.start); if (!priv->mtd.name) { ret = -ENOMEM; diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h index a0924515c396..588f5374047c 100644 --- a/drivers/mtd/nand/gpmi-nand/bch-regs.h +++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h @@ -61,6 +61,16 @@ & BM_BCH_FLASH0LAYOUT0_ECC0) \ ) +#define MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14 10 +#define MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14 \ + (0x1 << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14) +#define BF_BCH_FLASH0LAYOUT0_GF(v, x) \ + ((GPMI_IS_MX6Q(x) && ((v) == 14)) \ + ? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14) \ + & MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14) \ + : 0 \ + ) + #define BP_BCH_FLASH0LAYOUT0_DATA0_SIZE 0 #define BM_BCH_FLASH0LAYOUT0_DATA0_SIZE \ (0xfff << BP_BCH_FLASH0LAYOUT0_DATA0_SIZE) @@ -93,6 +103,16 @@ & BM_BCH_FLASH0LAYOUT1_ECCN) \ ) +#define MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14 10 +#define MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14 \ + (0x1 << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14) +#define BF_BCH_FLASH0LAYOUT1_GF(v, x) \ + ((GPMI_IS_MX6Q(x) && ((v) == 14)) \ + ? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14) \ + & MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14) \ + : 0 \ + ) + #define BP_BCH_FLASH0LAYOUT1_DATAN_SIZE 0 #define BM_BCH_FLASH0LAYOUT1_DATAN_SIZE \ (0xfff << BP_BCH_FLASH0LAYOUT1_DATAN_SIZE) @@ -103,4 +123,6 @@ ? (((v) >> 2) & MX6Q_BM_BCH_FLASH0LAYOUT1_DATAN_SIZE) \ : ((v) & BM_BCH_FLASH0LAYOUT1_DATAN_SIZE) \ ) + +#define HW_BCH_VERSION 0x00000160 #endif diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index d84699c7968e..4f8857fa48a7 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -208,6 +208,11 @@ void gpmi_dump_info(struct gpmi_nand_data *this) } /* start to print out the BCH info */ + pr_err("Show BCH registers :\n"); + for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) { + reg = readl(r->bch_regs + i * 0x10); + pr_err("offset 0x%.3x : 0x%.8x\n", i * 0x10, reg); + } pr_err("BCH Geometry :\n"); pr_err("GF length : %u\n", geo->gf_len); pr_err("ECC Strength : %u\n", geo->ecc_strength); @@ -232,6 +237,7 @@ int bch_set_geometry(struct gpmi_nand_data *this) unsigned int metadata_size; unsigned int ecc_strength; unsigned int page_size; + unsigned int gf_len; int ret; if (common_nfc_set_geometry(this)) @@ -242,6 +248,7 @@ int bch_set_geometry(struct gpmi_nand_data *this) metadata_size = bch_geo->metadata_size; ecc_strength = bch_geo->ecc_strength >> 1; page_size = bch_geo->page_size; + gf_len = bch_geo->gf_len; ret = gpmi_enable_clk(this); if (ret) @@ -263,11 +270,13 @@ int bch_set_geometry(struct gpmi_nand_data *this) writel(BF_BCH_FLASH0LAYOUT0_NBLOCKS(block_count) | BF_BCH_FLASH0LAYOUT0_META_SIZE(metadata_size) | BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this) + | BF_BCH_FLASH0LAYOUT0_GF(gf_len, this) | BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this), r->bch_regs + HW_BCH_FLASH0LAYOUT0); writel(BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size) | BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this) + | BF_BCH_FLASH0LAYOUT1_GF(gf_len, this) | BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this), r->bch_regs + HW_BCH_FLASH0LAYOUT1); diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index e9b1c47e3cf9..717881a3d1b8 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -94,6 +94,25 @@ static inline int get_ecc_strength(struct gpmi_nand_data *this) return round_down(ecc_strength, 2); } +static inline bool gpmi_check_ecc(struct gpmi_nand_data *this) +{ + struct bch_geometry *geo = &this->bch_geometry; + + /* Do the sanity check. */ + if (GPMI_IS_MX23(this) || GPMI_IS_MX28(this)) { + /* The mx23/mx28 only support the GF13. */ + if (geo->gf_len == 14) + return false; + + if (geo->ecc_strength > MXS_ECC_STRENGTH_MAX) + return false; + } else if (GPMI_IS_MX6Q(this)) { + if (geo->ecc_strength > MX6_ECC_STRENGTH_MAX) + return false; + } + return true; +} + int common_nfc_set_geometry(struct gpmi_nand_data *this) { struct bch_geometry *geo = &this->bch_geometry; @@ -112,17 +131,24 @@ int common_nfc_set_geometry(struct gpmi_nand_data *this) /* The default for the length of Galois Field. */ geo->gf_len = 13; - /* The default for chunk size. There is no oobsize greater then 512. */ + /* The default for chunk size. */ geo->ecc_chunk_size = 512; - while (geo->ecc_chunk_size < mtd->oobsize) + while (geo->ecc_chunk_size < mtd->oobsize) { geo->ecc_chunk_size *= 2; /* keep C >= O */ + geo->gf_len = 14; + } geo->ecc_chunk_count = mtd->writesize / geo->ecc_chunk_size; /* We use the same ECC strength for all chunks. */ geo->ecc_strength = get_ecc_strength(this); - if (!geo->ecc_strength) { - pr_err("wrong ECC strength.\n"); + if (!gpmi_check_ecc(this)) { + dev_err(this->dev, + "We can not support this nand chip." + " Its required ecc strength(%d) is beyond our" + " capability(%d).\n", geo->ecc_strength, + (GPMI_IS_MX6Q(this) ? MX6_ECC_STRENGTH_MAX + : MXS_ECC_STRENGTH_MAX)); return -EINVAL; } @@ -920,8 +946,7 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, dma_addr_t auxiliary_phys; unsigned int i; unsigned char *status; - unsigned int failed; - unsigned int corrected; + unsigned int max_bitflips = 0; int ret; pr_debug("page number is : %d\n", page); @@ -945,35 +970,25 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, payload_virt, payload_phys); if (ret) { pr_err("Error in ECC-based read: %d\n", ret); - goto exit_nfc; + return ret; } /* handle the block mark swapping */ block_mark_swapping(this, payload_virt, auxiliary_virt); /* Loop over status bytes, accumulating ECC status. */ - failed = 0; - corrected = 0; - status = auxiliary_virt + nfc_geo->auxiliary_status_offset; + status = auxiliary_virt + nfc_geo->auxiliary_status_offset; for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) { if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED)) continue; if (*status == STATUS_UNCORRECTABLE) { - failed++; + mtd->ecc_stats.failed++; continue; } - corrected += *status; - } - - /* - * Propagate ECC status to the owning MTD only when failed or - * corrected times nearly reaches our ECC correction threshold. - */ - if (failed || corrected >= (nfc_geo->ecc_strength - 1)) { - mtd->ecc_stats.failed += failed; - mtd->ecc_stats.corrected += corrected; + mtd->ecc_stats.corrected += *status; + max_bitflips = max_t(unsigned int, max_bitflips, *status); } if (oob_required) { @@ -995,8 +1010,8 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, this->payload_virt, this->payload_phys, nfc_geo->payload_size, payload_virt, payload_phys); -exit_nfc: - return ret; + + return max_bitflips; } static int gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip, @@ -1668,8 +1683,8 @@ exit_nfc_init: release_resources(this); exit_acquire_resources: platform_set_drvdata(pdev, NULL); - kfree(this); dev_err(this->dev, "driver registration failed: %d\n", ret); + kfree(this); return ret; } diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h index 3d93a5e39090..072947731277 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h @@ -284,6 +284,10 @@ extern int gpmi_read_page(struct gpmi_nand_data *, #define STATUS_ERASED 0xff #define STATUS_UNCORRECTABLE 0xfe +/* BCH's bit correction capability. */ +#define MXS_ECC_STRENGTH_MAX 20 /* mx23 and mx28 */ +#define MX6_ECC_STRENGTH_MAX 40 + /* Use the platform_id to distinguish different Archs. */ #define IS_MX23 0x0 #define IS_MX28 0x1 diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 60ac5b98b718..07e5784e5cd3 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -530,12 +530,23 @@ static void send_page_v1(struct mtd_info *mtd, unsigned int ops) static void send_read_id_v3(struct mxc_nand_host *host) { + struct nand_chip *this = &host->nand; + /* Read ID into main buffer */ writel(NFC_ID, NFC_V3_LAUNCH); wait_op_done(host, true); memcpy32_fromio(host->data_buf, host->main_area0, 16); + + if (this->options & NAND_BUSWIDTH_16) { + /* compress the ID info */ + host->data_buf[1] = host->data_buf[2]; + host->data_buf[2] = host->data_buf[4]; + host->data_buf[3] = host->data_buf[6]; + host->data_buf[4] = host->data_buf[8]; + host->data_buf[5] = host->data_buf[10]; + } } /* Request the NANDFC to perform a read of the NAND device ID. */ diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3766682a0289..43214151b882 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -825,13 +825,8 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip, static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) { - unsigned long timeo = jiffies; int status, state = chip->state; - - if (state == FL_ERASING) - timeo += (HZ * 400) / 1000; - else - timeo += (HZ * 20) / 1000; + unsigned long timeo = (state == FL_ERASING ? 400 : 20); led_trigger_event(nand_led_trigger, LED_FULL); @@ -849,6 +844,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) if (in_interrupt() || oops_in_progress) panic_nand_wait(mtd, chip, timeo); else { + timeo = jiffies + msecs_to_jiffies(timeo); while (time_before(jiffies, timeo)) { if (chip->dev_ready) { if (chip->dev_ready(mtd)) diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c index b7cfe0d37121..053c9a2d47c3 100644 --- a/drivers/mtd/nand/nand_ecc.c +++ b/drivers/mtd/nand/nand_ecc.c @@ -55,8 +55,7 @@ struct mtd_info; #define MODULE_AUTHOR(x) /* x */ #define MODULE_DESCRIPTION(x) /* x */ -#define printk printf -#define KERN_ERR "" +#define pr_err printf #endif /* @@ -507,7 +506,7 @@ int __nand_correct_data(unsigned char *buf, if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1) return 1; /* error in ECC data; no action needed */ - printk(KERN_ERR "uncorrectable error : "); + pr_err("%s: uncorrectable ECC error", __func__); return -1; } EXPORT_SYMBOL(__nand_correct_data); diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 8f30d385bfa3..891c52a30e6a 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -1468,12 +1468,12 @@ int do_read_error(struct nandsim *ns, int num) void do_bit_flips(struct nandsim *ns, int num) { - if (bitflips && random32() < (1 << 22)) { + if (bitflips && prandom_u32() < (1 << 22)) { int flips = 1; if (bitflips > 1) - flips = (random32() % (int) bitflips) + 1; + flips = (prandom_u32() % (int) bitflips) + 1; while (flips--) { - int pos = random32() % (num * 8); + int pos = prandom_u32() % (num * 8); ns->buf.byte[pos / 8] ^= (1 << (pos % 8)); NS_WARN("read_page: flipping bit %d in page %d " "reading from %d ecc: corrected=%u failed=%u\n", diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 1d333497cfcb..8e820ddf4e08 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -22,9 +22,12 @@ #include <linux/omap-dma.h> #include <linux/io.h> #include <linux/slab.h> +#include <linux/of.h> +#include <linux/of_device.h> #ifdef CONFIG_MTD_NAND_OMAP_BCH #include <linux/bch.h> +#include <linux/platform_data/elm.h> #endif #include <linux/platform_data/mtd-nand-omap2.h> @@ -117,6 +120,33 @@ #define OMAP24XX_DMA_GPMC 4 +#define BCH8_MAX_ERROR 8 /* upto 8 bit correctable */ +#define BCH4_MAX_ERROR 4 /* upto 4 bit correctable */ + +#define SECTOR_BYTES 512 +/* 4 bit padding to make byte aligned, 56 = 52 + 4 */ +#define BCH4_BIT_PAD 4 +#define BCH8_ECC_MAX ((SECTOR_BYTES + BCH8_ECC_OOB_BYTES) * 8) +#define BCH4_ECC_MAX ((SECTOR_BYTES + BCH4_ECC_OOB_BYTES) * 8) + +/* GPMC ecc engine settings for read */ +#define BCH_WRAPMODE_1 1 /* BCH wrap mode 1 */ +#define BCH8R_ECC_SIZE0 0x1a /* ecc_size0 = 26 */ +#define BCH8R_ECC_SIZE1 0x2 /* ecc_size1 = 2 */ +#define BCH4R_ECC_SIZE0 0xd /* ecc_size0 = 13 */ +#define BCH4R_ECC_SIZE1 0x3 /* ecc_size1 = 3 */ + +/* GPMC ecc engine settings for write */ +#define BCH_WRAPMODE_6 6 /* BCH wrap mode 6 */ +#define BCH_ECC_SIZE0 0x0 /* ecc_size0 = 0, no oob protection */ +#define BCH_ECC_SIZE1 0x20 /* ecc_size1 = 32 */ + +#ifdef CONFIG_MTD_NAND_OMAP_BCH +static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc, + 0xac, 0x6b, 0xff, 0x99, 0x7b}; +static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10}; +#endif + /* oob info generated runtime depending on ecc algorithm and layout selected */ static struct nand_ecclayout omap_oobinfo; /* Define some generic bad / good block scan pattern which are used @@ -156,6 +186,9 @@ struct omap_nand_info { #ifdef CONFIG_MTD_NAND_OMAP_BCH struct bch_control *bch; struct nand_ecclayout ecclayout; + bool is_elm_used; + struct device *elm_dev; + struct device_node *of_node; #endif }; @@ -1031,6 +1064,13 @@ static int omap_dev_ready(struct mtd_info *mtd) * omap3_enable_hwecc_bch - Program OMAP3 GPMC to perform BCH ECC correction * @mtd: MTD device structure * @mode: Read/Write mode + * + * When using BCH, sector size is hardcoded to 512 bytes. + * Using wrapping mode 6 both for reading and writing if ELM module not uses + * for error correction. + * On writing, + * eccsize0 = 0 (no additional protected byte in spare area) + * eccsize1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area) */ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode) { @@ -1039,32 +1079,57 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode) struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); struct nand_chip *chip = mtd->priv; - u32 val; + u32 val, wr_mode; + unsigned int ecc_size1, ecc_size0; + + /* Using wrapping mode 6 for writing */ + wr_mode = BCH_WRAPMODE_6; - nerrors = (info->nand.ecc.bytes == 13) ? 8 : 4; - dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0; - nsectors = 1; /* - * Program GPMC to perform correction on one 512-byte sector at a time. - * Using 4 sectors at a time (i.e. ecc.size = 2048) is also possible and - * gives a slight (5%) performance gain (but requires additional code). + * ECC engine enabled for valid ecc_size0 nibbles + * and disabled for ecc_size1 nibbles. */ + ecc_size0 = BCH_ECC_SIZE0; + ecc_size1 = BCH_ECC_SIZE1; + + /* Perform ecc calculation on 512-byte sector */ + nsectors = 1; + + /* Update number of error correction */ + nerrors = info->nand.ecc.strength; + + /* Multi sector reading/writing for NAND flash with page size < 4096 */ + if (info->is_elm_used && (mtd->writesize <= 4096)) { + if (mode == NAND_ECC_READ) { + /* Using wrapping mode 1 for reading */ + wr_mode = BCH_WRAPMODE_1; + + /* + * ECC engine enabled for ecc_size0 nibbles + * and disabled for ecc_size1 nibbles. + */ + ecc_size0 = (nerrors == 8) ? + BCH8R_ECC_SIZE0 : BCH4R_ECC_SIZE0; + ecc_size1 = (nerrors == 8) ? + BCH8R_ECC_SIZE1 : BCH4R_ECC_SIZE1; + } + + /* Perform ecc calculation for one page (< 4096) */ + nsectors = info->nand.ecc.steps; + } writel(ECC1, info->reg.gpmc_ecc_control); - /* - * When using BCH, sector size is hardcoded to 512 bytes. - * Here we are using wrapping mode 6 both for reading and writing, with: - * size0 = 0 (no additional protected byte in spare area) - * size1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area) - */ - val = (32 << ECCSIZE1_SHIFT) | (0 << ECCSIZE0_SHIFT); + /* Configure ecc size for BCH */ + val = (ecc_size1 << ECCSIZE1_SHIFT) | (ecc_size0 << ECCSIZE0_SHIFT); writel(val, info->reg.gpmc_ecc_size_config); + dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0; + /* BCH configuration */ val = ((1 << 16) | /* enable BCH */ (((nerrors == 8) ? 1 : 0) << 12) | /* 8 or 4 bits */ - (0x06 << 8) | /* wrap mode = 6 */ + (wr_mode << 8) | /* wrap mode */ (dev_width << 7) | /* bus width */ (((nsectors-1) & 0x7) << 4) | /* number of sectors */ (info->gpmc_cs << 1) | /* ECC CS */ @@ -1072,7 +1137,7 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode) writel(val, info->reg.gpmc_ecc_config); - /* clear ecc and enable bits */ + /* Clear ecc and enable bits */ writel(ECCCLEAR | ECC1, info->reg.gpmc_ecc_control); } @@ -1162,6 +1227,298 @@ static int omap3_calculate_ecc_bch8(struct mtd_info *mtd, const u_char *dat, } /** + * omap3_calculate_ecc_bch - Generate bytes of ECC bytes + * @mtd: MTD device structure + * @dat: The pointer to data on which ecc is computed + * @ecc_code: The ecc_code buffer + * + * Support calculating of BCH4/8 ecc vectors for the page + */ +static int omap3_calculate_ecc_bch(struct mtd_info *mtd, const u_char *dat, + u_char *ecc_code) +{ + struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, + mtd); + unsigned long nsectors, bch_val1, bch_val2, bch_val3, bch_val4; + int i, eccbchtsel; + + nsectors = ((readl(info->reg.gpmc_ecc_config) >> 4) & 0x7) + 1; + /* + * find BCH scheme used + * 0 -> BCH4 + * 1 -> BCH8 + */ + eccbchtsel = ((readl(info->reg.gpmc_ecc_config) >> 12) & 0x3); + + for (i = 0; i < nsectors; i++) { + + /* Read hw-computed remainder */ + bch_val1 = readl(info->reg.gpmc_bch_result0[i]); + bch_val2 = readl(info->reg.gpmc_bch_result1[i]); + if (eccbchtsel) { + bch_val3 = readl(info->reg.gpmc_bch_result2[i]); + bch_val4 = readl(info->reg.gpmc_bch_result3[i]); + } + + if (eccbchtsel) { + /* BCH8 ecc scheme */ + *ecc_code++ = (bch_val4 & 0xFF); + *ecc_code++ = ((bch_val3 >> 24) & 0xFF); + *ecc_code++ = ((bch_val3 >> 16) & 0xFF); + *ecc_code++ = ((bch_val3 >> 8) & 0xFF); + *ecc_code++ = (bch_val3 & 0xFF); + *ecc_code++ = ((bch_val2 >> 24) & 0xFF); + *ecc_code++ = ((bch_val2 >> 16) & 0xFF); + *ecc_code++ = ((bch_val2 >> 8) & 0xFF); + *ecc_code++ = (bch_val2 & 0xFF); + *ecc_code++ = ((bch_val1 >> 24) & 0xFF); + *ecc_code++ = ((bch_val1 >> 16) & 0xFF); + *ecc_code++ = ((bch_val1 >> 8) & 0xFF); + *ecc_code++ = (bch_val1 & 0xFF); + /* + * Setting 14th byte to zero to handle + * erased page & maintain compatibility + * with RBL + */ + *ecc_code++ = 0x0; + } else { + /* BCH4 ecc scheme */ + *ecc_code++ = ((bch_val2 >> 12) & 0xFF); + *ecc_code++ = ((bch_val2 >> 4) & 0xFF); + *ecc_code++ = ((bch_val2 & 0xF) << 4) | + ((bch_val1 >> 28) & 0xF); + *ecc_code++ = ((bch_val1 >> 20) & 0xFF); + *ecc_code++ = ((bch_val1 >> 12) & 0xFF); + *ecc_code++ = ((bch_val1 >> 4) & 0xFF); + *ecc_code++ = ((bch_val1 & 0xF) << 4); + /* + * Setting 8th byte to zero to handle + * erased page + */ + *ecc_code++ = 0x0; + } + } + + return 0; +} + +/** + * erased_sector_bitflips - count bit flips + * @data: data sector buffer + * @oob: oob buffer + * @info: omap_nand_info + * + * Check the bit flips in erased page falls below correctable level. + * If falls below, report the page as erased with correctable bit + * flip, else report as uncorrectable page. + */ +static int erased_sector_bitflips(u_char *data, u_char *oob, + struct omap_nand_info *info) +{ + int flip_bits = 0, i; + + for (i = 0; i < info->nand.ecc.size; i++) { + flip_bits += hweight8(~data[i]); + if (flip_bits > info->nand.ecc.strength) + return 0; + } + + for (i = 0; i < info->nand.ecc.bytes - 1; i++) { + flip_bits += hweight8(~oob[i]); + if (flip_bits > info->nand.ecc.strength) + return 0; + } + + /* + * Bit flips falls in correctable level. + * Fill data area with 0xFF + */ + if (flip_bits) { + memset(data, 0xFF, info->nand.ecc.size); + memset(oob, 0xFF, info->nand.ecc.bytes); + } + + return flip_bits; +} + +/** + * omap_elm_correct_data - corrects page data area in case error reported + * @mtd: MTD device structure + * @data: page data + * @read_ecc: ecc read from nand flash + * @calc_ecc: ecc read from HW ECC registers + * + * Calculated ecc vector reported as zero in case of non-error pages. + * In case of error/erased pages non-zero error vector is reported. + * In case of non-zero ecc vector, check read_ecc at fixed offset + * (x = 13/7 in case of BCH8/4 == 0) to find page programmed or not. + * To handle bit flips in this data, count the number of 0's in + * read_ecc[x] and check if it greater than 4. If it is less, it is + * programmed page, else erased page. + * + * 1. If page is erased, check with standard ecc vector (ecc vector + * for erased page to find any bit flip). If check fails, bit flip + * is present in erased page. Count the bit flips in erased page and + * if it falls under correctable level, report page with 0xFF and + * update the correctable bit information. + * 2. If error is reported on programmed page, update elm error + * vector and correct the page with ELM error correction routine. + * + */ +static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data, + u_char *read_ecc, u_char *calc_ecc) +{ + struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, + mtd); + int eccsteps = info->nand.ecc.steps; + int i , j, stat = 0; + int eccsize, eccflag, ecc_vector_size; + struct elm_errorvec err_vec[ERROR_VECTOR_MAX]; + u_char *ecc_vec = calc_ecc; + u_char *spare_ecc = read_ecc; + u_char *erased_ecc_vec; + enum bch_ecc type; + bool is_error_reported = false; + + /* Initialize elm error vector to zero */ + memset(err_vec, 0, sizeof(err_vec)); + + if (info->nand.ecc.strength == BCH8_MAX_ERROR) { + type = BCH8_ECC; + erased_ecc_vec = bch8_vector; + } else { + type = BCH4_ECC; + erased_ecc_vec = bch4_vector; + } + + ecc_vector_size = info->nand.ecc.bytes; + + /* + * Remove extra byte padding for BCH8 RBL + * compatibility and erased page handling + */ + eccsize = ecc_vector_size - 1; + + for (i = 0; i < eccsteps ; i++) { + eccflag = 0; /* initialize eccflag */ + + /* + * Check any error reported, + * In case of error, non zero ecc reported. + */ + + for (j = 0; (j < eccsize); j++) { + if (calc_ecc[j] != 0) { + eccflag = 1; /* non zero ecc, error present */ + break; + } + } + + if (eccflag == 1) { + /* + * Set threshold to minimum of 4, half of ecc.strength/2 + * to allow max bit flip in byte to 4 + */ + unsigned int threshold = min_t(unsigned int, 4, + info->nand.ecc.strength / 2); + + /* + * Check data area is programmed by counting + * number of 0's at fixed offset in spare area. + * Checking count of 0's against threshold. + * In case programmed page expects at least threshold + * zeros in byte. + * If zeros are less than threshold for programmed page/ + * zeros are more than threshold erased page, either + * case page reported as uncorrectable. + */ + if (hweight8(~read_ecc[eccsize]) >= threshold) { + /* + * Update elm error vector as + * data area is programmed + */ + err_vec[i].error_reported = true; + is_error_reported = true; + } else { + /* Error reported in erased page */ + int bitflip_count; + u_char *buf = &data[info->nand.ecc.size * i]; + + if (memcmp(calc_ecc, erased_ecc_vec, eccsize)) { + bitflip_count = erased_sector_bitflips( + buf, read_ecc, info); + + if (bitflip_count) + stat += bitflip_count; + else + return -EINVAL; + } + } + } + + /* Update the ecc vector */ + calc_ecc += ecc_vector_size; + read_ecc += ecc_vector_size; + } + + /* Check if any error reported */ + if (!is_error_reported) + return 0; + + /* Decode BCH error using ELM module */ + elm_decode_bch_error_page(info->elm_dev, ecc_vec, err_vec); + + for (i = 0; i < eccsteps; i++) { + if (err_vec[i].error_reported) { + for (j = 0; j < err_vec[i].error_count; j++) { + u32 bit_pos, byte_pos, error_max, pos; + + if (type == BCH8_ECC) + error_max = BCH8_ECC_MAX; + else + error_max = BCH4_ECC_MAX; + + if (info->nand.ecc.strength == BCH8_MAX_ERROR) + pos = err_vec[i].error_loc[j]; + else + /* Add 4 to take care 4 bit padding */ + pos = err_vec[i].error_loc[j] + + BCH4_BIT_PAD; + + /* Calculate bit position of error */ + bit_pos = pos % 8; + + /* Calculate byte position of error */ + byte_pos = (error_max - pos - 1) / 8; + + if (pos < error_max) { + if (byte_pos < 512) + data[byte_pos] ^= 1 << bit_pos; + else + spare_ecc[byte_pos - 512] ^= + 1 << bit_pos; + } + /* else, not interested to correct ecc */ + } + } + + /* Update number of correctable errors */ + stat += err_vec[i].error_count; + + /* Update page data with sector size */ + data += info->nand.ecc.size; + spare_ecc += ecc_vector_size; + } + + for (i = 0; i < eccsteps; i++) + /* Return error if uncorrectable error present */ + if (err_vec[i].error_uncorrectable) + return -EINVAL; + + return stat; +} + +/** * omap3_correct_data_bch - Decode received data and correct errors * @mtd: MTD device structure * @data: page data @@ -1194,6 +1551,92 @@ static int omap3_correct_data_bch(struct mtd_info *mtd, u_char *data, } /** + * omap_write_page_bch - BCH ecc based write page function for entire page + * @mtd: mtd info structure + * @chip: nand chip info structure + * @buf: data buffer + * @oob_required: must write chip->oob_poi to OOB + * + * Custom write page method evolved to support multi sector writing in one shot + */ +static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required) +{ + int i; + uint8_t *ecc_calc = chip->buffers->ecccalc; + uint32_t *eccpos = chip->ecc.layout->eccpos; + + /* Enable GPMC ecc engine */ + chip->ecc.hwctl(mtd, NAND_ECC_WRITE); + + /* Write data */ + chip->write_buf(mtd, buf, mtd->writesize); + + /* Update ecc vector from GPMC result registers */ + chip->ecc.calculate(mtd, buf, &ecc_calc[0]); + + for (i = 0; i < chip->ecc.total; i++) + chip->oob_poi[eccpos[i]] = ecc_calc[i]; + + /* Write ecc vector to OOB area */ + chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); + return 0; +} + +/** + * omap_read_page_bch - BCH ecc based page read function for entire page + * @mtd: mtd info structure + * @chip: nand chip info structure + * @buf: buffer to store read data + * @oob_required: caller requires OOB data read to chip->oob_poi + * @page: page number to read + * + * For BCH ecc scheme, GPMC used for syndrome calculation and ELM module + * used for error correction. + * Custom method evolved to support ELM error correction & multi sector + * reading. On reading page data area is read along with OOB data with + * ecc engine enabled. ecc vector updated after read of OOB data. + * For non error pages ecc vector reported as zero. + */ +static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page) +{ + uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_code = chip->buffers->ecccode; + uint32_t *eccpos = chip->ecc.layout->eccpos; + uint8_t *oob = &chip->oob_poi[eccpos[0]]; + uint32_t oob_pos = mtd->writesize + chip->ecc.layout->eccpos[0]; + int stat; + unsigned int max_bitflips = 0; + + /* Enable GPMC ecc engine */ + chip->ecc.hwctl(mtd, NAND_ECC_READ); + + /* Read data */ + chip->read_buf(mtd, buf, mtd->writesize); + + /* Read oob bytes */ + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_pos, -1); + chip->read_buf(mtd, oob, chip->ecc.total); + + /* Calculate ecc bytes */ + chip->ecc.calculate(mtd, buf, ecc_calc); + + memcpy(ecc_code, &chip->oob_poi[eccpos[0]], chip->ecc.total); + + stat = chip->ecc.correct(mtd, buf, ecc_code, ecc_calc); + + if (stat < 0) { + mtd->ecc_stats.failed++; + } else { + mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } + + return max_bitflips; +} + +/** * omap3_free_bch - Release BCH ecc resources * @mtd: MTD device structure */ @@ -1218,43 +1661,86 @@ static int omap3_init_bch(struct mtd_info *mtd, int ecc_opt) struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); #ifdef CONFIG_MTD_NAND_OMAP_BCH8 - const int hw_errors = 8; + const int hw_errors = BCH8_MAX_ERROR; #else - const int hw_errors = 4; + const int hw_errors = BCH4_MAX_ERROR; #endif + enum bch_ecc bch_type; + const __be32 *parp; + int lenp; + struct device_node *elm_node; + info->bch = NULL; - max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ? 8 : 4; + max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ? + BCH8_MAX_ERROR : BCH4_MAX_ERROR; if (max_errors != hw_errors) { pr_err("cannot configure %d-bit BCH ecc, only %d-bit supported", max_errors, hw_errors); goto fail; } - /* software bch library is only used to detect and locate errors */ - info->bch = init_bch(13, max_errors, 0x201b /* hw polynomial */); - if (!info->bch) - goto fail; + info->nand.ecc.size = 512; + info->nand.ecc.hwctl = omap3_enable_hwecc_bch; + info->nand.ecc.mode = NAND_ECC_HW; + info->nand.ecc.strength = max_errors; - info->nand.ecc.size = 512; - info->nand.ecc.hwctl = omap3_enable_hwecc_bch; - info->nand.ecc.correct = omap3_correct_data_bch; - info->nand.ecc.mode = NAND_ECC_HW; + if (hw_errors == BCH8_MAX_ERROR) + bch_type = BCH8_ECC; + else + bch_type = BCH4_ECC; - /* - * The number of corrected errors in an ecc block that will trigger - * block scrubbing defaults to the ecc strength (4 or 8). - * Set mtd->bitflip_threshold here to define a custom threshold. - */ + /* Detect availability of ELM module */ + parp = of_get_property(info->of_node, "elm_id", &lenp); + if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) { + pr_err("Missing elm_id property, fall back to Software BCH\n"); + info->is_elm_used = false; + } else { + struct platform_device *pdev; - if (max_errors == 8) { - info->nand.ecc.strength = 8; - info->nand.ecc.bytes = 13; - info->nand.ecc.calculate = omap3_calculate_ecc_bch8; + elm_node = of_find_node_by_phandle(be32_to_cpup(parp)); + pdev = of_find_device_by_node(elm_node); + info->elm_dev = &pdev->dev; + elm_config(info->elm_dev, bch_type); + info->is_elm_used = true; + } + + if (info->is_elm_used && (mtd->writesize <= 4096)) { + + if (hw_errors == BCH8_MAX_ERROR) + info->nand.ecc.bytes = BCH8_SIZE; + else + info->nand.ecc.bytes = BCH4_SIZE; + + info->nand.ecc.correct = omap_elm_correct_data; + info->nand.ecc.calculate = omap3_calculate_ecc_bch; + info->nand.ecc.read_page = omap_read_page_bch; + info->nand.ecc.write_page = omap_write_page_bch; } else { - info->nand.ecc.strength = 4; - info->nand.ecc.bytes = 7; - info->nand.ecc.calculate = omap3_calculate_ecc_bch4; + /* + * software bch library is only used to detect and + * locate errors + */ + info->bch = init_bch(13, max_errors, + 0x201b /* hw polynomial */); + if (!info->bch) + goto fail; + + info->nand.ecc.correct = omap3_correct_data_bch; + + /* + * The number of corrected errors in an ecc block that will + * trigger block scrubbing defaults to the ecc strength (4 or 8) + * Set mtd->bitflip_threshold here to define a custom threshold. + */ + + if (max_errors == 8) { + info->nand.ecc.bytes = 13; + info->nand.ecc.calculate = omap3_calculate_ecc_bch8; + } else { + info->nand.ecc.bytes = 7; + info->nand.ecc.calculate = omap3_calculate_ecc_bch4; + } } pr_info("enabling NAND BCH ecc with %d-bit correction\n", max_errors); @@ -1270,7 +1756,7 @@ fail: */ static int omap3_init_bch_tail(struct mtd_info *mtd) { - int i, steps; + int i, steps, offset; struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); struct nand_ecclayout *layout = &info->ecclayout; @@ -1292,11 +1778,21 @@ static int omap3_init_bch_tail(struct mtd_info *mtd) goto fail; } + /* ECC layout compatible with RBL for BCH8 */ + if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE)) + offset = 2; + else + offset = mtd->oobsize - layout->eccbytes; + /* put ecc bytes at oob tail */ for (i = 0; i < layout->eccbytes; i++) - layout->eccpos[i] = mtd->oobsize-layout->eccbytes+i; + layout->eccpos[i] = offset + i; + + if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE)) + layout->oobfree[0].offset = 2 + layout->eccbytes * steps; + else + layout->oobfree[0].offset = 2; - layout->oobfree[0].offset = 2; layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes; info->nand.ecc.layout = layout; @@ -1360,6 +1856,9 @@ static int omap_nand_probe(struct platform_device *pdev) info->nand.options = pdata->devsize; info->nand.options |= NAND_SKIP_BBTSCAN; +#ifdef CONFIG_MTD_NAND_OMAP_BCH + info->of_node = pdata->of_node; +#endif res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c index dbd3aa574eaf..30bd907a260a 100644 --- a/drivers/mtd/ofpart.c +++ b/drivers/mtd/ofpart.c @@ -174,7 +174,14 @@ out: return rc; } +static void __exit ofpart_parser_exit(void) +{ + deregister_mtd_parser(&ofpart_parser); + deregister_mtd_parser(&ofoldpart_parser); +} + module_init(ofpart_parser_init); +module_exit(ofpart_parser_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree"); diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c index db2f22e7966a..70106607c247 100644 --- a/drivers/mtd/tests/mtd_nandecctest.c +++ b/drivers/mtd/tests/mtd_nandecctest.c @@ -44,7 +44,7 @@ struct nand_ecc_test { static void single_bit_error_data(void *error_data, void *correct_data, size_t size) { - unsigned int offset = random32() % (size * BITS_PER_BYTE); + unsigned int offset = prandom_u32() % (size * BITS_PER_BYTE); memcpy(error_data, correct_data, size); __change_bit_le(offset, error_data); @@ -55,9 +55,9 @@ static void double_bit_error_data(void *error_data, void *correct_data, { unsigned int offset[2]; - offset[0] = random32() % (size * BITS_PER_BYTE); + offset[0] = prandom_u32() % (size * BITS_PER_BYTE); do { - offset[1] = random32() % (size * BITS_PER_BYTE); + offset[1] = prandom_u32() % (size * BITS_PER_BYTE); } while (offset[0] == offset[1]); memcpy(error_data, correct_data, size); @@ -68,7 +68,7 @@ static void double_bit_error_data(void *error_data, void *correct_data, static unsigned int random_ecc_bit(size_t size) { - unsigned int offset = random32() % (3 * BITS_PER_BYTE); + unsigned int offset = prandom_u32() % (3 * BITS_PER_BYTE); if (size == 256) { /* @@ -76,7 +76,7 @@ static unsigned int random_ecc_bit(size_t size) * and 17th bit) in ECC code for 256 byte data block */ while (offset == 16 || offset == 17) - offset = random32() % (3 * BITS_PER_BYTE); + offset = prandom_u32() % (3 * BITS_PER_BYTE); } return offset; diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index 2d7e6cffd6d4..787f539d16ca 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -55,7 +55,7 @@ static int rand_eb(void) unsigned int eb; again: - eb = random32(); + eb = prandom_u32(); /* Read or write up 2 eraseblocks at a time - hence 'ebcnt - 1' */ eb %= (ebcnt - 1); if (bbt[eb]) @@ -67,7 +67,7 @@ static int rand_offs(void) { unsigned int offs; - offs = random32(); + offs = prandom_u32(); offs %= bufsize; return offs; } @@ -76,7 +76,7 @@ static int rand_len(int offs) { unsigned int len; - len = random32(); + len = prandom_u32(); len %= (bufsize - offs); return len; } @@ -191,7 +191,7 @@ static int do_write(void) static int do_operation(void) { - if (random32() & 1) + if (prandom_u32() & 1) return do_read(); else return do_write(); diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c index c4cde1e9eddb..3a9f6a6a79f9 100644 --- a/drivers/mtd/tests/mtd_torturetest.c +++ b/drivers/mtd/tests/mtd_torturetest.c @@ -208,7 +208,7 @@ static inline int write_pattern(int ebnum, void *buf) static int __init tort_init(void) { int err = 0, i, infinite = !cycles_count; - int bad_ebs[ebcnt]; + int *bad_ebs; printk(KERN_INFO "\n"); printk(KERN_INFO "=================================================\n"); @@ -250,28 +250,24 @@ static int __init tort_init(void) err = -ENOMEM; patt_5A5 = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!patt_5A5) { - pr_err("error: cannot allocate memory\n"); + if (!patt_5A5) goto out_mtd; - } patt_A5A = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!patt_A5A) { - pr_err("error: cannot allocate memory\n"); + if (!patt_A5A) goto out_patt_5A5; - } patt_FF = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!patt_FF) { - pr_err("error: cannot allocate memory\n"); + if (!patt_FF) goto out_patt_A5A; - } check_buf = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!check_buf) { - pr_err("error: cannot allocate memory\n"); + if (!check_buf) goto out_patt_FF; - } + + bad_ebs = kcalloc(ebcnt, sizeof(*bad_ebs), GFP_KERNEL); + if (!bad_ebs) + goto out_check_buf; err = 0; @@ -290,7 +286,6 @@ static int __init tort_init(void) /* * Check if there is a bad eraseblock among those we are going to test. */ - memset(&bad_ebs[0], 0, sizeof(int) * ebcnt); if (mtd_can_have_bb(mtd)) { for (i = eb; i < eb + ebcnt; i++) { err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize); @@ -394,6 +389,8 @@ out: pr_info("finished after %u erase cycles\n", erase_cycles); + kfree(bad_ebs); +out_check_buf: kfree(check_buf); out_patt_FF: kfree(patt_FF); diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 33f8f3b2c9b2..cba89fcd1587 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -86,7 +86,7 @@ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { if (ubi->dbg.emulate_bitflips) - return !(random32() % 200); + return !(prandom_u32() % 200); return 0; } @@ -100,7 +100,7 @@ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) { if (ubi->dbg.emulate_io_failures) - return !(random32() % 500); + return !(prandom_u32() % 500); return 0; } @@ -114,7 +114,7 @@ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { if (ubi->dbg.emulate_io_failures) - return !(random32() % 400); + return !(prandom_u32() % 400); return 0; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a7efec293037..9b017d9c58e9 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -381,7 +381,7 @@ static void b44_set_flow_ctrl(struct b44 *bp, u32 local, u32 remote) } #ifdef CONFIG_BCM47XX -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> static void b44_wap54g10_workaround(struct b44 *bp) { char buf[20]; @@ -393,7 +393,7 @@ static void b44_wap54g10_workaround(struct b44 *bp) * see https://dev.openwrt.org/ticket/146 * check and reset bit "isolate" */ - if (nvram_getenv("boardnum", buf, sizeof(buf)) < 0) + if (bcm47xx_nvram_getenv("boardnum", buf, sizeof(buf)) < 0) return; if (simple_strtoul(buf, NULL, 0) == 2) { err = __b44_readphy(bp, 0, MII_BMCR, &val); diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index bf985c04524e..639049d7e92d 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -15,7 +15,7 @@ #include <linux/mii.h> #include <linux/interrupt.h> #include <linux/dma-mapping.h> -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> static const struct bcma_device_id bgmac_bcma_tbl[] = { BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS), @@ -908,7 +908,7 @@ static void bgmac_chip_reset(struct bgmac *bgmac) BGMAC_CHIPCTL_1_IF_TYPE_RMII; char buf[2]; - if (nvram_getenv("et_swtype", buf, 1) > 0) { + if (bcm47xx_nvram_getenv("et_swtype", buf, 1) > 0) { if (kstrtou8(buf, 0, &et_swtype)) bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n", buf); @@ -1386,7 +1386,7 @@ static int bgmac_probe(struct bcma_device *core) } bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK; - if (nvram_getenv("et0_no_txint", NULL, 0) == 0) + if (bcm47xx_nvram_getenv("et0_no_txint", NULL, 0) == 0) bgmac->int_mask &= ~BGMAC_IS_TX_MASK; /* TODO: reset the external phy. Specs are needed */ diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 7ab0b2fba503..3338437b559b 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -79,6 +79,17 @@ config ASUS_LAPTOP If you have an ACPI-compatible ASUS laptop, say Y or M here. +config CHROMEOS_LAPTOP + tristate "Chrome OS Laptop" + depends on I2C + depends on DMI + ---help--- + This driver instantiates i2c and smbus devices such as + light sensors and touchpads. + + If you have a supported Chromebook, choose Y or M here. + The module will be called chromeos_laptop. + config DELL_LAPTOP tristate "Dell Laptop Extras" depends on X86 @@ -288,9 +299,11 @@ config IDEAPAD_LAPTOP depends on ACPI depends on RFKILL && INPUT depends on SERIO_I8042 + depends on BACKLIGHT_CLASS_DEVICE select INPUT_SPARSEKMAP help - This is a driver for the rfkill switches on Lenovo IdeaPad netbooks. + This is a driver for Lenovo IdeaPad netbooks contains drivers for + rfkill switch, hotkey, fan control and backlight control. config THINKPAD_ACPI tristate "ThinkPad ACPI Laptop Extras" diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index bf7e4f935b17..ace2b38942fe 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -50,3 +50,4 @@ obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o obj-$(CONFIG_SAMSUNG_Q10) += samsung-q10.o obj-$(CONFIG_APPLE_GMUX) += apple-gmux.o +obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index afed7018a2b5..c9076bdaf2c1 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -511,6 +511,24 @@ static struct dmi_system_id acer_quirks[] = { }, .driver_data = &quirk_fujitsu_amilo_li_1718, }, + { + .callback = dmi_matched, + .ident = "Lenovo Ideapad S205-10382JG", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "10382JG"), + }, + .driver_data = &quirk_lenovo_ideapad_s205, + }, + { + .callback = dmi_matched, + .ident = "Lenovo Ideapad S205-1038DPG", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "1038DPG"), + }, + .driver_data = &quirk_lenovo_ideapad_s205, + }, {} }; @@ -1204,6 +1222,9 @@ static acpi_status WMID_set_capabilities(void) devices = *((u32 *) obj->buffer.pointer); } else if (obj->type == ACPI_TYPE_INTEGER) { devices = (u32) obj->integer.value; + } else { + kfree(out.pointer); + return AE_ERROR; } } else { kfree(out.pointer); diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index d9f9a0dbc6f3..0eea09c1c134 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -128,10 +128,12 @@ MODULE_PARM_DESC(als_status, "Set the ALS status on boot " /* * Some events we use, same for all Asus */ -#define ATKD_BR_UP 0x10 /* (event & ~ATKD_BR_UP) = brightness level */ -#define ATKD_BR_DOWN 0x20 /* (event & ~ATKD_BR_DOWN) = britghness level */ -#define ATKD_BR_MIN ATKD_BR_UP -#define ATKD_BR_MAX (ATKD_BR_DOWN | 0xF) /* 0x2f */ +#define ATKD_BRNUP_MIN 0x10 +#define ATKD_BRNUP_MAX 0x1f +#define ATKD_BRNDOWN_MIN 0x20 +#define ATKD_BRNDOWN_MAX 0x2f +#define ATKD_BRNDOWN 0x20 +#define ATKD_BRNUP 0x2f #define ATKD_LCD_ON 0x33 #define ATKD_LCD_OFF 0x34 @@ -301,40 +303,65 @@ static const struct key_entry asus_keymap[] = { {KE_KEY, 0x17, { KEY_ZOOM } }, {KE_KEY, 0x1f, { KEY_BATTERY } }, /* End of Lenovo SL Specific keycodes */ + {KE_KEY, ATKD_BRNDOWN, { KEY_BRIGHTNESSDOWN } }, + {KE_KEY, ATKD_BRNUP, { KEY_BRIGHTNESSUP } }, {KE_KEY, 0x30, { KEY_VOLUMEUP } }, {KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, {KE_KEY, 0x32, { KEY_MUTE } }, - {KE_KEY, 0x33, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x34, { KEY_SWITCHVIDEOMODE } }, + {KE_KEY, 0x33, { KEY_DISPLAYTOGGLE } }, /* LCD on */ + {KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ {KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, {KE_KEY, 0x41, { KEY_NEXTSONG } }, - {KE_KEY, 0x43, { KEY_STOPCD } }, + {KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ {KE_KEY, 0x45, { KEY_PLAYPAUSE } }, - {KE_KEY, 0x4c, { KEY_MEDIA } }, + {KE_KEY, 0x4c, { KEY_MEDIA } }, /* WMP Key */ {KE_KEY, 0x50, { KEY_EMAIL } }, {KE_KEY, 0x51, { KEY_WWW } }, {KE_KEY, 0x55, { KEY_CALC } }, + {KE_IGNORE, 0x57, }, /* Battery mode */ + {KE_IGNORE, 0x58, }, /* AC mode */ {KE_KEY, 0x5C, { KEY_SCREENLOCK } }, /* Screenlock */ - {KE_KEY, 0x5D, { KEY_WLAN } }, - {KE_KEY, 0x5E, { KEY_WLAN } }, - {KE_KEY, 0x5F, { KEY_WLAN } }, - {KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x6B, { KEY_F13 } }, /* Lock Touchpad */ + {KE_KEY, 0x5D, { KEY_WLAN } }, /* WLAN Toggle */ + {KE_KEY, 0x5E, { KEY_WLAN } }, /* WLAN Enable */ + {KE_KEY, 0x5F, { KEY_WLAN } }, /* WLAN Disable */ + {KE_KEY, 0x60, { KEY_TOUCHPAD_ON } }, + {KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD only */ + {KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT only */ + {KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT */ + {KE_KEY, 0x64, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV */ + {KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */ + {KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */ + {KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */ + {KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, /* Lock Touchpad */ {KE_KEY, 0x6C, { KEY_SLEEP } }, /* Suspend */ {KE_KEY, 0x6D, { KEY_SLEEP } }, /* Hibernate */ - {KE_KEY, 0x7E, { KEY_BLUETOOTH } }, - {KE_KEY, 0x7D, { KEY_BLUETOOTH } }, + {KE_IGNORE, 0x6E, }, /* Low Battery notification */ + {KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */ + {KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */ {KE_KEY, 0x82, { KEY_CAMERA } }, - {KE_KEY, 0x88, { KEY_WLAN } }, - {KE_KEY, 0x8A, { KEY_PROG1 } }, + {KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */ + {KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */ + {KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */ + {KE_KEY, 0x8D, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + DVI */ + {KE_KEY, 0x8E, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + DVI */ + {KE_KEY, 0x8F, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + DVI */ + {KE_KEY, 0x90, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + DVI */ + {KE_KEY, 0x91, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + DVI */ + {KE_KEY, 0x92, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + DVI */ + {KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */ {KE_KEY, 0x95, { KEY_MEDIA } }, {KE_KEY, 0x99, { KEY_PHONE } }, - {KE_KEY, 0xc4, { KEY_KBDILLUMUP } }, - {KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } }, - {KE_KEY, 0xb5, { KEY_CALC } }, + {KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */ + {KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */ + {KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */ + {KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */ + {KE_KEY, 0xA4, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + HDMI */ + {KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */ + {KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */ + {KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */ + {KE_KEY, 0xB5, { KEY_CALC } }, + {KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, + {KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, {KE_END, 0}, }; @@ -1521,15 +1548,19 @@ static void asus_acpi_notify(struct acpi_device *device, u32 event) dev_name(&asus->device->dev), event, count); - /* Brightness events are special */ - if (event >= ATKD_BR_MIN && event <= ATKD_BR_MAX) { + if (event >= ATKD_BRNUP_MIN && event <= ATKD_BRNUP_MAX) + event = ATKD_BRNUP; + else if (event >= ATKD_BRNDOWN_MIN && + event <= ATKD_BRNDOWN_MAX) + event = ATKD_BRNDOWN; - /* Ignore them completely if the acpi video driver is used */ + /* Brightness events are special */ + if (event == ATKD_BRNDOWN || event == ATKD_BRNUP) { if (asus->backlight_device != NULL) { /* Update the backlight device. */ asus_backlight_notify(asus); + return ; } - return ; } /* Accelerometer "coarse orientation change" event */ diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index be790402e0f1..210b5b872125 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -59,6 +59,17 @@ static struct quirk_entry quirk_asus_unknown = { .wapf = 0, }; +/* + * For those machines that need software to control bt/wifi status + * and can't adjust brightness through ACPI interface + * and have duplicate events(ACPI and WMI) for display toggle + */ +static struct quirk_entry quirk_asus_x55u = { + .wapf = 4, + .wmi_backlight_power = true, + .no_display_toggle = true, +}; + static struct quirk_entry quirk_asus_x401u = { .wapf = 4, }; @@ -77,6 +88,15 @@ static struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X401U"), }, + .driver_data = &quirk_asus_x55u, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X401A", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X401A"), + }, .driver_data = &quirk_asus_x401u, }, { @@ -95,6 +115,15 @@ static struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X501U"), }, + .driver_data = &quirk_asus_x55u, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X501A", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X501A"), + }, .driver_data = &quirk_asus_x401u, }, { @@ -131,7 +160,7 @@ static struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X55U"), }, - .driver_data = &quirk_asus_x401u, + .driver_data = &quirk_asus_x55u, }, { .callback = dmi_matched, @@ -161,6 +190,8 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) } static const struct key_entry asus_nb_wmi_keymap[] = { + { KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } }, + { KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } }, { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, @@ -168,9 +199,9 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, - { KE_KEY, 0x43, { KEY_STOPCD } }, + { KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ { KE_KEY, 0x45, { KEY_PLAYPAUSE } }, - { KE_KEY, 0x4c, { KEY_MEDIA } }, + { KE_KEY, 0x4c, { KEY_MEDIA } }, /* WMP Key */ { KE_KEY, 0x50, { KEY_EMAIL } }, { KE_KEY, 0x51, { KEY_WWW } }, { KE_KEY, 0x55, { KEY_CALC } }, @@ -180,25 +211,42 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x5D, { KEY_WLAN } }, /* Wireless console Toggle */ { KE_KEY, 0x5E, { KEY_WLAN } }, /* Wireless console Enable */ { KE_KEY, 0x5F, { KEY_WLAN } }, /* Wireless console Disable */ - { KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } }, - { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, - { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, - { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, + { KE_KEY, 0x60, { KEY_TOUCHPAD_ON } }, + { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD only */ + { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT only */ + { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT */ + { KE_KEY, 0x64, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV */ + { KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */ + { KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */ + { KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */ { KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, - { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, - { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, + { KE_IGNORE, 0x6E, }, /* Low Battery notification */ + { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */ + { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */ { KE_KEY, 0x82, { KEY_CAMERA } }, - { KE_KEY, 0x88, { KEY_RFKILL } }, - { KE_KEY, 0x8A, { KEY_PROG1 } }, + { KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */ + { KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */ + { KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */ + { KE_KEY, 0x8D, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + DVI */ + { KE_KEY, 0x8E, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + DVI */ + { KE_KEY, 0x8F, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + DVI */ + { KE_KEY, 0x90, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + DVI */ + { KE_KEY, 0x91, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + DVI */ + { KE_KEY, 0x92, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + DVI */ + { KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */ { KE_KEY, 0x95, { KEY_MEDIA } }, { KE_KEY, 0x99, { KEY_PHONE } }, { KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */ { KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */ { KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */ { KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */ - { KE_KEY, 0xb5, { KEY_CALC } }, - { KE_KEY, 0xc4, { KEY_KBDILLUMUP } }, - { KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } }, + { KE_KEY, 0xA4, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + HDMI */ + { KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */ + { KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */ + { KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */ + { KE_KEY, 0xB5, { KEY_CALC } }, + { KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, + { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, { KE_END, 0}, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index f80ae4d10f68..c11b2426dac1 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -187,6 +187,8 @@ struct asus_wmi { struct device *hwmon_device; struct platform_device *platform_device; + struct led_classdev wlan_led; + int wlan_led_wk; struct led_classdev tpd_led; int tpd_led_wk; struct led_classdev kbd_led; @@ -194,6 +196,7 @@ struct asus_wmi { struct workqueue_struct *led_workqueue; struct work_struct tpd_led_work; struct work_struct kbd_led_work; + struct work_struct wlan_led_work; struct asus_rfkill wlan; struct asus_rfkill bluetooth; @@ -456,12 +459,65 @@ static enum led_brightness kbd_led_get(struct led_classdev *led_cdev) return value; } +static int wlan_led_unknown_state(struct asus_wmi *asus) +{ + u32 result; + + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result); + + return result & ASUS_WMI_DSTS_UNKNOWN_BIT; +} + +static int wlan_led_presence(struct asus_wmi *asus) +{ + u32 result; + + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result); + + return result & ASUS_WMI_DSTS_PRESENCE_BIT; +} + +static void wlan_led_update(struct work_struct *work) +{ + int ctrl_param; + struct asus_wmi *asus; + + asus = container_of(work, struct asus_wmi, wlan_led_work); + + ctrl_param = asus->wlan_led_wk; + asus_wmi_set_devstate(ASUS_WMI_DEVID_WIRELESS_LED, ctrl_param, NULL); +} + +static void wlan_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct asus_wmi *asus; + + asus = container_of(led_cdev, struct asus_wmi, wlan_led); + + asus->wlan_led_wk = !!value; + queue_work(asus->led_workqueue, &asus->wlan_led_work); +} + +static enum led_brightness wlan_led_get(struct led_classdev *led_cdev) +{ + struct asus_wmi *asus; + u32 result; + + asus = container_of(led_cdev, struct asus_wmi, wlan_led); + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result); + + return result & ASUS_WMI_DSTS_BRIGHTNESS_MASK; +} + static void asus_wmi_led_exit(struct asus_wmi *asus) { if (!IS_ERR_OR_NULL(asus->kbd_led.dev)) led_classdev_unregister(&asus->kbd_led); if (!IS_ERR_OR_NULL(asus->tpd_led.dev)) led_classdev_unregister(&asus->tpd_led); + if (!IS_ERR_OR_NULL(asus->wlan_led.dev)) + led_classdev_unregister(&asus->wlan_led); if (asus->led_workqueue) destroy_workqueue(asus->led_workqueue); } @@ -498,6 +554,23 @@ static int asus_wmi_led_init(struct asus_wmi *asus) rv = led_classdev_register(&asus->platform_device->dev, &asus->kbd_led); + if (rv) + goto error; + } + + if (wlan_led_presence(asus)) { + INIT_WORK(&asus->wlan_led_work, wlan_led_update); + + asus->wlan_led.name = "asus::wlan"; + asus->wlan_led.brightness_set = wlan_led_set; + if (!wlan_led_unknown_state(asus)) + asus->wlan_led.brightness_get = wlan_led_get; + asus->wlan_led.flags = LED_CORE_SUSPENDRESUME; + asus->wlan_led.max_brightness = 1; + asus->wlan_led.default_trigger = "asus-wlan"; + + rv = led_classdev_register(&asus->platform_device->dev, + &asus->wlan_led); } error: @@ -813,6 +886,9 @@ static int asus_new_rfkill(struct asus_wmi *asus, if (!*rfkill) return -EINVAL; + if (dev_id == ASUS_WMI_DEVID_WLAN) + rfkill_set_led_trigger_name(*rfkill, "asus-wlan"); + rfkill_init_sw_state(*rfkill, !result); result = rfkill_register(*rfkill); if (result) { @@ -1265,6 +1341,18 @@ static void asus_wmi_backlight_exit(struct asus_wmi *asus) asus->backlight_device = NULL; } +static int is_display_toggle(int code) +{ + /* display toggle keys */ + if ((code >= 0x61 && code <= 0x67) || + (code >= 0x8c && code <= 0x93) || + (code >= 0xa0 && code <= 0xa7) || + (code >= 0xd0 && code <= 0xd5)) + return 1; + + return 0; +} + static void asus_wmi_notify(u32 value, void *context) { struct asus_wmi *asus = context; @@ -1298,16 +1386,24 @@ static void asus_wmi_notify(u32 value, void *context) } if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) - code = NOTIFY_BRNUP_MIN; + code = ASUS_WMI_BRN_UP; else if (code >= NOTIFY_BRNDOWN_MIN && code <= NOTIFY_BRNDOWN_MAX) - code = NOTIFY_BRNDOWN_MIN; + code = ASUS_WMI_BRN_DOWN; - if (code == NOTIFY_BRNUP_MIN || code == NOTIFY_BRNDOWN_MIN) { - if (!acpi_video_backlight_support()) + if (code == ASUS_WMI_BRN_DOWN || code == ASUS_WMI_BRN_UP) { + if (!acpi_video_backlight_support()) { asus_wmi_backlight_notify(asus, orig_code); - } else if (!sparse_keymap_report_event(asus->inputdev, code, - key_value, autorelease)) + goto exit; + } + } + + if (is_display_toggle(code) && + asus->driver->quirks->no_display_toggle) + goto exit; + + if (!sparse_keymap_report_event(asus->inputdev, code, + key_value, autorelease)) pr_info("Unknown key %x pressed\n", code); exit: diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 4c9bd38bb0a2..4da4c8bafe70 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -30,6 +30,8 @@ #include <linux/platform_device.h> #define ASUS_WMI_KEY_IGNORE (-1) +#define ASUS_WMI_BRN_DOWN 0x20 +#define ASUS_WMI_BRN_UP 0x2f struct module; struct key_entry; @@ -41,6 +43,13 @@ struct quirk_entry { bool store_backlight_power; bool wmi_backlight_power; int wapf; + /* + * For machines with AMD graphic chips, it will send out WMI event + * and ACPI interrupt at the same time while hitting the hotkey. + * To simplify the problem, we just have to ignore the WMI event, + * and let the ACPI interrupt to send out the key event. + */ + int no_display_toggle; }; struct asus_wmi_driver { diff --git a/drivers/platform/x86/chromeos_laptop.c b/drivers/platform/x86/chromeos_laptop.c new file mode 100644 index 000000000000..93d66809355a --- /dev/null +++ b/drivers/platform/x86/chromeos_laptop.c @@ -0,0 +1,371 @@ +/* + * chromeos_laptop.c - Driver to instantiate Chromebook i2c/smbus devices. + * + * Author : Benson Leung <bleung@chromium.org> + * + * Copyright (C) 2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/dmi.h> +#include <linux/i2c.h> +#include <linux/module.h> + +#define ATMEL_TP_I2C_ADDR 0x4b +#define ATMEL_TP_I2C_BL_ADDR 0x25 +#define ATMEL_TS_I2C_ADDR 0x4a +#define ATMEL_TS_I2C_BL_ADDR 0x26 +#define CYAPA_TP_I2C_ADDR 0x67 +#define ISL_ALS_I2C_ADDR 0x44 +#define TAOS_ALS_I2C_ADDR 0x29 + +static struct i2c_client *als; +static struct i2c_client *tp; +static struct i2c_client *ts; + +const char *i2c_adapter_names[] = { + "SMBus I801 adapter", + "i915 gmbus vga", + "i915 gmbus panel", +}; + +/* Keep this enum consistent with i2c_adapter_names */ +enum i2c_adapter_type { + I2C_ADAPTER_SMBUS = 0, + I2C_ADAPTER_VGADDC, + I2C_ADAPTER_PANEL, +}; + +static struct i2c_board_info __initdata cyapa_device = { + I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR), + .flags = I2C_CLIENT_WAKE, +}; + +static struct i2c_board_info __initdata isl_als_device = { + I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR), +}; + +static struct i2c_board_info __initdata tsl2583_als_device = { + I2C_BOARD_INFO("tsl2583", TAOS_ALS_I2C_ADDR), +}; + +static struct i2c_board_info __initdata tsl2563_als_device = { + I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR), +}; + +static struct i2c_board_info __initdata atmel_224s_tp_device = { + I2C_BOARD_INFO("atmel_mxt_tp", ATMEL_TP_I2C_ADDR), + .platform_data = NULL, + .flags = I2C_CLIENT_WAKE, +}; + +static struct i2c_board_info __initdata atmel_1664s_device = { + I2C_BOARD_INFO("atmel_mxt_ts", ATMEL_TS_I2C_ADDR), + .platform_data = NULL, + .flags = I2C_CLIENT_WAKE, +}; + +static struct i2c_client __init *__add_probed_i2c_device( + const char *name, + int bus, + struct i2c_board_info *info, + const unsigned short *addrs) +{ + const struct dmi_device *dmi_dev; + const struct dmi_dev_onboard *dev_data; + struct i2c_adapter *adapter; + struct i2c_client *client; + + if (bus < 0) + return NULL; + /* + * If a name is specified, look for irq platform information stashed + * in DMI_DEV_TYPE_DEV_ONBOARD by the Chrome OS custom system firmware. + */ + if (name) { + dmi_dev = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD, name, NULL); + if (!dmi_dev) { + pr_err("%s failed to dmi find device %s.\n", + __func__, + name); + return NULL; + } + dev_data = (struct dmi_dev_onboard *)dmi_dev->device_data; + if (!dev_data) { + pr_err("%s failed to get data from dmi for %s.\n", + __func__, name); + return NULL; + } + info->irq = dev_data->instance; + } + + adapter = i2c_get_adapter(bus); + if (!adapter) { + pr_err("%s failed to get i2c adapter %d.\n", __func__, bus); + return NULL; + } + + /* add the i2c device */ + client = i2c_new_probed_device(adapter, info, addrs, NULL); + if (!client) + pr_err("%s failed to register device %d-%02x\n", + __func__, bus, info->addr); + else + pr_debug("%s added i2c device %d-%02x\n", + __func__, bus, info->addr); + + i2c_put_adapter(adapter); + return client; +} + +static int __init __find_i2c_adap(struct device *dev, void *data) +{ + const char *name = data; + static const char *prefix = "i2c-"; + struct i2c_adapter *adapter; + if (strncmp(dev_name(dev), prefix, strlen(prefix)) != 0) + return 0; + adapter = to_i2c_adapter(dev); + return (strncmp(adapter->name, name, strlen(name)) == 0); +} + +static int __init find_i2c_adapter_num(enum i2c_adapter_type type) +{ + struct device *dev = NULL; + struct i2c_adapter *adapter; + const char *name = i2c_adapter_names[type]; + /* find the adapter by name */ + dev = bus_find_device(&i2c_bus_type, NULL, (void *)name, + __find_i2c_adap); + if (!dev) { + pr_err("%s: i2c adapter %s not found on system.\n", __func__, + name); + return -ENODEV; + } + adapter = to_i2c_adapter(dev); + return adapter->nr; +} + +/* + * Takes a list of addresses in addrs as such : + * { addr1, ... , addrn, I2C_CLIENT_END }; + * add_probed_i2c_device will use i2c_new_probed_device + * and probe for devices at all of the addresses listed. + * Returns NULL if no devices found. + * See Documentation/i2c/instantiating-devices for more information. + */ +static __init struct i2c_client *add_probed_i2c_device( + const char *name, + enum i2c_adapter_type type, + struct i2c_board_info *info, + const unsigned short *addrs) +{ + return __add_probed_i2c_device(name, + find_i2c_adapter_num(type), + info, + addrs); +} + +/* + * Probes for a device at a single address, the one provided by + * info->addr. + * Returns NULL if no device found. + */ +static __init struct i2c_client *add_i2c_device(const char *name, + enum i2c_adapter_type type, + struct i2c_board_info *info) +{ + const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END }; + return __add_probed_i2c_device(name, + find_i2c_adapter_num(type), + info, + addr_list); +} + + +static struct i2c_client __init *add_smbus_device(const char *name, + struct i2c_board_info *info) +{ + return add_i2c_device(name, I2C_ADAPTER_SMBUS, info); +} + +static int __init setup_cyapa_smbus_tp(const struct dmi_system_id *id) +{ + /* add cyapa touchpad on smbus */ + tp = add_smbus_device("trackpad", &cyapa_device); + return 0; +} + +static int __init setup_atmel_224s_tp(const struct dmi_system_id *id) +{ + const unsigned short addr_list[] = { ATMEL_TP_I2C_BL_ADDR, + ATMEL_TP_I2C_ADDR, + I2C_CLIENT_END }; + + /* add atmel mxt touchpad on VGA DDC GMBus */ + tp = add_probed_i2c_device("trackpad", I2C_ADAPTER_VGADDC, + &atmel_224s_tp_device, addr_list); + return 0; +} + +static int __init setup_atmel_1664s_ts(const struct dmi_system_id *id) +{ + const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR, + ATMEL_TS_I2C_ADDR, + I2C_CLIENT_END }; + + /* add atmel mxt touch device on PANEL GMBus */ + ts = add_probed_i2c_device("touchscreen", I2C_ADAPTER_PANEL, + &atmel_1664s_device, addr_list); + return 0; +} + + +static int __init setup_isl29018_als(const struct dmi_system_id *id) +{ + /* add isl29018 light sensor */ + als = add_smbus_device("lightsensor", &isl_als_device); + return 0; +} + +static int __init setup_isl29023_als(const struct dmi_system_id *id) +{ + /* add isl29023 light sensor on Panel GMBus */ + als = add_i2c_device("lightsensor", I2C_ADAPTER_PANEL, + &isl_als_device); + return 0; +} + +static int __init setup_tsl2583_als(const struct dmi_system_id *id) +{ + /* add tsl2583 light sensor on smbus */ + als = add_smbus_device(NULL, &tsl2583_als_device); + return 0; +} + +static int __init setup_tsl2563_als(const struct dmi_system_id *id) +{ + /* add tsl2563 light sensor on smbus */ + als = add_smbus_device(NULL, &tsl2563_als_device); + return 0; +} + +static struct dmi_system_id __initdata chromeos_laptop_dmi_table[] = { + { + .ident = "Samsung Series 5 550 - Touchpad", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"), + }, + .callback = setup_cyapa_smbus_tp, + }, + { + .ident = "Chromebook Pixel - Touchscreen", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Link"), + }, + .callback = setup_atmel_1664s_ts, + }, + { + .ident = "Chromebook Pixel - Touchpad", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Link"), + }, + .callback = setup_atmel_224s_tp, + }, + { + .ident = "Samsung Series 5 550 - Light Sensor", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"), + }, + .callback = setup_isl29018_als, + }, + { + .ident = "Chromebook Pixel - Light Sensor", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Link"), + }, + .callback = setup_isl29023_als, + }, + { + .ident = "Acer C7 Chromebook - Touchpad", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Parrot"), + }, + .callback = setup_cyapa_smbus_tp, + }, + { + .ident = "HP Pavilion 14 Chromebook - Touchpad", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Butterfly"), + }, + .callback = setup_cyapa_smbus_tp, + }, + { + .ident = "Samsung Series 5 - Light Sensor", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Alex"), + }, + .callback = setup_tsl2583_als, + }, + { + .ident = "Cr-48 - Light Sensor", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Mario"), + }, + .callback = setup_tsl2563_als, + }, + { + .ident = "Acer AC700 - Light Sensor", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"), + }, + .callback = setup_tsl2563_als, + }, + { } +}; +MODULE_DEVICE_TABLE(dmi, chromeos_laptop_dmi_table); + +static int __init chromeos_laptop_init(void) +{ + if (!dmi_check_system(chromeos_laptop_dmi_table)) { + pr_debug("%s unsupported system.\n", __func__); + return -ENODEV; + } + return 0; +} + +static void __exit chromeos_laptop_exit(void) +{ + if (als) + i2c_unregister_device(als); + if (tp) + i2c_unregister_device(tp); + if (ts) + i2c_unregister_device(ts); +} + +module_init(chromeos_laptop_init); +module_exit(chromeos_laptop_exit); + +MODULE_DESCRIPTION("Chrome OS Laptop driver"); +MODULE_AUTHOR("Benson Leung <bleung@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c index 60cb76a5b513..af67e6e56ebb 100644 --- a/drivers/platform/x86/eeepc-wmi.c +++ b/drivers/platform/x86/eeepc-wmi.c @@ -63,6 +63,8 @@ MODULE_PARM_DESC(hotplug_wireless, #define HOME_RELEASE 0xe5 static const struct key_entry eeepc_wmi_keymap[] = { + { KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } }, + { KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } }, /* Sleep already handled via generic ACPI code */ { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 1dde7accf27c..45cacf79f3a7 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -60,6 +60,7 @@ enum hp_wmi_radio { HPWMI_WIFI = 0, HPWMI_BLUETOOTH = 1, HPWMI_WWAN = 2, + HPWMI_GPS = 3, }; enum hp_wmi_event_ids { @@ -72,10 +73,6 @@ enum hp_wmi_event_ids { HPWMI_LOCK_SWITCH = 7, }; -static int hp_wmi_bios_setup(struct platform_device *device); -static int __exit hp_wmi_bios_remove(struct platform_device *device); -static int hp_wmi_resume_handler(struct device *device); - struct bios_args { u32 signature; u32 command; @@ -137,6 +134,7 @@ static const struct key_entry hp_wmi_keymap[] = { { KE_KEY, 0x2142, { KEY_MEDIA } }, { KE_KEY, 0x213b, { KEY_INFO } }, { KE_KEY, 0x2169, { KEY_DIRECTION } }, + { KE_KEY, 0x216a, { KEY_SETUP } }, { KE_KEY, 0x231b, { KEY_HELP } }, { KE_END, 0 } }; @@ -147,6 +145,7 @@ static struct platform_device *hp_wmi_platform_dev; static struct rfkill *wifi_rfkill; static struct rfkill *bluetooth_rfkill; static struct rfkill *wwan_rfkill; +static struct rfkill *gps_rfkill; struct rfkill2_device { u8 id; @@ -157,21 +156,6 @@ struct rfkill2_device { static int rfkill2_count; static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES]; -static const struct dev_pm_ops hp_wmi_pm_ops = { - .resume = hp_wmi_resume_handler, - .restore = hp_wmi_resume_handler, -}; - -static struct platform_driver hp_wmi_driver = { - .driver = { - .name = "hp-wmi", - .owner = THIS_MODULE, - .pm = &hp_wmi_pm_ops, - }, - .probe = hp_wmi_bios_setup, - .remove = hp_wmi_bios_remove, -}; - /* * hp_wmi_perform_query * @@ -543,6 +527,10 @@ static void hp_wmi_notify(u32 value, void *context) rfkill_set_states(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN), hp_wmi_get_hw_state(HPWMI_WWAN)); + if (gps_rfkill) + rfkill_set_states(gps_rfkill, + hp_wmi_get_sw_state(HPWMI_GPS), + hp_wmi_get_hw_state(HPWMI_GPS)); break; case HPWMI_CPU_BATTERY_THROTTLE: pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n"); @@ -670,7 +658,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) (void *) HPWMI_WWAN); if (!wwan_rfkill) { err = -ENOMEM; - goto register_bluetooth_error; + goto register_gps_error; } rfkill_init_sw_state(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN)); @@ -681,10 +669,33 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) goto register_wwan_err; } + if (wireless & 0x8) { + gps_rfkill = rfkill_alloc("hp-gps", &device->dev, + RFKILL_TYPE_GPS, + &hp_wmi_rfkill_ops, + (void *) HPWMI_GPS); + if (!gps_rfkill) { + err = -ENOMEM; + goto register_bluetooth_error; + } + rfkill_init_sw_state(gps_rfkill, + hp_wmi_get_sw_state(HPWMI_GPS)); + rfkill_set_hw_state(bluetooth_rfkill, + hp_wmi_get_hw_state(HPWMI_GPS)); + err = rfkill_register(gps_rfkill); + if (err) + goto register_gps_error; + } + return 0; register_wwan_err: rfkill_destroy(wwan_rfkill); wwan_rfkill = NULL; + if (gps_rfkill) + rfkill_unregister(gps_rfkill); +register_gps_error: + rfkill_destroy(gps_rfkill); + gps_rfkill = NULL; if (bluetooth_rfkill) rfkill_unregister(bluetooth_rfkill); register_bluetooth_error: @@ -729,6 +740,10 @@ static int hp_wmi_rfkill2_setup(struct platform_device *device) type = RFKILL_TYPE_WWAN; name = "hp-wwan"; break; + case HPWMI_GPS: + type = RFKILL_TYPE_GPS; + name = "hp-gps"; + break; default: pr_warn("unknown device type 0x%x\n", state.device[i].radio_type); @@ -778,7 +793,7 @@ fail: return err; } -static int hp_wmi_bios_setup(struct platform_device *device) +static int __init hp_wmi_bios_setup(struct platform_device *device) { int err; @@ -786,6 +801,7 @@ static int hp_wmi_bios_setup(struct platform_device *device) wifi_rfkill = NULL; bluetooth_rfkill = NULL; wwan_rfkill = NULL; + gps_rfkill = NULL; rfkill2_count = 0; if (hp_wmi_rfkill_setup(device)) @@ -835,6 +851,10 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device) rfkill_unregister(wwan_rfkill); rfkill_destroy(wwan_rfkill); } + if (gps_rfkill) { + rfkill_unregister(gps_rfkill); + rfkill_destroy(gps_rfkill); + } return 0; } @@ -870,51 +890,70 @@ static int hp_wmi_resume_handler(struct device *device) rfkill_set_states(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN), hp_wmi_get_hw_state(HPWMI_WWAN)); + if (gps_rfkill) + rfkill_set_states(gps_rfkill, + hp_wmi_get_sw_state(HPWMI_GPS), + hp_wmi_get_hw_state(HPWMI_GPS)); return 0; } +static const struct dev_pm_ops hp_wmi_pm_ops = { + .resume = hp_wmi_resume_handler, + .restore = hp_wmi_resume_handler, +}; + +static struct platform_driver hp_wmi_driver = { + .driver = { + .name = "hp-wmi", + .owner = THIS_MODULE, + .pm = &hp_wmi_pm_ops, + }, + .remove = __exit_p(hp_wmi_bios_remove), +}; + static int __init hp_wmi_init(void) { int err; int event_capable = wmi_has_guid(HPWMI_EVENT_GUID); int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID); + if (!bios_capable && !event_capable) + return -ENODEV; + if (event_capable) { err = hp_wmi_input_setup(); if (err) return err; + + //Enable magic for hotkeys that run on the SMBus + ec_write(0xe6,0x6e); } if (bios_capable) { - err = platform_driver_register(&hp_wmi_driver); - if (err) - goto err_driver_reg; - hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1); - if (!hp_wmi_platform_dev) { - err = -ENOMEM; - goto err_device_alloc; + hp_wmi_platform_dev = + platform_device_register_simple("hp-wmi", -1, NULL, 0); + if (IS_ERR(hp_wmi_platform_dev)) { + err = PTR_ERR(hp_wmi_platform_dev); + goto err_destroy_input; } - err = platform_device_add(hp_wmi_platform_dev); + + err = platform_driver_probe(&hp_wmi_driver, hp_wmi_bios_setup); if (err) - goto err_device_add; + goto err_unregister_device; } - if (!bios_capable && !event_capable) - return -ENODEV; - return 0; -err_device_add: - platform_device_put(hp_wmi_platform_dev); -err_device_alloc: - platform_driver_unregister(&hp_wmi_driver); -err_driver_reg: +err_unregister_device: + platform_device_unregister(hp_wmi_platform_dev); +err_destroy_input: if (event_capable) hp_wmi_input_destroy(); return err; } +module_init(hp_wmi_init); static void __exit hp_wmi_exit(void) { @@ -926,6 +965,4 @@ static void __exit hp_wmi_exit(void) platform_driver_unregister(&hp_wmi_driver); } } - -module_init(hp_wmi_init); module_exit(hp_wmi_exit); diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 2111dbb7e1e3..6b2293875672 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -82,8 +82,19 @@ #define MSI_STANDARD_EC_SCM_LOAD_ADDRESS 0x2d #define MSI_STANDARD_EC_SCM_LOAD_MASK (1 << 0) -#define MSI_STANDARD_EC_TOUCHPAD_ADDRESS 0xe4 +#define MSI_STANDARD_EC_FUNCTIONS_ADDRESS 0xe4 +/* Power LED is orange - Turbo mode */ +#define MSI_STANDARD_EC_TURBO_MASK (1 << 1) +/* Power LED is green - ECO mode */ +#define MSI_STANDARD_EC_ECO_MASK (1 << 3) +/* Touchpad is turned on */ #define MSI_STANDARD_EC_TOUCHPAD_MASK (1 << 4) +/* If this bit != bit 1, turbo mode can't be toggled */ +#define MSI_STANDARD_EC_TURBO_COOLDOWN_MASK (1 << 7) + +#define MSI_STANDARD_EC_FAN_ADDRESS 0x33 +/* If zero, fan rotates at maximal speed */ +#define MSI_STANDARD_EC_AUTOFAN_MASK (1 << 0) #ifdef CONFIG_PM_SLEEP static int msi_laptop_resume(struct device *device); @@ -108,23 +119,38 @@ static const struct key_entry msi_laptop_keymap[] = { static struct input_dev *msi_laptop_input_dev; -static bool old_ec_model; static int wlan_s, bluetooth_s, threeg_s; static int threeg_exists; - -/* Some MSI 3G netbook only have one fn key to control Wlan/Bluetooth/3G, - * those netbook will load the SCM (windows app) to disable the original - * Wlan/Bluetooth control by BIOS when user press fn key, then control - * Wlan/Bluetooth/3G by SCM (software control by OS). Without SCM, user - * cann't on/off 3G module on those 3G netbook. - * On Linux, msi-laptop driver will do the same thing to disable the - * original BIOS control, then might need use HAL or other userland - * application to do the software control that simulate with SCM. - * e.g. MSI N034 netbook - */ -static bool load_scm_model; static struct rfkill *rfk_wlan, *rfk_bluetooth, *rfk_threeg; +/* MSI laptop quirks */ +struct quirk_entry { + bool old_ec_model; + + /* Some MSI 3G netbook only have one fn key to control + * Wlan/Bluetooth/3G, those netbook will load the SCM (windows app) to + * disable the original Wlan/Bluetooth control by BIOS when user press + * fn key, then control Wlan/Bluetooth/3G by SCM (software control by + * OS). Without SCM, user cann't on/off 3G module on those 3G netbook. + * On Linux, msi-laptop driver will do the same thing to disable the + * original BIOS control, then might need use HAL or other userland + * application to do the software control that simulate with SCM. + * e.g. MSI N034 netbook + */ + bool load_scm_model; + + /* Some MSI laptops need delay before reading from EC */ + bool ec_delay; + + /* Some MSI Wind netbooks (e.g. MSI Wind U100) need loading SCM to get + * some features working (e.g. ECO mode), but we cannot change + * Wlan/Bluetooth state in software and we can only read its state. + */ + bool ec_read_only; +}; + +static struct quirk_entry *quirks; + /* Hardware access */ static int set_lcd_level(int level) @@ -195,10 +221,13 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask) if (sscanf(buf, "%i", &status) != 1 || (status < 0 || status > 1)) return -EINVAL; + if (quirks->ec_read_only) + return -EOPNOTSUPP; + /* read current device state */ result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata); if (result < 0) - return -EINVAL; + return result; if (!!(rdata & mask) != status) { /* reverse device bit */ @@ -209,7 +238,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask) result = ec_write(MSI_STANDARD_EC_COMMAND_ADDRESS, wdata); if (result < 0) - return -EINVAL; + return result; } return count; @@ -222,7 +251,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) - return -1; + return result; if (wlan) *wlan = !!(rdata & 8); @@ -240,7 +269,7 @@ static int get_wireless_state_ec_standard(void) result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata); if (result < 0) - return -1; + return result; wlan_s = !!(rdata & MSI_STANDARD_EC_WLAN_MASK); @@ -258,7 +287,7 @@ static int get_threeg_exists(void) result = ec_read(MSI_STANDARD_EC_DEVICES_EXISTS_ADDRESS, &rdata); if (result < 0) - return -1; + return result; threeg_exists = !!(rdata & MSI_STANDARD_EC_3G_MASK); @@ -291,9 +320,9 @@ static ssize_t show_wlan(struct device *dev, struct device_attribute *attr, char *buf) { - int ret, enabled; + int ret, enabled = 0; - if (old_ec_model) { + if (quirks->old_ec_model) { ret = get_wireless_state(&enabled, NULL); } else { ret = get_wireless_state_ec_standard(); @@ -315,9 +344,9 @@ static ssize_t show_bluetooth(struct device *dev, struct device_attribute *attr, char *buf) { - int ret, enabled; + int ret, enabled = 0; - if (old_ec_model) { + if (quirks->old_ec_model) { ret = get_wireless_state(NULL, &enabled); } else { ret = get_wireless_state_ec_standard(); @@ -342,8 +371,8 @@ static ssize_t show_threeg(struct device *dev, int ret; /* old msi ec not support 3G */ - if (old_ec_model) - return -1; + if (quirks->old_ec_model) + return -ENODEV; ret = get_wireless_state_ec_standard(); if (ret < 0) @@ -417,18 +446,119 @@ static ssize_t store_auto_brightness(struct device *dev, return count; } +static ssize_t show_touchpad(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_TOUCHPAD_MASK)); +} + +static ssize_t show_turbo(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_TURBO_MASK)); +} + +static ssize_t show_eco(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_ECO_MASK)); +} + +static ssize_t show_turbo_cooldown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", (!!(rdata & MSI_STANDARD_EC_TURBO_MASK)) | + (!!(rdata & MSI_STANDARD_EC_TURBO_COOLDOWN_MASK) << 1)); +} + +static ssize_t show_auto_fan(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FAN_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_AUTOFAN_MASK)); +} + +static ssize_t store_auto_fan(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + + int enable, result; + + if (sscanf(buf, "%i", &enable) != 1 || (enable != (enable & 1))) + return -EINVAL; + + result = ec_write(MSI_STANDARD_EC_FAN_ADDRESS, enable); + if (result < 0) + return result; + + return count; +} + static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); static DEVICE_ATTR(auto_brightness, 0644, show_auto_brightness, store_auto_brightness); static DEVICE_ATTR(bluetooth, 0444, show_bluetooth, NULL); static DEVICE_ATTR(wlan, 0444, show_wlan, NULL); static DEVICE_ATTR(threeg, 0444, show_threeg, NULL); +static DEVICE_ATTR(touchpad, 0444, show_touchpad, NULL); +static DEVICE_ATTR(turbo_mode, 0444, show_turbo, NULL); +static DEVICE_ATTR(eco_mode, 0444, show_eco, NULL); +static DEVICE_ATTR(turbo_cooldown, 0444, show_turbo_cooldown, NULL); +static DEVICE_ATTR(auto_fan, 0644, show_auto_fan, store_auto_fan); static struct attribute *msipf_attributes[] = { - &dev_attr_lcd_level.attr, - &dev_attr_auto_brightness.attr, &dev_attr_bluetooth.attr, &dev_attr_wlan.attr, + &dev_attr_touchpad.attr, + &dev_attr_turbo_mode.attr, + &dev_attr_eco_mode.attr, + &dev_attr_turbo_cooldown.attr, + &dev_attr_auto_fan.attr, + NULL +}; + +static struct attribute *msipf_old_attributes[] = { + &dev_attr_lcd_level.attr, + &dev_attr_auto_brightness.attr, NULL }; @@ -436,6 +566,10 @@ static struct attribute_group msipf_attribute_group = { .attrs = msipf_attributes }; +static struct attribute_group msipf_old_attribute_group = { + .attrs = msipf_old_attributes +}; + static struct platform_driver msipf_driver = { .driver = { .name = "msi-laptop-pf", @@ -448,9 +582,26 @@ static struct platform_device *msipf_device; /* Initialization */ -static int dmi_check_cb(const struct dmi_system_id *id) +static struct quirk_entry quirk_old_ec_model = { + .old_ec_model = true, +}; + +static struct quirk_entry quirk_load_scm_model = { + .load_scm_model = true, + .ec_delay = true, +}; + +static struct quirk_entry quirk_load_scm_ro_model = { + .load_scm_model = true, + .ec_read_only = true, +}; + +static int dmi_check_cb(const struct dmi_system_id *dmi) { - pr_info("Identified laptop model '%s'\n", id->ident); + pr_info("Identified laptop model '%s'\n", dmi->ident); + + quirks = dmi->driver_data; + return 1; } @@ -464,6 +615,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT'L CO.,LTD") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, { @@ -474,6 +626,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "0581"), DMI_MATCH(DMI_BOARD_NAME, "MS-1058") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, { @@ -484,6 +637,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_BOARD_VENDOR, "MSI"), DMI_MATCH(DMI_BOARD_NAME, "MS-1412") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, { @@ -495,12 +649,9 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT'L CO.,LTD") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, - { } -}; - -static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { { .ident = "MSI N034", .matches = { @@ -510,6 +661,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD") }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -521,6 +673,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD") }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -530,6 +683,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { "MICRO-STAR INTERNATIONAL CO., LTD"), DMI_MATCH(DMI_PRODUCT_NAME, "MS-N014"), }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -539,6 +693,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { "Micro-Star International"), DMI_MATCH(DMI_PRODUCT_NAME, "CR620"), }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -548,6 +703,17 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { "Micro-Star International Co., Ltd."), DMI_MATCH(DMI_PRODUCT_NAME, "U270 series"), }, + .driver_data = &quirk_load_scm_model, + .callback = dmi_check_cb + }, + { + .ident = "MSI U90/U100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U90/U100"), + }, + .driver_data = &quirk_load_scm_ro_model, .callback = dmi_check_cb }, { } @@ -560,32 +726,26 @@ static int rfkill_bluetooth_set(void *data, bool blocked) * blocked == false is on * blocked == true is off */ - if (blocked) - set_device_state("0", 0, MSI_STANDARD_EC_BLUETOOTH_MASK); - else - set_device_state("1", 0, MSI_STANDARD_EC_BLUETOOTH_MASK); + int result = set_device_state(blocked ? "0" : "1", 0, + MSI_STANDARD_EC_BLUETOOTH_MASK); - return 0; + return min(result, 0); } static int rfkill_wlan_set(void *data, bool blocked) { - if (blocked) - set_device_state("0", 0, MSI_STANDARD_EC_WLAN_MASK); - else - set_device_state("1", 0, MSI_STANDARD_EC_WLAN_MASK); + int result = set_device_state(blocked ? "0" : "1", 0, + MSI_STANDARD_EC_WLAN_MASK); - return 0; + return min(result, 0); } static int rfkill_threeg_set(void *data, bool blocked) { - if (blocked) - set_device_state("0", 0, MSI_STANDARD_EC_3G_MASK); - else - set_device_state("1", 0, MSI_STANDARD_EC_3G_MASK); + int result = set_device_state(blocked ? "0" : "1", 0, + MSI_STANDARD_EC_3G_MASK); - return 0; + return min(result, 0); } static const struct rfkill_ops rfkill_bluetooth_ops = { @@ -618,25 +778,34 @@ static void rfkill_cleanup(void) } } +static bool msi_rfkill_set_state(struct rfkill *rfkill, bool blocked) +{ + if (quirks->ec_read_only) + return rfkill_set_hw_state(rfkill, blocked); + else + return rfkill_set_sw_state(rfkill, blocked); +} + static void msi_update_rfkill(struct work_struct *ignored) { get_wireless_state_ec_standard(); if (rfk_wlan) - rfkill_set_sw_state(rfk_wlan, !wlan_s); + msi_rfkill_set_state(rfk_wlan, !wlan_s); if (rfk_bluetooth) - rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s); + msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s); if (rfk_threeg) - rfkill_set_sw_state(rfk_threeg, !threeg_s); + msi_rfkill_set_state(rfk_threeg, !threeg_s); } -static DECLARE_DELAYED_WORK(msi_rfkill_work, msi_update_rfkill); +static DECLARE_DELAYED_WORK(msi_rfkill_dwork, msi_update_rfkill); +static DECLARE_WORK(msi_rfkill_work, msi_update_rfkill); static void msi_send_touchpad_key(struct work_struct *ignored) { u8 rdata; int result; - result = ec_read(MSI_STANDARD_EC_TOUCHPAD_ADDRESS, &rdata); + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); if (result < 0) return; @@ -644,7 +813,8 @@ static void msi_send_touchpad_key(struct work_struct *ignored) (rdata & MSI_STANDARD_EC_TOUCHPAD_MASK) ? KEY_TOUCHPAD_ON : KEY_TOUCHPAD_OFF, 1, true); } -static DECLARE_DELAYED_WORK(msi_touchpad_work, msi_send_touchpad_key); +static DECLARE_DELAYED_WORK(msi_touchpad_dwork, msi_send_touchpad_key); +static DECLARE_WORK(msi_touchpad_work, msi_send_touchpad_key); static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str, struct serio *port) @@ -662,14 +832,20 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str, extended = false; switch (data) { case 0xE4: - schedule_delayed_work(&msi_touchpad_work, - round_jiffies_relative(0.5 * HZ)); + if (quirks->ec_delay) { + schedule_delayed_work(&msi_touchpad_dwork, + round_jiffies_relative(0.5 * HZ)); + } else + schedule_work(&msi_touchpad_work); break; case 0x54: case 0x62: case 0x76: - schedule_delayed_work(&msi_rfkill_work, - round_jiffies_relative(0.5 * HZ)); + if (quirks->ec_delay) { + schedule_delayed_work(&msi_rfkill_dwork, + round_jiffies_relative(0.5 * HZ)); + } else + schedule_work(&msi_rfkill_work); break; } } @@ -736,8 +912,11 @@ static int rfkill_init(struct platform_device *sdev) } /* schedule to run rfkill state initial */ - schedule_delayed_work(&msi_rfkill_init, - round_jiffies_relative(1 * HZ)); + if (quirks->ec_delay) { + schedule_delayed_work(&msi_rfkill_init, + round_jiffies_relative(1 * HZ)); + } else + schedule_work(&msi_rfkill_work); return 0; @@ -761,7 +940,7 @@ static int msi_laptop_resume(struct device *device) u8 data; int result; - if (!load_scm_model) + if (!quirks->load_scm_model) return 0; /* set load SCM to disable hardware control by fn key */ @@ -819,13 +998,15 @@ static int __init load_scm_model_init(struct platform_device *sdev) u8 data; int result; - /* allow userland write sysfs file */ - dev_attr_bluetooth.store = store_bluetooth; - dev_attr_wlan.store = store_wlan; - dev_attr_threeg.store = store_threeg; - dev_attr_bluetooth.attr.mode |= S_IWUSR; - dev_attr_wlan.attr.mode |= S_IWUSR; - dev_attr_threeg.attr.mode |= S_IWUSR; + if (!quirks->ec_read_only) { + /* allow userland write sysfs file */ + dev_attr_bluetooth.store = store_bluetooth; + dev_attr_wlan.store = store_wlan; + dev_attr_threeg.store = store_threeg; + dev_attr_bluetooth.attr.mode |= S_IWUSR; + dev_attr_wlan.attr.mode |= S_IWUSR; + dev_attr_threeg.attr.mode |= S_IWUSR; + } /* disable hardware control by fn key */ result = ec_read(MSI_STANDARD_EC_SCM_LOAD_ADDRESS, &data); @@ -874,21 +1055,22 @@ static int __init msi_init(void) if (acpi_disabled) return -ENODEV; - if (force || dmi_check_system(msi_dmi_table)) - old_ec_model = 1; + dmi_check_system(msi_dmi_table); + if (!quirks) + /* quirks may be NULL if no match in DMI table */ + quirks = &quirk_load_scm_model; + if (force) + quirks = &quirk_old_ec_model; - if (!old_ec_model) + if (!quirks->old_ec_model) get_threeg_exists(); - if (!old_ec_model && dmi_check_system(msi_load_scm_models_dmi_table)) - load_scm_model = 1; - if (auto_brightness < 0 || auto_brightness > 2) return -EINVAL; /* Register backlight stuff */ - if (acpi_video_backlight_support()) { + if (!quirks->old_ec_model || acpi_video_backlight_support()) { pr_info("Brightness ignored, must be controlled by ACPI video driver\n"); } else { struct backlight_properties props; @@ -918,7 +1100,7 @@ static int __init msi_init(void) if (ret) goto fail_platform_device1; - if (load_scm_model && (load_scm_model_init(msipf_device) < 0)) { + if (quirks->load_scm_model && (load_scm_model_init(msipf_device) < 0)) { ret = -EINVAL; goto fail_platform_device1; } @@ -928,20 +1110,25 @@ static int __init msi_init(void) if (ret) goto fail_platform_device2; - if (!old_ec_model) { + if (!quirks->old_ec_model) { if (threeg_exists) ret = device_create_file(&msipf_device->dev, &dev_attr_threeg); if (ret) goto fail_platform_device2; - } + } else { + ret = sysfs_create_group(&msipf_device->dev.kobj, + &msipf_old_attribute_group); + if (ret) + goto fail_platform_device2; - /* Disable automatic brightness control by default because - * this module was probably loaded to do brightness control in - * software. */ + /* Disable automatic brightness control by default because + * this module was probably loaded to do brightness control in + * software. */ - if (auto_brightness != 2) - set_auto_brightness(auto_brightness); + if (auto_brightness != 2) + set_auto_brightness(auto_brightness); + } pr_info("driver " MSI_DRIVER_VERSION " successfully loaded\n"); @@ -949,9 +1136,10 @@ static int __init msi_init(void) fail_platform_device2: - if (load_scm_model) { + if (quirks->load_scm_model) { i8042_remove_filter(msi_laptop_i8042_filter); - cancel_delayed_work_sync(&msi_rfkill_work); + cancel_delayed_work_sync(&msi_rfkill_dwork); + cancel_work_sync(&msi_rfkill_work); rfkill_cleanup(); } platform_device_del(msipf_device); @@ -973,23 +1161,26 @@ fail_backlight: static void __exit msi_cleanup(void) { - if (load_scm_model) { + if (quirks->load_scm_model) { i8042_remove_filter(msi_laptop_i8042_filter); msi_laptop_input_destroy(); - cancel_delayed_work_sync(&msi_rfkill_work); + cancel_delayed_work_sync(&msi_rfkill_dwork); + cancel_work_sync(&msi_rfkill_work); rfkill_cleanup(); } sysfs_remove_group(&msipf_device->dev.kobj, &msipf_attribute_group); - if (!old_ec_model && threeg_exists) + if (!quirks->old_ec_model && threeg_exists) device_remove_file(&msipf_device->dev, &dev_attr_threeg); platform_device_unregister(msipf_device); platform_driver_unregister(&msipf_driver); backlight_device_unregister(msibl_device); - /* Enable automatic brightness control again */ - if (auto_brightness != 2) - set_auto_brightness(1); + if (quirks->old_ec_model) { + /* Enable automatic brightness control again */ + if (auto_brightness != 2) + set_auto_brightness(1); + } pr_info("driver unloaded\n"); } @@ -1011,3 +1202,4 @@ MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N051:*"); MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N014:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnCR620:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnU270series:*"); +MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnU90/U100:*"); diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index 2264331bd48e..70222f265f68 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -34,29 +34,65 @@ MODULE_AUTHOR("Thomas Renninger <trenn@suse.de>"); MODULE_DESCRIPTION("MSI laptop WMI hotkeys driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("wmi:551A1F84-FBDD-4125-91DB-3EA8F44F1D45"); -MODULE_ALIAS("wmi:B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"); - #define DRV_NAME "msi-wmi" #define MSIWMI_BIOS_GUID "551A1F84-FBDD-4125-91DB-3EA8F44F1D45" -#define MSIWMI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2" - -#define SCANCODE_BASE 0xD0 -#define MSI_WMI_BRIGHTNESSUP SCANCODE_BASE -#define MSI_WMI_BRIGHTNESSDOWN (SCANCODE_BASE + 1) -#define MSI_WMI_VOLUMEUP (SCANCODE_BASE + 2) -#define MSI_WMI_VOLUMEDOWN (SCANCODE_BASE + 3) -#define MSI_WMI_MUTE (SCANCODE_BASE + 4) +#define MSIWMI_MSI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2" +#define MSIWMI_WIND_EVENT_GUID "5B3CC38A-40D9-7245-8AE6-1145B751BE3F" + +MODULE_ALIAS("wmi:" MSIWMI_BIOS_GUID); +MODULE_ALIAS("wmi:" MSIWMI_MSI_EVENT_GUID); +MODULE_ALIAS("wmi:" MSIWMI_WIND_EVENT_GUID); + +enum msi_scancodes { + /* Generic MSI keys (not present on MSI Wind) */ + MSI_KEY_BRIGHTNESSUP = 0xD0, + MSI_KEY_BRIGHTNESSDOWN, + MSI_KEY_VOLUMEUP, + MSI_KEY_VOLUMEDOWN, + MSI_KEY_MUTE, + /* MSI Wind keys */ + WIND_KEY_TOUCHPAD = 0x08, /* Fn+F3 touchpad toggle */ + WIND_KEY_BLUETOOTH = 0x56, /* Fn+F11 Bluetooth toggle */ + WIND_KEY_CAMERA, /* Fn+F6 webcam toggle */ + WIND_KEY_WLAN = 0x5f, /* Fn+F11 Wi-Fi toggle */ + WIND_KEY_TURBO, /* Fn+F10 turbo mode toggle */ + WIND_KEY_ECO = 0x69, /* Fn+F10 ECO mode toggle */ +}; static struct key_entry msi_wmi_keymap[] = { - { KE_KEY, MSI_WMI_BRIGHTNESSUP, {KEY_BRIGHTNESSUP} }, - { KE_KEY, MSI_WMI_BRIGHTNESSDOWN, {KEY_BRIGHTNESSDOWN} }, - { KE_KEY, MSI_WMI_VOLUMEUP, {KEY_VOLUMEUP} }, - { KE_KEY, MSI_WMI_VOLUMEDOWN, {KEY_VOLUMEDOWN} }, - { KE_KEY, MSI_WMI_MUTE, {KEY_MUTE} }, - { KE_END, 0} + { KE_KEY, MSI_KEY_BRIGHTNESSUP, {KEY_BRIGHTNESSUP} }, + { KE_KEY, MSI_KEY_BRIGHTNESSDOWN, {KEY_BRIGHTNESSDOWN} }, + { KE_KEY, MSI_KEY_VOLUMEUP, {KEY_VOLUMEUP} }, + { KE_KEY, MSI_KEY_VOLUMEDOWN, {KEY_VOLUMEDOWN} }, + { KE_KEY, MSI_KEY_MUTE, {KEY_MUTE} }, + + /* These keys work without WMI. Ignore them to avoid double keycodes */ + { KE_IGNORE, WIND_KEY_TOUCHPAD, {KEY_TOUCHPAD_TOGGLE} }, + { KE_IGNORE, WIND_KEY_BLUETOOTH, {KEY_BLUETOOTH} }, + { KE_IGNORE, WIND_KEY_CAMERA, {KEY_CAMERA} }, + { KE_IGNORE, WIND_KEY_WLAN, {KEY_WLAN} }, + + /* These are unknown WMI events found on MSI Wind */ + { KE_IGNORE, 0x00 }, + { KE_IGNORE, 0x62 }, + { KE_IGNORE, 0x63 }, + + /* These are MSI Wind keys that should be handled via WMI */ + { KE_KEY, WIND_KEY_TURBO, {KEY_PROG1} }, + { KE_KEY, WIND_KEY_ECO, {KEY_PROG2} }, + + { KE_END, 0 } +}; + +static ktime_t last_pressed; + +static const struct { + const char *guid; + bool quirk_last_pressed; +} *event_wmi, event_wmis[] = { + { MSIWMI_MSI_EVENT_GUID, true }, + { MSIWMI_WIND_EVENT_GUID, false }, }; -static ktime_t last_pressed[ARRAY_SIZE(msi_wmi_keymap) - 1]; static struct backlight_device *backlight; @@ -149,7 +185,6 @@ static void msi_wmi_notify(u32 value, void *context) struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL }; static struct key_entry *key; union acpi_object *obj; - ktime_t cur; acpi_status status; status = wmi_get_event_data(value, &response); @@ -165,39 +200,67 @@ static void msi_wmi_notify(u32 value, void *context) pr_debug("Eventcode: 0x%x\n", eventcode); key = sparse_keymap_entry_from_scancode(msi_wmi_input_dev, eventcode); - if (key) { - ktime_t diff; - cur = ktime_get_real(); - diff = ktime_sub(cur, last_pressed[key->code - - SCANCODE_BASE]); - /* Ignore event if the same event happened in a 50 ms + if (!key) { + pr_info("Unknown key pressed - %x\n", eventcode); + goto msi_wmi_notify_exit; + } + + if (event_wmi->quirk_last_pressed) { + ktime_t cur = ktime_get_real(); + ktime_t diff = ktime_sub(cur, last_pressed); + /* Ignore event if any event happened in a 50 ms timeframe -> Key press may result in 10-20 GPEs */ if (ktime_to_us(diff) < 1000 * 50) { pr_debug("Suppressed key event 0x%X - " "Last press was %lld us ago\n", key->code, ktime_to_us(diff)); - return; - } - last_pressed[key->code - SCANCODE_BASE] = cur; - - if (key->type == KE_KEY && - /* Brightness is served via acpi video driver */ - (!acpi_video_backlight_support() || - (key->code != MSI_WMI_BRIGHTNESSUP && - key->code != MSI_WMI_BRIGHTNESSDOWN))) { - pr_debug("Send key: 0x%X - " - "Input layer keycode: %d\n", - key->code, key->keycode); - sparse_keymap_report_entry(msi_wmi_input_dev, - key, 1, true); + goto msi_wmi_notify_exit; } - } else - pr_info("Unknown key pressed - %x\n", eventcode); + last_pressed = cur; + } + + if (key->type == KE_KEY && + /* Brightness is served via acpi video driver */ + (backlight || + (key->code != MSI_KEY_BRIGHTNESSUP && + key->code != MSI_KEY_BRIGHTNESSDOWN))) { + pr_debug("Send key: 0x%X - Input layer keycode: %d\n", + key->code, key->keycode); + sparse_keymap_report_entry(msi_wmi_input_dev, key, 1, + true); + } } else pr_info("Unknown event received\n"); + +msi_wmi_notify_exit: kfree(response.pointer); } +static int __init msi_wmi_backlight_setup(void) +{ + int err; + struct backlight_properties props; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = ARRAY_SIZE(backlight_map) - 1; + backlight = backlight_device_register(DRV_NAME, NULL, NULL, + &msi_backlight_ops, + &props); + if (IS_ERR(backlight)) + return PTR_ERR(backlight); + + err = bl_get(NULL); + if (err < 0) { + backlight_device_unregister(backlight); + return err; + } + + backlight->props.brightness = err; + + return 0; +} + static int __init msi_wmi_input_setup(void) { int err; @@ -219,7 +282,7 @@ static int __init msi_wmi_input_setup(void) if (err) goto err_free_keymap; - memset(last_pressed, 0, sizeof(last_pressed)); + last_pressed = ktime_set(0, 0); return 0; @@ -233,61 +296,66 @@ err_free_dev: static int __init msi_wmi_init(void) { int err; + int i; - if (!wmi_has_guid(MSIWMI_EVENT_GUID)) { - pr_err("This machine doesn't have MSI-hotkeys through WMI\n"); - return -ENODEV; - } - err = wmi_install_notify_handler(MSIWMI_EVENT_GUID, - msi_wmi_notify, NULL); - if (ACPI_FAILURE(err)) - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(event_wmis); i++) { + if (!wmi_has_guid(event_wmis[i].guid)) + continue; - err = msi_wmi_input_setup(); - if (err) - goto err_uninstall_notifier; - - if (!acpi_video_backlight_support()) { - struct backlight_properties props; - memset(&props, 0, sizeof(struct backlight_properties)); - props.type = BACKLIGHT_PLATFORM; - props.max_brightness = ARRAY_SIZE(backlight_map) - 1; - backlight = backlight_device_register(DRV_NAME, NULL, NULL, - &msi_backlight_ops, - &props); - if (IS_ERR(backlight)) { - err = PTR_ERR(backlight); + err = msi_wmi_input_setup(); + if (err) { + pr_err("Unable to setup input device\n"); + return err; + } + + err = wmi_install_notify_handler(event_wmis[i].guid, + msi_wmi_notify, NULL); + if (ACPI_FAILURE(err)) { + pr_err("Unable to setup WMI notify handler\n"); goto err_free_input; } - err = bl_get(NULL); - if (err < 0) - goto err_free_backlight; + pr_debug("Event handler installed\n"); + event_wmi = &event_wmis[i]; + break; + } - backlight->props.brightness = err; + if (wmi_has_guid(MSIWMI_BIOS_GUID) && !acpi_video_backlight_support()) { + err = msi_wmi_backlight_setup(); + if (err) { + pr_err("Unable to setup backlight device\n"); + goto err_uninstall_handler; + } + pr_debug("Backlight device created\n"); + } + + if (!event_wmi && !backlight) { + pr_err("This machine doesn't have neither MSI-hotkeys nor backlight through WMI\n"); + return -ENODEV; } - pr_debug("Event handler installed\n"); return 0; -err_free_backlight: - backlight_device_unregister(backlight); +err_uninstall_handler: + if (event_wmi) + wmi_remove_notify_handler(event_wmi->guid); err_free_input: - sparse_keymap_free(msi_wmi_input_dev); - input_unregister_device(msi_wmi_input_dev); -err_uninstall_notifier: - wmi_remove_notify_handler(MSIWMI_EVENT_GUID); + if (event_wmi) { + sparse_keymap_free(msi_wmi_input_dev); + input_unregister_device(msi_wmi_input_dev); + } return err; } static void __exit msi_wmi_exit(void) { - if (wmi_has_guid(MSIWMI_EVENT_GUID)) { - wmi_remove_notify_handler(MSIWMI_EVENT_GUID); + if (event_wmi) { + wmi_remove_notify_handler(event_wmi->guid); sparse_keymap_free(msi_wmi_input_dev); input_unregister_device(msi_wmi_input_dev); - backlight_device_unregister(backlight); } + if (backlight) + backlight_device_unregister(backlight); } module_init(msi_wmi_init); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 8da21876a794..14d4dced1def 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -158,6 +158,11 @@ static void sony_nc_thermal_cleanup(struct platform_device *pd); static int sony_nc_lid_resume_setup(struct platform_device *pd); static void sony_nc_lid_resume_cleanup(struct platform_device *pd); +static int sony_nc_gfx_switch_setup(struct platform_device *pd, + unsigned int handle); +static void sony_nc_gfx_switch_cleanup(struct platform_device *pd); +static int __sony_nc_gfx_switch_status_get(void); + static int sony_nc_highspeed_charging_setup(struct platform_device *pd); static void sony_nc_highspeed_charging_cleanup(struct platform_device *pd); @@ -1241,17 +1246,13 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) /* Hybrid GFX switching */ sony_call_snc_handle(handle, 0x0000, &result); dprintk("GFX switch event received (reason: %s)\n", - (result & 0x01) ? - "switch change" : "unknown"); - - /* verify the switch state - * 1: discrete GFX - * 0: integrated GFX - */ - sony_call_snc_handle(handle, 0x0100, &result); + (result == 0x1) ? "switch change" : + (result == 0x2) ? "output switch" : + (result == 0x3) ? "output switch" : + ""); ev_type = GFX_SWITCH; - real_ev = result & 0xff; + real_ev = __sony_nc_gfx_switch_status_get(); break; default: @@ -1350,6 +1351,13 @@ static void sony_nc_function_setup(struct acpi_device *device, pr_err("couldn't set up thermal profile function (%d)\n", result); break; + case 0x0128: + case 0x0146: + result = sony_nc_gfx_switch_setup(pf_device, handle); + if (result) + pr_err("couldn't set up GFX Switch status (%d)\n", + result); + break; case 0x0131: result = sony_nc_highspeed_charging_setup(pf_device); if (result) @@ -1365,6 +1373,8 @@ static void sony_nc_function_setup(struct acpi_device *device, break; case 0x0137: case 0x0143: + case 0x014b: + case 0x014c: result = sony_nc_kbd_backlight_setup(pf_device, handle); if (result) pr_err("couldn't set up keyboard backlight function (%d)\n", @@ -1414,6 +1424,10 @@ static void sony_nc_function_cleanup(struct platform_device *pd) case 0x0122: sony_nc_thermal_cleanup(pd); break; + case 0x0128: + case 0x0146: + sony_nc_gfx_switch_cleanup(pd); + break; case 0x0131: sony_nc_highspeed_charging_cleanup(pd); break; @@ -1423,6 +1437,8 @@ static void sony_nc_function_cleanup(struct platform_device *pd) break; case 0x0137: case 0x0143: + case 0x014b: + case 0x014c: sony_nc_kbd_backlight_cleanup(pd); break; default: @@ -1467,6 +1483,8 @@ static void sony_nc_function_resume(void) break; case 0x0137: case 0x0143: + case 0x014b: + case 0x014c: sony_nc_kbd_backlight_resume(); break; default: @@ -1534,7 +1552,7 @@ static int sony_nc_rfkill_set(void *data, bool blocked) int argument = sony_rfkill_address[(long) data] + 0x100; if (!blocked) - argument |= 0x030000; + argument |= 0x070000; return sony_call_snc_handle(sony_rfkill_handle, argument, &result); } @@ -2333,7 +2351,7 @@ static int sony_nc_lid_resume_setup(struct platform_device *pd) return 0; liderror: - for (; i > 0; i--) + for (i--; i >= 0; i--) device_remove_file(&pd->dev, &lid_ctl->attrs[i]); kfree(lid_ctl); @@ -2355,6 +2373,97 @@ static void sony_nc_lid_resume_cleanup(struct platform_device *pd) } } +/* GFX Switch position */ +enum gfx_switch { + SPEED, + STAMINA, + AUTO +}; +struct snc_gfx_switch_control { + struct device_attribute attr; + unsigned int handle; +}; +static struct snc_gfx_switch_control *gfxs_ctl; + +/* returns 0 for speed, 1 for stamina */ +static int __sony_nc_gfx_switch_status_get(void) +{ + unsigned int result; + + if (sony_call_snc_handle(gfxs_ctl->handle, 0x0100, &result)) + return -EIO; + + switch (gfxs_ctl->handle) { + case 0x0146: + /* 1: discrete GFX (speed) + * 0: integrated GFX (stamina) + */ + return result & 0x1 ? SPEED : STAMINA; + break; + case 0x0128: + /* it's a more elaborated bitmask, for now: + * 2: integrated GFX (stamina) + * 0: discrete GFX (speed) + */ + dprintk("GFX Status: 0x%x\n", result); + return result & 0x80 ? AUTO : + result & 0x02 ? STAMINA : SPEED; + break; + } + return -EINVAL; +} + +static ssize_t sony_nc_gfx_switch_status_show(struct device *dev, + struct device_attribute *attr, + char *buffer) +{ + int pos = __sony_nc_gfx_switch_status_get(); + + if (pos < 0) + return pos; + + return snprintf(buffer, PAGE_SIZE, "%s\n", pos ? "speed" : "stamina"); +} + +static int sony_nc_gfx_switch_setup(struct platform_device *pd, + unsigned int handle) +{ + unsigned int result; + + gfxs_ctl = kzalloc(sizeof(struct snc_gfx_switch_control), GFP_KERNEL); + if (!gfxs_ctl) + return -ENOMEM; + + gfxs_ctl->handle = handle; + + sysfs_attr_init(&gfxs_ctl->attr.attr); + gfxs_ctl->attr.attr.name = "gfx_switch_status"; + gfxs_ctl->attr.attr.mode = S_IRUGO; + gfxs_ctl->attr.show = sony_nc_gfx_switch_status_show; + + result = device_create_file(&pd->dev, &gfxs_ctl->attr); + if (result) + goto gfxerror; + + return 0; + +gfxerror: + kfree(gfxs_ctl); + gfxs_ctl = NULL; + + return result; +} + +static void sony_nc_gfx_switch_cleanup(struct platform_device *pd) +{ + if (gfxs_ctl) { + device_remove_file(&pd->dev, &gfxs_ctl->attr); + + kfree(gfxs_ctl); + gfxs_ctl = NULL; + } +} + /* High speed charging function */ static struct device_attribute *hsc_handle; @@ -2533,6 +2642,8 @@ static void sony_nc_backlight_ng_read_limits(int handle, lvl_table_len = 9; break; case 0x143: + case 0x14b: + case 0x14c: lvl_table_len = 16; break; } @@ -2584,6 +2695,18 @@ static void sony_nc_backlight_setup(void) sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props); max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + } else if (sony_find_snc_handle(0x14b) >= 0) { + ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x3000; + sony_nc_backlight_ng_read_limits(0x14b, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + + } else if (sony_find_snc_handle(0x14c) >= 0) { + ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x3000; + sony_nc_backlight_ng_read_limits(0x14c, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + } else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT", &unused))) { ops = &sony_backlight_ops; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index f4f8408f3b5b..9a907567f41e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -209,9 +209,8 @@ enum tpacpi_hkey_event_t { TP_HKEY_EV_ALARM_SENSOR_XHOT = 0x6022, /* sensor critically hot */ TP_HKEY_EV_THM_TABLE_CHANGED = 0x6030, /* thermal table changed */ - TP_HKEY_EV_UNK_6040 = 0x6040, /* Related to AC change? - some sort of APM hint, - W520 */ + /* AC-related events */ + TP_HKEY_EV_AC_CHANGED = 0x6040, /* AC status changed */ /* Misc */ TP_HKEY_EV_RFKILL_CHANGED = 0x7000, /* rfkill switch changed */ @@ -3629,6 +3628,12 @@ static bool hotkey_notify_6xxx(const u32 hkey, "a sensor reports something is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; + case TP_HKEY_EV_AC_CHANGED: + /* X120e, X121e, X220, X220i, X220t, X230, T420, T420s, W520: + * AC status changed; can be triggered by plugging or + * unplugging AC adapter, docking or undocking. */ + + /* fallthrough */ case TP_HKEY_EV_KEY_NUMLOCK: case TP_HKEY_EV_KEY_FN: @@ -8574,7 +8579,8 @@ static bool __pure __init tpacpi_is_valid_fw_id(const char* const s, return s && strlen(s) >= 8 && tpacpi_is_fw_digit(s[0]) && tpacpi_is_fw_digit(s[1]) && - s[2] == t && s[3] == 'T' && + s[2] == t && + (s[3] == 'T' || s[3] == 'N') && tpacpi_is_fw_digit(s[4]) && tpacpi_is_fw_digit(s[5]); } @@ -8607,7 +8613,8 @@ static int __must_check __init get_thinkpad_model_data( return -ENOMEM; /* Really ancient ThinkPad 240X will fail this, which is fine */ - if (!tpacpi_is_valid_fw_id(tp->bios_version_str, 'E')) + if (!(tpacpi_is_valid_fw_id(tp->bios_version_str, 'E') || + tpacpi_is_valid_fw_id(tp->bios_version_str, 'C'))) return 0; tp->bios_model = tp->bios_version_str[0] diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index b2a8ed99b2bf..98f0d3c30738 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -27,6 +27,8 @@ #include <linux/slab.h> #include <linux/of_device.h> #include <linux/of.h> +#include <linux/stmp_device.h> +#include <linux/stmp3xxx_rtc_wdt.h> #include <mach/common.h> @@ -36,6 +38,7 @@ #define STMP3XXX_RTC_CTRL_ALARM_IRQ_EN 0x00000001 #define STMP3XXX_RTC_CTRL_ONEMSEC_IRQ_EN 0x00000002 #define STMP3XXX_RTC_CTRL_ALARM_IRQ 0x00000004 +#define STMP3XXX_RTC_CTRL_WATCHDOGEN 0x00000010 #define STMP3XXX_RTC_STAT 0x10 #define STMP3XXX_RTC_STAT_STALE_SHIFT 16 @@ -45,6 +48,8 @@ #define STMP3XXX_RTC_ALARM 0x40 +#define STMP3XXX_RTC_WATCHDOG 0x50 + #define STMP3XXX_RTC_PERSISTENT0 0x60 #define STMP3XXX_RTC_PERSISTENT0_SET 0x64 #define STMP3XXX_RTC_PERSISTENT0_CLR 0x68 @@ -52,12 +57,70 @@ #define STMP3XXX_RTC_PERSISTENT0_ALARM_EN 0x00000004 #define STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE 0x00000080 +#define STMP3XXX_RTC_PERSISTENT1 0x70 +/* missing bitmask in headers */ +#define STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER 0x80000000 + struct stmp3xxx_rtc_data { struct rtc_device *rtc; void __iomem *io; int irq_alarm; }; +#if IS_ENABLED(CONFIG_STMP3XXX_RTC_WATCHDOG) +/** + * stmp3xxx_wdt_set_timeout - configure the watchdog inside the STMP3xxx RTC + * @dev: the parent device of the watchdog (= the RTC) + * @timeout: the desired value for the timeout register of the watchdog. + * 0 disables the watchdog + * + * The watchdog needs one register and two bits which are in the RTC domain. + * To handle the resource conflict, the RTC driver will create another + * platform_device for the watchdog driver as a child of the RTC device. + * The watchdog driver is passed the below accessor function via platform_data + * to configure the watchdog. Locking is not needed because accessing SET/CLR + * registers is atomic. + */ + +static void stmp3xxx_wdt_set_timeout(struct device *dev, u32 timeout) +{ + struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (timeout) { + writel(timeout, rtc_data->io + STMP3XXX_RTC_WATCHDOG); + writel(STMP3XXX_RTC_CTRL_WATCHDOGEN, + rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_SET); + writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER, + rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_SET); + } else { + writel(STMP3XXX_RTC_CTRL_WATCHDOGEN, + rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_CLR); + writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER, + rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_CLR); + } +} + +static struct stmp3xxx_wdt_pdata wdt_pdata = { + .wdt_set_timeout = stmp3xxx_wdt_set_timeout, +}; + +static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) +{ + struct platform_device *wdt_pdev = + platform_device_alloc("stmp3xxx_rtc_wdt", rtc_pdev->id); + + if (wdt_pdev) { + wdt_pdev->dev.parent = &rtc_pdev->dev; + wdt_pdev->dev.platform_data = &wdt_pdata; + platform_device_add(wdt_pdev); + } +} +#else +static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) +{ +} +#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ + static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) { /* @@ -233,6 +296,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) goto out_irq_alarm; } + stmp3xxx_wdt_register(pdev); return 0; out_irq_alarm: diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 3b021ec63255..e2e349204e7d 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -407,7 +407,7 @@ static int aac_src_deliver_message(struct fib *fib) fib->hw_fib_va->header.StructType = FIB_MAGIC2; fib->hw_fib_va->header.SenderFibAddress = (u32)address; fib->hw_fib_va->header.u.TimeStamp = 0; - BUG_ON((u32)(address >> 32) != 0L); + BUG_ON(upper_32_bits(address) != 0L); address |= fibsize; } else { /* Calculate the amount to the fibsize bits */ @@ -431,7 +431,7 @@ static int aac_src_deliver_message(struct fib *fib) address |= fibsize; } - src_writel(dev, MUnit.IQ_H, (address >> 32) & 0xffffffff); + src_writel(dev, MUnit.IQ_H, upper_32_bits(address) & 0xffffffff); src_writel(dev, MUnit.IQ_L, address & 0xffffffff); return 0; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 6401db494ef5..2daf4b0da434 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -62,6 +62,10 @@ static int bnx2fc_destroy(struct net_device *net_device); static int bnx2fc_enable(struct net_device *netdev); static int bnx2fc_disable(struct net_device *netdev); +/* fcoe_syfs control interface handlers */ +static int bnx2fc_ctlr_alloc(struct net_device *netdev); +static int bnx2fc_ctlr_enabled(struct fcoe_ctlr_device *cdev); + static void bnx2fc_recv_frame(struct sk_buff *skb); static void bnx2fc_start_disc(struct bnx2fc_interface *interface); @@ -89,7 +93,6 @@ static void bnx2fc_port_shutdown(struct fc_lport *lport); static void bnx2fc_stop(struct bnx2fc_interface *interface); static int __init bnx2fc_mod_init(void); static void __exit bnx2fc_mod_exit(void); -static void bnx2fc_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev); unsigned int bnx2fc_debug_level; module_param_named(debug_logging, bnx2fc_debug_level, int, S_IRUGO|S_IWUSR); @@ -107,44 +110,6 @@ static inline struct net_device *bnx2fc_netdev(const struct fc_lport *lport) ((struct fcoe_port *)lport_priv(lport))->priv)->netdev; } -/** - * bnx2fc_get_lesb() - Fill the FCoE Link Error Status Block - * @lport: the local port - * @fc_lesb: the link error status block - */ -static void bnx2fc_get_lesb(struct fc_lport *lport, - struct fc_els_lesb *fc_lesb) -{ - struct net_device *netdev = bnx2fc_netdev(lport); - - __fcoe_get_lesb(lport, fc_lesb, netdev); -} - -static void bnx2fc_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev) -{ - struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev); - struct net_device *netdev = bnx2fc_netdev(fip->lp); - struct fcoe_fc_els_lesb *fcoe_lesb; - struct fc_els_lesb fc_lesb; - - __fcoe_get_lesb(fip->lp, &fc_lesb, netdev); - fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb); - - ctlr_dev->lesb.lesb_link_fail = - ntohl(fcoe_lesb->lesb_link_fail); - ctlr_dev->lesb.lesb_vlink_fail = - ntohl(fcoe_lesb->lesb_vlink_fail); - ctlr_dev->lesb.lesb_miss_fka = - ntohl(fcoe_lesb->lesb_miss_fka); - ctlr_dev->lesb.lesb_symb_err = - ntohl(fcoe_lesb->lesb_symb_err); - ctlr_dev->lesb.lesb_err_block = - ntohl(fcoe_lesb->lesb_err_block); - ctlr_dev->lesb.lesb_fcs_error = - ntohl(fcoe_lesb->lesb_fcs_error); -} -EXPORT_SYMBOL(bnx2fc_ctlr_get_lesb); - static void bnx2fc_fcf_get_vlan_id(struct fcoe_fcf_device *fcf_dev) { struct fcoe_ctlr_device *ctlr_dev = @@ -741,35 +706,6 @@ static int bnx2fc_shost_config(struct fc_lport *lport, struct device *dev) return 0; } -static void bnx2fc_link_speed_update(struct fc_lport *lport) -{ - struct fcoe_port *port = lport_priv(lport); - struct bnx2fc_interface *interface = port->priv; - struct net_device *netdev = interface->netdev; - struct ethtool_cmd ecmd; - - if (!__ethtool_get_settings(netdev, &ecmd)) { - lport->link_supported_speeds &= - ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); - if (ecmd.supported & (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full)) - lport->link_supported_speeds |= FC_PORTSPEED_1GBIT; - if (ecmd.supported & SUPPORTED_10000baseT_Full) - lport->link_supported_speeds |= FC_PORTSPEED_10GBIT; - - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_1000: - lport->link_speed = FC_PORTSPEED_1GBIT; - break; - case SPEED_2500: - lport->link_speed = FC_PORTSPEED_2GBIT; - break; - case SPEED_10000: - lport->link_speed = FC_PORTSPEED_10GBIT; - break; - } - } -} static int bnx2fc_link_ok(struct fc_lport *lport) { struct fcoe_port *port = lport_priv(lport); @@ -827,7 +763,7 @@ static int bnx2fc_net_config(struct fc_lport *lport, struct net_device *netdev) port->fcoe_pending_queue_active = 0; setup_timer(&port->timer, fcoe_queue_timer, (unsigned long) lport); - bnx2fc_link_speed_update(lport); + fcoe_link_speed_update(lport); if (!lport->vport) { if (fcoe_get_wwn(netdev, &wwnn, NETDEV_FCOE_WWNN)) @@ -871,6 +807,7 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, u16 vlan_id) { struct bnx2fc_hba *hba = (struct bnx2fc_hba *)context; + struct fcoe_ctlr_device *cdev; struct fc_lport *lport; struct fc_lport *vport; struct bnx2fc_interface *interface, *tmp; @@ -930,30 +867,47 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, BNX2FC_HBA_DBG(lport, "netevent handler - event=%s %ld\n", interface->netdev->name, event); - bnx2fc_link_speed_update(lport); + fcoe_link_speed_update(lport); + + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); if (link_possible && !bnx2fc_link_ok(lport)) { - /* Reset max recv frame size to default */ - fc_set_mfs(lport, BNX2FC_MFS); - /* - * ctlr link up will only be handled during - * enable to avoid sending discovery solicitation - * on a stale vlan - */ - if (interface->enabled) - fcoe_ctlr_link_up(ctlr); + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link up while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + /* Reset max recv frame size to default */ + fc_set_mfs(lport, BNX2FC_MFS); + /* + * ctlr link up will only be handled during + * enable to avoid sending discovery + * solicitation on a stale vlan + */ + if (interface->enabled) + fcoe_ctlr_link_up(ctlr); + }; } else if (fcoe_ctlr_link_down(ctlr)) { - mutex_lock(&lport->lp_mutex); - list_for_each_entry(vport, &lport->vports, list) - fc_host_port_type(vport->host) = - FC_PORTTYPE_UNKNOWN; - mutex_unlock(&lport->lp_mutex); - fc_host_port_type(lport->host) = FC_PORTTYPE_UNKNOWN; - per_cpu_ptr(lport->stats, - get_cpu())->LinkFailureCount++; - put_cpu(); - fcoe_clean_pending_queue(lport); - wait_for_upload = 1; + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link down while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + mutex_lock(&lport->lp_mutex); + list_for_each_entry(vport, &lport->vports, list) + fc_host_port_type(vport->host) = + FC_PORTTYPE_UNKNOWN; + mutex_unlock(&lport->lp_mutex); + fc_host_port_type(lport->host) = + FC_PORTTYPE_UNKNOWN; + per_cpu_ptr(lport->stats, + get_cpu())->LinkFailureCount++; + put_cpu(); + fcoe_clean_pending_queue(lport); + wait_for_upload = 1; + }; } } mutex_unlock(&bnx2fc_dev_lock); @@ -1484,6 +1438,7 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port = lport_priv(lport); port->lport = lport; port->priv = interface; + port->get_netdev = bnx2fc_netdev; INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ @@ -2003,7 +1958,9 @@ static void bnx2fc_ulp_init(struct cnic_dev *dev) set_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic); } - +/** + * Deperecated: Use bnx2fc_enabled() + */ static int bnx2fc_disable(struct net_device *netdev) { struct bnx2fc_interface *interface; @@ -2029,7 +1986,9 @@ static int bnx2fc_disable(struct net_device *netdev) return rc; } - +/** + * Deprecated: Use bnx2fc_enabled() + */ static int bnx2fc_enable(struct net_device *netdev) { struct bnx2fc_interface *interface; @@ -2055,17 +2014,57 @@ static int bnx2fc_enable(struct net_device *netdev) } /** - * bnx2fc_create - Create bnx2fc FCoE interface + * bnx2fc_ctlr_enabled() - Enable or disable an FCoE Controller + * @cdev: The FCoE Controller that is being enabled or disabled + * + * fcoe_sysfs will ensure that the state of 'enabled' has + * changed, so no checking is necessary here. This routine simply + * calls fcoe_enable or fcoe_disable, both of which are deprecated. + * When those routines are removed the functionality can be merged + * here. + */ +static int bnx2fc_ctlr_enabled(struct fcoe_ctlr_device *cdev) +{ + struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev); + struct fc_lport *lport = ctlr->lp; + struct net_device *netdev = bnx2fc_netdev(lport); + + switch (cdev->enabled) { + case FCOE_CTLR_ENABLED: + return bnx2fc_enable(netdev); + case FCOE_CTLR_DISABLED: + return bnx2fc_disable(netdev); + case FCOE_CTLR_UNUSED: + default: + return -ENOTSUPP; + }; +} + +enum bnx2fc_create_link_state { + BNX2FC_CREATE_LINK_DOWN, + BNX2FC_CREATE_LINK_UP, +}; + +/** + * _bnx2fc_create() - Create bnx2fc FCoE interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * @link_state: The ctlr link state on creation * - * @buffer: The name of Ethernet interface to create on - * @kp: The associated kernel param + * Called from either the libfcoe 'create' module parameter + * via fcoe_create or from fcoe_syfs's ctlr_create file. * - * Called from sysfs. + * libfcoe's 'create' module parameter is deprecated so some + * consolidation of code can be done when that interface is + * removed. * * Returns: 0 for success */ -static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) +static int _bnx2fc_create(struct net_device *netdev, + enum fip_state fip_mode, + enum bnx2fc_create_link_state link_state) { + struct fcoe_ctlr_device *cdev; struct fcoe_ctlr *ctlr; struct bnx2fc_interface *interface; struct bnx2fc_hba *hba; @@ -2160,7 +2159,15 @@ static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) /* Make this master N_port */ ctlr->lp = lport; - if (!bnx2fc_link_ok(lport)) { + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + + if (link_state == BNX2FC_CREATE_LINK_UP) + cdev->enabled = FCOE_CTLR_ENABLED; + else + cdev->enabled = FCOE_CTLR_DISABLED; + + if (link_state == BNX2FC_CREATE_LINK_UP && + !bnx2fc_link_ok(lport)) { fcoe_ctlr_link_up(ctlr); fc_host_port_type(lport->host) = FC_PORTTYPE_NPORT; set_bit(ADAPTER_STATE_READY, &interface->hba->adapter_state); @@ -2168,7 +2175,10 @@ static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) BNX2FC_HBA_DBG(lport, "create: START DISC\n"); bnx2fc_start_disc(interface); - interface->enabled = true; + + if (link_state == BNX2FC_CREATE_LINK_UP) + interface->enabled = true; + /* * Release from kref_init in bnx2fc_interface_setup, on success * lport should be holding a reference taken in bnx2fc_if_create @@ -2194,6 +2204,37 @@ mod_err: } /** + * bnx2fc_create() - Create a bnx2fc interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * + * Called from fcoe transport + * + * Returns: 0 for success + */ +static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) +{ + return _bnx2fc_create(netdev, fip_mode, BNX2FC_CREATE_LINK_UP); +} + +/** + * bnx2fc_ctlr_alloc() - Allocate a bnx2fc interface from fcoe_sysfs + * @netdev: The net_device to be used by the allocated FCoE Controller + * + * This routine is called from fcoe_sysfs. It will start the fcoe_ctlr + * in a link_down state. The allows the user an opportunity to configure + * the FCoE Controller from sysfs before enabling the FCoE Controller. + * + * Creating in with this routine starts the FCoE Controller in Fabric + * mode. The user can change to VN2VN or another mode before enabling. + */ +static int bnx2fc_ctlr_alloc(struct net_device *netdev) +{ + return _bnx2fc_create(netdev, FIP_MODE_FABRIC, + BNX2FC_CREATE_LINK_DOWN); +} + +/** * bnx2fc_find_hba_for_cnic - maps cnic instance to bnx2fc hba instance * * @cnic: Pointer to cnic device instance @@ -2318,6 +2359,7 @@ static struct fcoe_transport bnx2fc_transport = { .name = {"bnx2fc"}, .attached = false, .list = LIST_HEAD_INIT(bnx2fc_transport.list), + .alloc = bnx2fc_ctlr_alloc, .match = bnx2fc_match, .create = bnx2fc_create, .destroy = bnx2fc_destroy, @@ -2562,13 +2604,13 @@ module_init(bnx2fc_mod_init); module_exit(bnx2fc_mod_exit); static struct fcoe_sysfs_function_template bnx2fc_fcoe_sysfs_templ = { - .get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode, - .get_fcoe_ctlr_link_fail = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_vlink_fail = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_miss_fka = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_symb_err = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_err_block = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_fcs_error = bnx2fc_ctlr_get_lesb, + .set_fcoe_ctlr_enabled = bnx2fc_ctlr_enabled, + .get_fcoe_ctlr_link_fail = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_vlink_fail = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_miss_fka = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_symb_err = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_err_block = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_fcs_error = fcoe_ctlr_get_lesb, .get_fcoe_fcf_selected = fcoe_fcf_get_selected, .get_fcoe_fcf_vlan_id = bnx2fc_fcf_get_vlan_id, @@ -2675,7 +2717,7 @@ static struct libfc_function_template bnx2fc_libfc_fcn_templ = { .elsct_send = bnx2fc_elsct_send, .fcp_abort_io = bnx2fc_abort_io, .fcp_cleanup = bnx2fc_cleanup, - .get_lesb = bnx2fc_get_lesb, + .get_lesb = fcoe_get_lesb, .rport_event_callback = bnx2fc_rport_event_handler, }; diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 865c64fa923c..fed486bfd3f4 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -3747,13 +3747,13 @@ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb, dcb->max_command = 1; dcb->target_id = target; dcb->target_lun = lun; + dcb->dev_mode = eeprom->target[target].cfg0; #ifndef DC395x_NO_DISCONNECT dcb->identify_msg = IDENTIFY(dcb->dev_mode & NTC_DO_DISCONNECT, lun); #else dcb->identify_msg = IDENTIFY(0, lun); #endif - dcb->dev_mode = eeprom->target[target].cfg0; dcb->inquiry7 = 0; dcb->sync_mode = 0; dcb->min_nego_period = clock_period[period_index]; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 666b7ac4475f..b5d92fc93c70 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -82,11 +82,11 @@ static int fcoe_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int fcoe_percpu_receive_thread(void *); static void fcoe_percpu_clean(struct fc_lport *); -static int fcoe_link_speed_update(struct fc_lport *); static int fcoe_link_ok(struct fc_lport *); static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *); static int fcoe_hostlist_add(const struct fc_lport *); +static void fcoe_hostlist_del(const struct fc_lport *); static int fcoe_device_notification(struct notifier_block *, ulong, void *); static void fcoe_dev_setup(void); @@ -117,6 +117,11 @@ static int fcoe_destroy(struct net_device *netdev); static int fcoe_enable(struct net_device *netdev); static int fcoe_disable(struct net_device *netdev); +/* fcoe_syfs control interface handlers */ +static int fcoe_ctlr_alloc(struct net_device *netdev); +static int fcoe_ctlr_enabled(struct fcoe_ctlr_device *cdev); + + static struct fc_seq *fcoe_elsct_send(struct fc_lport *, u32 did, struct fc_frame *, unsigned int op, @@ -126,8 +131,6 @@ static struct fc_seq *fcoe_elsct_send(struct fc_lport *, void *, u32 timeout); static void fcoe_recv_frame(struct sk_buff *skb); -static void fcoe_get_lesb(struct fc_lport *, struct fc_els_lesb *); - /* notification function for packets from net device */ static struct notifier_block fcoe_notifier = { .notifier_call = fcoe_device_notification, @@ -151,11 +154,11 @@ static int fcoe_vport_create(struct fc_vport *, bool disabled); static int fcoe_vport_disable(struct fc_vport *, bool disable); static void fcoe_set_vport_symbolic_name(struct fc_vport *); static void fcoe_set_port_id(struct fc_lport *, u32, struct fc_frame *); -static void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *); static void fcoe_fcf_get_vlan_id(struct fcoe_fcf_device *); static struct fcoe_sysfs_function_template fcoe_sysfs_templ = { - .get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode, + .set_fcoe_ctlr_mode = fcoe_ctlr_set_fip_mode, + .set_fcoe_ctlr_enabled = fcoe_ctlr_enabled, .get_fcoe_ctlr_link_fail = fcoe_ctlr_get_lesb, .get_fcoe_ctlr_vlink_fail = fcoe_ctlr_get_lesb, .get_fcoe_ctlr_miss_fka = fcoe_ctlr_get_lesb, @@ -1112,10 +1115,17 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe, port = lport_priv(lport); port->lport = lport; port->priv = fcoe; + port->get_netdev = fcoe_netdev; port->max_queue_depth = FCOE_MAX_QUEUE_DEPTH; port->min_queue_depth = FCOE_MIN_QUEUE_DEPTH; INIT_WORK(&port->destroy_work, fcoe_destroy_work); + /* + * Need to add the lport to the hostlist + * so we catch NETDEV_CHANGE events. + */ + fcoe_hostlist_add(lport); + /* configure a fc_lport including the exchange manager */ rc = fcoe_lport_config(lport); if (rc) { @@ -1187,6 +1197,7 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe, out_lp_destroy: fc_exch_mgr_free(lport); out_host_put: + fcoe_hostlist_del(lport); scsi_host_put(lport->host); out: return ERR_PTR(rc); @@ -1964,6 +1975,7 @@ static int fcoe_dcb_app_notification(struct notifier_block *notifier, static int fcoe_device_notification(struct notifier_block *notifier, ulong event, void *ptr) { + struct fcoe_ctlr_device *cdev; struct fc_lport *lport = NULL; struct net_device *netdev = ptr; struct fcoe_ctlr *ctlr; @@ -2020,13 +2032,29 @@ static int fcoe_device_notification(struct notifier_block *notifier, fcoe_link_speed_update(lport); - if (link_possible && !fcoe_link_ok(lport)) - fcoe_ctlr_link_up(ctlr); - else if (fcoe_ctlr_link_down(ctlr)) { - stats = per_cpu_ptr(lport->stats, get_cpu()); - stats->LinkFailureCount++; - put_cpu(); - fcoe_clean_pending_queue(lport); + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + + if (link_possible && !fcoe_link_ok(lport)) { + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link up while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + fcoe_ctlr_link_up(ctlr); + }; + } else if (fcoe_ctlr_link_down(ctlr)) { + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link down while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + stats = per_cpu_ptr(lport->stats, get_cpu()); + stats->LinkFailureCount++; + put_cpu(); + fcoe_clean_pending_queue(lport); + }; } out: return rc; @@ -2039,6 +2067,8 @@ out: * Called from fcoe transport. * * Returns: 0 for success + * + * Deprecated: use fcoe_ctlr_enabled() */ static int fcoe_disable(struct net_device *netdev) { @@ -2098,6 +2128,33 @@ out: } /** + * fcoe_ctlr_enabled() - Enable or disable an FCoE Controller + * @cdev: The FCoE Controller that is being enabled or disabled + * + * fcoe_sysfs will ensure that the state of 'enabled' has + * changed, so no checking is necessary here. This routine simply + * calls fcoe_enable or fcoe_disable, both of which are deprecated. + * When those routines are removed the functionality can be merged + * here. + */ +static int fcoe_ctlr_enabled(struct fcoe_ctlr_device *cdev) +{ + struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev); + struct fc_lport *lport = ctlr->lp; + struct net_device *netdev = fcoe_netdev(lport); + + switch (cdev->enabled) { + case FCOE_CTLR_ENABLED: + return fcoe_enable(netdev); + case FCOE_CTLR_DISABLED: + return fcoe_disable(netdev); + case FCOE_CTLR_UNUSED: + default: + return -ENOTSUPP; + }; +} + +/** * fcoe_destroy() - Destroy a FCoE interface * @netdev : The net_device object the Ethernet interface to create on * @@ -2139,8 +2196,31 @@ static void fcoe_destroy_work(struct work_struct *work) { struct fcoe_port *port; struct fcoe_interface *fcoe; + struct Scsi_Host *shost; + struct fc_host_attrs *fc_host; + unsigned long flags; + struct fc_vport *vport; + struct fc_vport *next_vport; port = container_of(work, struct fcoe_port, destroy_work); + shost = port->lport->host; + fc_host = shost_to_fc_host(shost); + + /* Loop through all the vports and mark them for deletion */ + spin_lock_irqsave(shost->host_lock, flags); + list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers) { + if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) { + continue; + } else { + vport->flags |= FC_VPORT_DELETING; + queue_work(fc_host_work_q(shost), + &vport->vport_delete_work); + } + } + spin_unlock_irqrestore(shost->host_lock, flags); + + flush_workqueue(fc_host_work_q(shost)); + mutex_lock(&fcoe_config_mutex); fcoe = port->priv; @@ -2204,16 +2284,26 @@ static void fcoe_dcb_create(struct fcoe_interface *fcoe) #endif } +enum fcoe_create_link_state { + FCOE_CREATE_LINK_DOWN, + FCOE_CREATE_LINK_UP, +}; + /** - * fcoe_create() - Create a fcoe interface - * @netdev : The net_device object the Ethernet interface to create on - * @fip_mode: The FIP mode for this creation + * _fcoe_create() - (internal) Create a fcoe interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * @link_state: The ctlr link state on creation * - * Called from fcoe transport + * Called from either the libfcoe 'create' module parameter + * via fcoe_create or from fcoe_syfs's ctlr_create file. * - * Returns: 0 for success + * libfcoe's 'create' module parameter is deprecated so some + * consolidation of code can be done when that interface is + * removed. */ -static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) +static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, + enum fcoe_create_link_state link_state) { int rc = 0; struct fcoe_ctlr_device *ctlr_dev; @@ -2254,13 +2344,29 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) /* setup DCB priority attributes. */ fcoe_dcb_create(fcoe); - /* add to lports list */ - fcoe_hostlist_add(lport); - /* start FIP Discovery and FLOGI */ lport->boot_time = jiffies; fc_fabric_login(lport); - if (!fcoe_link_ok(lport)) { + + /* + * If the fcoe_ctlr_device is to be set to DISABLED + * it must be done after the lport is added to the + * hostlist, but before the rtnl_lock is released. + * This is because the rtnl_lock protects the + * hostlist that fcoe_device_notification uses. If + * the FCoE Controller is intended to be created + * DISABLED then 'enabled' needs to be considered + * handling link events. 'enabled' must be set + * before the lport can be found in the hostlist + * when a link up event is received. + */ + if (link_state == FCOE_CREATE_LINK_UP) + ctlr_dev->enabled = FCOE_CTLR_ENABLED; + else + ctlr_dev->enabled = FCOE_CTLR_DISABLED; + + if (link_state == FCOE_CREATE_LINK_UP && + !fcoe_link_ok(lport)) { rtnl_unlock(); fcoe_ctlr_link_up(ctlr); mutex_unlock(&fcoe_config_mutex); @@ -2275,37 +2381,34 @@ out_nortnl: } /** - * fcoe_link_speed_update() - Update the supported and actual link speeds - * @lport: The local port to update speeds for + * fcoe_create() - Create a fcoe interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * + * Called from fcoe transport + * + * Returns: 0 for success + */ +static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) +{ + return _fcoe_create(netdev, fip_mode, FCOE_CREATE_LINK_UP); +} + +/** + * fcoe_ctlr_alloc() - Allocate a fcoe interface from fcoe_sysfs + * @netdev: The net_device to be used by the allocated FCoE Controller * - * Returns: 0 if the ethtool query was successful - * -1 if the ethtool query failed + * This routine is called from fcoe_sysfs. It will start the fcoe_ctlr + * in a link_down state. The allows the user an opportunity to configure + * the FCoE Controller from sysfs before enabling the FCoE Controller. + * + * Creating in with this routine starts the FCoE Controller in Fabric + * mode. The user can change to VN2VN or another mode before enabling. */ -static int fcoe_link_speed_update(struct fc_lport *lport) +static int fcoe_ctlr_alloc(struct net_device *netdev) { - struct net_device *netdev = fcoe_netdev(lport); - struct ethtool_cmd ecmd; - - if (!__ethtool_get_settings(netdev, &ecmd)) { - lport->link_supported_speeds &= - ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); - if (ecmd.supported & (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full)) - lport->link_supported_speeds |= FC_PORTSPEED_1GBIT; - if (ecmd.supported & SUPPORTED_10000baseT_Full) - lport->link_supported_speeds |= - FC_PORTSPEED_10GBIT; - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_1000: - lport->link_speed = FC_PORTSPEED_1GBIT; - break; - case SPEED_10000: - lport->link_speed = FC_PORTSPEED_10GBIT; - break; - } - return 0; - } - return -1; + return _fcoe_create(netdev, FIP_MODE_FABRIC, + FCOE_CREATE_LINK_DOWN); } /** @@ -2375,10 +2478,13 @@ static int fcoe_reset(struct Scsi_Host *shost) struct fcoe_port *port = lport_priv(lport); struct fcoe_interface *fcoe = port->priv; struct fcoe_ctlr *ctlr = fcoe_to_ctlr(fcoe); + struct fcoe_ctlr_device *cdev = fcoe_ctlr_to_ctlr_dev(ctlr); fcoe_ctlr_link_down(ctlr); fcoe_clean_pending_queue(ctlr->lp); - if (!fcoe_link_ok(ctlr->lp)) + + if (cdev->enabled != FCOE_CTLR_DISABLED && + !fcoe_link_ok(ctlr->lp)) fcoe_ctlr_link_up(ctlr); return 0; } @@ -2445,12 +2551,31 @@ static int fcoe_hostlist_add(const struct fc_lport *lport) return 0; } +/** + * fcoe_hostlist_del() - Remove the FCoE interface identified by a local + * port to the hostlist + * @lport: The local port that identifies the FCoE interface to be added + * + * Locking: must be called with the RTNL mutex held + * + */ +static void fcoe_hostlist_del(const struct fc_lport *lport) +{ + struct fcoe_interface *fcoe; + struct fcoe_port *port; + + port = lport_priv(lport); + fcoe = port->priv; + list_del(&fcoe->list); + return; +} static struct fcoe_transport fcoe_sw_transport = { .name = {FCOE_TRANSPORT_DEFAULT}, .attached = false, .list = LIST_HEAD_INIT(fcoe_sw_transport.list), .match = fcoe_match, + .alloc = fcoe_ctlr_alloc, .create = fcoe_create, .destroy = fcoe_destroy, .enable = fcoe_enable, @@ -2534,9 +2659,9 @@ static void __exit fcoe_exit(void) /* releases the associated fcoe hosts */ rtnl_lock(); list_for_each_entry_safe(fcoe, tmp, &fcoe_hostlist, list) { - list_del(&fcoe->list); ctlr = fcoe_to_ctlr(fcoe); port = lport_priv(ctlr->lp); + fcoe_hostlist_del(port->lport); queue_work(fcoe_wq, &port->destroy_work); } rtnl_unlock(); @@ -2776,43 +2901,6 @@ static void fcoe_set_vport_symbolic_name(struct fc_vport *vport) NULL, NULL, 3 * lport->r_a_tov); } -/** - * fcoe_get_lesb() - Fill the FCoE Link Error Status Block - * @lport: the local port - * @fc_lesb: the link error status block - */ -static void fcoe_get_lesb(struct fc_lport *lport, - struct fc_els_lesb *fc_lesb) -{ - struct net_device *netdev = fcoe_netdev(lport); - - __fcoe_get_lesb(lport, fc_lesb, netdev); -} - -static void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev) -{ - struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev); - struct net_device *netdev = fcoe_netdev(fip->lp); - struct fcoe_fc_els_lesb *fcoe_lesb; - struct fc_els_lesb fc_lesb; - - __fcoe_get_lesb(fip->lp, &fc_lesb, netdev); - fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb); - - ctlr_dev->lesb.lesb_link_fail = - ntohl(fcoe_lesb->lesb_link_fail); - ctlr_dev->lesb.lesb_vlink_fail = - ntohl(fcoe_lesb->lesb_vlink_fail); - ctlr_dev->lesb.lesb_miss_fka = - ntohl(fcoe_lesb->lesb_miss_fka); - ctlr_dev->lesb.lesb_symb_err = - ntohl(fcoe_lesb->lesb_symb_err); - ctlr_dev->lesb.lesb_err_block = - ntohl(fcoe_lesb->lesb_err_block); - ctlr_dev->lesb.lesb_fcs_error = - ntohl(fcoe_lesb->lesb_fcs_error); -} - static void fcoe_fcf_get_vlan_id(struct fcoe_fcf_device *fcf_dev) { struct fcoe_ctlr_device *ctlr_dev = diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h index b42dc32cb5eb..2b53672bf932 100644 --- a/drivers/scsi/fcoe/fcoe.h +++ b/drivers/scsi/fcoe/fcoe.h @@ -55,12 +55,12 @@ do { \ #define FCOE_DBG(fmt, args...) \ FCOE_CHECK_LOGGING(FCOE_LOGGING, \ - printk(KERN_INFO "fcoe: " fmt, ##args);) + pr_info("fcoe: " fmt, ##args);) #define FCOE_NETDEV_DBG(netdev, fmt, args...) \ FCOE_CHECK_LOGGING(FCOE_NETDEV_LOGGING, \ - printk(KERN_INFO "fcoe: %s: " fmt, \ - netdev->name, ##args);) + pr_info("fcoe: %s: " fmt, \ + netdev->name, ##args);) /** * struct fcoe_interface - A FCoE interface diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 4a909d7cfde1..08c3bc398da2 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -1291,8 +1291,16 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, LIBFCOE_FIP_DBG(fip, "Clear Virtual Link received\n"); - if (!fcf || !lport->port_id) + if (!fcf || !lport->port_id) { + /* + * We are yet to select best FCF, but we got CVL in the + * meantime. reset the ctlr and let it rediscover the FCF + */ + mutex_lock(&fip->ctlr_mutex); + fcoe_ctlr_reset(fip); + mutex_unlock(&fip->ctlr_mutex); return; + } /* * mask of required descriptors. Validating each one clears its bit. @@ -1551,15 +1559,6 @@ static struct fcoe_fcf *fcoe_ctlr_select(struct fcoe_ctlr *fip) fcf->fabric_name, fcf->vfid, fcf->fcf_mac, fcf->fc_map, fcoe_ctlr_mtu_valid(fcf), fcf->flogi_sent, fcf->pri); - if (fcf->fabric_name != first->fabric_name || - fcf->vfid != first->vfid || - fcf->fc_map != first->fc_map) { - LIBFCOE_FIP_DBG(fip, "Conflicting fabric, VFID, " - "or FC-MAP\n"); - return NULL; - } - if (fcf->flogi_sent) - continue; if (!fcoe_ctlr_fcf_usable(fcf)) { LIBFCOE_FIP_DBG(fip, "FCF for fab %16.16llx " "map %x %svalid %savailable\n", @@ -1569,6 +1568,15 @@ static struct fcoe_fcf *fcoe_ctlr_select(struct fcoe_ctlr *fip) "" : "un"); continue; } + if (fcf->fabric_name != first->fabric_name || + fcf->vfid != first->vfid || + fcf->fc_map != first->fc_map) { + LIBFCOE_FIP_DBG(fip, "Conflicting fabric, VFID, " + "or FC-MAP\n"); + return NULL; + } + if (fcf->flogi_sent) + continue; if (!best || fcf->pri < best->pri || best->flogi_sent) best = fcf; } @@ -2864,22 +2872,21 @@ void fcoe_fcf_get_selected(struct fcoe_fcf_device *fcf_dev) } EXPORT_SYMBOL(fcoe_fcf_get_selected); -void fcoe_ctlr_get_fip_mode(struct fcoe_ctlr_device *ctlr_dev) +void fcoe_ctlr_set_fip_mode(struct fcoe_ctlr_device *ctlr_dev) { struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(ctlr_dev); mutex_lock(&ctlr->ctlr_mutex); - switch (ctlr->mode) { - case FIP_MODE_FABRIC: - ctlr_dev->mode = FIP_CONN_TYPE_FABRIC; - break; - case FIP_MODE_VN2VN: - ctlr_dev->mode = FIP_CONN_TYPE_VN2VN; + switch (ctlr_dev->mode) { + case FIP_CONN_TYPE_VN2VN: + ctlr->mode = FIP_MODE_VN2VN; break; + case FIP_CONN_TYPE_FABRIC: default: - ctlr_dev->mode = FIP_CONN_TYPE_UNKNOWN; + ctlr->mode = FIP_MODE_FABRIC; break; } + mutex_unlock(&ctlr->ctlr_mutex); } -EXPORT_SYMBOL(fcoe_ctlr_get_fip_mode); +EXPORT_SYMBOL(fcoe_ctlr_set_fip_mode); diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c index 5e751689a089..8c05ae017f5b 100644 --- a/drivers/scsi/fcoe/fcoe_sysfs.c +++ b/drivers/scsi/fcoe/fcoe_sysfs.c @@ -21,8 +21,17 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/etherdevice.h> +#include <linux/ctype.h> #include <scsi/fcoe_sysfs.h> +#include <scsi/libfcoe.h> + +/* + * OK to include local libfcoe.h for debug_logging, but cannot include + * <scsi/libfcoe.h> otherwise non-netdev based fcoe solutions would have + * have to include more than fcoe_sysfs.h. + */ +#include "libfcoe.h" static atomic_t ctlr_num; static atomic_t fcf_num; @@ -71,6 +80,8 @@ MODULE_PARM_DESC(fcf_dev_loss_tmo, ((x)->lesb.lesb_err_block) #define fcoe_ctlr_fcs_error(x) \ ((x)->lesb.lesb_fcs_error) +#define fcoe_ctlr_enabled(x) \ + ((x)->enabled) #define fcoe_fcf_state(x) \ ((x)->state) #define fcoe_fcf_fabric_name(x) \ @@ -210,25 +221,34 @@ static ssize_t show_fcoe_fcf_device_##field(struct device *dev, \ #define fcoe_enum_name_search(title, table_type, table) \ static const char *get_fcoe_##title##_name(enum table_type table_key) \ { \ - int i; \ - char *name = NULL; \ - \ - for (i = 0; i < ARRAY_SIZE(table); i++) { \ - if (table[i].value == table_key) { \ - name = table[i].name; \ - break; \ - } \ - } \ - return name; \ + if (table_key < 0 || table_key >= ARRAY_SIZE(table)) \ + return NULL; \ + return table[table_key]; \ +} + +static char *fip_conn_type_names[] = { + [ FIP_CONN_TYPE_UNKNOWN ] = "Unknown", + [ FIP_CONN_TYPE_FABRIC ] = "Fabric", + [ FIP_CONN_TYPE_VN2VN ] = "VN2VN", +}; +fcoe_enum_name_search(ctlr_mode, fip_conn_type, fip_conn_type_names) + +static enum fip_conn_type fcoe_parse_mode(const char *buf) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(fip_conn_type_names); i++) { + if (strcasecmp(buf, fip_conn_type_names[i]) == 0) + return i; + } + + return FIP_CONN_TYPE_UNKNOWN; } -static struct { - enum fcf_state value; - char *name; -} fcf_state_names[] = { - { FCOE_FCF_STATE_UNKNOWN, "Unknown" }, - { FCOE_FCF_STATE_DISCONNECTED, "Disconnected" }, - { FCOE_FCF_STATE_CONNECTED, "Connected" }, +static char *fcf_state_names[] = { + [ FCOE_FCF_STATE_UNKNOWN ] = "Unknown", + [ FCOE_FCF_STATE_DISCONNECTED ] = "Disconnected", + [ FCOE_FCF_STATE_CONNECTED ] = "Connected", }; fcoe_enum_name_search(fcf_state, fcf_state, fcf_state_names) #define FCOE_FCF_STATE_MAX_NAMELEN 50 @@ -246,17 +266,7 @@ static ssize_t show_fcf_state(struct device *dev, } static FCOE_DEVICE_ATTR(fcf, state, S_IRUGO, show_fcf_state, NULL); -static struct { - enum fip_conn_type value; - char *name; -} fip_conn_type_names[] = { - { FIP_CONN_TYPE_UNKNOWN, "Unknown" }, - { FIP_CONN_TYPE_FABRIC, "Fabric" }, - { FIP_CONN_TYPE_VN2VN, "VN2VN" }, -}; -fcoe_enum_name_search(ctlr_mode, fip_conn_type, fip_conn_type_names) -#define FCOE_CTLR_MODE_MAX_NAMELEN 50 - +#define FCOE_MAX_MODENAME_LEN 20 static ssize_t show_ctlr_mode(struct device *dev, struct device_attribute *attr, char *buf) @@ -264,17 +274,116 @@ static ssize_t show_ctlr_mode(struct device *dev, struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); const char *name; - if (ctlr->f->get_fcoe_ctlr_mode) - ctlr->f->get_fcoe_ctlr_mode(ctlr); - name = get_fcoe_ctlr_mode_name(ctlr->mode); if (!name) return -EINVAL; - return snprintf(buf, FCOE_CTLR_MODE_MAX_NAMELEN, + return snprintf(buf, FCOE_MAX_MODENAME_LEN, + "%s\n", name); +} + +static ssize_t store_ctlr_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + char mode[FCOE_MAX_MODENAME_LEN + 1]; + + if (count > FCOE_MAX_MODENAME_LEN) + return -EINVAL; + + strncpy(mode, buf, count); + + if (mode[count - 1] == '\n') + mode[count - 1] = '\0'; + else + mode[count] = '\0'; + + switch (ctlr->enabled) { + case FCOE_CTLR_ENABLED: + LIBFCOE_SYSFS_DBG(ctlr, "Cannot change mode when enabled."); + return -EBUSY; + case FCOE_CTLR_DISABLED: + if (!ctlr->f->set_fcoe_ctlr_mode) { + LIBFCOE_SYSFS_DBG(ctlr, + "Mode change not supported by LLD."); + return -ENOTSUPP; + } + + ctlr->mode = fcoe_parse_mode(mode); + if (ctlr->mode == FIP_CONN_TYPE_UNKNOWN) { + LIBFCOE_SYSFS_DBG(ctlr, + "Unknown mode %s provided.", buf); + return -EINVAL; + } + + ctlr->f->set_fcoe_ctlr_mode(ctlr); + LIBFCOE_SYSFS_DBG(ctlr, "Mode changed to %s.", buf); + + return count; + case FCOE_CTLR_UNUSED: + default: + LIBFCOE_SYSFS_DBG(ctlr, "Mode change not supported."); + return -ENOTSUPP; + }; +} + +static FCOE_DEVICE_ATTR(ctlr, mode, S_IRUGO | S_IWUSR, + show_ctlr_mode, store_ctlr_mode); + +static ssize_t store_ctlr_enabled(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + int rc; + + switch (ctlr->enabled) { + case FCOE_CTLR_ENABLED: + if (*buf == '1') + return count; + ctlr->enabled = FCOE_CTLR_DISABLED; + break; + case FCOE_CTLR_DISABLED: + if (*buf == '0') + return count; + ctlr->enabled = FCOE_CTLR_ENABLED; + break; + case FCOE_CTLR_UNUSED: + return -ENOTSUPP; + }; + + rc = ctlr->f->set_fcoe_ctlr_enabled(ctlr); + if (rc) + return rc; + + return count; +} + +static char *ctlr_enabled_state_names[] = { + [ FCOE_CTLR_ENABLED ] = "1", + [ FCOE_CTLR_DISABLED ] = "0", +}; +fcoe_enum_name_search(ctlr_enabled_state, ctlr_enabled_state, + ctlr_enabled_state_names) +#define FCOE_CTLR_ENABLED_MAX_NAMELEN 50 + +static ssize_t show_ctlr_enabled_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + const char *name; + + name = get_fcoe_ctlr_enabled_state_name(ctlr->enabled); + if (!name) + return -EINVAL; + return snprintf(buf, FCOE_CTLR_ENABLED_MAX_NAMELEN, "%s\n", name); } -static FCOE_DEVICE_ATTR(ctlr, mode, S_IRUGO, - show_ctlr_mode, NULL); + +static FCOE_DEVICE_ATTR(ctlr, enabled, S_IRUGO | S_IWUSR, + show_ctlr_enabled_state, + store_ctlr_enabled); static ssize_t store_private_fcoe_ctlr_fcf_dev_loss_tmo(struct device *dev, @@ -359,6 +468,7 @@ static struct attribute_group fcoe_ctlr_lesb_attr_group = { static struct attribute *fcoe_ctlr_attrs[] = { &device_attr_fcoe_ctlr_fcf_dev_loss_tmo.attr, + &device_attr_fcoe_ctlr_enabled.attr, &device_attr_fcoe_ctlr_mode.attr, NULL, }; @@ -443,9 +553,16 @@ struct device_type fcoe_fcf_device_type = { .release = fcoe_fcf_device_release, }; +struct bus_attribute fcoe_bus_attr_group[] = { + __ATTR(ctlr_create, S_IWUSR, NULL, fcoe_ctlr_create_store), + __ATTR(ctlr_destroy, S_IWUSR, NULL, fcoe_ctlr_destroy_store), + __ATTR_NULL +}; + struct bus_type fcoe_bus_type = { .name = "fcoe", .match = &fcoe_bus_match, + .bus_attrs = fcoe_bus_attr_group, }; /** @@ -566,6 +683,7 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent, ctlr->id = atomic_inc_return(&ctlr_num) - 1; ctlr->f = f; + ctlr->mode = FIP_CONN_TYPE_FABRIC; INIT_LIST_HEAD(&ctlr->fcfs); mutex_init(&ctlr->lock); ctlr->dev.parent = parent; diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c index ac76d8a042d7..f3a5a53e8631 100644 --- a/drivers/scsi/fcoe/fcoe_transport.c +++ b/drivers/scsi/fcoe/fcoe_transport.c @@ -83,6 +83,50 @@ static struct notifier_block libfcoe_notifier = { .notifier_call = libfcoe_device_notification, }; +/** + * fcoe_link_speed_update() - Update the supported and actual link speeds + * @lport: The local port to update speeds for + * + * Returns: 0 if the ethtool query was successful + * -1 if the ethtool query failed + */ +int fcoe_link_speed_update(struct fc_lport *lport) +{ + struct net_device *netdev = fcoe_get_netdev(lport); + struct ethtool_cmd ecmd; + + if (!__ethtool_get_settings(netdev, &ecmd)) { + lport->link_supported_speeds &= + ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); + if (ecmd.supported & (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full)) + lport->link_supported_speeds |= FC_PORTSPEED_1GBIT; + if (ecmd.supported & SUPPORTED_10000baseT_Full) + lport->link_supported_speeds |= + FC_PORTSPEED_10GBIT; + switch (ethtool_cmd_speed(&ecmd)) { + case SPEED_1000: + lport->link_speed = FC_PORTSPEED_1GBIT; + break; + case SPEED_10000: + lport->link_speed = FC_PORTSPEED_10GBIT; + break; + } + return 0; + } + return -1; +} +EXPORT_SYMBOL_GPL(fcoe_link_speed_update); + +/** + * __fcoe_get_lesb() - Get the Link Error Status Block (LESB) for a given lport + * @lport: The local port to update speeds for + * @fc_lesb: Pointer to the LESB to be filled up + * @netdev: Pointer to the netdev that is associated with the lport + * + * Note, the Link Error Status Block (LESB) for FCoE is defined in FC-BB-6 + * Clause 7.11 in v1.04. + */ void __fcoe_get_lesb(struct fc_lport *lport, struct fc_els_lesb *fc_lesb, struct net_device *netdev) @@ -112,6 +156,51 @@ void __fcoe_get_lesb(struct fc_lport *lport, } EXPORT_SYMBOL_GPL(__fcoe_get_lesb); +/** + * fcoe_get_lesb() - Fill the FCoE Link Error Status Block + * @lport: the local port + * @fc_lesb: the link error status block + */ +void fcoe_get_lesb(struct fc_lport *lport, + struct fc_els_lesb *fc_lesb) +{ + struct net_device *netdev = fcoe_get_netdev(lport); + + __fcoe_get_lesb(lport, fc_lesb, netdev); +} +EXPORT_SYMBOL_GPL(fcoe_get_lesb); + +/** + * fcoe_ctlr_get_lesb() - Get the Link Error Status Block (LESB) for a given + * fcoe controller device + * @ctlr_dev: The given fcoe controller device + * + */ +void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev) +{ + struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev); + struct net_device *netdev = fcoe_get_netdev(fip->lp); + struct fcoe_fc_els_lesb *fcoe_lesb; + struct fc_els_lesb fc_lesb; + + __fcoe_get_lesb(fip->lp, &fc_lesb, netdev); + fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb); + + ctlr_dev->lesb.lesb_link_fail = + ntohl(fcoe_lesb->lesb_link_fail); + ctlr_dev->lesb.lesb_vlink_fail = + ntohl(fcoe_lesb->lesb_vlink_fail); + ctlr_dev->lesb.lesb_miss_fka = + ntohl(fcoe_lesb->lesb_miss_fka); + ctlr_dev->lesb.lesb_symb_err = + ntohl(fcoe_lesb->lesb_symb_err); + ctlr_dev->lesb.lesb_err_block = + ntohl(fcoe_lesb->lesb_err_block); + ctlr_dev->lesb.lesb_fcs_error = + ntohl(fcoe_lesb->lesb_fcs_error); +} +EXPORT_SYMBOL_GPL(fcoe_ctlr_get_lesb); + void fcoe_wwn_to_str(u64 wwn, char *buf, int len) { u8 wwpn[8]; @@ -627,6 +716,110 @@ static int libfcoe_device_notification(struct notifier_block *notifier, return NOTIFY_OK; } +ssize_t fcoe_ctlr_create_store(struct bus_type *bus, + const char *buf, size_t count) +{ + struct net_device *netdev = NULL; + struct fcoe_transport *ft = NULL; + struct fcoe_ctlr_device *ctlr_dev = NULL; + int rc = 0; + int err; + + mutex_lock(&ft_mutex); + + netdev = fcoe_if_to_netdev(buf); + if (!netdev) { + LIBFCOE_TRANSPORT_DBG("Invalid device %s.\n", buf); + rc = -ENODEV; + goto out_nodev; + } + + ft = fcoe_netdev_map_lookup(netdev); + if (ft) { + LIBFCOE_TRANSPORT_DBG("transport %s already has existing " + "FCoE instance on %s.\n", + ft->name, netdev->name); + rc = -EEXIST; + goto out_putdev; + } + + ft = fcoe_transport_lookup(netdev); + if (!ft) { + LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n", + netdev->name); + rc = -ENODEV; + goto out_putdev; + } + + /* pass to transport create */ + err = ft->alloc ? ft->alloc(netdev) : -ENODEV; + if (err) { + fcoe_del_netdev_mapping(netdev); + rc = -ENOMEM; + goto out_putdev; + } + + err = fcoe_add_netdev_mapping(netdev, ft); + if (err) { + LIBFCOE_TRANSPORT_DBG("failed to add new netdev mapping " + "for FCoE transport %s for %s.\n", + ft->name, netdev->name); + rc = -ENODEV; + goto out_putdev; + } + + LIBFCOE_TRANSPORT_DBG("transport %s %s to create fcoe on %s.\n", + ft->name, (ctlr_dev) ? "succeeded" : "failed", + netdev->name); + +out_putdev: + dev_put(netdev); +out_nodev: + mutex_unlock(&ft_mutex); + if (rc) + return rc; + return count; +} + +ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus, + const char *buf, size_t count) +{ + int rc = -ENODEV; + struct net_device *netdev = NULL; + struct fcoe_transport *ft = NULL; + + mutex_lock(&ft_mutex); + + netdev = fcoe_if_to_netdev(buf); + if (!netdev) { + LIBFCOE_TRANSPORT_DBG("invalid device %s.\n", buf); + goto out_nodev; + } + + ft = fcoe_netdev_map_lookup(netdev); + if (!ft) { + LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n", + netdev->name); + goto out_putdev; + } + + /* pass to transport destroy */ + rc = ft->destroy(netdev); + if (rc) + goto out_putdev; + + fcoe_del_netdev_mapping(netdev); + LIBFCOE_TRANSPORT_DBG("transport %s %s to destroy fcoe on %s.\n", + ft->name, (rc) ? "failed" : "succeeded", + netdev->name); + rc = count; /* required for successful return */ +out_putdev: + dev_put(netdev); +out_nodev: + mutex_unlock(&ft_mutex); + return rc; +} +EXPORT_SYMBOL(fcoe_ctlr_destroy_store); /** * fcoe_transport_create() - Create a fcoe interface @@ -769,11 +962,7 @@ out_putdev: dev_put(netdev); out_nodev: mutex_unlock(&ft_mutex); - - if (rc == -ERESTARTSYS) - return restart_syscall(); - else - return rc; + return rc; } /** diff --git a/drivers/scsi/fcoe/libfcoe.h b/drivers/scsi/fcoe/libfcoe.h index 6af5fc3a17d8..d3bb16d11401 100644 --- a/drivers/scsi/fcoe/libfcoe.h +++ b/drivers/scsi/fcoe/libfcoe.h @@ -2,9 +2,10 @@ #define _FCOE_LIBFCOE_H_ extern unsigned int libfcoe_debug_logging; -#define LIBFCOE_LOGGING 0x01 /* General logging, not categorized */ -#define LIBFCOE_FIP_LOGGING 0x02 /* FIP logging */ -#define LIBFCOE_TRANSPORT_LOGGING 0x04 /* FCoE transport logging */ +#define LIBFCOE_LOGGING 0x01 /* General logging, not categorized */ +#define LIBFCOE_FIP_LOGGING 0x02 /* FIP logging */ +#define LIBFCOE_TRANSPORT_LOGGING 0x04 /* FCoE transport logging */ +#define LIBFCOE_SYSFS_LOGGING 0x08 /* fcoe_sysfs logging */ #define LIBFCOE_CHECK_LOGGING(LEVEL, CMD) \ do { \ @@ -16,16 +17,19 @@ do { \ #define LIBFCOE_DBG(fmt, args...) \ LIBFCOE_CHECK_LOGGING(LIBFCOE_LOGGING, \ - printk(KERN_INFO "libfcoe: " fmt, ##args);) + pr_info("libfcoe: " fmt, ##args);) #define LIBFCOE_FIP_DBG(fip, fmt, args...) \ LIBFCOE_CHECK_LOGGING(LIBFCOE_FIP_LOGGING, \ - printk(KERN_INFO "host%d: fip: " fmt, \ - (fip)->lp->host->host_no, ##args);) + pr_info("host%d: fip: " fmt, \ + (fip)->lp->host->host_no, ##args);) #define LIBFCOE_TRANSPORT_DBG(fmt, args...) \ LIBFCOE_CHECK_LOGGING(LIBFCOE_TRANSPORT_LOGGING, \ - printk(KERN_INFO "%s: " fmt, \ - __func__, ##args);) + pr_info("%s: " fmt, __func__, ##args);) + +#define LIBFCOE_SYSFS_DBG(cdev, fmt, args...) \ + LIBFCOE_CHECK_LOGGING(LIBFCOE_SYSFS_LOGGING, \ + pr_info("ctlr_%d: " fmt, cdev->id, ##args);) #endif /* _FCOE_LIBFCOE_H_ */ diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile index 37c3440bc17c..383598fadf04 100644 --- a/drivers/scsi/fnic/Makefile +++ b/drivers/scsi/fnic/Makefile @@ -7,6 +7,8 @@ fnic-y := \ fnic_res.o \ fnic_fcs.o \ fnic_scsi.o \ + fnic_trace.o \ + fnic_debugfs.o \ vnic_cq.o \ vnic_dev.o \ vnic_intr.o \ diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 95a5ba29320d..98436c363035 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -26,6 +26,7 @@ #include <scsi/libfcoe.h> #include "fnic_io.h" #include "fnic_res.h" +#include "fnic_trace.h" #include "vnic_dev.h" #include "vnic_wq.h" #include "vnic_rq.h" @@ -56,6 +57,34 @@ #define FNIC_NO_TAG -1 /* + * Command flags to identify the type of command and for other future + * use. + */ +#define FNIC_NO_FLAGS 0 +#define FNIC_IO_INITIALIZED BIT(0) +#define FNIC_IO_ISSUED BIT(1) +#define FNIC_IO_DONE BIT(2) +#define FNIC_IO_REQ_NULL BIT(3) +#define FNIC_IO_ABTS_PENDING BIT(4) +#define FNIC_IO_ABORTED BIT(5) +#define FNIC_IO_ABTS_ISSUED BIT(6) +#define FNIC_IO_TERM_ISSUED BIT(7) +#define FNIC_IO_INTERNAL_TERM_ISSUED BIT(8) +#define FNIC_IO_ABT_TERM_DONE BIT(9) +#define FNIC_IO_ABT_TERM_REQ_NULL BIT(10) +#define FNIC_IO_ABT_TERM_TIMED_OUT BIT(11) +#define FNIC_DEVICE_RESET BIT(12) /* Device reset request */ +#define FNIC_DEV_RST_ISSUED BIT(13) +#define FNIC_DEV_RST_TIMED_OUT BIT(14) +#define FNIC_DEV_RST_ABTS_ISSUED BIT(15) +#define FNIC_DEV_RST_TERM_ISSUED BIT(16) +#define FNIC_DEV_RST_DONE BIT(17) +#define FNIC_DEV_RST_REQ_NULL BIT(18) +#define FNIC_DEV_RST_ABTS_DONE BIT(19) +#define FNIC_DEV_RST_TERM_DONE BIT(20) +#define FNIC_DEV_RST_ABTS_PENDING BIT(21) + +/* * Usage of the scsi_cmnd scratchpad. * These fields are locked by the hashed io_req_lock. */ @@ -64,6 +93,7 @@ #define CMD_ABTS_STATUS(Cmnd) ((Cmnd)->SCp.Message) #define CMD_LR_STATUS(Cmnd) ((Cmnd)->SCp.have_data_in) #define CMD_TAG(Cmnd) ((Cmnd)->SCp.sent_command) +#define CMD_FLAGS(Cmnd) ((Cmnd)->SCp.Status) #define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */ @@ -71,9 +101,28 @@ #define FNIC_HOST_RESET_TIMEOUT 10000 /* mSec */ #define FNIC_RMDEVICE_TIMEOUT 1000 /* mSec */ #define FNIC_HOST_RESET_SETTLE_TIME 30 /* Sec */ +#define FNIC_ABT_TERM_DELAY_TIMEOUT 500 /* mSec */ #define FNIC_MAX_FCP_TARGET 256 +/** + * state_flags to identify host state along along with fnic's state + **/ +#define __FNIC_FLAGS_FWRESET BIT(0) /* fwreset in progress */ +#define __FNIC_FLAGS_BLOCK_IO BIT(1) /* IOs are blocked */ + +#define FNIC_FLAGS_NONE (0) +#define FNIC_FLAGS_FWRESET (__FNIC_FLAGS_FWRESET | \ + __FNIC_FLAGS_BLOCK_IO) + +#define FNIC_FLAGS_IO_BLOCKED (__FNIC_FLAGS_BLOCK_IO) + +#define fnic_set_state_flags(fnicp, st_flags) \ + __fnic_set_state_flags(fnicp, st_flags, 0) + +#define fnic_clear_state_flags(fnicp, st_flags) \ + __fnic_set_state_flags(fnicp, st_flags, 1) + extern unsigned int fnic_log_level; #define FNIC_MAIN_LOGGING 0x01 @@ -170,6 +219,9 @@ struct fnic { struct completion *remove_wait; /* device remove thread blocks */ + atomic_t in_flight; /* io counter */ + u32 _reserved; /* fill hole */ + unsigned long state_flags; /* protected by host lock */ enum fnic_state state; spinlock_t fnic_lock; @@ -267,4 +319,12 @@ const char *fnic_state_to_str(unsigned int state); void fnic_log_q_error(struct fnic *fnic); void fnic_handle_link_event(struct fnic *fnic); +int fnic_is_abts_pending(struct fnic *, struct scsi_cmnd *); + +static inline int +fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags) +{ + return ((fnic->state_flags & st_flags) == st_flags); +} +void __fnic_set_state_flags(struct fnic *, unsigned long, unsigned long); #endif /* _FNIC_H_ */ diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c new file mode 100644 index 000000000000..adc1f7f471f5 --- /dev/null +++ b/drivers/scsi/fnic/fnic_debugfs.c @@ -0,0 +1,314 @@ +/* + * Copyright 2012 Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/debugfs.h> +#include "fnic.h" + +static struct dentry *fnic_trace_debugfs_root; +static struct dentry *fnic_trace_debugfs_file; +static struct dentry *fnic_trace_enable; + +/* + * fnic_trace_ctrl_open - Open the trace_enable file + * @inode: The inode pointer. + * @file: The file pointer to attach the trace enable/disable flag. + * + * Description: + * This routine opens a debugsfs file trace_enable. + * + * Returns: + * This function returns zero if successful. + */ +static int fnic_trace_ctrl_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/* + * fnic_trace_ctrl_read - Read a trace_enable debugfs file + * @filp: The file pointer to read from. + * @ubuf: The buffer to copy the data to. + * @cnt: The number of bytes to read. + * @ppos: The position in the file to start reading from. + * + * Description: + * This routine reads value of variable fnic_tracing_enabled + * and stores into local @buf. It will start reading file at @ppos and + * copy up to @cnt of data to @ubuf from @buf. + * + * Returns: + * This function returns the amount of data that was read. + */ +static ssize_t fnic_trace_ctrl_read(struct file *filp, + char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int len; + len = sprintf(buf, "%u\n", fnic_tracing_enabled); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); +} + +/* + * fnic_trace_ctrl_write - Write to trace_enable debugfs file + * @filp: The file pointer to write from. + * @ubuf: The buffer to copy the data from. + * @cnt: The number of bytes to write. + * @ppos: The position in the file to start writing to. + * + * Description: + * This routine writes data from user buffer @ubuf to buffer @buf and + * sets fnic_tracing_enabled value as per user input. + * + * Returns: + * This function returns the amount of data that was written. + */ +static ssize_t fnic_trace_ctrl_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + unsigned long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = kstrtoul(buf, 10, &val); + if (ret < 0) + return ret; + + fnic_tracing_enabled = val; + (*ppos)++; + + return cnt; +} + +/* + * fnic_trace_debugfs_open - Open the fnic trace log + * @inode: The inode pointer + * @file: The file pointer to attach the log output + * + * Description: + * This routine is the entry point for the debugfs open file operation. + * It allocates the necessary buffer for the log, fills the buffer from + * the in-memory log and then returns a pointer to that log in + * the private_data field in @file. + * + * Returns: + * This function returns zero if successful. On error it will return + * a negative error value. + */ +static int fnic_trace_debugfs_open(struct inode *inode, + struct file *file) +{ + fnic_dbgfs_t *fnic_dbg_prt; + fnic_dbg_prt = kzalloc(sizeof(fnic_dbgfs_t), GFP_KERNEL); + if (!fnic_dbg_prt) + return -ENOMEM; + + fnic_dbg_prt->buffer = vmalloc((3*(trace_max_pages * PAGE_SIZE))); + if (!fnic_dbg_prt->buffer) { + kfree(fnic_dbg_prt); + return -ENOMEM; + } + memset((void *)fnic_dbg_prt->buffer, 0, + (3*(trace_max_pages * PAGE_SIZE))); + fnic_dbg_prt->buffer_len = fnic_get_trace_data(fnic_dbg_prt); + file->private_data = fnic_dbg_prt; + return 0; +} + +/* + * fnic_trace_debugfs_lseek - Seek through a debugfs file + * @file: The file pointer to seek through. + * @offset: The offset to seek to or the amount to seek by. + * @howto: Indicates how to seek. + * + * Description: + * This routine is the entry point for the debugfs lseek file operation. + * The @howto parameter indicates whether @offset is the offset to directly + * seek to, or if it is a value to seek forward or reverse by. This function + * figures out what the new offset of the debugfs file will be and assigns + * that value to the f_pos field of @file. + * + * Returns: + * This function returns the new offset if successful and returns a negative + * error if unable to process the seek. + */ +static loff_t fnic_trace_debugfs_lseek(struct file *file, + loff_t offset, + int howto) +{ + fnic_dbgfs_t *fnic_dbg_prt = file->private_data; + loff_t pos = -1; + + switch (howto) { + case 0: + pos = offset; + break; + case 1: + pos = file->f_pos + offset; + break; + case 2: + pos = fnic_dbg_prt->buffer_len - offset; + } + return (pos < 0 || pos > fnic_dbg_prt->buffer_len) ? + -EINVAL : (file->f_pos = pos); +} + +/* + * fnic_trace_debugfs_read - Read a debugfs file + * @file: The file pointer to read from. + * @ubuf: The buffer to copy the data to. + * @nbytes: The number of bytes to read. + * @pos: The position in the file to start reading from. + * + * Description: + * This routine reads data from the buffer indicated in the private_data + * field of @file. It will start reading at @pos and copy up to @nbytes of + * data to @ubuf. + * + * Returns: + * This function returns the amount of data that was read (this could be + * less than @nbytes if the end of the file was reached). + */ +static ssize_t fnic_trace_debugfs_read(struct file *file, + char __user *ubuf, + size_t nbytes, + loff_t *pos) +{ + fnic_dbgfs_t *fnic_dbg_prt = file->private_data; + int rc = 0; + rc = simple_read_from_buffer(ubuf, nbytes, pos, + fnic_dbg_prt->buffer, + fnic_dbg_prt->buffer_len); + return rc; +} + +/* + * fnic_trace_debugfs_release - Release the buffer used to store + * debugfs file data + * @inode: The inode pointer + * @file: The file pointer that contains the buffer to release + * + * Description: + * This routine frees the buffer that was allocated when the debugfs + * file was opened. + * + * Returns: + * This function returns zero. + */ +static int fnic_trace_debugfs_release(struct inode *inode, + struct file *file) +{ + fnic_dbgfs_t *fnic_dbg_prt = file->private_data; + + vfree(fnic_dbg_prt->buffer); + kfree(fnic_dbg_prt); + return 0; +} + +static const struct file_operations fnic_trace_ctrl_fops = { + .owner = THIS_MODULE, + .open = fnic_trace_ctrl_open, + .read = fnic_trace_ctrl_read, + .write = fnic_trace_ctrl_write, +}; + +static const struct file_operations fnic_trace_debugfs_fops = { + .owner = THIS_MODULE, + .open = fnic_trace_debugfs_open, + .llseek = fnic_trace_debugfs_lseek, + .read = fnic_trace_debugfs_read, + .release = fnic_trace_debugfs_release, +}; + +/* + * fnic_trace_debugfs_init - Initialize debugfs for fnic trace logging + * + * Description: + * When Debugfs is configured this routine sets up the fnic debugfs + * file system. If not already created, this routine will create the + * fnic directory. It will create file trace to log fnic trace buffer + * output into debugfs and it will also create file trace_enable to + * control enable/disable of trace logging into trace buffer. + */ +int fnic_trace_debugfs_init(void) +{ + int rc = -1; + fnic_trace_debugfs_root = debugfs_create_dir("fnic", NULL); + if (!fnic_trace_debugfs_root) { + printk(KERN_DEBUG "Cannot create debugfs root\n"); + return rc; + } + fnic_trace_enable = debugfs_create_file("tracing_enable", + S_IFREG|S_IRUGO|S_IWUSR, + fnic_trace_debugfs_root, + NULL, &fnic_trace_ctrl_fops); + + if (!fnic_trace_enable) { + printk(KERN_DEBUG "Cannot create trace_enable file" + " under debugfs"); + return rc; + } + + fnic_trace_debugfs_file = debugfs_create_file("trace", + S_IFREG|S_IRUGO|S_IWUSR, + fnic_trace_debugfs_root, + NULL, + &fnic_trace_debugfs_fops); + + if (!fnic_trace_debugfs_file) { + printk(KERN_DEBUG "Cannot create trace file under debugfs"); + return rc; + } + rc = 0; + return rc; +} + +/* + * fnic_trace_debugfs_terminate - Tear down debugfs infrastructure + * + * Description: + * When Debugfs is configured this routine removes debugfs file system + * elements that are specific to fnic trace logging. + */ +void fnic_trace_debugfs_terminate(void) +{ + if (fnic_trace_debugfs_file) { + debugfs_remove(fnic_trace_debugfs_file); + fnic_trace_debugfs_file = NULL; + } + if (fnic_trace_enable) { + debugfs_remove(fnic_trace_enable); + fnic_trace_enable = NULL; + } + if (fnic_trace_debugfs_root) { + debugfs_remove(fnic_trace_debugfs_root); + fnic_trace_debugfs_root = NULL; + } +} diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h index f0b896988cd5..c35b8f1889ea 100644 --- a/drivers/scsi/fnic/fnic_io.h +++ b/drivers/scsi/fnic/fnic_io.h @@ -21,7 +21,7 @@ #include <scsi/fc/fc_fcp.h> #define FNIC_DFLT_SG_DESC_CNT 32 -#define FNIC_MAX_SG_DESC_CNT 1024 /* Maximum descriptors per sgl */ +#define FNIC_MAX_SG_DESC_CNT 256 /* Maximum descriptors per sgl */ #define FNIC_SG_DESC_ALIGN 16 /* Descriptor address alignment */ struct host_sg_desc { @@ -45,7 +45,8 @@ enum fnic_sgl_list_type { }; enum fnic_ioreq_state { - FNIC_IOREQ_CMD_PENDING = 0, + FNIC_IOREQ_NOT_INITED = 0, + FNIC_IOREQ_CMD_PENDING, FNIC_IOREQ_ABTS_PENDING, FNIC_IOREQ_ABTS_COMPLETE, FNIC_IOREQ_CMD_COMPLETE, @@ -60,6 +61,7 @@ struct fnic_io_req { u8 sgl_type; /* device DMA descriptor list type */ u8 io_completed:1; /* set to 1 when fw completes IO */ u32 port_id; /* remote port DID */ + unsigned long start_time; /* in jiffies */ struct completion *abts_done; /* completion for abts */ struct completion *dr_done; /* completion for device reset */ }; diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index fbf3ac6e0c55..d601ac543c52 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -68,6 +68,10 @@ unsigned int fnic_log_level; module_param(fnic_log_level, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels"); +unsigned int fnic_trace_max_pages = 16; +module_param(fnic_trace_max_pages, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(fnic_trace_max_pages, "Total allocated memory pages " + "for fnic trace buffer"); static struct libfc_function_template fnic_transport_template = { .frame_send = fnic_send, @@ -624,6 +628,9 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } fnic->state = FNIC_IN_FC_MODE; + atomic_set(&fnic->in_flight, 0); + fnic->state_flags = FNIC_FLAGS_NONE; + /* Enable hardware stripping of vlan header on ingress */ fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1); @@ -858,6 +865,14 @@ static int __init fnic_init_module(void) printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION); + /* Allocate memory for trace buffer */ + err = fnic_trace_buf_init(); + if (err < 0) { + printk(KERN_ERR PFX "Trace buffer initialization Failed " + "Fnic Tracing utility is disabled\n"); + fnic_trace_free(); + } + /* Create a cache for allocation of default size sgls */ len = sizeof(struct fnic_dflt_sgl_list); fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create @@ -928,6 +943,7 @@ err_create_fnic_ioreq_slab: err_create_fnic_sgl_slab_max: kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); err_create_fnic_sgl_slab_dflt: + fnic_trace_free(); return err; } @@ -939,6 +955,7 @@ static void __exit fnic_cleanup_module(void) kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); kmem_cache_destroy(fnic_io_req_cache); fc_release_transport(fnic_fc_transport); + fnic_trace_free(); } module_init(fnic_init_module); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index c40ce52ed7c6..be99e7549d89 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -47,6 +47,7 @@ const char *fnic_state_str[] = { }; static const char *fnic_ioreq_state_str[] = { + [FNIC_IOREQ_NOT_INITED] = "FNIC_IOREQ_NOT_INITED", [FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING", [FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING", [FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE", @@ -165,6 +166,33 @@ static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq) } +/** + * __fnic_set_state_flags + * Sets/Clears bits in fnic's state_flags + **/ +void +__fnic_set_state_flags(struct fnic *fnic, unsigned long st_flags, + unsigned long clearbits) +{ + struct Scsi_Host *host = fnic->lport->host; + int sh_locked = spin_is_locked(host->host_lock); + unsigned long flags = 0; + + if (!sh_locked) + spin_lock_irqsave(host->host_lock, flags); + + if (clearbits) + fnic->state_flags &= ~st_flags; + else + fnic->state_flags |= st_flags; + + if (!sh_locked) + spin_unlock_irqrestore(host->host_lock, flags); + + return; +} + + /* * fnic_fw_reset_handler * Routine to send reset msg to fw @@ -175,9 +203,16 @@ int fnic_fw_reset_handler(struct fnic *fnic) int ret = 0; unsigned long flags; + /* indicate fwreset to io path */ + fnic_set_state_flags(fnic, FNIC_FLAGS_FWRESET); + skb_queue_purge(&fnic->frame_queue); skb_queue_purge(&fnic->tx_queue); + /* wait for io cmpl */ + while (atomic_read(&fnic->in_flight)) + schedule_timeout(msecs_to_jiffies(1)); + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) @@ -193,9 +228,12 @@ int fnic_fw_reset_handler(struct fnic *fnic) if (!ret) FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Issued fw reset\n"); - else + else { + fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET); FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Failed to issue fw reset\n"); + } + return ret; } @@ -312,6 +350,8 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, if (unlikely(!vnic_wq_copy_desc_avail(wq))) { spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "fnic_queue_wq_copy_desc failure - no descriptors\n"); return SCSI_MLQUEUE_HOST_BUSY; } @@ -351,16 +391,20 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, */ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) { - struct fc_lport *lp; + struct fc_lport *lp = shost_priv(sc->device->host); struct fc_rport *rport; - struct fnic_io_req *io_req; - struct fnic *fnic; + struct fnic_io_req *io_req = NULL; + struct fnic *fnic = lport_priv(lp); struct vnic_wq_copy *wq; int ret; - int sg_count; + u64 cmd_trace; + int sg_count = 0; unsigned long flags; unsigned long ptr; + if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED))) + return SCSI_MLQUEUE_HOST_BUSY; + rport = starget_to_rport(scsi_target(sc->device)); ret = fc_remote_port_chkready(rport); if (ret) { @@ -369,20 +413,21 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ return 0; } - lp = shost_priv(sc->device->host); if (lp->state != LPORT_ST_READY || !(lp->link_up)) return SCSI_MLQUEUE_HOST_BUSY; + atomic_inc(&fnic->in_flight); + /* * Release host lock, use driver resource specific locks from here. * Don't re-enable interrupts in case they were disabled prior to the * caller disabling them. */ spin_unlock(lp->host->host_lock); + CMD_STATE(sc) = FNIC_IOREQ_NOT_INITED; + CMD_FLAGS(sc) = FNIC_NO_FLAGS; /* Get a new io_req for this SCSI IO */ - fnic = lport_priv(lp); - io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC); if (!io_req) { ret = SCSI_MLQUEUE_HOST_BUSY; @@ -393,6 +438,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ /* Map the data buffer */ sg_count = scsi_dma_map(sc); if (sg_count < 0) { + FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, + sc->request->tag, sc, 0, sc->cmnd[0], + sg_count, CMD_STATE(sc)); mempool_free(io_req, fnic->io_req_pool); goto out; } @@ -427,8 +475,10 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ /* initialize rest of io_req */ io_req->port_id = rport->port_id; + io_req->start_time = jiffies; CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; CMD_SP(sc) = (char *)io_req; + CMD_FLAGS(sc) |= FNIC_IO_INITIALIZED; sc->scsi_done = done; /* create copy wq desc and enqueue it */ @@ -440,7 +490,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ * refetch the pointer under the lock. */ spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc); - + FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, + sc->request->tag, sc, 0, 0, 0, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); CMD_SP(sc) = NULL; @@ -450,8 +502,21 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); } + } else { + /* REVISIT: Use per IO lock in the final code */ + CMD_FLAGS(sc) |= FNIC_IO_ISSUED; } out: + cmd_trace = ((u64)sc->cmnd[0] << 56 | (u64)sc->cmnd[7] << 40 | + (u64)sc->cmnd[8] << 32 | (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | (u64)sc->cmnd[4] << 8 | + sc->cmnd[5]); + + FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, + sc->request->tag, sc, io_req, + sg_count, cmd_trace, + (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc))); + atomic_dec(&fnic->in_flight); /* acquire host lock before returning to SCSI */ spin_lock(lp->host->host_lock); return ret; @@ -529,6 +594,8 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, fnic_flush_tx(fnic); reset_cmpl_handler_end: + fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET); + return ret; } @@ -622,6 +689,7 @@ static inline void fnic_fcpio_ack_handler(struct fnic *fnic, struct vnic_wq_copy *wq; u16 request_out = desc->u.ack.request_out; unsigned long flags; + u64 *ox_id_tag = (u64 *)(void *)desc; /* mark the ack state */ wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count]; @@ -632,6 +700,9 @@ static inline void fnic_fcpio_ack_handler(struct fnic *fnic, fnic->fw_ack_recd[0] = 1; } spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + FNIC_TRACE(fnic_fcpio_ack_handler, + fnic->lport->host->host_no, 0, 0, ox_id_tag[2], ox_id_tag[3], + ox_id_tag[4], ox_id_tag[5]); } /* @@ -651,27 +722,53 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, struct scsi_cmnd *sc; unsigned long flags; spinlock_t *io_lock; + u64 cmd_trace; + unsigned long start_time; /* Decode the cmpl description to get the io_req id */ fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); fcpio_tag_id_dec(&tag, &id); + icmnd_cmpl = &desc->u.icmnd_cmpl; - if (id >= FNIC_MAX_IO_REQ) + if (id >= FNIC_MAX_IO_REQ) { + shost_printk(KERN_ERR, fnic->lport->host, + "Tag out of range tag %x hdr status = %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); return; + } sc = scsi_host_find_tag(fnic->lport->host, id); WARN_ON_ONCE(!sc); - if (!sc) + if (!sc) { + shost_printk(KERN_ERR, fnic->lport->host, + "icmnd_cmpl sc is null - " + "hdr status = %s tag = 0x%x desc = 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), id, desc); + FNIC_TRACE(fnic_fcpio_icmnd_cmpl_handler, + fnic->lport->host->host_no, id, + ((u64)icmnd_cmpl->_resvd0[1] << 16 | + (u64)icmnd_cmpl->_resvd0[0]), + ((u64)hdr_status << 16 | + (u64)icmnd_cmpl->scsi_status << 8 | + (u64)icmnd_cmpl->flags), desc, + (u64)icmnd_cmpl->residual, 0); return; + } io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); WARN_ON_ONCE(!io_req); if (!io_req) { + CMD_FLAGS(sc) |= FNIC_IO_REQ_NULL; spin_unlock_irqrestore(io_lock, flags); + shost_printk(KERN_ERR, fnic->lport->host, + "icmnd_cmpl io_req is null - " + "hdr status = %s tag = 0x%x sc 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), id, sc); return; } + start_time = io_req->start_time; /* firmware completed the io */ io_req->io_completed = 1; @@ -682,6 +779,28 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, */ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABTS_PENDING; + switch (hdr_status) { + case FCPIO_SUCCESS: + CMD_FLAGS(sc) |= FNIC_IO_DONE; + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "icmnd_cmpl ABTS pending hdr status = %s " + "sc 0x%p scsi_status %x residual %d\n", + fnic_fcpio_status_to_str(hdr_status), sc, + icmnd_cmpl->scsi_status, + icmnd_cmpl->residual); + break; + case FCPIO_ABORTED: + CMD_FLAGS(sc) |= FNIC_IO_ABORTED; + break; + default: + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "icmnd_cmpl abts pending " + "hdr status = %s tag = 0x%x sc = 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), + id, sc); + break; + } return; } @@ -765,6 +884,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, /* Break link with the SCSI command */ CMD_SP(sc) = NULL; + CMD_FLAGS(sc) |= FNIC_IO_DONE; spin_unlock_irqrestore(io_lock, flags); @@ -772,6 +892,20 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, mempool_free(io_req, fnic->io_req_pool); + cmd_trace = ((u64)hdr_status << 56) | + (u64)icmnd_cmpl->scsi_status << 48 | + (u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]; + + FNIC_TRACE(fnic_fcpio_icmnd_cmpl_handler, + sc->device->host->host_no, id, sc, + ((u64)icmnd_cmpl->_resvd0[1] << 56 | + (u64)icmnd_cmpl->_resvd0[0] << 48 | + jiffies_to_msecs(jiffies - start_time)), + desc, cmd_trace, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + if (sc->sc_data_direction == DMA_FROM_DEVICE) { fnic->lport->host_stats.fcp_input_requests++; fnic->fcp_input_bytes += xfer_len; @@ -784,7 +918,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, /* Call SCSI completion function to complete the IO */ if (sc->scsi_done) sc->scsi_done(sc); - } /* fnic_fcpio_itmf_cmpl_handler @@ -801,28 +934,54 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, struct fnic_io_req *io_req; unsigned long flags; spinlock_t *io_lock; + unsigned long start_time; fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); fcpio_tag_id_dec(&tag, &id); - if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) + if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) { + shost_printk(KERN_ERR, fnic->lport->host, + "Tag out of range tag %x hdr status = %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); return; + } sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK); WARN_ON_ONCE(!sc); - if (!sc) + if (!sc) { + shost_printk(KERN_ERR, fnic->lport->host, + "itmf_cmpl sc is null - hdr status = %s tag = 0x%x\n", + fnic_fcpio_status_to_str(hdr_status), id); return; - + } io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); WARN_ON_ONCE(!io_req); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; + shost_printk(KERN_ERR, fnic->lport->host, + "itmf_cmpl io_req is null - " + "hdr status = %s tag = 0x%x sc 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), id, sc); return; } + start_time = io_req->start_time; - if (id & FNIC_TAG_ABORT) { + if ((id & FNIC_TAG_ABORT) && (id & FNIC_TAG_DEV_RST)) { + /* Abort and terminate completion of device reset req */ + /* REVISIT : Add asserts about various flags */ + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "dev reset abts cmpl recd. id %x status %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; + CMD_ABTS_STATUS(sc) = hdr_status; + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; + if (io_req->abts_done) + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + } else if (id & FNIC_TAG_ABORT) { /* Completion of abort cmd */ if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) { /* This is a late completion. Ignore it */ @@ -832,6 +991,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; CMD_ABTS_STATUS(sc) = hdr_status; + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "abts cmpl recd. id %d status %s\n", (int)(id & FNIC_TAG_MASK), @@ -855,14 +1015,58 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); - if (sc->scsi_done) + if (sc->scsi_done) { + FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler, + sc->device->host->host_no, id, + sc, + jiffies_to_msecs(jiffies - start_time), + desc, + (((u64)hdr_status << 40) | + (u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | + CMD_STATE(sc))); sc->scsi_done(sc); + } } } else if (id & FNIC_TAG_DEV_RST) { /* Completion of device reset */ CMD_LR_STATUS(sc) = hdr_status; + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_DEV_RST_ABTS_PENDING; + FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler, + sc->device->host->host_no, id, sc, + jiffies_to_msecs(jiffies - start_time), + desc, 0, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Terminate pending " + "dev reset cmpl recd. id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + return; + } + if (CMD_FLAGS(sc) & FNIC_DEV_RST_TIMED_OUT) { + /* Need to wait for terminate completion */ + spin_unlock_irqrestore(io_lock, flags); + FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler, + sc->device->host->host_no, id, sc, + jiffies_to_msecs(jiffies - start_time), + desc, 0, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "dev reset cmpl recd after time out. " + "id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + return; + } CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "dev reset cmpl recd. id %d status %s\n", (int)(id & FNIC_TAG_MASK), @@ -889,7 +1093,6 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, struct fcpio_fw_req *desc) { struct fnic *fnic = vnic_dev_priv(vdev); - int ret = 0; switch (desc->hdr.type) { case FCPIO_ACK: /* fw copied copy wq desc to its queue */ @@ -906,11 +1109,11 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */ case FCPIO_FLOGI_FIP_REG_CMPL: /* fw completed flogi_fip_reg */ - ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc); + fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc); break; case FCPIO_RESET_CMPL: /* fw completed reset */ - ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc); + fnic_fcpio_fw_reset_cmpl_handler(fnic, desc); break; default: @@ -920,7 +1123,7 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, break; } - return ret; + return 0; } /* @@ -950,6 +1153,7 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) unsigned long flags = 0; struct scsi_cmnd *sc; spinlock_t *io_lock; + unsigned long start_time = 0; for (i = 0; i < FNIC_MAX_IO_REQ; i++) { if (i == exclude_id) @@ -962,6 +1166,23 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { + /* + * We will be here only when FW completes reset + * without sending completions for outstanding ios. + */ + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; + if (io_req && io_req->dr_done) + complete(io_req->dr_done); + else if (io_req && io_req->abts_done) + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + continue; + } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } if (!io_req) { spin_unlock_irqrestore(io_lock, flags); goto cleanup_scsi_cmd; @@ -975,6 +1196,7 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) * If there is a scsi_cmnd associated with this io_req, then * free the corresponding state */ + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); @@ -984,8 +1206,18 @@ cleanup_scsi_cmd: " DID_TRANSPORT_DISRUPTED\n"); /* Complete the command to SCSI */ - if (sc->scsi_done) + if (sc->scsi_done) { + FNIC_TRACE(fnic_cleanup_io, + sc->device->host->host_no, i, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + sc->scsi_done(sc); + } } } @@ -998,6 +1230,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, struct scsi_cmnd *sc; unsigned long flags; spinlock_t *io_lock; + unsigned long start_time = 0; /* get the tag reference */ fcpio_tag_id_dec(&desc->hdr.tag, &id); @@ -1027,6 +1260,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, spin_unlock_irqrestore(io_lock, flags); + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); @@ -1035,8 +1269,17 @@ wq_copy_cleanup_scsi_cmd: FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:" " DID_NO_CONNECT\n"); - if (sc->scsi_done) + if (sc->scsi_done) { + FNIC_TRACE(fnic_wq_copy_cleanup_handler, + sc->device->host->host_no, id, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + sc->scsi_done(sc); + } } static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, @@ -1044,8 +1287,18 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, struct fnic_io_req *io_req) { struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + struct Scsi_Host *host = fnic->lport->host; unsigned long flags; + spin_lock_irqsave(host->host_lock, flags); + if (unlikely(fnic_chk_state_flags_locked(fnic, + FNIC_FLAGS_IO_BLOCKED))) { + spin_unlock_irqrestore(host->host_lock, flags); + return 1; + } else + atomic_inc(&fnic->in_flight); + spin_unlock_irqrestore(host->host_lock, flags); + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) @@ -1053,6 +1306,9 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, if (!vnic_wq_copy_desc_avail(wq)) { spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + atomic_dec(&fnic->in_flight); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_queue_abort_io_req: failure: no descriptors\n"); return 1; } fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT, @@ -1060,12 +1316,15 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, fnic->config.ra_tov, fnic->config.ed_tov); spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + atomic_dec(&fnic->in_flight); + return 0; } -void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) +static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) { int tag; + int abt_tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; @@ -1075,13 +1334,14 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "fnic_rport_reset_exch called portid 0x%06x\n", + "fnic_rport_exch_reset called portid 0x%06x\n", port_id); if (fnic->in_remove) return; for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + abt_tag = tag; sc = scsi_host_find_tag(fnic->lport->host, tag); if (!sc) continue; @@ -1096,6 +1356,15 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) continue; } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_exch_reset dev rst not pending sc 0x%p\n", + sc); + spin_unlock_irqrestore(io_lock, flags); + continue; + } + /* * Found IO that is still pending with firmware and * belongs to rport that went away @@ -1104,9 +1373,29 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) spin_unlock_irqrestore(io_lock, flags); continue; } + if (io_req->abts_done) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_rport_exch_reset: io_req->abts_done is set " + "state is %s\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + } + + if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { + shost_printk(KERN_ERR, fnic->lport->host, + "rport_exch_reset " + "IO not yet issued %p tag 0x%x flags " + "%x state %d\n", + sc, tag, CMD_FLAGS(sc), CMD_STATE(sc)); + } old_ioreq_state = CMD_STATE(sc); CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag = (tag | FNIC_TAG_DEV_RST); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_exch_reset dev rst sc 0x%p\n", + sc); + } BUG_ON(io_req->abts_done); @@ -1118,7 +1407,7 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) /* Now queue the abort command to firmware */ int_to_scsilun(sc->device->lun, &fc_lun); - if (fnic_queue_abort_io_req(fnic, tag, + if (fnic_queue_abort_io_req(fnic, abt_tag, FCPIO_ITMF_ABT_TASK_TERM, fc_lun.scsi_lun, io_req)) { /* @@ -1127,12 +1416,17 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) * aborted later by scsi_eh, or cleaned up during * lun reset */ - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); } } @@ -1141,6 +1435,7 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) void fnic_terminate_rport_io(struct fc_rport *rport) { int tag; + int abt_tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; @@ -1154,14 +1449,15 @@ void fnic_terminate_rport_io(struct fc_rport *rport) FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_terminate_rport_io called" - " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n", - rport->port_name, rport->node_name, + " wwpn 0x%llx, wwnn0x%llx, rport 0x%p, portid 0x%06x\n", + rport->port_name, rport->node_name, rport, rport->port_id); if (fnic->in_remove) return; for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + abt_tag = tag; sc = scsi_host_find_tag(fnic->lport->host, tag); if (!sc) continue; @@ -1180,6 +1476,14 @@ void fnic_terminate_rport_io(struct fc_rport *rport) continue; } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_terminate_rport_io dev rst not pending sc 0x%p\n", + sc); + spin_unlock_irqrestore(io_lock, flags); + continue; + } /* * Found IO that is still pending with firmware and * belongs to rport that went away @@ -1188,9 +1492,27 @@ void fnic_terminate_rport_io(struct fc_rport *rport) spin_unlock_irqrestore(io_lock, flags); continue; } + if (io_req->abts_done) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_terminate_rport_io: io_req->abts_done is set " + "state is %s\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + } + if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "fnic_terminate_rport_io " + "IO not yet issued %p tag 0x%x flags " + "%x state %d\n", + sc, tag, CMD_FLAGS(sc), CMD_STATE(sc)); + } old_ioreq_state = CMD_STATE(sc); CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag = (tag | FNIC_TAG_DEV_RST); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_terminate_rport_io dev rst sc 0x%p\n", sc); + } BUG_ON(io_req->abts_done); @@ -1203,7 +1525,7 @@ void fnic_terminate_rport_io(struct fc_rport *rport) /* Now queue the abort command to firmware */ int_to_scsilun(sc->device->lun, &fc_lun); - if (fnic_queue_abort_io_req(fnic, tag, + if (fnic_queue_abort_io_req(fnic, abt_tag, FCPIO_ITMF_ABT_TASK_TERM, fc_lun.scsi_lun, io_req)) { /* @@ -1212,12 +1534,17 @@ void fnic_terminate_rport_io(struct fc_rport *rport) * aborted later by scsi_eh, or cleaned up during * lun reset */ - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); } } @@ -1232,13 +1559,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) { struct fc_lport *lp; struct fnic *fnic; - struct fnic_io_req *io_req; + struct fnic_io_req *io_req = NULL; struct fc_rport *rport; spinlock_t *io_lock; unsigned long flags; + unsigned long start_time = 0; int ret = SUCCESS; - u32 task_req; + u32 task_req = 0; struct scsi_lun fc_lun; + int tag; DECLARE_COMPLETION_ONSTACK(tm_done); /* Wait for rport to unblock */ @@ -1249,9 +1578,13 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) fnic = lport_priv(lp); rport = starget_to_rport(scsi_target(sc->device)); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n", - rport->port_id, sc->device->lun, sc->request->tag); + tag = sc->request->tag; + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %x flags %x\n", + rport->port_id, sc->device->lun, tag, CMD_FLAGS(sc)); + + CMD_FLAGS(sc) = FNIC_NO_FLAGS; if (lp->state != LPORT_ST_READY || !(lp->link_up)) { ret = FAILED; @@ -1318,6 +1651,10 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) ret = FAILED; goto fnic_abort_cmd_end; } + if (task_req == FCPIO_ITMF_ABT_TASK) + CMD_FLAGS(sc) |= FNIC_IO_ABTS_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_TERM_ISSUED; /* * We queued an abort IO, wait for its completion. @@ -1336,6 +1673,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) io_req = (struct fnic_io_req *)CMD_SP(sc); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; ret = FAILED; goto fnic_abort_cmd_end; } @@ -1344,6 +1682,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) /* fw did not complete abort, timed out */ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT; ret = FAILED; goto fnic_abort_cmd_end; } @@ -1359,12 +1698,21 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) spin_unlock_irqrestore(io_lock, flags); + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); fnic_abort_cmd_end: + FNIC_TRACE(fnic_abort_cmd, sc->device->host->host_no, + sc->request->tag, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Returning from abort cmd %s\n", + "Returning from abort cmd type %x %s\n", task_req, (ret == SUCCESS) ? "SUCCESS" : "FAILED"); return ret; @@ -1375,16 +1723,28 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, struct fnic_io_req *io_req) { struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + struct Scsi_Host *host = fnic->lport->host; struct scsi_lun fc_lun; int ret = 0; unsigned long intr_flags; + spin_lock_irqsave(host->host_lock, intr_flags); + if (unlikely(fnic_chk_state_flags_locked(fnic, + FNIC_FLAGS_IO_BLOCKED))) { + spin_unlock_irqrestore(host->host_lock, intr_flags); + return FAILED; + } else + atomic_inc(&fnic->in_flight); + spin_unlock_irqrestore(host->host_lock, intr_flags); + spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags); if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) free_wq_copy_descs(fnic, wq); if (!vnic_wq_copy_desc_avail(wq)) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "queue_dr_io_req failure - no descriptors\n"); ret = -EAGAIN; goto lr_io_req_end; } @@ -1399,6 +1759,7 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, lr_io_req_end: spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + atomic_dec(&fnic->in_flight); return ret; } @@ -1412,7 +1773,7 @@ lr_io_req_end: static int fnic_clean_pending_aborts(struct fnic *fnic, struct scsi_cmnd *lr_sc) { - int tag; + int tag, abt_tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; @@ -1421,6 +1782,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, struct scsi_lun fc_lun; struct scsi_device *lun_dev = lr_sc->device; DECLARE_COMPLETION_ONSTACK(tm_done); + enum fnic_ioreq_state old_ioreq_state; for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { sc = scsi_host_find_tag(fnic->lport->host, tag); @@ -1449,7 +1811,41 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, "Found IO in %s on lun\n", fnic_ioreq_state_to_str(CMD_STATE(sc))); - BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s dev rst not pending sc 0x%p\n", __func__, + sc); + spin_unlock_irqrestore(io_lock, flags); + continue; + } + old_ioreq_state = CMD_STATE(sc); + /* + * Any pending IO issued prior to reset is expected to be + * in abts pending state, if not we need to set + * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending. + * When IO is completed, the IO will be handed over and + * handled in this function. + */ + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + + if (io_req->abts_done) + shost_printk(KERN_ERR, fnic->lport->host, + "%s: io_req->abts_done is set state is %s\n", + __func__, fnic_ioreq_state_to_str(CMD_STATE(sc))); + + BUG_ON(io_req->abts_done); + + abt_tag = tag; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag |= FNIC_TAG_DEV_RST; + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s: dev rst sc 0x%p\n", __func__, sc); + } CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; io_req->abts_done = &tm_done; @@ -1458,17 +1854,25 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, /* Now queue the abort command to firmware */ int_to_scsilun(sc->device->lun, &fc_lun); - if (fnic_queue_abort_io_req(fnic, tag, + if (fnic_queue_abort_io_req(fnic, abt_tag, FCPIO_ITMF_ABT_TASK_TERM, fc_lun.scsi_lun, io_req)) { spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); if (io_req) io_req->abts_done = NULL; + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); ret = 1; goto clean_pending_aborts_end; + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); } + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; wait_for_completion_timeout(&tm_done, msecs_to_jiffies @@ -1479,8 +1883,8 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, io_req = (struct fnic_io_req *)CMD_SP(sc); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); - ret = 1; - goto clean_pending_aborts_end; + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; + continue; } io_req->abts_done = NULL; @@ -1488,6 +1892,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, /* if abort is still pending with fw, fail */ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; ret = 1; goto clean_pending_aborts_end; } @@ -1498,10 +1903,75 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, mempool_free(io_req, fnic->io_req_pool); } + schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov)); + + /* walk again to check, if IOs are still pending in fw */ + if (fnic_is_abts_pending(fnic, lr_sc)) + ret = FAILED; + clean_pending_aborts_end: return ret; } +/** + * fnic_scsi_host_start_tag + * Allocates tagid from host's tag list + **/ +static inline int +fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc) +{ + struct blk_queue_tag *bqt = fnic->lport->host->bqt; + int tag, ret = SCSI_NO_TAG; + + BUG_ON(!bqt); + if (!bqt) { + pr_err("Tags are not supported\n"); + goto end; + } + + do { + tag = find_next_zero_bit(bqt->tag_map, bqt->max_depth, 1); + if (tag >= bqt->max_depth) { + pr_err("Tag allocation failure\n"); + goto end; + } + } while (test_and_set_bit(tag, bqt->tag_map)); + + bqt->tag_index[tag] = sc->request; + sc->request->tag = tag; + sc->tag = tag; + if (!sc->request->special) + sc->request->special = sc; + + ret = tag; + +end: + return ret; +} + +/** + * fnic_scsi_host_end_tag + * frees tag allocated by fnic_scsi_host_start_tag. + **/ +static inline void +fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc) +{ + struct blk_queue_tag *bqt = fnic->lport->host->bqt; + int tag = sc->request->tag; + + if (tag == SCSI_NO_TAG) + return; + + BUG_ON(!bqt || !bqt->tag_index[tag]); + if (!bqt) + return; + + bqt->tag_index[tag] = NULL; + clear_bit(tag, bqt->tag_map); + + return; +} + /* * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN * fail to get aborted. It calls driver's eh_device_reset with a SCSI command @@ -1511,13 +1981,17 @@ int fnic_device_reset(struct scsi_cmnd *sc) { struct fc_lport *lp; struct fnic *fnic; - struct fnic_io_req *io_req; + struct fnic_io_req *io_req = NULL; struct fc_rport *rport; int status; int ret = FAILED; spinlock_t *io_lock; unsigned long flags; + unsigned long start_time = 0; + struct scsi_lun fc_lun; + int tag = 0; DECLARE_COMPLETION_ONSTACK(tm_done); + int tag_gen_flag = 0; /*to track tags allocated by fnic driver*/ /* Wait for rport to unblock */ fc_block_scsi_eh(sc); @@ -1529,8 +2003,8 @@ int fnic_device_reset(struct scsi_cmnd *sc) rport = starget_to_rport(scsi_target(sc->device)); FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Device reset called FCID 0x%x, LUN 0x%x\n", - rport->port_id, sc->device->lun); + "Device reset called FCID 0x%x, LUN 0x%x sc 0x%p\n", + rport->port_id, sc->device->lun, sc); if (lp->state != LPORT_ST_READY || !(lp->link_up)) goto fnic_device_reset_end; @@ -1539,6 +2013,16 @@ int fnic_device_reset(struct scsi_cmnd *sc) if (fc_remote_port_chkready(rport)) goto fnic_device_reset_end; + CMD_FLAGS(sc) = FNIC_DEVICE_RESET; + /* Allocate tag if not present */ + + tag = sc->request->tag; + if (unlikely(tag < 0)) { + tag = fnic_scsi_host_start_tag(fnic, sc); + if (unlikely(tag == SCSI_NO_TAG)) + goto fnic_device_reset_end; + tag_gen_flag = 1; + } io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); @@ -1562,8 +2046,7 @@ int fnic_device_reset(struct scsi_cmnd *sc) CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE; spin_unlock_irqrestore(io_lock, flags); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n", - sc->request->tag); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %x\n", tag); /* * issue the device reset, if enqueue failed, clean up the ioreq @@ -1576,6 +2059,9 @@ int fnic_device_reset(struct scsi_cmnd *sc) io_req->dr_done = NULL; goto fnic_device_reset_clean; } + spin_lock_irqsave(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_DEV_RST_ISSUED; + spin_unlock_irqrestore(io_lock, flags); /* * Wait on the local completion for LUN reset. The io_req may be @@ -1588,12 +2074,13 @@ int fnic_device_reset(struct scsi_cmnd *sc) io_req = (struct fnic_io_req *)CMD_SP(sc); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "io_req is null tag 0x%x sc 0x%p\n", tag, sc); goto fnic_device_reset_end; } io_req->dr_done = NULL; status = CMD_LR_STATUS(sc); - spin_unlock_irqrestore(io_lock, flags); /* * If lun reset not completed, bail out with failed. io_req @@ -1602,7 +2089,53 @@ int fnic_device_reset(struct scsi_cmnd *sc) if (status == FCPIO_INVALID_CODE) { FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Device reset timed out\n"); - goto fnic_device_reset_end; + CMD_FLAGS(sc) |= FNIC_DEV_RST_TIMED_OUT; + spin_unlock_irqrestore(io_lock, flags); + int_to_scsilun(sc->device->lun, &fc_lun); + /* + * Issue abort and terminate on the device reset request. + * If q'ing of the abort fails, retry issue it after a delay. + */ + while (1) { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEV_RST_TERM_ISSUED) { + spin_unlock_irqrestore(io_lock, flags); + break; + } + spin_unlock_irqrestore(io_lock, flags); + if (fnic_queue_abort_io_req(fnic, + tag | FNIC_TAG_DEV_RST, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies(FNIC_ABT_TERM_DELAY_TIMEOUT)); + } else { + spin_lock_irqsave(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + io_req->abts_done = &tm_done; + spin_unlock_irqrestore(io_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Abort and terminate issued on Device reset " + "tag 0x%x sc 0x%p\n", tag, sc); + break; + } + } + while (1) { + spin_lock_irqsave(io_lock, flags); + if (!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { + spin_unlock_irqrestore(io_lock, flags); + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT)); + break; + } else { + io_req = (struct fnic_io_req *)CMD_SP(sc); + io_req->abts_done = NULL; + goto fnic_device_reset_clean; + } + } + } else { + spin_unlock_irqrestore(io_lock, flags); } /* Completed, but not successful, clean up the io_req, return fail */ @@ -1645,11 +2178,24 @@ fnic_device_reset_clean: spin_unlock_irqrestore(io_lock, flags); if (io_req) { + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); } fnic_device_reset_end: + FNIC_TRACE(fnic_device_reset, sc->device->host->host_no, + sc->request->tag, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + + /* free tag if it is allocated */ + if (unlikely(tag_gen_flag)) + fnic_scsi_host_end_tag(fnic, sc); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Returning from device reset %s\n", (ret == SUCCESS) ? @@ -1735,7 +2281,15 @@ void fnic_scsi_abort_io(struct fc_lport *lp) DECLARE_COMPLETION_ONSTACK(remove_wait); /* Issue firmware reset for fnic, wait for reset to complete */ +retry_fw_reset: spin_lock_irqsave(&fnic->fnic_lock, flags); + if (unlikely(fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)) { + /* fw reset is in progress, poll for its completion */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + schedule_timeout(msecs_to_jiffies(100)); + goto retry_fw_reset; + } + fnic->remove_wait = &remove_wait; old_state = fnic->state; fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; @@ -1776,7 +2330,14 @@ void fnic_scsi_cleanup(struct fc_lport *lp) struct fnic *fnic = lport_priv(lp); /* issue fw reset */ +retry_fw_reset: spin_lock_irqsave(&fnic->fnic_lock, flags); + if (unlikely(fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)) { + /* fw reset is in progress, poll for its completion */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + schedule_timeout(msecs_to_jiffies(100)); + goto retry_fw_reset; + } old_state = fnic->state; fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; fnic_update_mac_locked(fnic, fnic->ctlr.ctl_src_addr); @@ -1822,3 +2383,61 @@ call_fc_exch_mgr_reset: fc_exch_mgr_reset(lp, sid, did); } + +/* + * fnic_is_abts_pending() is a helper function that + * walks through tag map to check if there is any IOs pending,if there is one, + * then it returns 1 (true), otherwise 0 (false) + * if @lr_sc is non NULL, then it checks IOs specific to particular LUN, + * otherwise, it checks for all IOs. + */ +int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + int ret = 0; + struct scsi_cmnd *sc; + struct scsi_device *lun_dev = NULL; + + if (lr_sc) + lun_dev = lr_sc->device; + + /* walk again to check, if IOs are still pending in fw */ + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + /* + * ignore this lun reset cmd or cmds that do not belong to + * this lun + */ + if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc))) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || sc->device != lun_dev) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to the LUN that we are resetting + */ + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "Found IO in %s on lun\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + continue; + } + } + + return ret; +} diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c new file mode 100644 index 000000000000..23a60e3d8527 --- /dev/null +++ b/drivers/scsi/fnic/fnic_trace.c @@ -0,0 +1,273 @@ +/* + * Copyright 2012 Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/mempool.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include "fnic_io.h" +#include "fnic.h" + +unsigned int trace_max_pages; +static int fnic_max_trace_entries; + +static unsigned long fnic_trace_buf_p; +static DEFINE_SPINLOCK(fnic_trace_lock); + +static fnic_trace_dbg_t fnic_trace_entries; +int fnic_tracing_enabled = 1; + +/* + * fnic_trace_get_buf - Give buffer pointer to user to fill up trace information + * + * Description: + * This routine gets next available trace buffer entry location @wr_idx + * from allocated trace buffer pages and give that memory location + * to user to store the trace information. + * + * Return Value: + * This routine returns pointer to next available trace entry + * @fnic_buf_head for user to fill trace information. + */ +fnic_trace_data_t *fnic_trace_get_buf(void) +{ + unsigned long fnic_buf_head; + unsigned long flags; + + spin_lock_irqsave(&fnic_trace_lock, flags); + + /* + * Get next available memory location for writing trace information + * at @wr_idx and increment @wr_idx + */ + fnic_buf_head = + fnic_trace_entries.page_offset[fnic_trace_entries.wr_idx]; + fnic_trace_entries.wr_idx++; + + /* + * Verify if trace buffer is full then change wd_idx to + * start from zero + */ + if (fnic_trace_entries.wr_idx >= fnic_max_trace_entries) + fnic_trace_entries.wr_idx = 0; + + /* + * Verify if write index @wr_idx and read index @rd_idx are same then + * increment @rd_idx to move to next entry in trace buffer + */ + if (fnic_trace_entries.wr_idx == fnic_trace_entries.rd_idx) { + fnic_trace_entries.rd_idx++; + if (fnic_trace_entries.rd_idx >= fnic_max_trace_entries) + fnic_trace_entries.rd_idx = 0; + } + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return (fnic_trace_data_t *)fnic_buf_head; +} + +/* + * fnic_get_trace_data - Copy trace buffer to a memory file + * @fnic_dbgfs_t: pointer to debugfs trace buffer + * + * Description: + * This routine gathers the fnic trace debugfs data from the fnic_trace_data_t + * buffer and dumps it to fnic_dbgfs_t. It will start at the rd_idx entry in + * the log and process the log until the end of the buffer. Then it will gather + * from the beginning of the log and process until the current entry @wr_idx. + * + * Return Value: + * This routine returns the amount of bytes that were dumped into fnic_dbgfs_t + */ +int fnic_get_trace_data(fnic_dbgfs_t *fnic_dbgfs_prt) +{ + int rd_idx; + int wr_idx; + int len = 0; + unsigned long flags; + char str[KSYM_SYMBOL_LEN]; + struct timespec val; + fnic_trace_data_t *tbp; + + spin_lock_irqsave(&fnic_trace_lock, flags); + rd_idx = fnic_trace_entries.rd_idx; + wr_idx = fnic_trace_entries.wr_idx; + if (wr_idx < rd_idx) { + while (1) { + /* Start from read index @rd_idx */ + tbp = (fnic_trace_data_t *) + fnic_trace_entries.page_offset[rd_idx]; + if (!tbp) { + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return 0; + } + /* Convert function pointer to function name */ + if (sizeof(unsigned long) < 8) { + sprint_symbol(str, tbp->fnaddr.low); + jiffies_to_timespec(tbp->timestamp.low, &val); + } else { + sprint_symbol(str, tbp->fnaddr.val); + jiffies_to_timespec(tbp->timestamp.val, &val); + } + /* + * Dump trace buffer entry to memory file + * and increment read index @rd_idx + */ + len += snprintf(fnic_dbgfs_prt->buffer + len, + (trace_max_pages * PAGE_SIZE * 3) - len, + "%16lu.%16lu %-50s %8x %8x %16llx %16llx " + "%16llx %16llx %16llx\n", val.tv_sec, + val.tv_nsec, str, tbp->host_no, tbp->tag, + tbp->data[0], tbp->data[1], tbp->data[2], + tbp->data[3], tbp->data[4]); + rd_idx++; + /* + * If rd_idx is reached to maximum trace entries + * then move rd_idx to zero + */ + if (rd_idx > (fnic_max_trace_entries-1)) + rd_idx = 0; + /* + * Continure dumpping trace buffer entries into + * memory file till rd_idx reaches write index + */ + if (rd_idx == wr_idx) + break; + } + } else if (wr_idx > rd_idx) { + while (1) { + /* Start from read index @rd_idx */ + tbp = (fnic_trace_data_t *) + fnic_trace_entries.page_offset[rd_idx]; + if (!tbp) { + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return 0; + } + /* Convert function pointer to function name */ + if (sizeof(unsigned long) < 8) { + sprint_symbol(str, tbp->fnaddr.low); + jiffies_to_timespec(tbp->timestamp.low, &val); + } else { + sprint_symbol(str, tbp->fnaddr.val); + jiffies_to_timespec(tbp->timestamp.val, &val); + } + /* + * Dump trace buffer entry to memory file + * and increment read index @rd_idx + */ + len += snprintf(fnic_dbgfs_prt->buffer + len, + (trace_max_pages * PAGE_SIZE * 3) - len, + "%16lu.%16lu %-50s %8x %8x %16llx %16llx " + "%16llx %16llx %16llx\n", val.tv_sec, + val.tv_nsec, str, tbp->host_no, tbp->tag, + tbp->data[0], tbp->data[1], tbp->data[2], + tbp->data[3], tbp->data[4]); + rd_idx++; + /* + * Continue dumpping trace buffer entries into + * memory file till rd_idx reaches write index + */ + if (rd_idx == wr_idx) + break; + } + } + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return len; +} + +/* + * fnic_trace_buf_init - Initialize fnic trace buffer logging facility + * + * Description: + * Initialize trace buffer data structure by allocating required memory and + * setting page_offset information for every trace entry by adding trace entry + * length to previous page_offset value. + */ +int fnic_trace_buf_init(void) +{ + unsigned long fnic_buf_head; + int i; + int err = 0; + + trace_max_pages = fnic_trace_max_pages; + fnic_max_trace_entries = (trace_max_pages * PAGE_SIZE)/ + FNIC_ENTRY_SIZE_BYTES; + + fnic_trace_buf_p = (unsigned long)vmalloc((trace_max_pages * PAGE_SIZE)); + if (!fnic_trace_buf_p) { + printk(KERN_ERR PFX "Failed to allocate memory " + "for fnic_trace_buf_p\n"); + err = -ENOMEM; + goto err_fnic_trace_buf_init; + } + memset((void *)fnic_trace_buf_p, 0, (trace_max_pages * PAGE_SIZE)); + + fnic_trace_entries.page_offset = vmalloc(fnic_max_trace_entries * + sizeof(unsigned long)); + if (!fnic_trace_entries.page_offset) { + printk(KERN_ERR PFX "Failed to allocate memory for" + " page_offset\n"); + if (fnic_trace_buf_p) { + vfree((void *)fnic_trace_buf_p); + fnic_trace_buf_p = 0; + } + err = -ENOMEM; + goto err_fnic_trace_buf_init; + } + memset((void *)fnic_trace_entries.page_offset, 0, + (fnic_max_trace_entries * sizeof(unsigned long))); + fnic_trace_entries.wr_idx = fnic_trace_entries.rd_idx = 0; + fnic_buf_head = fnic_trace_buf_p; + + /* + * Set page_offset field of fnic_trace_entries struct by + * calculating memory location for every trace entry using + * length of each trace entry + */ + for (i = 0; i < fnic_max_trace_entries; i++) { + fnic_trace_entries.page_offset[i] = fnic_buf_head; + fnic_buf_head += FNIC_ENTRY_SIZE_BYTES; + } + err = fnic_trace_debugfs_init(); + if (err < 0) { + printk(KERN_ERR PFX "Failed to initialize debugfs for tracing\n"); + goto err_fnic_trace_debugfs_init; + } + printk(KERN_INFO PFX "Successfully Initialized Trace Buffer\n"); + return err; +err_fnic_trace_debugfs_init: + fnic_trace_free(); +err_fnic_trace_buf_init: + return err; +} + +/* + * fnic_trace_free - Free memory of fnic trace data structures. + */ +void fnic_trace_free(void) +{ + fnic_tracing_enabled = 0; + fnic_trace_debugfs_terminate(); + if (fnic_trace_entries.page_offset) { + vfree((void *)fnic_trace_entries.page_offset); + fnic_trace_entries.page_offset = NULL; + } + if (fnic_trace_buf_p) { + vfree((void *)fnic_trace_buf_p); + fnic_trace_buf_p = 0; + } + printk(KERN_INFO PFX "Successfully Freed Trace Buffer\n"); +} diff --git a/drivers/scsi/fnic/fnic_trace.h b/drivers/scsi/fnic/fnic_trace.h new file mode 100644 index 000000000000..cef42b4c4d6c --- /dev/null +++ b/drivers/scsi/fnic/fnic_trace.h @@ -0,0 +1,90 @@ +/* + * Copyright 2012 Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __FNIC_TRACE_H__ +#define __FNIC_TRACE_H__ + +#define FNIC_ENTRY_SIZE_BYTES 64 + +extern ssize_t simple_read_from_buffer(void __user *to, + size_t count, + loff_t *ppos, + const void *from, + size_t available); + +extern unsigned int fnic_trace_max_pages; +extern int fnic_tracing_enabled; +extern unsigned int trace_max_pages; + +typedef struct fnic_trace_dbg { + int wr_idx; + int rd_idx; + unsigned long *page_offset; +} fnic_trace_dbg_t; + +typedef struct fnic_dbgfs { + int buffer_len; + char *buffer; +} fnic_dbgfs_t; + +struct fnic_trace_data { + union { + struct { + u32 low; + u32 high; + }; + u64 val; + } timestamp, fnaddr; + u32 host_no; + u32 tag; + u64 data[5]; +} __attribute__((__packed__)); + +typedef struct fnic_trace_data fnic_trace_data_t; + +#define FNIC_TRACE_ENTRY_SIZE \ + (FNIC_ENTRY_SIZE_BYTES - sizeof(fnic_trace_data_t)) + +#define FNIC_TRACE(_fn, _hn, _t, _a, _b, _c, _d, _e) \ + if (unlikely(fnic_tracing_enabled)) { \ + fnic_trace_data_t *trace_buf = fnic_trace_get_buf(); \ + if (trace_buf) { \ + if (sizeof(unsigned long) < 8) { \ + trace_buf->timestamp.low = jiffies; \ + trace_buf->fnaddr.low = (u32)(unsigned long)_fn; \ + } else { \ + trace_buf->timestamp.val = jiffies; \ + trace_buf->fnaddr.val = (u64)(unsigned long)_fn; \ + } \ + trace_buf->host_no = _hn; \ + trace_buf->tag = _t; \ + trace_buf->data[0] = (u64)(unsigned long)_a; \ + trace_buf->data[1] = (u64)(unsigned long)_b; \ + trace_buf->data[2] = (u64)(unsigned long)_c; \ + trace_buf->data[3] = (u64)(unsigned long)_d; \ + trace_buf->data[4] = (u64)(unsigned long)_e; \ + } \ + } + +fnic_trace_data_t *fnic_trace_get_buf(void); +int fnic_get_trace_data(fnic_dbgfs_t *); +int fnic_trace_buf_init(void); +void fnic_trace_free(void); +int fnic_trace_debugfs_init(void); +void fnic_trace_debugfs_terminate(void); + +#endif diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 4f338061b5c3..7f4f790a3d71 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -165,7 +165,7 @@ static void cmd_free(struct ctlr_info *h, struct CommandList *c); static void cmd_special_free(struct ctlr_info *h, struct CommandList *c); static struct CommandList *cmd_alloc(struct ctlr_info *h); static struct CommandList *cmd_special_alloc(struct ctlr_info *h); -static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, +static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, void *buff, size_t size, u8 page_code, unsigned char *scsi3addr, int cmd_type); @@ -1131,7 +1131,7 @@ clean: return -ENOMEM; } -static void hpsa_map_sg_chain_block(struct ctlr_info *h, +static int hpsa_map_sg_chain_block(struct ctlr_info *h, struct CommandList *c) { struct SGDescriptor *chain_sg, *chain_block; @@ -1144,8 +1144,15 @@ static void hpsa_map_sg_chain_block(struct ctlr_info *h, (c->Header.SGTotal - h->max_cmd_sg_entries); temp64 = pci_map_single(h->pdev, chain_block, chain_sg->Len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&h->pdev->dev, temp64)) { + /* prevent subsequent unmapping */ + chain_sg->Addr.lower = 0; + chain_sg->Addr.upper = 0; + return -1; + } chain_sg->Addr.lower = (u32) (temp64 & 0x0FFFFFFFFULL); chain_sg->Addr.upper = (u32) ((temp64 >> 32) & 0x0FFFFFFFFULL); + return 0; } static void hpsa_unmap_sg_chain_block(struct ctlr_info *h, @@ -1390,7 +1397,7 @@ static void hpsa_pci_unmap(struct pci_dev *pdev, } } -static void hpsa_map_one(struct pci_dev *pdev, +static int hpsa_map_one(struct pci_dev *pdev, struct CommandList *cp, unsigned char *buf, size_t buflen, @@ -1401,10 +1408,16 @@ static void hpsa_map_one(struct pci_dev *pdev, if (buflen == 0 || data_direction == PCI_DMA_NONE) { cp->Header.SGList = 0; cp->Header.SGTotal = 0; - return; + return 0; } addr64 = (u64) pci_map_single(pdev, buf, buflen, data_direction); + if (dma_mapping_error(&pdev->dev, addr64)) { + /* Prevent subsequent unmap of something never mapped */ + cp->Header.SGList = 0; + cp->Header.SGTotal = 0; + return -1; + } cp->SG[0].Addr.lower = (u32) (addr64 & (u64) 0x00000000FFFFFFFF); cp->SG[0].Addr.upper = @@ -1412,6 +1425,7 @@ static void hpsa_map_one(struct pci_dev *pdev, cp->SG[0].Len = buflen; cp->Header.SGList = (u8) 1; /* no. SGs contig in this cmd */ cp->Header.SGTotal = (u16) 1; /* total sgs in this cmd list */ + return 0; } static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h, @@ -1540,13 +1554,18 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr, return -ENOMEM; } - fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize, page, scsi3addr, TYPE_CMD); + if (fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize, + page, scsi3addr, TYPE_CMD)) { + rc = -1; + goto out; + } hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE); ei = c->err_info; if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { hpsa_scsi_interpret_error(c); rc = -1; } +out: cmd_special_free(h, c); return rc; } @@ -1564,7 +1583,9 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr) return -ENOMEM; } - fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, scsi3addr, TYPE_MSG); + /* fill_cmd can't fail here, no data buffer to map. */ + (void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, + NULL, 0, 0, scsi3addr, TYPE_MSG); hpsa_scsi_do_simple_cmd_core(h, c); /* no unmap needed here because no data xfer. */ @@ -1631,8 +1652,11 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical, } /* address the controller */ memset(scsi3addr, 0, sizeof(scsi3addr)); - fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h, - buf, bufsize, 0, scsi3addr, TYPE_CMD); + if (fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h, + buf, bufsize, 0, scsi3addr, TYPE_CMD)) { + rc = -1; + goto out; + } if (extended_response) c->Request.CDB[1] = extended_response; hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE); @@ -1642,6 +1666,7 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical, hpsa_scsi_interpret_error(c); rc = -1; } +out: cmd_special_free(h, c); return rc; } @@ -2105,7 +2130,10 @@ static int hpsa_scatter_gather(struct ctlr_info *h, if (chained) { cp->Header.SGList = h->max_cmd_sg_entries; cp->Header.SGTotal = (u16) (use_sg + 1); - hpsa_map_sg_chain_block(h, cp); + if (hpsa_map_sg_chain_block(h, cp)) { + scsi_dma_unmap(cmd); + return -1; + } return 0; } @@ -2353,8 +2381,9 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h, if (waittime < HPSA_MAX_WAIT_INTERVAL_SECS) waittime = waittime * 2; - /* Send the Test Unit Ready */ - fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, lunaddr, TYPE_CMD); + /* Send the Test Unit Ready, fill_cmd can't fail, no mapping */ + (void) fill_cmd(c, TEST_UNIT_READY, h, + NULL, 0, 0, lunaddr, TYPE_CMD); hpsa_scsi_do_simple_cmd_core(h, c); /* no unmap needed here because no data xfer. */ @@ -2439,7 +2468,9 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr, return -ENOMEM; } - fill_cmd(c, HPSA_ABORT_MSG, h, abort, 0, 0, scsi3addr, TYPE_MSG); + /* fill_cmd can't fail here, no buffer to map */ + (void) fill_cmd(c, HPSA_ABORT_MSG, h, abort, + 0, 0, scsi3addr, TYPE_MSG); if (swizzle) swizzle_abort_tag(&c->Request.CDB[4]); hpsa_scsi_do_simple_cmd_core(h, c); @@ -2928,6 +2959,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) struct CommandList *c; char *buff = NULL; union u64bit temp64; + int rc = 0; if (!argp) return -EINVAL; @@ -2947,8 +2979,8 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) /* Copy the data into the buffer we created */ if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) { - kfree(buff); - return -EFAULT; + rc = -EFAULT; + goto out_kfree; } } else { memset(buff, 0, iocommand.buf_size); @@ -2956,8 +2988,8 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) } c = cmd_special_alloc(h); if (c == NULL) { - kfree(buff); - return -ENOMEM; + rc = -ENOMEM; + goto out_kfree; } /* Fill in the command type */ c->cmd_type = CMD_IOCTL_PEND; @@ -2982,6 +3014,13 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) if (iocommand.buf_size > 0) { temp64.val = pci_map_single(h->pdev, buff, iocommand.buf_size, PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(&h->pdev->dev, temp64.val)) { + c->SG[0].Addr.lower = 0; + c->SG[0].Addr.upper = 0; + c->SG[0].Len = 0; + rc = -ENOMEM; + goto out; + } c->SG[0].Addr.lower = temp64.val32.lower; c->SG[0].Addr.upper = temp64.val32.upper; c->SG[0].Len = iocommand.buf_size; @@ -2996,22 +3035,22 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) memcpy(&iocommand.error_info, c->err_info, sizeof(iocommand.error_info)); if (copy_to_user(argp, &iocommand, sizeof(iocommand))) { - kfree(buff); - cmd_special_free(h, c); - return -EFAULT; + rc = -EFAULT; + goto out; } if (iocommand.Request.Type.Direction == XFER_READ && iocommand.buf_size > 0) { /* Copy the data out of the buffer we created */ if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) { - kfree(buff); - cmd_special_free(h, c); - return -EFAULT; + rc = -EFAULT; + goto out; } } - kfree(buff); +out: cmd_special_free(h, c); - return 0; +out_kfree: + kfree(buff); + return rc; } static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) @@ -3103,6 +3142,15 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) for (i = 0; i < sg_used; i++) { temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i], PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(&h->pdev->dev, temp64.val)) { + c->SG[i].Addr.lower = 0; + c->SG[i].Addr.upper = 0; + c->SG[i].Len = 0; + hpsa_pci_unmap(h->pdev, c, i, + PCI_DMA_BIDIRECTIONAL); + status = -ENOMEM; + goto cleanup1; + } c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Len = buff_size[i]; @@ -3190,7 +3238,8 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr, c = cmd_alloc(h); if (!c) return -ENOMEM; - fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, + /* fill_cmd can't fail here, no data buffer to map */ + (void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, RAID_CTLR_LUNID, TYPE_MSG); c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ c->waiting = NULL; @@ -3202,7 +3251,7 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr, return 0; } -static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, +static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, void *buff, size_t size, u8 page_code, unsigned char *scsi3addr, int cmd_type) { @@ -3271,7 +3320,7 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, default: dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd); BUG(); - return; + return -1; } } else if (cmd_type == TYPE_MSG) { switch (cmd) { @@ -3343,10 +3392,9 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, default: pci_dir = PCI_DMA_BIDIRECTIONAL; } - - hpsa_map_one(h->pdev, c, buff, size, pci_dir); - - return; + if (hpsa_map_one(h->pdev, c, buff, size, pci_dir)) + return -1; + return 0; } /* @@ -4882,10 +4930,13 @@ static void hpsa_flush_cache(struct ctlr_info *h) dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n"); goto out_of_memory; } - fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0, - RAID_CTLR_LUNID, TYPE_CMD); + if (fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0, + RAID_CTLR_LUNID, TYPE_CMD)) { + goto out; + } hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_TODEVICE); if (c->err_info->CommandStatus != 0) +out: dev_warn(&h->pdev->dev, "error flushing cache on controller\n"); cmd_special_free(h, c); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 8fa79b83f2d3..f328089a1060 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6112,7 +6112,7 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * We have told the host to stop giving us new requests, but * ERP ops don't count. FIXME */ - if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead)) { + if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead && !hrrq->removing_ioa)) { spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return SCSI_MLQUEUE_HOST_BUSY; } @@ -6121,7 +6121,7 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * FIXME - Create scsi_set_host_offline interface * and the ioa_is_dead check can be removed */ - if (unlikely(hrrq->ioa_is_dead || !res)) { + if (unlikely(hrrq->ioa_is_dead || hrrq->removing_ioa || !res)) { spin_unlock_irqrestore(hrrq->lock, hrrq_flags); goto err_nodev; } @@ -6741,14 +6741,17 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; ENTER; + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { + ipr_trace; + spin_unlock_irq(ioa_cfg->host->host_lock); + scsi_unblock_requests(ioa_cfg->host); + spin_lock_irq(ioa_cfg->host->host_lock); + } + ioa_cfg->in_reset_reload = 0; ioa_cfg->reset_retries = 0; list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); - - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); LEAVE; return IPR_RC_JOB_RETURN; @@ -8494,7 +8497,8 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, spin_unlock(&ioa_cfg->hrrq[i]._lock); } wmb(); - scsi_block_requests(ioa_cfg->host); + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) + scsi_block_requests(ioa_cfg->host); ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); ioa_cfg->reset_cmd = ipr_cmd; @@ -8549,9 +8553,11 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_fail_all_ops(ioa_cfg); wake_up_all(&ioa_cfg->reset_wait_q); - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { + spin_unlock_irq(ioa_cfg->host->host_lock); + scsi_unblock_requests(ioa_cfg->host); + spin_lock_irq(ioa_cfg->host->host_lock); + } return; } else { ioa_cfg->in_ioa_bringdown = 1; @@ -9695,6 +9701,7 @@ static void __ipr_remove(struct pci_dev *pdev) { unsigned long host_lock_flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; ENTER; spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); @@ -9704,6 +9711,12 @@ static void __ipr_remove(struct pci_dev *pdev) spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].removing_ioa = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_bringdown(ioa_cfg, IPR_SHUTDOWN_NORMAL); spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 1a9a246932ae..21a6ff1ed5c6 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -493,6 +493,7 @@ struct ipr_hrr_queue { u8 allow_interrupts:1; u8 ioa_is_dead:1; u8 allow_cmds:1; + u8 removing_ioa:1; struct blk_iopoll iopoll; }; diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index fcb9d0b20ee4..09c81b2f2169 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -1381,10 +1381,10 @@ static void fc_fcp_timeout(unsigned long data) fsp->state |= FC_SRB_FCP_PROCESSING_TMO; - if (fsp->state & FC_SRB_RCV_STATUS) - fc_fcp_complete_locked(fsp); - else if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED) + if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED) fc_fcp_rec(fsp); + else if (fsp->state & FC_SRB_RCV_STATUS) + fc_fcp_complete_locked(fsp); else fc_fcp_recovery(fsp, FC_TIMED_OUT); fsp->state &= ~FC_SRB_FCP_PROCESSING_TMO; diff --git a/drivers/scsi/libfc/fc_libfc.h b/drivers/scsi/libfc/fc_libfc.h index c2830cc66d6a..b74189d89322 100644 --- a/drivers/scsi/libfc/fc_libfc.h +++ b/drivers/scsi/libfc/fc_libfc.h @@ -41,25 +41,25 @@ extern unsigned int fc_debug_logging; #define FC_LIBFC_DBG(fmt, args...) \ FC_CHECK_LOGGING(FC_LIBFC_LOGGING, \ - printk(KERN_INFO "libfc: " fmt, ##args)) + pr_info("libfc: " fmt, ##args)) #define FC_LPORT_DBG(lport, fmt, args...) \ FC_CHECK_LOGGING(FC_LPORT_LOGGING, \ - printk(KERN_INFO "host%u: lport %6.6x: " fmt, \ - (lport)->host->host_no, \ - (lport)->port_id, ##args)) + pr_info("host%u: lport %6.6x: " fmt, \ + (lport)->host->host_no, \ + (lport)->port_id, ##args)) -#define FC_DISC_DBG(disc, fmt, args...) \ - FC_CHECK_LOGGING(FC_DISC_LOGGING, \ - printk(KERN_INFO "host%u: disc: " fmt, \ - fc_disc_lport(disc)->host->host_no, \ - ##args)) +#define FC_DISC_DBG(disc, fmt, args...) \ + FC_CHECK_LOGGING(FC_DISC_LOGGING, \ + pr_info("host%u: disc: " fmt, \ + fc_disc_lport(disc)->host->host_no, \ + ##args)) #define FC_RPORT_ID_DBG(lport, port_id, fmt, args...) \ FC_CHECK_LOGGING(FC_RPORT_LOGGING, \ - printk(KERN_INFO "host%u: rport %6.6x: " fmt, \ - (lport)->host->host_no, \ - (port_id), ##args)) + pr_info("host%u: rport %6.6x: " fmt, \ + (lport)->host->host_no, \ + (port_id), ##args)) #define FC_RPORT_DBG(rdata, fmt, args...) \ FC_RPORT_ID_DBG((rdata)->local_port, (rdata)->ids.port_id, fmt, ##args) @@ -70,13 +70,13 @@ extern unsigned int fc_debug_logging; if ((pkt)->seq_ptr) { \ struct fc_exch *_ep = NULL; \ _ep = fc_seq_exch((pkt)->seq_ptr); \ - printk(KERN_INFO "host%u: fcp: %6.6x: " \ + pr_info("host%u: fcp: %6.6x: " \ "xid %04x-%04x: " fmt, \ (pkt)->lp->host->host_no, \ (pkt)->rport->port_id, \ (_ep)->oxid, (_ep)->rxid, ##args); \ } else { \ - printk(KERN_INFO "host%u: fcp: %6.6x: " fmt, \ + pr_info("host%u: fcp: %6.6x: " fmt, \ (pkt)->lp->host->host_no, \ (pkt)->rport->port_id, ##args); \ } \ @@ -84,14 +84,14 @@ extern unsigned int fc_debug_logging; #define FC_EXCH_DBG(exch, fmt, args...) \ FC_CHECK_LOGGING(FC_EXCH_LOGGING, \ - printk(KERN_INFO "host%u: xid %4x: " fmt, \ - (exch)->lp->host->host_no, \ - exch->xid, ##args)) + pr_info("host%u: xid %4x: " fmt, \ + (exch)->lp->host->host_no, \ + exch->xid, ##args)) #define FC_SCSI_DBG(lport, fmt, args...) \ FC_CHECK_LOGGING(FC_SCSI_LOGGING, \ - printk(KERN_INFO "host%u: scsi: " fmt, \ - (lport)->host->host_no, ##args)) + pr_info("host%u: scsi: " fmt, \ + (lport)->host->host_no, ##args)) /* * FC-4 Providers. diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 83aa1efec875..d518d17e940f 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -582,7 +582,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp) static void fc_rport_error_retry(struct fc_rport_priv *rdata, struct fc_frame *fp) { - unsigned long delay = FC_DEF_E_D_TOV; + unsigned long delay = msecs_to_jiffies(FC_DEF_E_D_TOV); /* make sure this isn't an FC_EX_CLOSED error, never retry those */ if (PTR_ERR(fp) == -FC_EX_CLOSED) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 55b6fc83ad71..74b67d98e952 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6644,7 +6644,7 @@ static int lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag) { - MAILBOX_t *mb; + MAILBOX_t *mbx; struct lpfc_sli *psli = &phba->sli; uint32_t status, evtctr; uint32_t ha_copy, hc_copy; @@ -6698,7 +6698,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, psli = &phba->sli; - mb = &pmbox->u.mb; + mbx = &pmbox->u.mb; status = MBX_SUCCESS; if (phba->link_state == LPFC_HBA_ERROR) { @@ -6713,7 +6713,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, goto out_not_finished; } - if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) { + if (mbx->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) { if (lpfc_readl(phba->HCregaddr, &hc_copy) || !(hc_copy & HC_MBINT_ENA)) { spin_unlock_irqrestore(&phba->hbalock, drvr_flag); @@ -6767,7 +6767,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, "(%d):0308 Mbox cmd issue - BUSY Data: " "x%x x%x x%x x%x\n", pmbox->vport ? pmbox->vport->vpi : 0xffffff, - mb->mbxCommand, phba->pport->port_state, + mbx->mbxCommand, phba->pport->port_state, psli->sli_flag, flag); psli->slistat.mbox_busy++; @@ -6777,15 +6777,15 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_debugfs_disc_trc(pmbox->vport, LPFC_DISC_TRC_MBOX_VPORT, "MBOX Bsy vport: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } else { lpfc_debugfs_disc_trc(phba->pport, LPFC_DISC_TRC_MBOX, "MBOX Bsy: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } return MBX_BUSY; @@ -6796,7 +6796,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, /* If we are not polling, we MUST be in SLI2 mode */ if (flag != MBX_POLL) { if (!(psli->sli_flag & LPFC_SLI_ACTIVE) && - (mb->mbxCommand != MBX_KILL_BOARD)) { + (mbx->mbxCommand != MBX_KILL_BOARD)) { psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; spin_unlock_irqrestore(&phba->hbalock, drvr_flag); /* Mbox command <mbxCommand> cannot issue */ @@ -6818,23 +6818,23 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, "(%d):0309 Mailbox cmd x%x issue Data: x%x x%x " "x%x\n", pmbox->vport ? pmbox->vport->vpi : 0, - mb->mbxCommand, phba->pport->port_state, + mbx->mbxCommand, phba->pport->port_state, psli->sli_flag, flag); - if (mb->mbxCommand != MBX_HEARTBEAT) { + if (mbx->mbxCommand != MBX_HEARTBEAT) { if (pmbox->vport) { lpfc_debugfs_disc_trc(pmbox->vport, LPFC_DISC_TRC_MBOX_VPORT, "MBOX Send vport: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } else { lpfc_debugfs_disc_trc(phba->pport, LPFC_DISC_TRC_MBOX, "MBOX Send: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } } @@ -6842,12 +6842,12 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, evtctr = psli->slistat.mbox_event; /* next set own bit for the adapter and copy over command word */ - mb->mbxOwner = OWN_CHIP; + mbx->mbxOwner = OWN_CHIP; if (psli->sli_flag & LPFC_SLI_ACTIVE) { /* Populate mbox extension offset word. */ if (pmbox->in_ext_byte_len || pmbox->out_ext_byte_len) { - *(((uint32_t *)mb) + pmbox->mbox_offset_word) + *(((uint32_t *)mbx) + pmbox->mbox_offset_word) = (uint8_t *)phba->mbox_ext - (uint8_t *)phba->mbox; } @@ -6859,11 +6859,11 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, pmbox->in_ext_byte_len); } /* Copy command data to host SLIM area */ - lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE); + lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE); } else { /* Populate mbox extension offset word. */ if (pmbox->in_ext_byte_len || pmbox->out_ext_byte_len) - *(((uint32_t *)mb) + pmbox->mbox_offset_word) + *(((uint32_t *)mbx) + pmbox->mbox_offset_word) = MAILBOX_HBA_EXT_OFFSET; /* Copy the mailbox extension data */ @@ -6873,24 +6873,24 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, pmbox->context2, pmbox->in_ext_byte_len); } - if (mb->mbxCommand == MBX_CONFIG_PORT) { + if (mbx->mbxCommand == MBX_CONFIG_PORT) { /* copy command data into host mbox for cmpl */ - lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE); + lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE); } /* First copy mbox command data to HBA SLIM, skip past first word */ to_slim = phba->MBslimaddr + sizeof (uint32_t); - lpfc_memcpy_to_slim(to_slim, &mb->un.varWords[0], + lpfc_memcpy_to_slim(to_slim, &mbx->un.varWords[0], MAILBOX_CMD_SIZE - sizeof (uint32_t)); /* Next copy over first word, with mbxOwner set */ - ldata = *((uint32_t *)mb); + ldata = *((uint32_t *)mbx); to_slim = phba->MBslimaddr; writel(ldata, to_slim); readl(to_slim); /* flush */ - if (mb->mbxCommand == MBX_CONFIG_PORT) { + if (mbx->mbxCommand == MBX_CONFIG_PORT) { /* switch over to host mailbox */ psli->sli_flag |= LPFC_SLI_ACTIVE; } @@ -6965,7 +6965,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, /* First copy command data */ word0 = *((uint32_t *)phba->mbox); word0 = le32_to_cpu(word0); - if (mb->mbxCommand == MBX_CONFIG_PORT) { + if (mbx->mbxCommand == MBX_CONFIG_PORT) { MAILBOX_t *slimmb; uint32_t slimword0; /* Check real SLIM for any errors */ @@ -6992,7 +6992,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, if (psli->sli_flag & LPFC_SLI_ACTIVE) { /* copy results back to user */ - lpfc_sli_pcimem_bcopy(phba->mbox, mb, MAILBOX_CMD_SIZE); + lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ if (pmbox->out_ext_byte_len && pmbox->context2) { lpfc_sli_pcimem_bcopy(phba->mbox_ext, @@ -7001,7 +7001,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, } } else { /* First copy command data */ - lpfc_memcpy_from_slim(mb, phba->MBslimaddr, + lpfc_memcpy_from_slim(mbx, phba->MBslimaddr, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ if (pmbox->out_ext_byte_len && pmbox->context2) { @@ -7016,7 +7016,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, readl(phba->HAregaddr); /* flush */ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - status = mb->mbxStatus; + status = mbx->mbxStatus; } spin_unlock_irqrestore(&phba->hbalock, drvr_flag); diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 3b2365c8eab2..408d2548a748 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -33,9 +33,9 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "06.504.01.00-rc1" -#define MEGASAS_RELDATE "Oct. 1, 2012" -#define MEGASAS_EXT_VERSION "Mon. Oct. 1 17:00:00 PDT 2012" +#define MEGASAS_VERSION "06.506.00.00-rc1" +#define MEGASAS_RELDATE "Feb. 9, 2013" +#define MEGASAS_EXT_VERSION "Sat. Feb. 9 17:00:00 PDT 2013" /* * Device IDs diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 66a0fec0437b..9d53540207ec 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -18,7 +18,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * FILE: megaraid_sas_base.c - * Version : v06.504.01.00-rc1 + * Version : v06.506.00.00-rc1 * * Authors: LSI Corporation * Sreenivas Bagalkote diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 74030aff69ad..a7d56687bfca 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1206,7 +1206,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, MPI2_SCSIIO_EEDPFLAGS_INSERT_OP; } io_request->Control |= (0x4 << 26); - io_request->EEDPBlockSize = MEGASAS_EEDPBLOCKSIZE; + io_request->EEDPBlockSize = scp->device->sector_size; } else { /* Some drives don't support 16/12 byte CDB's, convert to 10 */ if (((cdb_len == 12) || (cdb_len == 16)) && @@ -1511,7 +1511,8 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance, if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS && instance->pd_list[pd_index].driveState == MR_PD_STATE_SYSTEM) { io_request->Function = 0; - io_request->DevHandle = + if (fusion->fast_path_io) + io_request->DevHandle = local_map_ptr->raidMap.devHndlInfo[device_id].curDevHdl; io_request->RaidContext.timeoutValue = local_map_ptr->raidMap.fpPdIoTimeoutSec; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index a7c64f051996..f68a3cd11d5d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -61,7 +61,6 @@ #define MEGASAS_SCSI_ADDL_CDB_LEN 0x18 #define MEGASAS_RD_WR_PROTECT_CHECK_ALL 0x20 #define MEGASAS_RD_WR_PROTECT_CHECK_NONE 0x60 -#define MEGASAS_EEDPBLOCKSIZE 512 /* * Raid context flags diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 5e24e7e73714..bcb23d28b3e8 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -2023,6 +2023,14 @@ _base_display_intel_branding(struct MPT2SAS_ADAPTER *ioc) printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, MPT2SAS_INTEL_RMS25KB040_BRANDING); break; + case MPT2SAS_INTEL_RMS25LB040_SSDID: + printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, + MPT2SAS_INTEL_RMS25LB040_BRANDING); + break; + case MPT2SAS_INTEL_RMS25LB080_SSDID: + printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, + MPT2SAS_INTEL_RMS25LB080_BRANDING); + break; default: break; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index c6ee7aad7501..4caaac13682f 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -165,6 +165,10 @@ "Intel(R) Integrated RAID Module RMS25KB080" #define MPT2SAS_INTEL_RMS25KB040_BRANDING \ "Intel(R) Integrated RAID Module RMS25KB040" +#define MPT2SAS_INTEL_RMS25LB040_BRANDING \ + "Intel(R) Integrated RAID Module RMS25LB040" +#define MPT2SAS_INTEL_RMS25LB080_BRANDING \ + "Intel(R) Integrated RAID Module RMS25LB080" #define MPT2SAS_INTEL_RMS2LL080_BRANDING \ "Intel Integrated RAID Module RMS2LL080" #define MPT2SAS_INTEL_RMS2LL040_BRANDING \ @@ -180,6 +184,8 @@ #define MPT2SAS_INTEL_RMS25JB040_SSDID 0x3517 #define MPT2SAS_INTEL_RMS25KB080_SSDID 0x3518 #define MPT2SAS_INTEL_RMS25KB040_SSDID 0x3519 +#define MPT2SAS_INTEL_RMS25LB040_SSDID 0x351A +#define MPT2SAS_INTEL_RMS25LB080_SSDID 0x351B #define MPT2SAS_INTEL_RMS2LL080_SSDID 0x350E #define MPT2SAS_INTEL_RMS2LL040_SSDID 0x350F #define MPT2SAS_INTEL_RS25GB008_SSDID 0x3000 diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 078c63913b55..532110f4562a 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -316,10 +316,13 @@ static int mvs_task_prep_smp(struct mvs_info *mvi, struct mvs_task_exec_info *tei) { int elem, rc, i; + struct sas_ha_struct *sha = mvi->sas; struct sas_task *task = tei->task; struct mvs_cmd_hdr *hdr = tei->hdr; struct domain_device *dev = task->dev; struct asd_sas_port *sas_port = dev->port; + struct sas_phy *sphy = dev->phy; + struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number]; struct scatterlist *sg_req, *sg_resp; u32 req_len, resp_len, tag = tei->tag; void *buf_tmp; @@ -392,7 +395,7 @@ static int mvs_task_prep_smp(struct mvs_info *mvi, slot->tx = mvi->tx_prod; mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) | TXQ_MODE_I | tag | - (sas_port->phy_mask << TXQ_PHY_SHIFT)); + (MVS_PHY_ID << TXQ_PHY_SHIFT)); hdr->flags |= flags; hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4)); @@ -438,11 +441,14 @@ static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag) static int mvs_task_prep_ata(struct mvs_info *mvi, struct mvs_task_exec_info *tei) { + struct sas_ha_struct *sha = mvi->sas; struct sas_task *task = tei->task; struct domain_device *dev = task->dev; struct mvs_device *mvi_dev = dev->lldd_dev; struct mvs_cmd_hdr *hdr = tei->hdr; struct asd_sas_port *sas_port = dev->port; + struct sas_phy *sphy = dev->phy; + struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number]; struct mvs_slot_info *slot; void *buf_prd; u32 tag = tei->tag, hdr_tag; @@ -462,7 +468,7 @@ static int mvs_task_prep_ata(struct mvs_info *mvi, slot->tx = mvi->tx_prod; del_q = TXQ_MODE_I | tag | (TXQ_CMD_STP << TXQ_CMD_SHIFT) | - (sas_port->phy_mask << TXQ_PHY_SHIFT) | + (MVS_PHY_ID << TXQ_PHY_SHIFT) | (mvi_dev->taskfileset << TXQ_SRS_SHIFT); mvi->tx[mvi->tx_prod] = cpu_to_le32(del_q); diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 2ae77a0394b2..9f3cc13a5ce7 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -76,6 +76,7 @@ extern struct kmem_cache *mvs_task_list_cache; (__mc) != 0 ; \ (++__lseq), (__mc) >>= 1) +#define MVS_PHY_ID (1U << sas_phy->id) #define MV_INIT_DELAYED_WORK(w, f, d) INIT_DELAYED_WORK(w, f) #define UNASSOC_D2H_FIS(id) \ ((void *) mvi->rx_fis + 0x100 * id) diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index c06b8e5aa2cf..d8293f25ca33 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -144,6 +144,10 @@ static int _osd_get_print_system_info(struct osd_dev *od, odi->osdname_len = get_attrs[a].len; /* Avoid NULL for memcmp optimization 0-length is good enough */ odi->osdname = kzalloc(odi->osdname_len + 1, GFP_KERNEL); + if (!odi->osdname) { + ret = -ENOMEM; + goto out; + } if (odi->osdname_len) memcpy(odi->osdname, get_attrs[a].val_ptr, odi->osdname_len); OSD_INFO("OSD_NAME [%s]\n", odi->osdname); diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 4c9fe733fe88..3d5e522e00fc 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -140,7 +140,8 @@ static void pm8001_free(struct pm8001_hba_info *pm8001_ha) for (i = 0; i < USI_MAX_MEMCNT; i++) { if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) { pci_free_consistent(pm8001_ha->pdev, - pm8001_ha->memoryMap.region[i].element_size, + (pm8001_ha->memoryMap.region[i].total_len + + pm8001_ha->memoryMap.region[i].alignment), pm8001_ha->memoryMap.region[i].virt_ptr, pm8001_ha->memoryMap.region[i].phys_addr); } diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 83d798428c10..1d82eef4e1eb 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -1272,22 +1272,29 @@ qla2x00_thermal_temp_show(struct device *dev, struct device_attribute *attr, char *buf) { scsi_qla_host_t *vha = shost_priv(class_to_shost(dev)); - int rval = QLA_FUNCTION_FAILED; - uint16_t temp, frac; + uint16_t temp = 0; - if (!vha->hw->flags.thermal_supported) - return snprintf(buf, PAGE_SIZE, "\n"); + if (!vha->hw->thermal_support) { + ql_log(ql_log_warn, vha, 0x70db, + "Thermal not supported by this card.\n"); + goto done; + } - temp = frac = 0; - if (qla2x00_reset_active(vha)) - ql_log(ql_log_warn, vha, 0x707b, - "ISP reset active.\n"); - else if (!vha->hw->flags.eeh_busy) - rval = qla2x00_get_thermal_temp(vha, &temp, &frac); - if (rval != QLA_SUCCESS) - return snprintf(buf, PAGE_SIZE, "\n"); + if (qla2x00_reset_active(vha)) { + ql_log(ql_log_warn, vha, 0x70dc, "ISP reset active.\n"); + goto done; + } + + if (vha->hw->flags.eeh_busy) { + ql_log(ql_log_warn, vha, 0x70dd, "PCI EEH busy.\n"); + goto done; + } + + if (qla2x00_get_thermal_temp(vha, &temp) == QLA_SUCCESS) + return snprintf(buf, PAGE_SIZE, "%d\n", temp); - return snprintf(buf, PAGE_SIZE, "%d.%02d\n", temp, frac); +done: + return snprintf(buf, PAGE_SIZE, "\n"); } static ssize_t diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 9f34dedcdad7..ad54099cb805 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -27,7 +27,7 @@ void qla2x00_bsg_sp_free(void *data, void *ptr) { srb_t *sp = (srb_t *)ptr; - struct scsi_qla_host *vha = (scsi_qla_host_t *)data; + struct scsi_qla_host *vha = sp->fcport->vha; struct fc_bsg_job *bsg_job = sp->u.bsg_job; struct qla_hw_data *ha = vha->hw; @@ -40,7 +40,7 @@ qla2x00_bsg_sp_free(void *data, void *ptr) if (sp->type == SRB_CT_CMD || sp->type == SRB_ELS_CMD_HST) kfree(sp->fcport); - mempool_free(sp, vha->hw->srb_mempool); + qla2x00_rel_sp(vha, sp); } int @@ -368,7 +368,7 @@ qla2x00_process_els(struct fc_bsg_job *bsg_job) if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, "qla2x00_start_sp failed = %d\n", rval); - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(vha, sp); rval = -EIO; goto done_unmap_sg; } @@ -515,7 +515,7 @@ qla2x00_process_ct(struct fc_bsg_job *bsg_job) if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x7017, "qla2x00_start_sp failed=%d.\n", rval); - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(vha, sp); rval = -EIO; goto done_free_fcport; } @@ -531,6 +531,75 @@ done_unmap_sg: done: return rval; } + +/* Disable loopback mode */ +static inline int +qla81xx_reset_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, + int wait, int wait2) +{ + int ret = 0; + int rval = 0; + uint16_t new_config[4]; + struct qla_hw_data *ha = vha->hw; + + if (!IS_QLA81XX(ha) && !IS_QLA8031(ha)) + goto done_reset_internal; + + memset(new_config, 0 , sizeof(new_config)); + if ((config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == + ENABLE_INTERNAL_LOOPBACK || + (config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == + ENABLE_EXTERNAL_LOOPBACK) { + new_config[0] = config[0] & ~INTERNAL_LOOPBACK_MASK; + ql_dbg(ql_dbg_user, vha, 0x70bf, "new_config[0]=%02x\n", + (new_config[0] & INTERNAL_LOOPBACK_MASK)); + memcpy(&new_config[1], &config[1], sizeof(uint16_t) * 3) ; + + ha->notify_dcbx_comp = wait; + ha->notify_lb_portup_comp = wait2; + + ret = qla81xx_set_port_config(vha, new_config); + if (ret != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x7025, + "Set port config failed.\n"); + ha->notify_dcbx_comp = 0; + ha->notify_lb_portup_comp = 0; + rval = -EINVAL; + goto done_reset_internal; + } + + /* Wait for DCBX complete event */ + if (wait && !wait_for_completion_timeout(&ha->dcbx_comp, + (DCBX_COMP_TIMEOUT * HZ))) { + ql_dbg(ql_dbg_user, vha, 0x7026, + "DCBX completion not received.\n"); + ha->notify_dcbx_comp = 0; + ha->notify_lb_portup_comp = 0; + rval = -EINVAL; + goto done_reset_internal; + } else + ql_dbg(ql_dbg_user, vha, 0x7027, + "DCBX completion received.\n"); + + if (wait2 && + !wait_for_completion_timeout(&ha->lb_portup_comp, + (LB_PORTUP_COMP_TIMEOUT * HZ))) { + ql_dbg(ql_dbg_user, vha, 0x70c5, + "Port up completion not received.\n"); + ha->notify_lb_portup_comp = 0; + rval = -EINVAL; + goto done_reset_internal; + } else + ql_dbg(ql_dbg_user, vha, 0x70c6, + "Port up completion received.\n"); + + ha->notify_dcbx_comp = 0; + ha->notify_lb_portup_comp = 0; + } +done_reset_internal: + return rval; +} + /* * Set the port configuration to enable the internal or external loopback * depending on the loopback mode. @@ -566,9 +635,19 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, } /* Wait for DCBX complete event */ - if (!wait_for_completion_timeout(&ha->dcbx_comp, (20 * HZ))) { + if (!wait_for_completion_timeout(&ha->dcbx_comp, + (DCBX_COMP_TIMEOUT * HZ))) { ql_dbg(ql_dbg_user, vha, 0x7022, - "State change notification not received.\n"); + "DCBX completion not received.\n"); + ret = qla81xx_reset_loopback_mode(vha, new_config, 0, 0); + /* + * If the reset of the loopback mode doesn't work take a FCoE + * dump and reset the chip. + */ + if (ret) { + ha->isp_ops->fw_dump(vha, 0); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + } rval = -EINVAL; } else { if (ha->flags.idc_compl_status) { @@ -578,7 +657,7 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, ha->flags.idc_compl_status = 0; } else ql_dbg(ql_dbg_user, vha, 0x7023, - "State change received.\n"); + "DCBX completion received.\n"); } ha->notify_dcbx_comp = 0; @@ -587,57 +666,6 @@ done_set_internal: return rval; } -/* Disable loopback mode */ -static inline int -qla81xx_reset_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, - int wait) -{ - int ret = 0; - int rval = 0; - uint16_t new_config[4]; - struct qla_hw_data *ha = vha->hw; - - if (!IS_QLA81XX(ha) && !IS_QLA8031(ha)) - goto done_reset_internal; - - memset(new_config, 0 , sizeof(new_config)); - if ((config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == - ENABLE_INTERNAL_LOOPBACK || - (config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == - ENABLE_EXTERNAL_LOOPBACK) { - new_config[0] = config[0] & ~INTERNAL_LOOPBACK_MASK; - ql_dbg(ql_dbg_user, vha, 0x70bf, "new_config[0]=%02x\n", - (new_config[0] & INTERNAL_LOOPBACK_MASK)); - memcpy(&new_config[1], &config[1], sizeof(uint16_t) * 3) ; - - ha->notify_dcbx_comp = wait; - ret = qla81xx_set_port_config(vha, new_config); - if (ret != QLA_SUCCESS) { - ql_log(ql_log_warn, vha, 0x7025, - "Set port config failed.\n"); - ha->notify_dcbx_comp = 0; - rval = -EINVAL; - goto done_reset_internal; - } - - /* Wait for DCBX complete event */ - if (wait && !wait_for_completion_timeout(&ha->dcbx_comp, - (20 * HZ))) { - ql_dbg(ql_dbg_user, vha, 0x7026, - "State change notification not received.\n"); - ha->notify_dcbx_comp = 0; - rval = -EINVAL; - goto done_reset_internal; - } else - ql_dbg(ql_dbg_user, vha, 0x7027, - "State change received.\n"); - - ha->notify_dcbx_comp = 0; - } -done_reset_internal: - return rval; -} - static int qla2x00_process_loopback(struct fc_bsg_job *bsg_job) { @@ -739,6 +767,7 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) { memset(config, 0, sizeof(config)); memset(new_config, 0, sizeof(new_config)); + if (qla81xx_get_port_config(vha, config)) { ql_log(ql_log_warn, vha, 0x701f, "Get port config failed.\n"); @@ -746,6 +775,14 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) goto done_free_dma_rsp; } + if ((config[0] & INTERNAL_LOOPBACK_MASK) != 0) { + ql_dbg(ql_dbg_user, vha, 0x70c4, + "Loopback operation already in " + "progress.\n"); + rval = -EAGAIN; + goto done_free_dma_rsp; + } + ql_dbg(ql_dbg_user, vha, 0x70c0, "elreq.options=%04x\n", elreq.options); @@ -755,7 +792,7 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) config, new_config, elreq.options); else rval = qla81xx_reset_loopback_mode(vha, - config, 1); + config, 1, 0); else rval = qla81xx_set_loopback_mode(vha, config, new_config, elreq.options); @@ -772,14 +809,6 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) command_sent = INT_DEF_LB_LOOPBACK_CMD; rval = qla2x00_loopback_test(vha, &elreq, response); - if (new_config[0]) { - /* Revert back to original port config - * Also clear internal loopback - */ - qla81xx_reset_loopback_mode(vha, - new_config, 0); - } - if (response[0] == MBS_COMMAND_ERROR && response[1] == MBS_LB_RESET) { ql_log(ql_log_warn, vha, 0x7029, @@ -788,15 +817,39 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) qla2xxx_wake_dpc(vha); qla2x00_wait_for_chip_reset(vha); /* Also reset the MPI */ - if (qla81xx_restart_mpi_firmware(vha) != - QLA_SUCCESS) { - ql_log(ql_log_warn, vha, 0x702a, - "MPI reset failed.\n"); + if (IS_QLA81XX(ha)) { + if (qla81xx_restart_mpi_firmware(vha) != + QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x702a, + "MPI reset failed.\n"); + } } rval = -EIO; goto done_free_dma_rsp; } + + if (new_config[0]) { + int ret; + + /* Revert back to original port config + * Also clear internal loopback + */ + ret = qla81xx_reset_loopback_mode(vha, + new_config, 0, 1); + if (ret) { + /* + * If the reset of the loopback mode + * doesn't work take FCoE dump and then + * reset the chip. + */ + ha->isp_ops->fw_dump(vha, 0); + set_bit(ISP_ABORT_NEEDED, + &vha->dpc_flags); + } + + } + } else { type = "FC_BSG_HST_VENDOR_LOOPBACK"; ql_dbg(ql_dbg_user, vha, 0x702b, @@ -1950,7 +2003,7 @@ qla24xx_bsg_timeout(struct fc_bsg_job *bsg_job) if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { if (((sp->type == SRB_CT_CMD) || @@ -1985,6 +2038,6 @@ done: spin_unlock_irqrestore(&ha->hardware_lock, flags); if (bsg_job->request->msgcode == FC_BSG_HST_CT) kfree(sp->fcport); - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(vha, sp); return 0; } diff --git a/drivers/scsi/qla2xxx/qla_bsg.h b/drivers/scsi/qla2xxx/qla_bsg.h index 37b8b7ba7421..e9f6b9bbf29a 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.h +++ b/drivers/scsi/qla2xxx/qla_bsg.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 53f9e492f9dc..1626de52e32a 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -11,20 +11,21 @@ * ---------------------------------------------------------------------- * | Level | Last Value Used | Holes | * ---------------------------------------------------------------------- - * | Module Init and Probe | 0x0125 | 0x4b,0xba,0xfa | - * | Mailbox commands | 0x114f | 0x111a-0x111b | + * | Module Init and Probe | 0x0126 | 0x4b,0xba,0xfa | + * | Mailbox commands | 0x115b | 0x111a-0x111b | * | | | 0x112c-0x112e | * | | | 0x113a | * | Device Discovery | 0x2087 | 0x2020-0x2022, | * | | | 0x2016 | - * | Queue Command and IO tracing | 0x3030 | 0x3006-0x300b | + * | Queue Command and IO tracing | 0x3031 | 0x3006-0x300b | * | | | 0x3027-0x3028 | * | | | 0x302d-0x302e | * | DPC Thread | 0x401d | 0x4002,0x4013 | * | Async Events | 0x5071 | 0x502b-0x502f | * | | | 0x5047,0x5052 | * | Timer Routines | 0x6011 | | - * | User Space Interactions | 0x70c3 | 0x7018,0x702e, | + * | User Space Interactions | 0x70c4 | 0x7018,0x702e, | + * | | | 0x7020,0x7024, | * | | | 0x7039,0x7045, | * | | | 0x7073-0x7075, | * | | | 0x708c, | @@ -35,11 +36,11 @@ * | | | 0x800b,0x8039 | * | AER/EEH | 0x9011 | | * | Virtual Port | 0xa007 | | - * | ISP82XX Specific | 0xb084 | 0xb002,0xb024 | + * | ISP82XX Specific | 0xb086 | 0xb002,0xb024 | * | MultiQ | 0xc00c | | * | Misc | 0xd010 | | - * | Target Mode | 0xe06f | | - * | Target Mode Management | 0xf071 | | + * | Target Mode | 0xe070 | | + * | Target Mode Management | 0xf072 | | * | Target Mode Task Management | 0x1000b | | * ---------------------------------------------------------------------- */ diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 8f911c0b1e74..35e20b4f8b6c 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 6e7727f46d43..c6509911772b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -37,6 +37,7 @@ #include "qla_nx.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" +#define QLA2XXX_MANUFACTURER "QLogic Corporation" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, @@ -253,8 +254,8 @@ #define LOOP_DOWN_TIME 255 /* 240 */ #define LOOP_DOWN_RESET (LOOP_DOWN_TIME - 30) -/* Maximum outstanding commands in ISP queues (1-65535) */ -#define MAX_OUTSTANDING_COMMANDS 1024 +#define DEFAULT_OUTSTANDING_COMMANDS 1024 +#define MIN_OUTSTANDING_COMMANDS 128 /* ISP request and response entry counts (37-65535) */ #define REQUEST_ENTRY_CNT_2100 128 /* Number of request entries. */ @@ -537,6 +538,8 @@ struct device_reg_25xxmq { uint32_t req_q_out; uint32_t rsp_q_in; uint32_t rsp_q_out; + uint32_t atio_q_in; + uint32_t atio_q_out; }; typedef union { @@ -563,6 +566,9 @@ typedef union { &(reg)->u.isp2100.mailbox5 : \ &(reg)->u.isp2300.rsp_q_out) +#define ISP_ATIO_Q_IN(vha) (vha->hw->tgt.atio_q_in) +#define ISP_ATIO_Q_OUT(vha) (vha->hw->tgt.atio_q_out) + #define MAILBOX_REG(ha, reg, num) \ (IS_QLA2100(ha) || IS_QLA2200(ha) ? \ (num < 8 ? \ @@ -762,8 +768,8 @@ typedef struct { #define MBC_PORT_LOGOUT 0x56 /* Port Logout request */ #define MBC_SEND_RNID_ELS 0x57 /* Send RNID ELS request */ #define MBC_SET_RNID_PARAMS 0x59 /* Set RNID parameters */ -#define MBC_GET_RNID_PARAMS 0x5a /* Data Rate */ -#define MBC_DATA_RATE 0x5d /* Get RNID parameters */ +#define MBC_GET_RNID_PARAMS 0x5a /* Get RNID parameters */ +#define MBC_DATA_RATE 0x5d /* Data Rate */ #define MBC_INITIALIZE_FIRMWARE 0x60 /* Initialize firmware */ #define MBC_INITIATE_LIP 0x62 /* Initiate Loop */ /* Initialization Procedure */ @@ -809,6 +815,7 @@ typedef struct { #define MBC_HOST_MEMORY_COPY 0x53 /* Host Memory Copy. */ #define MBC_SEND_RNFT_ELS 0x5e /* Send RNFT ELS request */ #define MBC_GET_LINK_PRIV_STATS 0x6d /* Get link & private data. */ +#define MBC_LINK_INITIALIZATION 0x72 /* Do link initialization. */ #define MBC_SET_VENDOR_ID 0x76 /* Set Vendor ID. */ #define MBC_PORT_RESET 0x120 /* Port Reset */ #define MBC_SET_PORT_CONFIG 0x122 /* Set port configuration */ @@ -856,6 +863,9 @@ typedef struct { #define MBX_1 BIT_1 #define MBX_0 BIT_0 +#define RNID_TYPE_SET_VERSION 0x9 +#define RNID_TYPE_ASIC_TEMP 0xC + /* * Firmware state codes from get firmware state mailbox command */ @@ -1841,9 +1851,6 @@ typedef struct fc_port { uint8_t scan_state; } fc_port_t; -#define QLA_FCPORT_SCAN_NONE 0 -#define QLA_FCPORT_SCAN_FOUND 1 - /* * Fibre channel port/lun states. */ @@ -2533,8 +2540,10 @@ struct req_que { uint16_t qos; uint16_t vp_idx; struct rsp_que *rsp; - srb_t *outstanding_cmds[MAX_OUTSTANDING_COMMANDS]; + srb_t **outstanding_cmds; uint32_t current_outstanding_cmd; + uint16_t num_outstanding_cmds; +#define MAX_Q_DEPTH 32 int max_q_depth; }; @@ -2557,11 +2566,13 @@ struct qlt_hw_data { struct atio *atio_ring_ptr; /* Current address. */ uint16_t atio_ring_index; /* Current index. */ uint16_t atio_q_length; + uint32_t __iomem *atio_q_in; + uint32_t __iomem *atio_q_out; void *target_lport_ptr; struct qla_tgt_func_tmpl *tgt_ops; struct qla_tgt *qla_tgt; - struct qla_tgt_cmd *cmds[MAX_OUTSTANDING_COMMANDS]; + struct qla_tgt_cmd *cmds[DEFAULT_OUTSTANDING_COMMANDS]; uint16_t current_handle; struct qla_tgt_vp_map *tgt_vp_map; @@ -2618,7 +2629,6 @@ struct qla_hw_data { uint32_t nic_core_hung:1; uint32_t quiesce_owner:1; - uint32_t thermal_supported:1; uint32_t nic_core_reset_hdlr_active:1; uint32_t nic_core_reset_owner:1; uint32_t isp82xx_no_md_cap:1; @@ -2788,6 +2798,8 @@ struct qla_hw_data { #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha)) #define IS_PI_SPLIT_DET_CAPABLE(ha) (IS_PI_SPLIT_DET_CAPABLE_HBA(ha) && \ (((ha)->fw_attributes_h << 16 | (ha)->fw_attributes) & BIT_22)) +#define IS_ATIO_MSIX_CAPABLE(ha) (IS_QLA83XX(ha)) +#define IS_TGT_MODE_CAPABLE(ha) (ha->tgt.atio_q_length) /* HBA serial number */ uint8_t serial0; @@ -2870,7 +2882,13 @@ struct qla_hw_data { struct completion mbx_cmd_comp; /* Serialize mbx access */ struct completion mbx_intr_comp; /* Used for completion notification */ struct completion dcbx_comp; /* For set port config notification */ + struct completion lb_portup_comp; /* Used to wait for link up during + * loopback */ +#define DCBX_COMP_TIMEOUT 20 +#define LB_PORTUP_COMP_TIMEOUT 10 + int notify_dcbx_comp; + int notify_lb_portup_comp; struct mutex selflogin_lock; /* Basic firmware related information. */ @@ -2887,6 +2905,7 @@ struct qla_hw_data { #define RISC_START_ADDRESS_2300 0x800 #define RISC_START_ADDRESS_2400 0x100000 uint16_t fw_xcb_count; + uint16_t fw_iocb_count; uint16_t fw_options[16]; /* slots: 1,2,3,10,11 */ uint8_t fw_seriallink_options[4]; @@ -3056,7 +3075,16 @@ struct qla_hw_data { struct work_struct idc_state_handler; struct work_struct nic_core_unrecoverable; +#define HOST_QUEUE_RAMPDOWN_INTERVAL (60 * HZ) +#define HOST_QUEUE_RAMPUP_INTERVAL (30 * HZ) + unsigned long host_last_rampdown_time; + unsigned long host_last_rampup_time; + int cfg_lun_q_depth; + struct qlt_hw_data tgt; + uint16_t thermal_support; +#define THERMAL_SUPPORT_I2C BIT_0 +#define THERMAL_SUPPORT_ISP BIT_1 }; /* @@ -3115,6 +3143,8 @@ typedef struct scsi_qla_host { #define MPI_RESET_NEEDED 19 /* Initiate MPI FW reset */ #define ISP_QUIESCE_NEEDED 20 /* Driver need some quiescence */ #define SCR_PENDING 21 /* SCR in target mode */ +#define HOST_RAMP_DOWN_QUEUE_DEPTH 22 +#define HOST_RAMP_UP_QUEUE_DEPTH 23 uint32_t device_flags; #define SWITCH_FOUND BIT_0 @@ -3248,8 +3278,6 @@ struct qla_tgt_vp_map { #define NVRAM_DELAY() udelay(10) -#define INVALID_HANDLE (MAX_OUTSTANDING_COMMANDS+1) - /* * Flash support definitions */ diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index 706c4f7bc7c9..792a29294b62 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index be6d61a89edc..1ac2b0e3a0e1 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -300,7 +300,8 @@ struct init_cb_24xx { uint32_t prio_request_q_address[2]; uint16_t msix; - uint8_t reserved_2[6]; + uint16_t msix_atio; + uint8_t reserved_2[4]; uint16_t atio_q_inpointer; uint16_t atio_q_length; @@ -1387,9 +1388,7 @@ struct qla_flt_header { #define FLT_REG_FCP_PRIO_0 0x87 #define FLT_REG_FCP_PRIO_1 0x88 #define FLT_REG_FCOE_FW 0xA4 -#define FLT_REG_FCOE_VPD_0 0xA9 #define FLT_REG_FCOE_NVRAM_0 0xAA -#define FLT_REG_FCOE_VPD_1 0xAB #define FLT_REG_FCOE_NVRAM_1 0xAC struct qla_flt_region { diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 2411d1a12b26..eb3ca21a7f17 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -55,7 +55,7 @@ extern void qla2x00_update_fcport(scsi_qla_host_t *, fc_port_t *); extern void qla2x00_alloc_fw_dump(scsi_qla_host_t *); extern void qla2x00_try_to_stop_firmware(scsi_qla_host_t *); -extern int qla2x00_get_thermal_temp(scsi_qla_host_t *, uint16_t *, uint16_t *); +extern int qla2x00_get_thermal_temp(scsi_qla_host_t *, uint16_t *); extern void qla84xx_put_chip(struct scsi_qla_host *); @@ -84,6 +84,9 @@ extern int qla83xx_nic_core_reset(scsi_qla_host_t *); extern void qla83xx_reset_ownership(scsi_qla_host_t *); extern int qla2xxx_mctp_dump(scsi_qla_host_t *); +extern int +qla2x00_alloc_outstanding_cmds(struct qla_hw_data *, struct req_que *); + /* * Global Data in qla_os.c source file. */ @@ -94,6 +97,7 @@ extern int qlport_down_retry; extern int ql2xplogiabsentdevice; extern int ql2xloginretrycount; extern int ql2xfdmienable; +extern int ql2xmaxqdepth; extern int ql2xallocfwdump; extern int ql2xextended_error_logging; extern int ql2xiidmaenable; @@ -278,6 +282,9 @@ extern int qla2x00_get_port_name(scsi_qla_host_t *, uint16_t, uint8_t *, uint8_t); extern int +qla24xx_link_initialize(scsi_qla_host_t *); + +extern int qla2x00_lip_reset(scsi_qla_host_t *); extern int @@ -351,6 +358,9 @@ extern int qla2x00_disable_fce_trace(scsi_qla_host_t *, uint64_t *, uint64_t *); extern int +qla2x00_set_driver_version(scsi_qla_host_t *, char *); + +extern int qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *, uint16_t, uint16_t, uint16_t, uint16_t); @@ -436,6 +446,7 @@ extern uint8_t *qla25xx_read_nvram_data(scsi_qla_host_t *, uint8_t *, uint32_t, uint32_t); extern int qla25xx_write_nvram_data(scsi_qla_host_t *, uint8_t *, uint32_t, uint32_t); +extern int qla2x00_is_a_vp_did(scsi_qla_host_t *, uint32_t); extern int qla2x00_beacon_on(struct scsi_qla_host *); extern int qla2x00_beacon_off(struct scsi_qla_host *); diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 01efc0e9cc36..9b455250c101 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -1328,8 +1328,8 @@ qla2x00_fdmi_rhba(scsi_qla_host_t *vha) /* Manufacturer. */ eiter = (struct ct_fdmi_hba_attr *) (entries + size); eiter->type = __constant_cpu_to_be16(FDMI_HBA_MANUFACTURER); - strcpy(eiter->a.manufacturer, "QLogic Corporation"); - alen = strlen(eiter->a.manufacturer); + alen = strlen(QLA2XXX_MANUFACTURER); + strncpy(eiter->a.manufacturer, QLA2XXX_MANUFACTURER, alen + 1); alen += (alen & 3) ? (4 - (alen & 3)) : 4; eiter->len = cpu_to_be16(4 + alen); size += 4 + alen; @@ -1649,8 +1649,8 @@ qla2x00_fdmi_rpa(scsi_qla_host_t *vha) /* OS device name. */ eiter = (struct ct_fdmi_port_attr *) (entries + size); eiter->type = __constant_cpu_to_be16(FDMI_PORT_OS_DEVICE_NAME); - strcpy(eiter->a.os_dev_name, QLA2XXX_DRIVER_NAME); - alen = strlen(eiter->a.os_dev_name); + alen = strlen(QLA2XXX_DRIVER_NAME); + strncpy(eiter->a.os_dev_name, QLA2XXX_DRIVER_NAME, alen + 1); alen += (alen & 3) ? (4 - (alen & 3)) : 4; eiter->len = cpu_to_be16(4 + alen); size += 4 + alen; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 563eee3fa924..edf4d14a1335 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -70,9 +70,7 @@ qla2x00_sp_free(void *data, void *ptr) struct scsi_qla_host *vha = (scsi_qla_host_t *)data; del_timer(&iocb->timer); - mempool_free(sp, vha->hw->srb_mempool); - - QLA_VHA_MARK_NOT_BUSY(vha); + qla2x00_rel_sp(vha, sp); } /* Asynchronous Login/Logout Routines -------------------------------------- */ @@ -525,7 +523,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) vha->flags.reset_active = 0; ha->flags.pci_channel_io_perm_failure = 0; ha->flags.eeh_busy = 0; - ha->flags.thermal_supported = 1; + ha->thermal_support = THERMAL_SUPPORT_I2C|THERMAL_SUPPORT_ISP; atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); atomic_set(&vha->loop_state, LOOP_DOWN); vha->device_flags = DFLG_NO_CABLE; @@ -621,6 +619,8 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) qla24xx_read_fcp_prio_cfg(vha); + qla2x00_set_driver_version(vha, QLA2XXX_VERSION); + return (rval); } @@ -1559,6 +1559,47 @@ done: return rval; } +int +qla2x00_alloc_outstanding_cmds(struct qla_hw_data *ha, struct req_que *req) +{ + /* Don't try to reallocate the array */ + if (req->outstanding_cmds) + return QLA_SUCCESS; + + if (!IS_FWI2_CAPABLE(ha) || (ha->mqiobase && + (ql2xmultique_tag || ql2xmaxqueues > 1))) + req->num_outstanding_cmds = DEFAULT_OUTSTANDING_COMMANDS; + else { + if (ha->fw_xcb_count <= ha->fw_iocb_count) + req->num_outstanding_cmds = ha->fw_xcb_count; + else + req->num_outstanding_cmds = ha->fw_iocb_count; + } + + req->outstanding_cmds = kzalloc(sizeof(srb_t *) * + req->num_outstanding_cmds, GFP_KERNEL); + + if (!req->outstanding_cmds) { + /* + * Try to allocate a minimal size just so we can get through + * initialization. + */ + req->num_outstanding_cmds = MIN_OUTSTANDING_COMMANDS; + req->outstanding_cmds = kzalloc(sizeof(srb_t *) * + req->num_outstanding_cmds, GFP_KERNEL); + + if (!req->outstanding_cmds) { + ql_log(ql_log_fatal, NULL, 0x0126, + "Failed to allocate memory for " + "outstanding_cmds for req_que %p.\n", req); + req->num_outstanding_cmds = 0; + return QLA_FUNCTION_FAILED; + } + } + + return QLA_SUCCESS; +} + /** * qla2x00_setup_chip() - Load and start RISC firmware. * @ha: HA context @@ -1628,9 +1669,18 @@ enable_82xx_npiv: MIN_MULTI_ID_FABRIC - 1; } qla2x00_get_resource_cnts(vha, NULL, - &ha->fw_xcb_count, NULL, NULL, + &ha->fw_xcb_count, NULL, &ha->fw_iocb_count, &ha->max_npiv_vports, NULL); + /* + * Allocate the array of outstanding commands + * now that we know the firmware resources. + */ + rval = qla2x00_alloc_outstanding_cmds(ha, + vha->req); + if (rval != QLA_SUCCESS) + goto failed; + if (!fw_major_version && ql2xallocfwdump && !IS_QLA82XX(ha)) qla2x00_alloc_fw_dump(vha); @@ -1914,7 +1964,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha) WRT_REG_DWORD(®->isp24.rsp_q_in, 0); WRT_REG_DWORD(®->isp24.rsp_q_out, 0); } - qlt_24xx_config_rings(vha, reg); + qlt_24xx_config_rings(vha); /* PCI posting */ RD_REG_DWORD(&ioreg->hccr); @@ -1948,7 +1998,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) req = ha->req_q_map[que]; if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) req->outstanding_cmds[cnt] = NULL; req->current_outstanding_cmd = 1; @@ -2157,6 +2207,7 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) char connect_type[22]; struct qla_hw_data *ha = vha->hw; unsigned long flags; + scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); /* Get host addresses. */ rval = qla2x00_get_adapter_id(vha, @@ -2170,6 +2221,13 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) } else { ql_log(ql_log_warn, vha, 0x2009, "Unable to get host loop ID.\n"); + if (IS_FWI2_CAPABLE(ha) && (vha == base_vha) && + (rval == QLA_COMMAND_ERROR && loop_id == 0x1b)) { + ql_log(ql_log_warn, vha, 0x1151, + "Doing link init.\n"); + if (qla24xx_link_initialize(vha) == QLA_SUCCESS) + return rval; + } set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); } return (rval); @@ -2690,7 +2748,6 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) fcport->loop_id = FC_NO_LOOP_ID; qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); fcport->supported_classes = FC_COS_UNSPECIFIED; - fcport->scan_state = QLA_FCPORT_SCAN_NONE; return fcport; } @@ -3103,7 +3160,7 @@ static int qla2x00_configure_fabric(scsi_qla_host_t *vha) { int rval; - fc_port_t *fcport; + fc_port_t *fcport, *fcptemp; uint16_t next_loopid; uint16_t mb[MAILBOX_REGISTER_COUNT]; uint16_t loop_id; @@ -3141,7 +3198,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) 0xfc, mb, BIT_1|BIT_0); if (rval != QLA_SUCCESS) { set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); - break; + return rval; } if (mb[0] != MBS_COMMAND_COMPLETE) { ql_dbg(ql_dbg_disc, vha, 0x2042, @@ -3173,16 +3230,21 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) } } +#define QLA_FCPORT_SCAN 1 +#define QLA_FCPORT_FOUND 2 + + list_for_each_entry(fcport, &vha->vp_fcports, list) { + fcport->scan_state = QLA_FCPORT_SCAN; + } + rval = qla2x00_find_all_fabric_devs(vha, &new_fcports); if (rval != QLA_SUCCESS) break; - /* Add new ports to existing port list */ - list_splice_tail_init(&new_fcports, &vha->vp_fcports); - - /* Starting free loop ID. */ - next_loopid = ha->min_external_loopid; - + /* + * Logout all previous fabric devices marked lost, except + * FCP2 devices. + */ list_for_each_entry(fcport, &vha->vp_fcports, list) { if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; @@ -3190,8 +3252,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; - /* Logout lost/gone fabric devices (non-FCP2) */ - if (fcport->scan_state != QLA_FCPORT_SCAN_FOUND && + if (fcport->scan_state == QLA_FCPORT_SCAN && atomic_read(&fcport->state) == FCS_ONLINE) { qla2x00_mark_device_lost(vha, fcport, ql2xplogiabsentdevice, 0); @@ -3204,30 +3265,74 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa); + fcport->loop_id = FC_NO_LOOP_ID; } - continue; } - fcport->scan_state = QLA_FCPORT_SCAN_NONE; - - /* Login fabric devices that need a login */ - if ((fcport->flags & FCF_LOGIN_NEEDED) != 0 && - atomic_read(&vha->loop_down_timer) == 0) { - if (fcport->loop_id == FC_NO_LOOP_ID) { - fcport->loop_id = next_loopid; - rval = qla2x00_find_new_loop_id( - base_vha, fcport); - if (rval != QLA_SUCCESS) { - /* Ran out of IDs to use */ - continue; - } + } + + /* Starting free loop ID. */ + next_loopid = ha->min_external_loopid; + + /* + * Scan through our port list and login entries that need to be + * logged in. + */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (atomic_read(&vha->loop_down_timer) || + test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) + break; + + if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 || + (fcport->flags & FCF_LOGIN_NEEDED) == 0) + continue; + + if (fcport->loop_id == FC_NO_LOOP_ID) { + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id( + base_vha, fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; } } + /* Login and update database */ + qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + } + + /* Exit if out of loop IDs. */ + if (rval != QLA_SUCCESS) { + break; + } + + /* + * Login and add the new devices to our port list. + */ + list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) { + if (atomic_read(&vha->loop_down_timer) || + test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) + break; + + /* Find a new loop ID to use. */ + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id(base_vha, fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; + } /* Login and update database */ qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + + list_move_tail(&fcport->list, &vha->vp_fcports); } } while (0); + /* Free all new device structures not processed. */ + list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) { + list_del(&fcport->list); + kfree(fcport); + } + if (rval) { ql_dbg(ql_dbg_disc, vha, 0x2068, "Configure fabric error exit rval=%d.\n", rval); @@ -3263,8 +3368,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, int first_dev, last_dev; port_id_t wrap = {}, nxt_d_id; struct qla_hw_data *ha = vha->hw; - struct scsi_qla_host *vp, *base_vha = pci_get_drvdata(ha->pdev); - struct scsi_qla_host *tvp; + struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); rval = QLA_SUCCESS; @@ -3377,22 +3481,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, continue; /* Bypass virtual ports of the same host. */ - found = 0; - if (ha->num_vhosts) { - unsigned long flags; - - spin_lock_irqsave(&ha->vport_slock, flags); - list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { - if (new_fcport->d_id.b24 == vp->d_id.b24) { - found = 1; - break; - } - } - spin_unlock_irqrestore(&ha->vport_slock, flags); - - if (found) - continue; - } + if (qla2x00_is_a_vp_did(vha, new_fcport->d_id.b24)) + continue; /* Bypass if same domain and area of adapter. */ if (((new_fcport->d_id.b24 & 0xffff00) == @@ -3417,7 +3507,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, WWN_SIZE)) continue; - fcport->scan_state = QLA_FCPORT_SCAN_FOUND; + fcport->scan_state = QLA_FCPORT_FOUND; found++; @@ -5004,7 +5094,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, return rval; } -#define QLA_FW_URL "ftp://ftp.qlogic.com/outgoing/linux/firmware/" +#define QLA_FW_URL "http://ldriver.qlogic.com/firmware/" int qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr) @@ -5529,6 +5619,8 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) if (IS_T10_PI_CAPABLE(ha)) nv->frame_payload_size &= ~7; + qlt_81xx_config_nvram_stage1(vha, nv); + /* Reset Initialization control block */ memset(icb, 0, ha->init_cb_size); @@ -5569,6 +5661,8 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) qla2x00_set_model_info(vha, nv->model_name, sizeof(nv->model_name), "QLE8XXX"); + qlt_81xx_config_nvram_stage2(vha, icb); + /* Use alternate WWN? */ if (nv->host_p & __constant_cpu_to_le32(BIT_15)) { memcpy(icb->node_name, nv->alternate_node_name, WWN_SIZE); diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index c0462c04c885..68e2c4afc134 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -198,6 +198,13 @@ done: } static inline void +qla2x00_rel_sp(scsi_qla_host_t *vha, srb_t *sp) +{ + mempool_free(sp, vha->hw->srb_mempool); + QLA_VHA_MARK_NOT_BUSY(vha); +} + +static inline void qla2x00_init_timer(srb_t *sp, unsigned long tmo) { init_timer(&sp->u.iocb_cmd.timer); @@ -213,3 +220,22 @@ qla2x00_gid_list_size(struct qla_hw_data *ha) { return sizeof(struct gid_list_info) * ha->max_fibre_devices; } + +static inline void +qla2x00_do_host_ramp_up(scsi_qla_host_t *vha) +{ + if (vha->hw->cfg_lun_q_depth >= ql2xmaxqdepth) + return; + + /* Wait at least HOST_QUEUE_RAMPDOWN_INTERVAL before ramping up */ + if (time_before(jiffies, (vha->hw->host_last_rampdown_time + + HOST_QUEUE_RAMPDOWN_INTERVAL))) + return; + + /* Wait at least HOST_QUEUE_RAMPUP_INTERVAL between each ramp up */ + if (time_before(jiffies, (vha->hw->host_last_rampup_time + + HOST_QUEUE_RAMPUP_INTERVAL))) + return; + + set_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags); +} diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index a481684479c1..d2630317cce8 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -349,14 +349,14 @@ qla2x00_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) + if (index == req->num_outstanding_cmds) goto queuing_error; /* Map the sg table so we have an accurate count of sg entries needed */ @@ -1467,16 +1467,15 @@ qla24xx_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) { + if (index == req->num_outstanding_cmds) goto queuing_error; - } /* Map the sg table so we have an accurate count of sg entries needed */ if (scsi_sg_count(cmd)) { @@ -1641,15 +1640,15 @@ qla24xx_dif_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) + if (index == req->num_outstanding_cmds) goto queuing_error; /* Compute number of required data segments */ @@ -1822,14 +1821,14 @@ qla2x00_alloc_iocbs(scsi_qla_host_t *vha, srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) { + if (index == req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x700b, "No room on outstanding cmd array.\n"); goto queuing_error; @@ -2263,14 +2262,14 @@ qla82xx_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) + if (index == req->num_outstanding_cmds) goto queuing_error; /* Map the sg table so we have an accurate count of sg entries needed */ @@ -2767,15 +2766,15 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) { + if (index == req->num_outstanding_cmds) { rval = EXT_STATUS_BUSY; goto queuing_error; } diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 873c82014b16..e9dbd74c20d3 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -13,6 +13,8 @@ #include <scsi/scsi_bsg_fc.h> #include <scsi/scsi_eh.h> +#include "qla_target.h" + static void qla2x00_mbx_completion(scsi_qla_host_t *, uint16_t); static void qla2x00_process_completed_request(struct scsi_qla_host *, struct req_que *, uint32_t); @@ -489,10 +491,37 @@ qla83xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb) if (mb[1] & IDC_DEVICE_STATE_CHANGE) { ql_log(ql_log_info, vha, 0x506a, "IDC Device-State changed = 0x%x.\n", mb[4]); + if (ha->flags.nic_core_reset_owner) + return; qla83xx_schedule_work(vha, MBA_IDC_AEN); } } +int +qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry) +{ + struct qla_hw_data *ha = vha->hw; + scsi_qla_host_t *vp; + uint32_t vp_did; + unsigned long flags; + int ret = 0; + + if (!ha->num_vhosts) + return ret; + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + vp_did = vp->d_id.b24; + if (vp_did == rscn_entry) { + ret = 1; + break; + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return ret; +} + /** * qla2x00_async_event() - Process aynchronous events. * @ha: SCSI driver HA context @@ -899,6 +928,10 @@ skip_rio: /* Ignore reserved bits from RSCN-payload. */ rscn_entry = ((mb[1] & 0x3ff) << 16) | mb[2]; + /* Skip RSCNs for virtual ports on the same physical port */ + if (qla2x00_is_a_vp_did(vha, rscn_entry)) + break; + atomic_set(&vha->loop_down_timer, 0); vha->flags.management_server_logged_in = 0; @@ -983,14 +1016,25 @@ skip_rio: mb[1], mb[2], mb[3]); break; case MBA_IDC_NOTIFY: - /* See if we need to quiesce any I/O */ - if (IS_QLA8031(vha->hw)) - if ((mb[2] & 0x7fff) == MBC_PORT_RESET || - (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) { + if (IS_QLA8031(vha->hw)) { + mb[4] = RD_REG_WORD(®24->mailbox4); + if (((mb[2] & 0x7fff) == MBC_PORT_RESET || + (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) && + (mb[4] & INTERNAL_LOOPBACK_MASK) != 0) { set_bit(ISP_QUIESCE_NEEDED, &vha->dpc_flags); + /* + * Extend loop down timer since port is active. + */ + if (atomic_read(&vha->loop_state) == LOOP_DOWN) + atomic_set(&vha->loop_down_timer, + LOOP_DOWN_TIME); qla2xxx_wake_dpc(vha); } + } case MBA_IDC_COMPLETE: + if (ha->notify_lb_portup_comp) + complete(&ha->lb_portup_comp); + /* Fallthru */ case MBA_IDC_TIME_EXT: if (IS_QLA81XX(vha->hw) || IS_QLA8031(vha->hw)) qla81xx_idc_event(vha, mb[0], mb[1]); @@ -1029,7 +1073,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha, struct qla_hw_data *ha = vha->hw; /* Validate handle. */ - if (index >= MAX_OUTSTANDING_COMMANDS) { + if (index >= req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x3014, "Invalid SCSI command index (%x).\n", index); @@ -1067,7 +1111,7 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, uint16_t index; index = LSW(pkt->handle); - if (index >= MAX_OUTSTANDING_COMMANDS) { + if (index >= req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x5031, "Invalid command index (%x).\n", index); if (IS_QLA82XX(ha)) @@ -1740,7 +1784,7 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt, sts24 = (struct sts_entry_24xx *) pkt; /* Validate handle. */ - if (index >= MAX_OUTSTANDING_COMMANDS) { + if (index >= req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x70af, "Invalid SCSI completion handle 0x%x.\n", index); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); @@ -1910,9 +1954,9 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) req = ha->req_q_map[que]; /* Validate handle. */ - if (handle < MAX_OUTSTANDING_COMMANDS) { + if (handle < req->num_outstanding_cmds) sp = req->outstanding_cmds[handle]; - } else + else sp = NULL; if (sp == NULL) { @@ -1934,6 +1978,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) /* Fast path completion. */ if (comp_status == CS_COMPLETE && scsi_status == 0) { + qla2x00_do_host_ramp_up(vha); qla2x00_process_completed_request(vha, req, handle); return; @@ -2193,6 +2238,9 @@ out: cp->cmnd[8], cp->cmnd[9], scsi_bufflen(cp), rsp_info_len, resid_len, fw_resid_len); + if (!res) + qla2x00_do_host_ramp_up(vha); + if (rsp->status_srb == NULL) sp->done(ha, sp, res); } @@ -2747,6 +2795,12 @@ static struct qla_init_msix_entry qla82xx_msix_entries[2] = { { "qla2xxx (rsp_q)", qla82xx_msix_rsp_q }, }; +static struct qla_init_msix_entry qla83xx_msix_entries[3] = { + { "qla2xxx (default)", qla24xx_msix_default }, + { "qla2xxx (rsp_q)", qla24xx_msix_rsp_q }, + { "qla2xxx (atio_q)", qla83xx_msix_atio_q }, +}; + static void qla24xx_disable_msix(struct qla_hw_data *ha) { @@ -2827,9 +2881,13 @@ msix_failed: } /* Enable MSI-X vectors for the base queue */ - for (i = 0; i < 2; i++) { + for (i = 0; i < ha->msix_count; i++) { qentry = &ha->msix_entries[i]; - if (IS_QLA82XX(ha)) { + if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) { + ret = request_irq(qentry->vector, + qla83xx_msix_entries[i].handler, + 0, qla83xx_msix_entries[i].name, rsp); + } else if (IS_QLA82XX(ha)) { ret = request_irq(qentry->vector, qla82xx_msix_entries[i].handler, 0, qla82xx_msix_entries[i].name, rsp); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 68c55eaa318c..186dd59ce4fa 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -900,13 +900,13 @@ qla2x00_abort_command(srb_t *sp) "Entered %s.\n", __func__); spin_lock_irqsave(&ha->hardware_lock, flags); - for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) { + for (handle = 1; handle < req->num_outstanding_cmds; handle++) { if (req->outstanding_cmds[handle] == sp) break; } spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (handle == MAX_OUTSTANDING_COMMANDS) { + if (handle == req->num_outstanding_cmds) { /* command not found */ return QLA_FUNCTION_FAILED; } @@ -1633,6 +1633,54 @@ qla2x00_get_port_name(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t *name, } /* + * qla24xx_link_initialization + * Issue link initialization mailbox command. + * + * Input: + * ha = adapter block pointer. + * TARGET_QUEUE_LOCK must be released. + * ADAPTER_STATE_LOCK must be released. + * + * Returns: + * qla2x00 local function return status code. + * + * Context: + * Kernel context. + */ +int +qla24xx_link_initialize(scsi_qla_host_t *vha) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1152, + "Entered %s.\n", __func__); + + if (!IS_FWI2_CAPABLE(vha->hw) || IS_CNA_CAPABLE(vha->hw)) + return QLA_FUNCTION_FAILED; + + mcp->mb[0] = MBC_LINK_INITIALIZATION; + mcp->mb[1] = BIT_6|BIT_4; + mcp->mb[2] = 0; + mcp->mb[3] = 0; + mcp->out_mb = MBX_3|MBX_2|MBX_1|MBX_0; + mcp->in_mb = MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0x1153, "Failed=%x.\n", rval); + } else { + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1154, + "Done %s.\n", __func__); + } + + return rval; +} + +/* * qla2x00_lip_reset * Issue LIP reset mailbox command. * @@ -2535,12 +2583,12 @@ qla24xx_abort_command(srb_t *sp) "Entered %s.\n", __func__); spin_lock_irqsave(&ha->hardware_lock, flags); - for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) { + for (handle = 1; handle < req->num_outstanding_cmds; handle++) { if (req->outstanding_cmds[handle] == sp) break; } spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (handle == MAX_OUTSTANDING_COMMANDS) { + if (handle == req->num_outstanding_cmds) { /* Command not found. */ return QLA_FUNCTION_FAILED; } @@ -3093,6 +3141,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, struct qla_hw_data *ha = vha->hw; scsi_qla_host_t *vp; unsigned long flags; + int found; ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6, "Entered %s.\n", __func__); @@ -3128,13 +3177,17 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, return; } + found = 0; spin_lock_irqsave(&ha->vport_slock, flags); - list_for_each_entry(vp, &ha->vp_list, list) - if (vp_idx == vp->vp_idx) + list_for_each_entry(vp, &ha->vp_list, list) { + if (vp_idx == vp->vp_idx) { + found = 1; break; + } + } spin_unlock_irqrestore(&ha->vport_slock, flags); - if (!vp) + if (!found) return; vp->d_id.b.domain = rptid_entry->port_id[2]; @@ -3814,6 +3867,97 @@ qla81xx_restart_mpi_firmware(scsi_qla_host_t *vha) } int +qla2x00_set_driver_version(scsi_qla_host_t *vha, char *version) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + int len; + uint16_t dwlen; + uint8_t *str; + dma_addr_t str_dma; + struct qla_hw_data *ha = vha->hw; + + if (!IS_FWI2_CAPABLE(ha) || IS_QLA82XX(ha)) + return QLA_FUNCTION_FAILED; + + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1155, + "Entered %s.\n", __func__); + + str = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &str_dma); + if (!str) { + ql_log(ql_log_warn, vha, 0x1156, + "Failed to allocate driver version param.\n"); + return QLA_MEMORY_ALLOC_FAILED; + } + + memcpy(str, "\x7\x3\x11\x0", 4); + dwlen = str[0]; + len = dwlen * sizeof(uint32_t) - 4; + memset(str + 4, 0, len); + if (len > strlen(version)) + len = strlen(version); + memcpy(str + 4, version, len); + + mcp->mb[0] = MBC_SET_RNID_PARAMS; + mcp->mb[1] = RNID_TYPE_SET_VERSION << 8 | dwlen; + mcp->mb[2] = MSW(LSD(str_dma)); + mcp->mb[3] = LSW(LSD(str_dma)); + mcp->mb[6] = MSW(MSD(str_dma)); + mcp->mb[7] = LSW(MSD(str_dma)); + mcp->out_mb = MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0; + mcp->in_mb = MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0x1157, + "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]); + } else { + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1158, + "Done %s.\n", __func__); + } + + dma_pool_free(ha->s_dma_pool, str, str_dma); + + return rval; +} + +static int +qla2x00_read_asic_temperature(scsi_qla_host_t *vha, uint16_t *temp) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + + if (!IS_FWI2_CAPABLE(vha->hw)) + return QLA_FUNCTION_FAILED; + + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1159, + "Entered %s.\n", __func__); + + mcp->mb[0] = MBC_GET_RNID_PARAMS; + mcp->mb[1] = RNID_TYPE_ASIC_TEMP << 8; + mcp->out_mb = MBX_1|MBX_0; + mcp->in_mb = MBX_1|MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + *temp = mcp->mb[1]; + + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0x115a, + "Failed=%x mb[0]=%x,%x.\n", rval, mcp->mb[0], mcp->mb[1]); + } else { + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x115b, + "Done %s.\n", __func__); + } + + return rval; +} + +int qla2x00_read_sfp(scsi_qla_host_t *vha, dma_addr_t sfp_dma, uint8_t *sfp, uint16_t dev, uint16_t off, uint16_t len, uint16_t opt) { @@ -4415,38 +4559,45 @@ qla24xx_set_fcp_prio(scsi_qla_host_t *vha, uint16_t loop_id, uint16_t priority, } int -qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp, uint16_t *frac) +qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp) { - int rval; - uint8_t byte; + int rval = QLA_FUNCTION_FAILED; struct qla_hw_data *ha = vha->hw; + uint8_t byte; ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10ca, "Entered %s.\n", __func__); - /* Integer part */ - rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x01, 1, - BIT_13|BIT_12|BIT_0); - if (rval != QLA_SUCCESS) { - ql_dbg(ql_dbg_mbx, vha, 0x10c9, "Failed=%x.\n", rval); - ha->flags.thermal_supported = 0; - goto fail; + if (ha->thermal_support & THERMAL_SUPPORT_I2C) { + rval = qla2x00_read_sfp(vha, 0, &byte, + 0x98, 0x1, 1, BIT_13|BIT_12|BIT_0); + *temp = byte; + if (rval == QLA_SUCCESS) + goto done; + + ql_log(ql_log_warn, vha, 0x10c9, + "Thermal not supported by I2C.\n"); + ha->thermal_support &= ~THERMAL_SUPPORT_I2C; } - *temp = byte; - /* Fraction part */ - rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x10, 1, - BIT_13|BIT_12|BIT_0); - if (rval != QLA_SUCCESS) { - ql_dbg(ql_dbg_mbx, vha, 0x1019, "Failed=%x.\n", rval); - ha->flags.thermal_supported = 0; - goto fail; + if (ha->thermal_support & THERMAL_SUPPORT_ISP) { + rval = qla2x00_read_asic_temperature(vha, temp); + if (rval == QLA_SUCCESS) + goto done; + + ql_log(ql_log_warn, vha, 0x1019, + "Thermal not supported by ISP.\n"); + ha->thermal_support &= ~THERMAL_SUPPORT_ISP; } - *frac = (byte >> 6) * 25; + ql_log(ql_log_warn, vha, 0x1150, + "Thermal not supported by this card " + "(ignoring further requests).\n"); + return rval; + +done: ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1018, "Done %s.\n", __func__); -fail: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 20fd974f903a..f868a9f98afe 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -523,6 +523,7 @@ qla25xx_free_req_que(struct scsi_qla_host *vha, struct req_que *req) clear_bit(que_id, ha->req_qid_map); mutex_unlock(&ha->vport_lock); } + kfree(req->outstanding_cmds); kfree(req); req = NULL; } @@ -649,6 +650,10 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, goto que_failed; } + ret = qla2x00_alloc_outstanding_cmds(ha, req); + if (ret != QLA_SUCCESS) + goto que_failed; + mutex_lock(&ha->vport_lock); que_id = find_first_zero_bit(ha->req_qid_map, ha->max_req_queues); if (que_id >= ha->max_req_queues) { @@ -685,7 +690,7 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, "options=0x%x.\n", req->options); ql_dbg(ql_dbg_init, base_vha, 0x00dd, "options=0x%x.\n", req->options); - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) req->outstanding_cmds[cnt] = NULL; req->current_outstanding_cmd = 1; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 3e3f593bada3..10754f518303 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -847,14 +847,21 @@ static int qla82xx_rom_lock(struct qla_hw_data *ha) { int done = 0, timeout = 0; + uint32_t lock_owner = 0; + scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); while (!done) { /* acquire semaphore2 from PCI HW block */ done = qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_LOCK)); if (done == 1) break; - if (timeout >= qla82xx_rom_lock_timeout) + if (timeout >= qla82xx_rom_lock_timeout) { + lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID); + ql_dbg(ql_dbg_p3p, vha, 0xb085, + "Failed to acquire rom lock, acquired by %d.\n", + lock_owner); return -1; + } timeout++; } qla82xx_wr_32(ha, QLA82XX_ROM_LOCK_ID, ROM_LOCK_DRIVER); @@ -3629,7 +3636,7 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha) req = ha->req_q_map[que]; if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { if (!sp->u.scmd.ctx || diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h index 6c953e8c08f0..d268e8406fdb 100644 --- a/drivers/scsi/qla2xxx/qla_nx.h +++ b/drivers/scsi/qla2xxx/qla_nx.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -897,7 +897,7 @@ struct ct6_dsd { #define FLT_REG_BOOT_CODE_82XX 0x78 #define FLT_REG_FW_82XX 0x74 #define FLT_REG_GOLD_FW_82XX 0x75 -#define FLT_REG_VPD_82XX 0x81 +#define FLT_REG_VPD_8XXX 0x81 #define FA_VPD_SIZE_82XX 0x400 diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 10d23f8b7036..2c6dd3dfe0f4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -111,8 +111,7 @@ MODULE_PARM_DESC(ql2xfdmienable, "Enables FDMI registrations. " "0 - no FDMI. Default is 1 - perform FDMI."); -#define MAX_Q_DEPTH 32 -static int ql2xmaxqdepth = MAX_Q_DEPTH; +int ql2xmaxqdepth = MAX_Q_DEPTH; module_param(ql2xmaxqdepth, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(ql2xmaxqdepth, "Maximum queue depth to set for each LUN. " @@ -360,6 +359,9 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req) (req->length + 1) * sizeof(request_t), req->ring, req->dma); + if (req) + kfree(req->outstanding_cmds); + kfree(req); req = NULL; } @@ -628,7 +630,7 @@ qla2x00_sp_free_dma(void *vha, void *ptr) } CMD_SP(cmd) = NULL; - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(sp->fcport->vha, sp); } static void @@ -716,9 +718,11 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) goto qc24_target_busy; } - sp = qla2x00_get_sp(base_vha, fcport, GFP_ATOMIC); - if (!sp) + sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); + if (!sp) { + set_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags); goto qc24_host_busy; + } sp->u.scmd.cmd = cmd; sp->type = SRB_SCSI_CMD; @@ -731,6 +735,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) if (rval != QLA_SUCCESS) { ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3013, "Start scsi failed rval=%d for cmd=%p.\n", rval, cmd); + set_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags); goto qc24_host_busy_free_sp; } @@ -1010,7 +1015,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, spin_lock_irqsave(&ha->hardware_lock, flags); req = vha->req; for (cnt = 1; status == QLA_SUCCESS && - cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (!sp) continue; @@ -1300,14 +1305,14 @@ qla2x00_loop_reset(scsi_qla_host_t *vha) } if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) { + atomic_set(&vha->loop_state, LOOP_DOWN); + atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + qla2x00_mark_all_devices_lost(vha, 0); ret = qla2x00_full_login_lip(vha); if (ret != QLA_SUCCESS) { ql_dbg(ql_dbg_taskm, vha, 0x802d, "full_login_lip=%d.\n", ret); } - atomic_set(&vha->loop_state, LOOP_DOWN); - atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); - qla2x00_mark_all_devices_lost(vha, 0); } if (ha->flags.enable_lip_reset) { @@ -1337,7 +1342,9 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) req = ha->req_q_map[que]; if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + if (!req->outstanding_cmds) + continue; + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { req->outstanding_cmds[cnt] = NULL; @@ -1453,6 +1460,81 @@ qla2x00_change_queue_type(struct scsi_device *sdev, int tag_type) return tag_type; } +static void +qla2x00_host_ramp_down_queuedepth(scsi_qla_host_t *vha) +{ + scsi_qla_host_t *vp; + struct Scsi_Host *shost; + struct scsi_device *sdev; + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + + ha->host_last_rampdown_time = jiffies; + + if (ha->cfg_lun_q_depth <= vha->host->cmd_per_lun) + return; + + if ((ha->cfg_lun_q_depth / 2) < vha->host->cmd_per_lun) + ha->cfg_lun_q_depth = vha->host->cmd_per_lun; + else + ha->cfg_lun_q_depth = ha->cfg_lun_q_depth / 2; + + /* + * Geometrically ramp down the queue depth for all devices on this + * adapter + */ + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + shost = vp->host; + shost_for_each_device(sdev, shost) { + if (sdev->queue_depth > shost->cmd_per_lun) { + if (sdev->queue_depth < ha->cfg_lun_q_depth) + continue; + ql_log(ql_log_warn, vp, 0x3031, + "%ld:%d:%d: Ramping down queue depth to %d", + vp->host_no, sdev->id, sdev->lun, + ha->cfg_lun_q_depth); + qla2x00_change_queue_depth(sdev, + ha->cfg_lun_q_depth, SCSI_QDEPTH_DEFAULT); + } + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return; +} + +static void +qla2x00_host_ramp_up_queuedepth(scsi_qla_host_t *vha) +{ + scsi_qla_host_t *vp; + struct Scsi_Host *shost; + struct scsi_device *sdev; + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + + ha->host_last_rampup_time = jiffies; + ha->cfg_lun_q_depth++; + + /* + * Linearly ramp up the queue depth for all devices on this + * adapter + */ + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + shost = vp->host; + shost_for_each_device(sdev, shost) { + if (sdev->queue_depth > ha->cfg_lun_q_depth) + continue; + qla2x00_change_queue_depth(sdev, ha->cfg_lun_q_depth, + SCSI_QDEPTH_RAMP_UP); + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return; +} + /** * qla2x00_config_dma_addressing() - Configure OS DMA addressing method. * @ha: HA context @@ -1730,6 +1812,9 @@ qla83xx_iospace_config(struct qla_hw_data *ha) mqiobase_exit: ha->msix_count = ha->max_rsp_queues + 1; + + qlt_83xx_iospace_config(ha); + ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011f, "MSIX Count:%d.\n", ha->msix_count); return 0; @@ -2230,6 +2315,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->init_cb_size = sizeof(init_cb_t); ha->link_data_rate = PORT_SPEED_UNKNOWN; ha->optrom_size = OPTROM_SIZE_2300; + ha->cfg_lun_q_depth = ql2xmaxqdepth; /* Assign ISP specific operations. */ if (IS_QLA2100(ha)) { @@ -2307,6 +2393,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->mbx_count = MAILBOX_REGISTER_COUNT; req_length = REQUEST_ENTRY_CNT_24XX; rsp_length = RESPONSE_ENTRY_CNT_2300; + ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX; ha->max_loop_id = SNS_LAST_LOOP_ID_2300; ha->init_cb_size = sizeof(struct mid_init_cb_81xx); ha->gid_list_info_size = 8; @@ -2338,6 +2425,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->mbx_count = MAILBOX_REGISTER_COUNT; req_length = REQUEST_ENTRY_CNT_24XX; rsp_length = RESPONSE_ENTRY_CNT_2300; + ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX; ha->max_loop_id = SNS_LAST_LOOP_ID_2300; ha->init_cb_size = sizeof(struct mid_init_cb_81xx); ha->gid_list_info_size = 8; @@ -2377,6 +2465,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) complete(&ha->mbx_cmd_comp); init_completion(&ha->mbx_intr_comp); init_completion(&ha->dcbx_comp); + init_completion(&ha->lb_portup_comp); set_bit(0, (unsigned long *) ha->vp_idx_map); @@ -2720,6 +2809,9 @@ qla2x00_shutdown(struct pci_dev *pdev) scsi_qla_host_t *vha; struct qla_hw_data *ha; + if (!atomic_read(&pdev->enable_cnt)) + return; + vha = pci_get_drvdata(pdev); ha = vha->hw; @@ -3974,6 +4066,8 @@ qla83xx_force_lock_recovery(scsi_qla_host_t *base_vha) uint32_t idc_lck_rcvry_stage_mask = 0x3; uint32_t idc_lck_rcvry_owner_mask = 0x3c; struct qla_hw_data *ha = base_vha->hw; + ql_dbg(ql_dbg_p3p, base_vha, 0xb086, + "Trying force recovery of the IDC lock.\n"); rval = qla83xx_rd_reg(base_vha, QLA83XX_IDC_LOCK_RECOVERY, &data); if (rval) @@ -4065,6 +4159,7 @@ qla83xx_idc_lock(scsi_qla_host_t *base_vha, uint16_t requester_id) { uint16_t options = (requester_id << 15) | BIT_6; uint32_t data; + uint32_t lock_owner; struct qla_hw_data *ha = base_vha->hw; /* IDC-lock implementation using driver-lock/lock-id remote registers */ @@ -4076,8 +4171,11 @@ retry_lock: qla83xx_wr_reg(base_vha, QLA83XX_DRIVER_LOCKID, ha->portnum); } else { + qla83xx_rd_reg(base_vha, QLA83XX_DRIVER_LOCKID, + &lock_owner); ql_dbg(ql_dbg_p3p, base_vha, 0xb063, - "Failed to acquire IDC lock. retrying...\n"); + "Failed to acquire IDC lock, acquired by %d, " + "retrying...\n", lock_owner); /* Retry/Perform IDC-Lock recovery */ if (qla83xx_idc_lock_recovery(base_vha) @@ -4605,6 +4703,18 @@ qla2x00_do_dpc(void *data) qla2xxx_flash_npiv_conf(base_vha); } + if (test_and_clear_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, + &base_vha->dpc_flags)) { + /* Prevents simultaneous ramp up and down */ + clear_bit(HOST_RAMP_UP_QUEUE_DEPTH, + &base_vha->dpc_flags); + qla2x00_host_ramp_down_queuedepth(base_vha); + } + + if (test_and_clear_bit(HOST_RAMP_UP_QUEUE_DEPTH, + &base_vha->dpc_flags)) + qla2x00_host_ramp_up_queuedepth(base_vha); + if (!ha->interrupts_on) ha->isp_ops->enable_intrs(ha); @@ -4733,7 +4843,7 @@ qla2x00_timer(scsi_qla_host_t *vha) cpu_flags); req = ha->req_q_map[0]; for (index = 1; - index < MAX_OUTSTANDING_COMMANDS; + index < req->num_outstanding_cmds; index++) { fc_port_t *sfcp; @@ -4802,7 +4912,9 @@ qla2x00_timer(scsi_qla_host_t *vha) test_bit(ISP_UNRECOVERABLE, &vha->dpc_flags) || test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags) || test_bit(VP_DPC_NEEDED, &vha->dpc_flags) || - test_bit(RELOGIN_NEEDED, &vha->dpc_flags))) { + test_bit(RELOGIN_NEEDED, &vha->dpc_flags) || + test_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags) || + test_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags))) { ql_dbg(ql_dbg_timer, vha, 0x600b, "isp_abort_needed=%d loop_resync_needed=%d " "fcport_update_needed=%d start_dpc=%d " @@ -4815,12 +4927,15 @@ qla2x00_timer(scsi_qla_host_t *vha) ql_dbg(ql_dbg_timer, vha, 0x600c, "beacon_blink_needed=%d isp_unrecoverable=%d " "fcoe_ctx_reset_needed=%d vp_dpc_needed=%d " - "relogin_needed=%d.\n", + "relogin_needed=%d, host_ramp_down_needed=%d " + "host_ramp_up_needed=%d.\n", test_bit(BEACON_BLINK_NEEDED, &vha->dpc_flags), test_bit(ISP_UNRECOVERABLE, &vha->dpc_flags), test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags), test_bit(VP_DPC_NEEDED, &vha->dpc_flags), - test_bit(RELOGIN_NEEDED, &vha->dpc_flags)); + test_bit(RELOGIN_NEEDED, &vha->dpc_flags), + test_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags), + test_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); } diff --git a/drivers/scsi/qla2xxx/qla_settings.h b/drivers/scsi/qla2xxx/qla_settings.h index 892a81e457bc..46ef0ac48f44 100644 --- a/drivers/scsi/qla2xxx/qla_settings.h +++ b/drivers/scsi/qla2xxx/qla_settings.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 32fdc2a66dd1..3bef6736d885 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -798,20 +798,8 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) case FLT_REG_BOOTLOAD_82XX: ha->flt_region_bootload = start; break; - case FLT_REG_VPD_82XX: - ha->flt_region_vpd = start; - break; - case FLT_REG_FCOE_VPD_0: - if (!IS_QLA8031(ha)) - break; - ha->flt_region_vpd_nvram = start; - if (ha->flags.port0) - ha->flt_region_vpd = start; - break; - case FLT_REG_FCOE_VPD_1: - if (!IS_QLA8031(ha)) - break; - if (!ha->flags.port0) + case FLT_REG_VPD_8XXX: + if (IS_CNA_CAPABLE(ha)) ha->flt_region_vpd = start; break; case FLT_REG_FCOE_NVRAM_0: diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 80f4b849e2b0..61b5d8c2b5da 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -52,7 +52,7 @@ MODULE_PARM_DESC(qlini_mode, "\"disabled\" - initiator mode will never be enabled; " "\"enabled\" (default) - initiator mode will always stay enabled."); -static int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE; +int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE; /* * From scsi/fc/fc_fcp.h @@ -1119,6 +1119,7 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha, nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; nack->u.isp24.status = ntfy->u.isp24.status; nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode; + nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle; nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address; nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs; nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui; @@ -1570,7 +1571,7 @@ static inline uint32_t qlt_make_handle(struct scsi_qla_host *vha) /* always increment cmd handle */ do { ++h; - if (h > MAX_OUTSTANDING_COMMANDS) + if (h > DEFAULT_OUTSTANDING_COMMANDS) h = 1; /* 0 is QLA_TGT_NULL_HANDLE */ if (h == ha->tgt.current_handle) { ql_dbg(ql_dbg_tgt, vha, 0xe04e, @@ -2441,7 +2442,7 @@ static struct qla_tgt_cmd *qlt_ctio_to_cmd(struct scsi_qla_host *vha, return NULL; } /* handle-1 is actually used */ - if (unlikely(handle > MAX_OUTSTANDING_COMMANDS)) { + if (unlikely(handle > DEFAULT_OUTSTANDING_COMMANDS)) { ql_dbg(ql_dbg_tgt, vha, 0xe052, "qla_target(%d): Wrong handle %x received\n", vha->vp_idx, handle); @@ -4305,6 +4306,12 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha) if (!QLA_TGT_MODE_ENABLED()) return 0; + if (!IS_TGT_MODE_CAPABLE(ha)) { + ql_log(ql_log_warn, base_vha, 0xe070, + "This adapter does not support target mode.\n"); + return 0; + } + ql_dbg(ql_dbg_tgt, base_vha, 0xe03b, "Registering target for host %ld(%p)", base_vha->host_no, ha); @@ -4666,7 +4673,6 @@ void qlt_24xx_process_atio_queue(struct scsi_qla_host *vha) { struct qla_hw_data *ha = vha->hw; - struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; struct atio_from_isp *pkt; int cnt, i; @@ -4694,26 +4700,28 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha) } /* Adjust ring index */ - WRT_REG_DWORD(®->atio_q_out, ha->tgt.atio_ring_index); + WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), ha->tgt.atio_ring_index); } void -qlt_24xx_config_rings(struct scsi_qla_host *vha, device_reg_t __iomem *reg) +qlt_24xx_config_rings(struct scsi_qla_host *vha) { struct qla_hw_data *ha = vha->hw; + if (!QLA_TGT_MODE_ENABLED()) + return; -/* FIXME: atio_q in/out for ha->mqenable=1..? */ - if (ha->mqenable) { -#if 0 - WRT_REG_DWORD(®->isp25mq.atio_q_in, 0); - WRT_REG_DWORD(®->isp25mq.atio_q_out, 0); - RD_REG_DWORD(®->isp25mq.atio_q_out); -#endif - } else { - /* Setup APTIO registers for target mode */ - WRT_REG_DWORD(®->isp24.atio_q_in, 0); - WRT_REG_DWORD(®->isp24.atio_q_out, 0); - RD_REG_DWORD(®->isp24.atio_q_out); + WRT_REG_DWORD(ISP_ATIO_Q_IN(vha), 0); + WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), 0); + RD_REG_DWORD(ISP_ATIO_Q_OUT(vha)); + + if (IS_ATIO_MSIX_CAPABLE(ha)) { + struct qla_msix_entry *msix = &ha->msix_entries[2]; + struct init_cb_24xx *icb = (struct init_cb_24xx *)ha->init_cb; + + icb->msix_atio = cpu_to_le16(msix->entry); + ql_dbg(ql_dbg_init, vha, 0xf072, + "Registering ICB vector 0x%x for atio que.\n", + msix->entry); } } @@ -4796,6 +4804,101 @@ qlt_24xx_config_nvram_stage2(struct scsi_qla_host *vha, } } +void +qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv) +{ + struct qla_hw_data *ha = vha->hw; + + if (!QLA_TGT_MODE_ENABLED()) + return; + + if (qla_tgt_mode_enabled(vha)) { + if (!ha->tgt.saved_set) { + /* We save only once */ + ha->tgt.saved_exchange_count = nv->exchange_count; + ha->tgt.saved_firmware_options_1 = + nv->firmware_options_1; + ha->tgt.saved_firmware_options_2 = + nv->firmware_options_2; + ha->tgt.saved_firmware_options_3 = + nv->firmware_options_3; + ha->tgt.saved_set = 1; + } + + nv->exchange_count = __constant_cpu_to_le16(0xFFFF); + + /* Enable target mode */ + nv->firmware_options_1 |= __constant_cpu_to_le32(BIT_4); + + /* Disable ini mode, if requested */ + if (!qla_ini_mode_enabled(vha)) + nv->firmware_options_1 |= + __constant_cpu_to_le32(BIT_5); + + /* Disable Full Login after LIP */ + nv->firmware_options_1 &= __constant_cpu_to_le32(~BIT_13); + /* Enable initial LIP */ + nv->firmware_options_1 &= __constant_cpu_to_le32(~BIT_9); + /* Enable FC tapes support */ + nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_12); + /* Disable Full Login after LIP */ + nv->host_p &= __constant_cpu_to_le32(~BIT_10); + /* Enable target PRLI control */ + nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_14); + } else { + if (ha->tgt.saved_set) { + nv->exchange_count = ha->tgt.saved_exchange_count; + nv->firmware_options_1 = + ha->tgt.saved_firmware_options_1; + nv->firmware_options_2 = + ha->tgt.saved_firmware_options_2; + nv->firmware_options_3 = + ha->tgt.saved_firmware_options_3; + } + return; + } + + /* out-of-order frames reassembly */ + nv->firmware_options_3 |= BIT_6|BIT_9; + + if (ha->tgt.enable_class_2) { + if (vha->flags.init_done) + fc_host_supported_classes(vha->host) = + FC_COS_CLASS2 | FC_COS_CLASS3; + + nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_8); + } else { + if (vha->flags.init_done) + fc_host_supported_classes(vha->host) = FC_COS_CLASS3; + + nv->firmware_options_2 &= ~__constant_cpu_to_le32(BIT_8); + } +} + +void +qlt_81xx_config_nvram_stage2(struct scsi_qla_host *vha, + struct init_cb_81xx *icb) +{ + struct qla_hw_data *ha = vha->hw; + + if (!QLA_TGT_MODE_ENABLED()) + return; + + if (ha->tgt.node_name_set) { + memcpy(icb->node_name, ha->tgt.tgt_node_name, WWN_SIZE); + icb->firmware_options_1 |= __constant_cpu_to_le32(BIT_14); + } +} + +void +qlt_83xx_iospace_config(struct qla_hw_data *ha) +{ + if (!QLA_TGT_MODE_ENABLED()) + return; + + ha->msix_count += 1; /* For ATIO Q */ +} + int qlt_24xx_process_response_error(struct scsi_qla_host *vha, struct sts_entry_24xx *pkt) @@ -4828,11 +4931,41 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) if (!QLA_TGT_MODE_ENABLED()) return; + if (ha->mqenable || IS_QLA83XX(ha)) { + ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in; + ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out; + } else { + ISP_ATIO_Q_IN(base_vha) = &ha->iobase->isp24.atio_q_in; + ISP_ATIO_Q_OUT(base_vha) = &ha->iobase->isp24.atio_q_out; + } + mutex_init(&ha->tgt.tgt_mutex); mutex_init(&ha->tgt.tgt_host_action_mutex); qlt_clear_mode(base_vha); } +irqreturn_t +qla83xx_msix_atio_q(int irq, void *dev_id) +{ + struct rsp_que *rsp; + scsi_qla_host_t *vha; + struct qla_hw_data *ha; + unsigned long flags; + + rsp = (struct rsp_que *) dev_id; + ha = rsp->hw; + vha = pci_get_drvdata(ha->pdev); + + spin_lock_irqsave(&ha->hardware_lock, flags); + + qlt_24xx_process_atio_queue(vha); + qla24xx_process_response_queue(vha, rsp); + + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + return IRQ_HANDLED; +} + int qlt_mem_alloc(struct qla_hw_data *ha) { diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index bad749561ec2..ff9ccb9fd036 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -60,8 +60,9 @@ * multi-complete should come to the tgt driver or be handled there by qla2xxx */ #define CTIO_COMPLETION_HANDLE_MARK BIT_29 -#if (CTIO_COMPLETION_HANDLE_MARK <= MAX_OUTSTANDING_COMMANDS) -#error "CTIO_COMPLETION_HANDLE_MARK not larger than MAX_OUTSTANDING_COMMANDS" +#if (CTIO_COMPLETION_HANDLE_MARK <= DEFAULT_OUTSTANDING_COMMANDS) +#error "CTIO_COMPLETION_HANDLE_MARK not larger than " + "DEFAULT_OUTSTANDING_COMMANDS" #endif #define HANDLE_IS_CTIO_COMP(h) (h & CTIO_COMPLETION_HANDLE_MARK) @@ -161,7 +162,7 @@ struct imm_ntfy_from_isp { uint16_t srr_rx_id; uint16_t status; uint8_t status_subcode; - uint8_t reserved_3; + uint8_t fw_handle; uint32_t exchange_address; uint32_t srr_rel_offs; uint16_t srr_ui; @@ -217,7 +218,7 @@ struct nack_to_isp { uint16_t srr_rx_id; uint16_t status; uint8_t status_subcode; - uint8_t reserved_3; + uint8_t fw_handle; uint32_t exchange_address; uint32_t srr_rel_offs; uint16_t srr_ui; @@ -948,6 +949,7 @@ extern void qlt_update_vp_map(struct scsi_qla_host *, int); * is not set. Right now, ha value is ignored. */ #define QLA_TGT_MODE_ENABLED() (ql2x_ini_mode != QLA2XXX_INI_MODE_ENABLED) +extern int ql2x_ini_mode; static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha) { @@ -985,12 +987,15 @@ extern void qlt_vport_create(struct scsi_qla_host *, struct qla_hw_data *); extern void qlt_rff_id(struct scsi_qla_host *, struct ct_sns_req *); extern void qlt_init_atio_q_entries(struct scsi_qla_host *); extern void qlt_24xx_process_atio_queue(struct scsi_qla_host *); -extern void qlt_24xx_config_rings(struct scsi_qla_host *, - device_reg_t __iomem *); +extern void qlt_24xx_config_rings(struct scsi_qla_host *); extern void qlt_24xx_config_nvram_stage1(struct scsi_qla_host *, struct nvram_24xx *); extern void qlt_24xx_config_nvram_stage2(struct scsi_qla_host *, struct init_cb_24xx *); +extern void qlt_81xx_config_nvram_stage2(struct scsi_qla_host *, + struct init_cb_81xx *); +extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *, + struct nvram_81xx *); extern int qlt_24xx_process_response_error(struct scsi_qla_host *, struct sts_entry_24xx *); extern void qlt_modify_vp_config(struct scsi_qla_host *, @@ -1000,5 +1005,7 @@ extern int qlt_mem_alloc(struct qla_hw_data *); extern void qlt_mem_free(struct qla_hw_data *); extern void qlt_stop_phase1(struct qla_tgt *); extern void qlt_stop_phase2(struct qla_tgt *); +extern irqreturn_t qla83xx_msix_atio_q(int, void *); +extern void qlt_83xx_iospace_config(struct qla_hw_data *); #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 49697ca41e78..2b6e478d9e33 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 81e738d61ec0..160d33697216 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -1420,10 +1420,8 @@ int qla4xxx_get_chap(struct scsi_qla_host *ha, char *username, char *password, dma_addr_t chap_dma; chap_table = dma_pool_alloc(ha->chap_dma_pool, GFP_KERNEL, &chap_dma); - if (chap_table == NULL) { - ret = -ENOMEM; - goto exit_get_chap; - } + if (chap_table == NULL) + return -ENOMEM; chap_size = sizeof(struct ql4_chap_table); memset(chap_table, 0, chap_size); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 270b3cf6f372..16a3a0cc9672 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -201,6 +201,7 @@ enum storvsc_request_type { #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ERROR 0x04 /* @@ -295,6 +296,25 @@ struct storvsc_scan_work { uint lun; }; +static void storvsc_device_scan(struct work_struct *work) +{ + struct storvsc_scan_work *wrk; + uint lun; + struct scsi_device *sdev; + + wrk = container_of(work, struct storvsc_scan_work, work); + lun = wrk->lun; + + sdev = scsi_device_lookup(wrk->host, 0, 0, lun); + if (!sdev) + goto done; + scsi_rescan_device(&sdev->sdev_gendev); + scsi_device_put(sdev); + +done: + kfree(wrk); +} + static void storvsc_bus_scan(struct work_struct *work) { struct storvsc_scan_work *wrk; @@ -467,6 +487,7 @@ static struct scatterlist *create_bounce_buffer(struct scatterlist *sgl, if (!bounce_sgl) return NULL; + sg_init_table(bounce_sgl, num_pages); for (i = 0; i < num_pages; i++) { page_buf = alloc_page(GFP_ATOMIC); if (!page_buf) @@ -760,6 +781,66 @@ cleanup: return ret; } +static void storvsc_handle_error(struct vmscsi_request *vm_srb, + struct scsi_cmnd *scmnd, + struct Scsi_Host *host, + u8 asc, u8 ascq) +{ + struct storvsc_scan_work *wrk; + void (*process_err_fn)(struct work_struct *work); + bool do_work = false; + + switch (vm_srb->srb_status) { + case SRB_STATUS_ERROR: + /* + * If there is an error; offline the device since all + * error recovery strategies would have already been + * deployed on the host side. However, if the command + * were a pass-through command deal with it appropriately. + */ + switch (scmnd->cmnd[0]) { + case ATA_16: + case ATA_12: + set_host_byte(scmnd, DID_PASSTHROUGH); + break; + default: + set_host_byte(scmnd, DID_TARGET_FAILURE); + } + break; + case SRB_STATUS_INVALID_LUN: + do_work = true; + process_err_fn = storvsc_remove_lun; + break; + case (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID): + if ((asc == 0x2a) && (ascq == 0x9)) { + do_work = true; + process_err_fn = storvsc_device_scan; + /* + * Retry the I/O that trigerred this. + */ + set_host_byte(scmnd, DID_REQUEUE); + } + break; + } + + if (!do_work) + return; + + /* + * We need to schedule work to process this error; schedule it. + */ + wrk = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC); + if (!wrk) { + set_host_byte(scmnd, DID_TARGET_FAILURE); + return; + } + + wrk->host = host; + wrk->lun = vm_srb->lun; + INIT_WORK(&wrk->work, process_err_fn); + schedule_work(&wrk->work); +} + static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) { @@ -768,8 +849,13 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) void (*scsi_done_fn)(struct scsi_cmnd *); struct scsi_sense_hdr sense_hdr; struct vmscsi_request *vm_srb; - struct storvsc_scan_work *wrk; struct stor_mem_pools *memp = scmnd->device->hostdata; + struct Scsi_Host *host; + struct storvsc_device *stor_dev; + struct hv_device *dev = host_dev->dev; + + stor_dev = get_in_stor_device(dev); + host = stor_dev->host; vm_srb = &cmd_request->vstor_packet.vm_srb; if (cmd_request->bounce_sgl_count) { @@ -782,55 +868,18 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) cmd_request->bounce_sgl_count); } - /* - * If there is an error; offline the device since all - * error recovery strategies would have already been - * deployed on the host side. However, if the command - * were a pass-through command deal with it appropriately. - */ scmnd->result = vm_srb->scsi_status; - if (vm_srb->srb_status == SRB_STATUS_ERROR) { - switch (scmnd->cmnd[0]) { - case ATA_16: - case ATA_12: - set_host_byte(scmnd, DID_PASSTHROUGH); - break; - default: - set_host_byte(scmnd, DID_TARGET_FAILURE); - } - } - - - /* - * If the LUN is invalid; remove the device. - */ - if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) { - struct storvsc_device *stor_dev; - struct hv_device *dev = host_dev->dev; - struct Scsi_Host *host; - - stor_dev = get_in_stor_device(dev); - host = stor_dev->host; - - wrk = kmalloc(sizeof(struct storvsc_scan_work), - GFP_ATOMIC); - if (!wrk) { - scmnd->result = DID_TARGET_FAILURE << 16; - } else { - wrk->host = host; - wrk->lun = vm_srb->lun; - INIT_WORK(&wrk->work, storvsc_remove_lun); - schedule_work(&wrk->work); - } - } - if (scmnd->result) { if (scsi_normalize_sense(scmnd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &sense_hdr)) scsi_print_sense_hdr("storvsc", &sense_hdr); } + if (vm_srb->srb_status != SRB_STATUS_SUCCESS) + storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, + sense_hdr.ascq); + scsi_set_resid(scmnd, cmd_request->data_buffer.len - vm_srb->data_transfer_length); @@ -1155,6 +1204,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice) blk_queue_bounce_limit(sdevice->request_queue, BLK_BOUNCE_ANY); + sdevice->no_write_same = 1; + return 0; } @@ -1237,6 +1288,8 @@ static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) u8 scsi_op = scmnd->cmnd[0]; switch (scsi_op) { + /* the host does not handle WRITE_SAME, log accident usage */ + case WRITE_SAME: /* * smartd sends this command and the host does not handle * this. So, don't send it. diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index 8f27f9d6f91d..0371047c5922 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -2,48 +2,58 @@ # Kernel configuration file for the UFS Host Controller # # This code is based on drivers/scsi/ufs/Kconfig -# Copyright (C) 2011 Samsung Samsung India Software Operations +# Copyright (C) 2011-2013 Samsung India Software Operations +# +# Authors: +# Santosh Yaraganavi <santosh.sy@samsung.com> +# Vinayak Holikatti <h.vinayak@samsung.com> # -# Santosh Yaraganavi <santosh.sy@samsung.com> -# Vinayak Holikatti <h.vinayak@samsung.com> - # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. - +# See the COPYING file in the top-level directory or visit +# <http://www.gnu.org/licenses/gpl-2.0.html> +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. +# +# This program is provided "AS IS" and "WITH ALL FAULTS" and +# without warranty of any kind. You are solely responsible for +# determining the appropriateness of using and distributing +# the program and assume all risks associated with your exercise +# of rights with respect to the program, including but not limited +# to infringement of third party rights, the risks and costs of +# program errors, damage to or loss of data, programs or equipment, +# and unavailability or interruption of operations. Under no +# circumstances will the contributor of this Program be liable for +# any damages of any kind arising from your use or distribution of +# this program. -# NO WARRANTY -# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT -# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, -# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is -# solely responsible for determining the appropriateness of using and -# distributing the Program and assumes all risks associated with its -# exercise of rights under this Agreement, including but not limited to -# the risks and costs of program errors, damage to or loss of data, -# programs or equipment, and unavailability or interruption of operations. - -# DISCLAIMER OF LIABILITY -# NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -# USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED -# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES +config SCSI_UFSHCD + tristate "Universal Flash Storage Controller Driver Core" + depends on SCSI + ---help--- + This selects the support for UFS devices in Linux, say Y and make + sure that you know the name of your UFS host adapter (the card + inside your computer that "speaks" the UFS protocol, also + called UFS Host Controller), because you will be asked for it. + The module will be called ufshcd. -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. + To compile this driver as a module, choose M here and read + <file:Documentation/scsi/ufs.txt>. + However, do not compile this as a module if your root file system + (the one containing the directory /) is located on a UFS device. -config SCSI_UFSHCD - tristate "Universal Flash Storage host controller driver" - depends on PCI && SCSI +config SCSI_UFSHCD_PCI + tristate "PCI bus based UFS Controller support" + depends on SCSI_UFSHCD && PCI ---help--- - This is a generic driver which supports PCIe UFS Host controllers. + This selects the PCI UFS Host Controller Interface. Select this if + you have UFS Host Controller with PCI Interface. + + If you have a controller with this interface, say Y or M here. + + If unsure, say N. diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile index adf7895a6a91..9eda0dfbd6df 100644 --- a/drivers/scsi/ufs/Makefile +++ b/drivers/scsi/ufs/Makefile @@ -1,2 +1,3 @@ # UFSHCD makefile obj-$(CONFIG_SCSI_UFSHCD) += ufshcd.o +obj-$(CONFIG_SCSI_UFSHCD_PCI) += ufshcd-pci.o diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index b207529f8d54..139bc0647b41 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -2,45 +2,35 @@ * Universal Flash Storage Host controller driver * * This code is based on drivers/scsi/ufs/ufs.h - * Copyright (C) 2011-2012 Samsung India Software Operations + * Copyright (C) 2011-2013 Samsung India Software Operations * - * Santosh Yaraganavi <santosh.sy@samsung.com> - * Vinayak Holikatti <h.vinayak@samsung.com> + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * NO WARRANTY - * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT - * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is - * solely responsible for determining the appropriateness of using and - * distributing the Program and assumes all risks associated with its - * exercise of rights under this Agreement, including but not limited to - * the risks and costs of program errors, damage to or loss of data, - * programs or equipment, and unavailability or interruption of operations. - - * DISCLAIMER OF LIABILITY - * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED - * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - * USA. + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. */ #ifndef _UFS_H diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c new file mode 100644 index 000000000000..5cb1d75f5868 --- /dev/null +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -0,0 +1,211 @@ +/* + * Universal Flash Storage Host controller PCI glue driver + * + * This code is based on drivers/scsi/ufs/ufshcd-pci.c + * Copyright (C) 2011-2013 Samsung India Software Operations + * + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. + */ + +#include "ufshcd.h" +#include <linux/pci.h> + +#ifdef CONFIG_PM +/** + * ufshcd_pci_suspend - suspend power management function + * @pdev: pointer to PCI device handle + * @state: power state + * + * Returns -ENOSYS + */ +static int ufshcd_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + /* + * TODO: + * 1. Call ufshcd_suspend + * 2. Do bus specific power management + */ + + return -ENOSYS; +} + +/** + * ufshcd_pci_resume - resume power management function + * @pdev: pointer to PCI device handle + * + * Returns -ENOSYS + */ +static int ufshcd_pci_resume(struct pci_dev *pdev) +{ + /* + * TODO: + * 1. Call ufshcd_resume. + * 2. Do bus specific wake up + */ + + return -ENOSYS; +} +#endif /* CONFIG_PM */ + +/** + * ufshcd_pci_shutdown - main function to put the controller in reset state + * @pdev: pointer to PCI device handle + */ +static void ufshcd_pci_shutdown(struct pci_dev *pdev) +{ + ufshcd_hba_stop((struct ufs_hba *)pci_get_drvdata(pdev)); +} + +/** + * ufshcd_pci_remove - de-allocate PCI/SCSI host and host memory space + * data structure memory + * @pdev - pointer to PCI handle + */ +static void ufshcd_pci_remove(struct pci_dev *pdev) +{ + struct ufs_hba *hba = pci_get_drvdata(pdev); + + disable_irq(pdev->irq); + free_irq(pdev->irq, hba); + ufshcd_remove(hba); + pci_release_regions(pdev); + pci_set_drvdata(pdev, NULL); + pci_clear_master(pdev); + pci_disable_device(pdev); +} + +/** + * ufshcd_set_dma_mask - Set dma mask based on the controller + * addressing capability + * @pdev: PCI device structure + * + * Returns 0 for success, non-zero for failure + */ +static int ufshcd_set_dma_mask(struct pci_dev *pdev) +{ + int err; + + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) + && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) + return 0; + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + return err; +} + +/** + * ufshcd_pci_probe - probe routine of the driver + * @pdev: pointer to PCI device handle + * @id: PCI device id + * + * Returns 0 on success, non-zero value on failure + */ +static int +ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ufs_hba *hba; + void __iomem *mmio_base; + int err; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device failed\n"); + goto out_error; + } + + pci_set_master(pdev); + + + err = pci_request_regions(pdev, UFSHCD); + if (err < 0) { + dev_err(&pdev->dev, "request regions failed\n"); + goto out_disable; + } + + mmio_base = pci_ioremap_bar(pdev, 0); + if (!mmio_base) { + dev_err(&pdev->dev, "memory map failed\n"); + err = -ENOMEM; + goto out_release_regions; + } + + err = ufshcd_set_dma_mask(pdev); + if (err) { + dev_err(&pdev->dev, "set dma mask failed\n"); + goto out_iounmap; + } + + err = ufshcd_init(&pdev->dev, &hba, mmio_base, pdev->irq); + if (err) { + dev_err(&pdev->dev, "Initialization failed\n"); + goto out_iounmap; + } + + pci_set_drvdata(pdev, hba); + + return 0; + +out_iounmap: + iounmap(mmio_base); +out_release_regions: + pci_release_regions(pdev); +out_disable: + pci_clear_master(pdev); + pci_disable_device(pdev); +out_error: + return err; +} + +static DEFINE_PCI_DEVICE_TABLE(ufshcd_pci_tbl) = { + { PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { } /* terminate list */ +}; + +MODULE_DEVICE_TABLE(pci, ufshcd_pci_tbl); + +static struct pci_driver ufshcd_pci_driver = { + .name = UFSHCD, + .id_table = ufshcd_pci_tbl, + .probe = ufshcd_pci_probe, + .remove = ufshcd_pci_remove, + .shutdown = ufshcd_pci_shutdown, +#ifdef CONFIG_PM + .suspend = ufshcd_pci_suspend, + .resume = ufshcd_pci_resume, +#endif +}; + +module_pci_driver(ufshcd_pci_driver); + +MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>"); +MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>"); +MODULE_DESCRIPTION("UFS host controller PCI glue driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(UFSHCD_DRIVER_VERSION); diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 91a4046ca9ba..60fd40c4e4c2 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1,77 +1,39 @@ /* - * Universal Flash Storage Host controller driver + * Universal Flash Storage Host controller driver Core * * This code is based on drivers/scsi/ufs/ufshcd.c - * Copyright (C) 2011-2012 Samsung India Software Operations + * Copyright (C) 2011-2013 Samsung India Software Operations * - * Santosh Yaraganavi <santosh.sy@samsung.com> - * Vinayak Holikatti <h.vinayak@samsung.com> + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * NO WARRANTY - * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT - * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is - * solely responsible for determining the appropriateness of using and - * distributing the Program and assumes all risks associated with its - * exercise of rights under this Agreement, including but not limited to - * the risks and costs of program errors, damage to or loss of data, - * programs or equipment, and unavailability or interruption of operations. - - * DISCLAIMER OF LIABILITY - * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED - * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - * USA. + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. */ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/delay.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/workqueue.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/wait.h> -#include <linux/bitops.h> - -#include <asm/irq.h> -#include <asm/byteorder.h> -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_host.h> -#include <scsi/scsi_tcq.h> -#include <scsi/scsi_dbg.h> -#include <scsi/scsi_eh.h> - -#include "ufs.h" -#include "ufshci.h" - -#define UFSHCD "ufshcd" -#define UFSHCD_DRIVER_VERSION "0.1" +#include "ufshcd.h" enum { UFSHCD_MAX_CHANNEL = 0, @@ -102,121 +64,6 @@ enum { }; /** - * struct uic_command - UIC command structure - * @command: UIC command - * @argument1: UIC command argument 1 - * @argument2: UIC command argument 2 - * @argument3: UIC command argument 3 - * @cmd_active: Indicate if UIC command is outstanding - * @result: UIC command result - */ -struct uic_command { - u32 command; - u32 argument1; - u32 argument2; - u32 argument3; - int cmd_active; - int result; -}; - -/** - * struct ufs_hba - per adapter private structure - * @mmio_base: UFSHCI base register address - * @ucdl_base_addr: UFS Command Descriptor base address - * @utrdl_base_addr: UTP Transfer Request Descriptor base address - * @utmrdl_base_addr: UTP Task Management Descriptor base address - * @ucdl_dma_addr: UFS Command Descriptor DMA address - * @utrdl_dma_addr: UTRDL DMA address - * @utmrdl_dma_addr: UTMRDL DMA address - * @host: Scsi_Host instance of the driver - * @pdev: PCI device handle - * @lrb: local reference block - * @outstanding_tasks: Bits representing outstanding task requests - * @outstanding_reqs: Bits representing outstanding transfer requests - * @capabilities: UFS Controller Capabilities - * @nutrs: Transfer Request Queue depth supported by controller - * @nutmrs: Task Management Queue depth supported by controller - * @active_uic_cmd: handle of active UIC command - * @ufshcd_tm_wait_queue: wait queue for task management - * @tm_condition: condition variable for task management - * @ufshcd_state: UFSHCD states - * @int_enable_mask: Interrupt Mask Bits - * @uic_workq: Work queue for UIC completion handling - * @feh_workq: Work queue for fatal controller error handling - * @errors: HBA errors - */ -struct ufs_hba { - void __iomem *mmio_base; - - /* Virtual memory reference */ - struct utp_transfer_cmd_desc *ucdl_base_addr; - struct utp_transfer_req_desc *utrdl_base_addr; - struct utp_task_req_desc *utmrdl_base_addr; - - /* DMA memory reference */ - dma_addr_t ucdl_dma_addr; - dma_addr_t utrdl_dma_addr; - dma_addr_t utmrdl_dma_addr; - - struct Scsi_Host *host; - struct pci_dev *pdev; - - struct ufshcd_lrb *lrb; - - unsigned long outstanding_tasks; - unsigned long outstanding_reqs; - - u32 capabilities; - int nutrs; - int nutmrs; - u32 ufs_version; - - struct uic_command active_uic_cmd; - wait_queue_head_t ufshcd_tm_wait_queue; - unsigned long tm_condition; - - u32 ufshcd_state; - u32 int_enable_mask; - - /* Work Queues */ - struct work_struct uic_workq; - struct work_struct feh_workq; - - /* HBA Errors */ - u32 errors; -}; - -/** - * struct ufshcd_lrb - local reference block - * @utr_descriptor_ptr: UTRD address of the command - * @ucd_cmd_ptr: UCD address of the command - * @ucd_rsp_ptr: Response UPIU address for this command - * @ucd_prdt_ptr: PRDT address of the command - * @cmd: pointer to SCSI command - * @sense_buffer: pointer to sense buffer address of the SCSI command - * @sense_bufflen: Length of the sense buffer - * @scsi_status: SCSI status of the command - * @command_type: SCSI, UFS, Query. - * @task_tag: Task tag of the command - * @lun: LUN of the command - */ -struct ufshcd_lrb { - struct utp_transfer_req_desc *utr_descriptor_ptr; - struct utp_upiu_cmd *ucd_cmd_ptr; - struct utp_upiu_rsp *ucd_rsp_ptr; - struct ufshcd_sg_entry *ucd_prdt_ptr; - - struct scsi_cmnd *cmd; - u8 *sense_buffer; - unsigned int sense_bufflen; - int scsi_status; - - int command_type; - int task_tag; - unsigned int lun; -}; - -/** * ufshcd_get_ufs_version - Get the UFS version supported by the HBA * @hba - Pointer to adapter instance * @@ -335,21 +182,21 @@ static inline void ufshcd_free_hba_memory(struct ufs_hba *hba) if (hba->utmrdl_base_addr) { utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs; - dma_free_coherent(&hba->pdev->dev, utmrdl_size, + dma_free_coherent(hba->dev, utmrdl_size, hba->utmrdl_base_addr, hba->utmrdl_dma_addr); } if (hba->utrdl_base_addr) { utrdl_size = (sizeof(struct utp_transfer_req_desc) * hba->nutrs); - dma_free_coherent(&hba->pdev->dev, utrdl_size, + dma_free_coherent(hba->dev, utrdl_size, hba->utrdl_base_addr, hba->utrdl_dma_addr); } if (hba->ucdl_base_addr) { ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); - dma_free_coherent(&hba->pdev->dev, ucdl_size, + dma_free_coherent(hba->dev, ucdl_size, hba->ucdl_base_addr, hba->ucdl_dma_addr); } } @@ -429,15 +276,6 @@ static void ufshcd_enable_run_stop_reg(struct ufs_hba *hba) } /** - * ufshcd_hba_stop - Send controller to reset state - * @hba: per adapter instance - */ -static inline void ufshcd_hba_stop(struct ufs_hba *hba) -{ - writel(CONTROLLER_DISABLE, (hba->mmio_base + REG_CONTROLLER_ENABLE)); -} - -/** * ufshcd_hba_start - Start controller initialization sequence * @hba: per adapter instance */ @@ -724,7 +562,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) /* Allocate memory for UTP command descriptors */ ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); - hba->ucdl_base_addr = dma_alloc_coherent(&hba->pdev->dev, + hba->ucdl_base_addr = dma_alloc_coherent(hba->dev, ucdl_size, &hba->ucdl_dma_addr, GFP_KERNEL); @@ -737,7 +575,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) */ if (!hba->ucdl_base_addr || WARN_ON(hba->ucdl_dma_addr & (PAGE_SIZE - 1))) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Command Descriptor Memory allocation failed\n"); goto out; } @@ -747,13 +585,13 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) * UFSHCI requires 1024 byte alignment of UTRD */ utrdl_size = (sizeof(struct utp_transfer_req_desc) * hba->nutrs); - hba->utrdl_base_addr = dma_alloc_coherent(&hba->pdev->dev, + hba->utrdl_base_addr = dma_alloc_coherent(hba->dev, utrdl_size, &hba->utrdl_dma_addr, GFP_KERNEL); if (!hba->utrdl_base_addr || WARN_ON(hba->utrdl_dma_addr & (PAGE_SIZE - 1))) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Transfer Descriptor Memory allocation failed\n"); goto out; } @@ -763,13 +601,13 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) * UFSHCI requires 1024 byte alignment of UTMRD */ utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs; - hba->utmrdl_base_addr = dma_alloc_coherent(&hba->pdev->dev, + hba->utmrdl_base_addr = dma_alloc_coherent(hba->dev, utmrdl_size, &hba->utmrdl_dma_addr, GFP_KERNEL); if (!hba->utmrdl_base_addr || WARN_ON(hba->utmrdl_dma_addr & (PAGE_SIZE - 1))) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Task Management Descriptor Memory allocation failed\n"); goto out; } @@ -777,7 +615,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) /* Allocate memory for local reference block */ hba->lrb = kcalloc(hba->nutrs, sizeof(struct ufshcd_lrb), GFP_KERNEL); if (!hba->lrb) { - dev_err(&hba->pdev->dev, "LRB Memory allocation failed\n"); + dev_err(hba->dev, "LRB Memory allocation failed\n"); goto out; } return 0; @@ -867,7 +705,7 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba) /* check if controller is ready to accept UIC commands */ if (((readl(hba->mmio_base + REG_CONTROLLER_STATUS)) & UIC_COMMAND_READY) == 0x0) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Controller not ready" " to accept UIC commands\n"); return -EIO; @@ -912,7 +750,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) /* check if device present */ reg = readl((hba->mmio_base + REG_CONTROLLER_STATUS)); if (!ufshcd_is_device_present(reg)) { - dev_err(&hba->pdev->dev, "cc: Device not present\n"); + dev_err(hba->dev, "cc: Device not present\n"); err = -ENXIO; goto out; } @@ -924,7 +762,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) if (!(ufshcd_get_lists_status(reg))) { ufshcd_enable_run_stop_reg(hba); } else { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Host controller not ready to process requests"); err = -EIO; goto out; @@ -1005,7 +843,7 @@ static int ufshcd_hba_enable(struct ufs_hba *hba) if (retry) { retry--; } else { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Controller enable failed\n"); return -EIO; } @@ -1084,7 +922,7 @@ static int ufshcd_do_reset(struct ufs_hba *hba) /* start the initialization process */ if (ufshcd_initialize_hba(hba)) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Reset: Controller initialization failed\n"); return FAILED; } @@ -1167,7 +1005,7 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index) task_result = SUCCESS; } else { task_result = FAILED; - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "trc: Invalid ocs = %x\n", ocs_value); } spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -1281,7 +1119,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) /* check if the returned transfer response is valid */ result = ufshcd_is_valid_req_rsp(lrbp->ucd_rsp_ptr); if (result) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Invalid response = %x\n", result); break; } @@ -1310,7 +1148,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case OCS_FATAL_ERROR: default: result |= DID_ERROR << 16; - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "OCS error from controller = %x\n", ocs); break; } /* end of switch */ @@ -1374,7 +1212,7 @@ static void ufshcd_uic_cc_handler (struct work_struct *work) !(ufshcd_get_uic_cmd_result(hba))) { if (ufshcd_make_hba_operational(hba)) - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "cc: hba not operational state\n"); return; } @@ -1509,7 +1347,7 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba, free_slot = ufshcd_get_tm_free_slot(hba); if (free_slot >= hba->nutmrs) { spin_unlock_irqrestore(host->host_lock, flags); - dev_err(&hba->pdev->dev, "Task management queue full\n"); + dev_err(hba->dev, "Task management queue full\n"); err = FAILED; goto out; } @@ -1552,7 +1390,7 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba, &hba->tm_condition) != 0), 60 * HZ); if (!err) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Task management command timed-out\n"); err = FAILED; goto out; @@ -1688,23 +1526,13 @@ static struct scsi_host_template ufshcd_driver_template = { }; /** - * ufshcd_shutdown - main function to put the controller in reset state - * @pdev: pointer to PCI device handle - */ -static void ufshcd_shutdown(struct pci_dev *pdev) -{ - ufshcd_hba_stop((struct ufs_hba *)pci_get_drvdata(pdev)); -} - -#ifdef CONFIG_PM -/** * ufshcd_suspend - suspend power management function - * @pdev: pointer to PCI device handle + * @hba: per adapter instance * @state: power state * * Returns -ENOSYS */ -static int ufshcd_suspend(struct pci_dev *pdev, pm_message_t state) +int ufshcd_suspend(struct ufs_hba *hba, pm_message_t state) { /* * TODO: @@ -1717,14 +1545,15 @@ static int ufshcd_suspend(struct pci_dev *pdev, pm_message_t state) return -ENOSYS; } +EXPORT_SYMBOL_GPL(ufshcd_suspend); /** * ufshcd_resume - resume power management function - * @pdev: pointer to PCI device handle + * @hba: per adapter instance * * Returns -ENOSYS */ -static int ufshcd_resume(struct pci_dev *pdev) +int ufshcd_resume(struct ufs_hba *hba) { /* * TODO: @@ -1737,7 +1566,7 @@ static int ufshcd_resume(struct pci_dev *pdev) return -ENOSYS; } -#endif /* CONFIG_PM */ +EXPORT_SYMBOL_GPL(ufshcd_resume); /** * ufshcd_hba_free - free allocated memory for @@ -1748,107 +1577,67 @@ static void ufshcd_hba_free(struct ufs_hba *hba) { iounmap(hba->mmio_base); ufshcd_free_hba_memory(hba); - pci_release_regions(hba->pdev); } /** - * ufshcd_remove - de-allocate PCI/SCSI host and host memory space + * ufshcd_remove - de-allocate SCSI host and host memory space * data structure memory - * @pdev - pointer to PCI handle + * @hba - per adapter instance */ -static void ufshcd_remove(struct pci_dev *pdev) +void ufshcd_remove(struct ufs_hba *hba) { - struct ufs_hba *hba = pci_get_drvdata(pdev); - /* disable interrupts */ ufshcd_int_config(hba, UFSHCD_INT_DISABLE); - free_irq(pdev->irq, hba); ufshcd_hba_stop(hba); ufshcd_hba_free(hba); scsi_remove_host(hba->host); scsi_host_put(hba->host); - pci_set_drvdata(pdev, NULL); - pci_clear_master(pdev); - pci_disable_device(pdev); -} - -/** - * ufshcd_set_dma_mask - Set dma mask based on the controller - * addressing capability - * @pdev: PCI device structure - * - * Returns 0 for success, non-zero for failure - */ -static int ufshcd_set_dma_mask(struct ufs_hba *hba) -{ - int err; - u64 dma_mask; - - /* - * If controller supports 64 bit addressing mode, then set the DMA - * mask to 64-bit, else set the DMA mask to 32-bit - */ - if (hba->capabilities & MASK_64_ADDRESSING_SUPPORT) - dma_mask = DMA_BIT_MASK(64); - else - dma_mask = DMA_BIT_MASK(32); - - err = pci_set_dma_mask(hba->pdev, dma_mask); - if (err) - return err; - - err = pci_set_consistent_dma_mask(hba->pdev, dma_mask); - - return err; } +EXPORT_SYMBOL_GPL(ufshcd_remove); /** - * ufshcd_probe - probe routine of the driver - * @pdev: pointer to PCI device handle - * @id: PCI device id - * + * ufshcd_init - Driver initialization routine + * @dev: pointer to device handle + * @hba_handle: driver private handle + * @mmio_base: base register address + * @irq: Interrupt line of device * Returns 0 on success, non-zero value on failure */ -static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id) +int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle, + void __iomem *mmio_base, unsigned int irq) { struct Scsi_Host *host; struct ufs_hba *hba; int err; - err = pci_enable_device(pdev); - if (err) { - dev_err(&pdev->dev, "pci_enable_device failed\n"); + if (!dev) { + dev_err(dev, + "Invalid memory reference for dev is NULL\n"); + err = -ENODEV; goto out_error; } - pci_set_master(pdev); + if (!mmio_base) { + dev_err(dev, + "Invalid memory reference for mmio_base is NULL\n"); + err = -ENODEV; + goto out_error; + } host = scsi_host_alloc(&ufshcd_driver_template, sizeof(struct ufs_hba)); if (!host) { - dev_err(&pdev->dev, "scsi_host_alloc failed\n"); + dev_err(dev, "scsi_host_alloc failed\n"); err = -ENOMEM; - goto out_disable; + goto out_error; } hba = shost_priv(host); - - err = pci_request_regions(pdev, UFSHCD); - if (err < 0) { - dev_err(&pdev->dev, "request regions failed\n"); - goto out_host_put; - } - - hba->mmio_base = pci_ioremap_bar(pdev, 0); - if (!hba->mmio_base) { - dev_err(&pdev->dev, "memory map failed\n"); - err = -ENOMEM; - goto out_release_regions; - } - hba->host = host; - hba->pdev = pdev; + hba->dev = dev; + hba->mmio_base = mmio_base; + hba->irq = irq; /* Read capabilities registers */ ufshcd_hba_capabilities(hba); @@ -1856,17 +1645,11 @@ static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Get UFS version supported by the controller */ hba->ufs_version = ufshcd_get_ufs_version(hba); - err = ufshcd_set_dma_mask(hba); - if (err) { - dev_err(&pdev->dev, "set dma mask failed\n"); - goto out_iounmap; - } - /* Allocate memory for host memory space */ err = ufshcd_memory_alloc(hba); if (err) { - dev_err(&pdev->dev, "Memory allocation failed\n"); - goto out_iounmap; + dev_err(hba->dev, "Memory allocation failed\n"); + goto out_disable; } /* Configure LRB */ @@ -1888,76 +1671,50 @@ static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler); /* IRQ registration */ - err = request_irq(pdev->irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba); + err = request_irq(irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba); if (err) { - dev_err(&pdev->dev, "request irq failed\n"); + dev_err(hba->dev, "request irq failed\n"); goto out_lrb_free; } /* Enable SCSI tag mapping */ err = scsi_init_shared_tag_map(host, host->can_queue); if (err) { - dev_err(&pdev->dev, "init shared queue failed\n"); + dev_err(hba->dev, "init shared queue failed\n"); goto out_free_irq; } - pci_set_drvdata(pdev, hba); - - err = scsi_add_host(host, &pdev->dev); + err = scsi_add_host(host, hba->dev); if (err) { - dev_err(&pdev->dev, "scsi_add_host failed\n"); + dev_err(hba->dev, "scsi_add_host failed\n"); goto out_free_irq; } /* Initialization routine */ err = ufshcd_initialize_hba(hba); if (err) { - dev_err(&pdev->dev, "Initialization failed\n"); - goto out_free_irq; + dev_err(hba->dev, "Initialization failed\n"); + goto out_remove_scsi_host; } + *hba_handle = hba; return 0; +out_remove_scsi_host: + scsi_remove_host(hba->host); out_free_irq: - free_irq(pdev->irq, hba); + free_irq(irq, hba); out_lrb_free: ufshcd_free_hba_memory(hba); -out_iounmap: - iounmap(hba->mmio_base); -out_release_regions: - pci_release_regions(pdev); -out_host_put: - scsi_host_put(host); out_disable: - pci_clear_master(pdev); - pci_disable_device(pdev); + scsi_host_put(host); out_error: return err; } +EXPORT_SYMBOL_GPL(ufshcd_init); -static DEFINE_PCI_DEVICE_TABLE(ufshcd_pci_tbl) = { - { PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, - { } /* terminate list */ -}; - -MODULE_DEVICE_TABLE(pci, ufshcd_pci_tbl); - -static struct pci_driver ufshcd_pci_driver = { - .name = UFSHCD, - .id_table = ufshcd_pci_tbl, - .probe = ufshcd_probe, - .remove = ufshcd_remove, - .shutdown = ufshcd_shutdown, -#ifdef CONFIG_PM - .suspend = ufshcd_suspend, - .resume = ufshcd_resume, -#endif -}; - -module_pci_driver(ufshcd_pci_driver); - -MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>, " - "Vinayak Holikatti <h.vinayak@samsung.com>"); -MODULE_DESCRIPTION("Generic UFS host controller driver"); +MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>"); +MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>"); +MODULE_DESCRIPTION("Generic UFS host controller driver Core"); MODULE_LICENSE("GPL"); MODULE_VERSION(UFSHCD_DRIVER_VERSION); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h new file mode 100644 index 000000000000..6b99a42f5819 --- /dev/null +++ b/drivers/scsi/ufs/ufshcd.h @@ -0,0 +1,202 @@ +/* + * Universal Flash Storage Host controller driver + * + * This code is based on drivers/scsi/ufs/ufshcd.h + * Copyright (C) 2011-2013 Samsung India Software Operations + * + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. + */ + +#ifndef _UFSHCD_H +#define _UFSHCD_H + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/wait.h> +#include <linux/bitops.h> +#include <linux/pm_runtime.h> +#include <linux/clk.h> + +#include <asm/irq.h> +#include <asm/byteorder.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi_tcq.h> +#include <scsi/scsi_dbg.h> +#include <scsi/scsi_eh.h> + +#include "ufs.h" +#include "ufshci.h" + +#define UFSHCD "ufshcd" +#define UFSHCD_DRIVER_VERSION "0.2" + +/** + * struct uic_command - UIC command structure + * @command: UIC command + * @argument1: UIC command argument 1 + * @argument2: UIC command argument 2 + * @argument3: UIC command argument 3 + * @cmd_active: Indicate if UIC command is outstanding + * @result: UIC command result + */ +struct uic_command { + u32 command; + u32 argument1; + u32 argument2; + u32 argument3; + int cmd_active; + int result; +}; + +/** + * struct ufshcd_lrb - local reference block + * @utr_descriptor_ptr: UTRD address of the command + * @ucd_cmd_ptr: UCD address of the command + * @ucd_rsp_ptr: Response UPIU address for this command + * @ucd_prdt_ptr: PRDT address of the command + * @cmd: pointer to SCSI command + * @sense_buffer: pointer to sense buffer address of the SCSI command + * @sense_bufflen: Length of the sense buffer + * @scsi_status: SCSI status of the command + * @command_type: SCSI, UFS, Query. + * @task_tag: Task tag of the command + * @lun: LUN of the command + */ +struct ufshcd_lrb { + struct utp_transfer_req_desc *utr_descriptor_ptr; + struct utp_upiu_cmd *ucd_cmd_ptr; + struct utp_upiu_rsp *ucd_rsp_ptr; + struct ufshcd_sg_entry *ucd_prdt_ptr; + + struct scsi_cmnd *cmd; + u8 *sense_buffer; + unsigned int sense_bufflen; + int scsi_status; + + int command_type; + int task_tag; + unsigned int lun; +}; + + +/** + * struct ufs_hba - per adapter private structure + * @mmio_base: UFSHCI base register address + * @ucdl_base_addr: UFS Command Descriptor base address + * @utrdl_base_addr: UTP Transfer Request Descriptor base address + * @utmrdl_base_addr: UTP Task Management Descriptor base address + * @ucdl_dma_addr: UFS Command Descriptor DMA address + * @utrdl_dma_addr: UTRDL DMA address + * @utmrdl_dma_addr: UTMRDL DMA address + * @host: Scsi_Host instance of the driver + * @dev: device handle + * @lrb: local reference block + * @outstanding_tasks: Bits representing outstanding task requests + * @outstanding_reqs: Bits representing outstanding transfer requests + * @capabilities: UFS Controller Capabilities + * @nutrs: Transfer Request Queue depth supported by controller + * @nutmrs: Task Management Queue depth supported by controller + * @ufs_version: UFS Version to which controller complies + * @irq: Irq number of the controller + * @active_uic_cmd: handle of active UIC command + * @ufshcd_tm_wait_queue: wait queue for task management + * @tm_condition: condition variable for task management + * @ufshcd_state: UFSHCD states + * @int_enable_mask: Interrupt Mask Bits + * @uic_workq: Work queue for UIC completion handling + * @feh_workq: Work queue for fatal controller error handling + * @errors: HBA errors + */ +struct ufs_hba { + void __iomem *mmio_base; + + /* Virtual memory reference */ + struct utp_transfer_cmd_desc *ucdl_base_addr; + struct utp_transfer_req_desc *utrdl_base_addr; + struct utp_task_req_desc *utmrdl_base_addr; + + /* DMA memory reference */ + dma_addr_t ucdl_dma_addr; + dma_addr_t utrdl_dma_addr; + dma_addr_t utmrdl_dma_addr; + + struct Scsi_Host *host; + struct device *dev; + + struct ufshcd_lrb *lrb; + + unsigned long outstanding_tasks; + unsigned long outstanding_reqs; + + u32 capabilities; + int nutrs; + int nutmrs; + u32 ufs_version; + unsigned int irq; + + struct uic_command active_uic_cmd; + wait_queue_head_t ufshcd_tm_wait_queue; + unsigned long tm_condition; + + u32 ufshcd_state; + u32 int_enable_mask; + + /* Work Queues */ + struct work_struct uic_workq; + struct work_struct feh_workq; + + /* HBA Errors */ + u32 errors; +}; + +int ufshcd_init(struct device *, struct ufs_hba ** , void __iomem * , + unsigned int); +void ufshcd_remove(struct ufs_hba *); + +/** + * ufshcd_hba_stop - Send controller to reset state + * @hba: per adapter instance + */ +static inline void ufshcd_hba_stop(struct ufs_hba *hba) +{ + writel(CONTROLLER_DISABLE, (hba->mmio_base + REG_CONTROLLER_ENABLE)); +} + +#endif /* End of Header */ diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 6e3510f71167..0c164847a3ef 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -2,45 +2,35 @@ * Universal Flash Storage Host controller driver * * This code is based on drivers/scsi/ufs/ufshci.h - * Copyright (C) 2011-2012 Samsung India Software Operations + * Copyright (C) 2011-2013 Samsung India Software Operations * - * Santosh Yaraganavi <santosh.sy@samsung.com> - * Vinayak Holikatti <h.vinayak@samsung.com> + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * NO WARRANTY - * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT - * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is - * solely responsible for determining the appropriateness of using and - * distributing the Program and assumes all risks associated with its - * exercise of rights under this Agreement, including but not limited to - * the risks and costs of program errors, damage to or loss of data, - * programs or equipment, and unavailability or interruption of operations. - - * DISCLAIMER OF LIABILITY - * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED - * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - * USA. + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. */ #ifndef _UFSHCI_H diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c index a43415a7fbed..4c0f6d883dd3 100644 --- a/drivers/ssb/driver_chipcommon_pmu.c +++ b/drivers/ssb/driver_chipcommon_pmu.c @@ -14,7 +14,7 @@ #include <linux/delay.h> #include <linux/export.h> #ifdef CONFIG_BCM47XX -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> #endif #include "ssb_private.h" @@ -322,7 +322,7 @@ static void ssb_pmu_pll_init(struct ssb_chipcommon *cc) if (bus->bustype == SSB_BUSTYPE_SSB) { #ifdef CONFIG_BCM47XX char buf[20]; - if (nvram_getenv("xtalfreq", buf, sizeof(buf)) >= 0) + if (bcm47xx_nvram_getenv("xtalfreq", buf, sizeof(buf)) >= 0) crystalfreq = simple_strtoul(buf, NULL, 0); #endif } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 9435a3d369a7..7ea246a07731 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3584,6 +3584,10 @@ check_rsp_state: spin_lock_bh(&cmd->istate_lock); cmd->i_state = ISTATE_SENT_STATUS; spin_unlock_bh(&cmd->istate_lock); + + if (atomic_read(&conn->check_immediate_queue)) + return 1; + continue; } else if (ret == 2) { /* Still must send status, @@ -3673,7 +3677,7 @@ check_rsp_state: } if (atomic_read(&conn->check_immediate_queue)) - break; + return 1; } return 0; @@ -3717,12 +3721,15 @@ restart: signal_pending(current)) goto transport_err; +get_immediate: ret = handle_immediate_queue(conn); if (ret < 0) goto transport_err; ret = handle_response_queue(conn); - if (ret == -EAGAIN) + if (ret == 1) + goto get_immediate; + else if (ret == -EAGAIN) goto restart; else if (ret < 0) goto transport_err; diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 6917a9e938e7..d3536f57444f 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2598,7 +2598,7 @@ static int __init sbp_init(void) return 0; }; -static void sbp_exit(void) +static void __exit sbp_exit(void) { sbp_deregister_configfs(); }; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index d226c10a985b..17a6acbc3ab0 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -631,7 +631,7 @@ static int __init fileio_module_init(void) return transport_subsystem_register(&fileio_template); } -static void fileio_module_exit(void) +static void __exit fileio_module_exit(void) { transport_subsystem_release(&fileio_template); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index c73f4a950e23..8bcc514ec8b6 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -821,7 +821,7 @@ static int __init iblock_module_init(void) return transport_subsystem_register(&iblock_template); } -static void iblock_module_exit(void) +static void __exit iblock_module_exit(void) { transport_subsystem_release(&iblock_template); } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 2bcfd79cf595..82e78d72fdb6 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -840,14 +840,14 @@ static void pscsi_bi_endio(struct bio *bio, int error) bio_put(bio); } -static inline struct bio *pscsi_get_bio(int sg_num) +static inline struct bio *pscsi_get_bio(int nr_vecs) { struct bio *bio; /* * Use bio_malloc() following the comment in for bio -> struct request * in block/blk-core.c:blk_make_request() */ - bio = bio_kmalloc(GFP_KERNEL, sg_num); + bio = bio_kmalloc(GFP_KERNEL, nr_vecs); if (!bio) { pr_err("PSCSI: bio_kmalloc() failed\n"); return NULL; @@ -940,7 +940,6 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, bio = NULL; } - page++; len -= bytes; data_len -= bytes; off = 0; @@ -952,7 +951,6 @@ fail: while (*hbio) { bio = *hbio; *hbio = (*hbio)->bi_next; - bio->bi_next = NULL; bio_endio(bio, 0); /* XXX: should be error */ } return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -1092,7 +1090,6 @@ fail_free_bio: while (hbio) { struct bio *bio = hbio; hbio = hbio->bi_next; - bio->bi_next = NULL; bio_endio(bio, 0); /* XXX: should be error */ } ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -1178,7 +1175,7 @@ static int __init pscsi_module_init(void) return transport_subsystem_register(&pscsi_template); } -static void pscsi_module_exit(void) +static void __exit pscsi_module_exit(void) { transport_subsystem_release(&pscsi_template); } diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index a0162cbf0557..cf9210db9fa9 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -729,19 +729,19 @@ config SERIAL_SH_SCI_DMA config SERIAL_PNX8XXX bool "Enable PNX8XXX SoCs' UART Support" - depends on SOC_PNX8550 || SOC_PNX833X + depends on SOC_PNX833X select SERIAL_CORE help - If you have a MIPS-based Philips SoC such as PNX8550 or PNX8330 - and you want to use serial ports, say Y. Otherwise, say N. + If you have a MIPS-based Philips SoC such as PNX8330 and you want + to use serial ports, say Y. Otherwise, say N. config SERIAL_PNX8XXX_CONSOLE bool "Enable PNX8XX0 serial console" depends on SERIAL_PNX8XXX select SERIAL_CORE_CONSOLE help - If you have a MIPS-based Philips SoC such as PNX8550 or PNX8330 - and you want to use serial console, say Y. Otherwise, say N. + If you have a MIPS-based Philips SoC such as PNX8330 and you want + to use serial console, say Y. Otherwise, say N. config SERIAL_HS_LPC32XX tristate "LPC32XX high speed serial port support" diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c index f904071d70df..20dbdcbe9b0f 100644 --- a/drivers/usb/host/ehci-timer.c +++ b/drivers/usb/host/ehci-timer.c @@ -113,15 +113,14 @@ static void ehci_poll_ASS(struct ehci_hcd *ehci) if (want != actual) { - /* Poll again later */ - ehci_enable_event(ehci, EHCI_HRTIMER_POLL_ASS, true); - ++ehci->ASS_poll_count; - return; + /* Poll again later, but give up after about 20 ms */ + if (ehci->ASS_poll_count++ < 20) { + ehci_enable_event(ehci, EHCI_HRTIMER_POLL_ASS, true); + return; + } + ehci_dbg(ehci, "Waited too long for the async schedule status (%x/%x), giving up\n", + want, actual); } - - if (ehci->ASS_poll_count > 20) - ehci_dbg(ehci, "ASS poll count reached %d\n", - ehci->ASS_poll_count); ehci->ASS_poll_count = 0; /* The status is up-to-date; restart or stop the schedule as needed */ @@ -160,14 +159,14 @@ static void ehci_poll_PSS(struct ehci_hcd *ehci) if (want != actual) { - /* Poll again later */ - ehci_enable_event(ehci, EHCI_HRTIMER_POLL_PSS, true); - return; + /* Poll again later, but give up after about 20 ms */ + if (ehci->PSS_poll_count++ < 20) { + ehci_enable_event(ehci, EHCI_HRTIMER_POLL_PSS, true); + return; + } + ehci_dbg(ehci, "Waited too long for the periodic schedule status (%x/%x), giving up\n", + want, actual); } - - if (ehci->PSS_poll_count > 20) - ehci_dbg(ehci, "PSS poll count reached %d\n", - ehci->PSS_poll_count); ehci->PSS_poll_count = 0; /* The status is up-to-date; restart or stop the schedule as needed */ diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 26e1fdbddf69..9fcc70c11cea 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -79,6 +79,7 @@ config DA9052_WATCHDOG config DA9055_WATCHDOG tristate "Dialog Semiconductor DA9055 Watchdog" depends on MFD_DA9055 + select WATCHDOG_CORE help If you say yes here you get support for watchdog on the Dialog Semiconductor DA9055 PMIC. @@ -108,7 +109,7 @@ config WM8350_WATCHDOG config ARM_SP805_WATCHDOG tristate "ARM SP805 Watchdog" - depends on ARM_AMBA + depends on ARM && ARM_AMBA select WATCHDOG_CORE help ARM Primecell SP805 Watchdog timer. This will reboot your system when @@ -116,7 +117,7 @@ config ARM_SP805_WATCHDOG config AT91RM9200_WATCHDOG tristate "AT91RM9200 watchdog" - depends on ARCH_AT91RM9200 + depends on ARCH_AT91 help Watchdog timer embedded into AT91RM9200 chips. This will reboot your system when the timeout is reached. @@ -124,6 +125,7 @@ config AT91RM9200_WATCHDOG config AT91SAM9X_WATCHDOG tristate "AT91SAM9X / AT91CAP9 watchdog" depends on ARCH_AT91 && !ARCH_AT91RM9200 + select WATCHDOG_CORE help Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will reboot your system when the timeout is reached. @@ -316,14 +318,15 @@ config TWL4030_WATCHDOG Support for TI TWL4030 watchdog. Say 'Y' here to enable the watchdog timer support for TWL4030 chips. -config STMP3XXX_WATCHDOG - tristate "Freescale STMP3XXX watchdog" - depends on ARCH_STMP3XXX +config STMP3XXX_RTC_WATCHDOG + tristate "Freescale STMP3XXX & i.MX23/28 watchdog" + depends on RTC_DRV_STMP + select WATCHDOG_CORE help - Say Y here if to include support for the watchdog timer - for the Sigmatel STMP37XX/378X SoC. + Say Y here to include support for the watchdog timer inside + the RTC for the STMP37XX/378X or i.MX23/28 SoC. To compile this driver as a module, choose M here: the - module will be called stmp3xxx_wdt. + module will be called stmp3xxx_rtc_wdt. config NUC900_WATCHDOG tristate "Nuvoton NUC900 watchdog" @@ -376,6 +379,18 @@ config UX500_WATCHDOG To compile this driver as a module, choose M here: the module will be called ux500_wdt. +config RETU_WATCHDOG + tristate "Retu watchdog" + depends on MFD_RETU + select WATCHDOG_CORE + help + Retu watchdog driver for Nokia Internet Tablets (770, N800, + N810). At least on N800 the watchdog cannot be disabled, so + this driver is essential and you should enable it. + + To compile this driver as a module, choose M here: the + module will be called retu_wdt. + # AVR32 Architecture config AT32AP700X_WDT @@ -593,7 +608,7 @@ config IE6XX_WDT config INTEL_SCU_WATCHDOG bool "Intel SCU Watchdog for Mobile Platforms" - depends on X86_MRST + depends on X86_INTEL_MID ---help--- Hardware driver for the watchdog time built into the Intel SCU for Intel Mobile Platforms. @@ -983,6 +998,7 @@ config ATH79_WDT config BCM47XX_WDT tristate "Broadcom BCM47xx Watchdog Timer" depends on BCM47XX + select WATCHDOG_CORE help Hardware driver for the Broadcom BCM47xx Watchdog Timer. @@ -1131,6 +1147,7 @@ config PIKA_WDT config BOOKE_WDT tristate "PowerPC Book-E Watchdog Timer" depends on BOOKE || 4xx + select WATCHDOG_CORE ---help--- Watchdog driver for PowerPC Book-E chips, such as the Freescale MPC85xx SOCs and the IBM PowerPC 440. diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index bec86ee6e9e3..a300b948f254 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -48,11 +48,12 @@ obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o -obj-$(CONFIG_STMP3XXX_WATCHDOG) += stmp3xxx_wdt.o +obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o +obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o # AVR32 Architecture obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 89831ed24a4f..1c75260b987c 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -24,6 +24,8 @@ #include <linux/types.h> #include <linux/watchdog.h> #include <linux/uaccess.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <mach/at91_st.h> #define WDT_DEFAULT_TIME 5 /* seconds */ @@ -252,6 +254,12 @@ static int at91wdt_resume(struct platform_device *pdev) #define at91wdt_resume NULL #endif +static const struct of_device_id at91_wdt_dt_ids[] = { + { .compatible = "atmel,at91rm9200-wdt" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, at91_wdt_dt_ids); + static struct platform_driver at91wdt_driver = { .probe = at91wdt_probe, .remove = at91wdt_remove, @@ -261,6 +269,7 @@ static struct platform_driver at91wdt_driver = { .driver = { .name = "at91_wdt", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(at91_wdt_dt_ids), }, }; diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index c08933cc565e..be37dde4f864 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -18,11 +18,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/errno.h> -#include <linux/fs.h> #include <linux/init.h> #include <linux/io.h> #include <linux/kernel.h> -#include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/platform_device.h> @@ -58,7 +56,7 @@ /* User land timeout */ #define WDT_HEARTBEAT 15 -static int heartbeat = WDT_HEARTBEAT; +static int heartbeat; module_param(heartbeat, int, 0); MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " "(default = " __MODULE_STRING(WDT_HEARTBEAT) ")"); @@ -68,19 +66,17 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +static struct watchdog_device at91_wdt_dev; static void at91_ping(unsigned long data); static struct { void __iomem *base; unsigned long next_heartbeat; /* the next_heartbeat for the timer */ - unsigned long open; - char expect_close; struct timer_list timer; /* The timer that pings the watchdog */ } at91wdt_private; /* ......................................................................... */ - /* * Reload the watchdog timer. (ie, pat the watchdog) */ @@ -95,39 +91,37 @@ static inline void at91_wdt_reset(void) static void at91_ping(unsigned long data) { if (time_before(jiffies, at91wdt_private.next_heartbeat) || - (!nowayout && !at91wdt_private.open)) { + (!watchdog_active(&at91_wdt_dev))) { at91_wdt_reset(); mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT); } else pr_crit("I will reset your machine !\n"); } -/* - * Watchdog device is opened, and watchdog starts running. - */ -static int at91_wdt_open(struct inode *inode, struct file *file) +static int at91_wdt_ping(struct watchdog_device *wdd) { - if (test_and_set_bit(0, &at91wdt_private.open)) - return -EBUSY; + /* calculate when the next userspace timeout will be */ + at91wdt_private.next_heartbeat = jiffies + wdd->timeout * HZ; + return 0; +} - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; +static int at91_wdt_start(struct watchdog_device *wdd) +{ + /* calculate the next userspace timeout and modify the timer */ + at91_wdt_ping(wdd); mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT); - - return nonseekable_open(inode, file); + return 0; } -/* - * Close the watchdog device. - */ -static int at91_wdt_close(struct inode *inode, struct file *file) +static int at91_wdt_stop(struct watchdog_device *wdd) { - clear_bit(0, &at91wdt_private.open); - - /* stop internal ping */ - if (!at91wdt_private.expect_close) - del_timer(&at91wdt_private.timer); + /* The watchdog timer hardware can not be stopped... */ + return 0; +} - at91wdt_private.expect_close = 0; +static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_timeout) +{ + wdd->timeout = new_timeout; return 0; } @@ -163,96 +157,28 @@ static int at91_wdt_settimeout(unsigned int timeout) return 0; } +/* ......................................................................... */ + static const struct watchdog_info at91_wdt_info = { .identity = DRV_NAME, .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, }; -/* - * Handle commands from user-space. - */ -static long at91_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &at91_wdt_info, - sizeof(at91_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_KEEPALIVE: - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - heartbeat = new_value; - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; - - return put_user(new_value, p); /* return current value */ - - case WDIOC_GETTIMEOUT: - return put_user(heartbeat, p); - } - return -ENOTTY; -} - -/* - * Pat the watchdog whenever device is written to. - */ -static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len, - loff_t *ppos) -{ - if (!len) - return 0; - - /* Scan for magic character */ - if (!nowayout) { - size_t i; - - at91wdt_private.expect_close = 0; - - for (i = 0; i < len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') { - at91wdt_private.expect_close = 42; - break; - } - } - } - - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; - - return len; -} - -/* ......................................................................... */ - -static const struct file_operations at91wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = at91_wdt_ioctl, - .open = at91_wdt_open, - .release = at91_wdt_close, - .write = at91_wdt_write, +static const struct watchdog_ops at91_wdt_ops = { + .owner = THIS_MODULE, + .start = at91_wdt_start, + .stop = at91_wdt_stop, + .ping = at91_wdt_ping, + .set_timeout = at91_wdt_set_timeout, }; -static struct miscdevice at91wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &at91wdt_fops, +static struct watchdog_device at91_wdt_dev = { + .info = &at91_wdt_info, + .ops = &at91_wdt_ops, + .timeout = WDT_HEARTBEAT, + .min_timeout = 1, + .max_timeout = 0xFFFF, }; static int __init at91wdt_probe(struct platform_device *pdev) @@ -260,10 +186,6 @@ static int __init at91wdt_probe(struct platform_device *pdev) struct resource *r; int res; - if (at91wdt_miscdev.parent) - return -EBUSY; - at91wdt_miscdev.parent = &pdev->dev; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) return -ENODEV; @@ -273,38 +195,41 @@ static int __init at91wdt_probe(struct platform_device *pdev) return -ENOMEM; } + at91_wdt_dev.parent = &pdev->dev; + watchdog_init_timeout(&at91_wdt_dev, heartbeat, &pdev->dev); + watchdog_set_nowayout(&at91_wdt_dev, nowayout); + /* Set watchdog */ res = at91_wdt_settimeout(ms_to_ticks(WDT_HW_TIMEOUT * 1000)); if (res) return res; - res = misc_register(&at91wdt_miscdev); + res = watchdog_register_device(&at91_wdt_dev); if (res) return res; - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; + at91wdt_private.next_heartbeat = jiffies + at91_wdt_dev.timeout * HZ; setup_timer(&at91wdt_private.timer, at91_ping, 0); mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT); pr_info("enabled (heartbeat=%d sec, nowayout=%d)\n", - heartbeat, nowayout); + at91_wdt_dev.timeout, nowayout); return 0; } static int __exit at91wdt_remove(struct platform_device *pdev) { - int res; + watchdog_unregister_device(&at91_wdt_dev); - res = misc_deregister(&at91wdt_miscdev); - if (!res) - at91wdt_miscdev.parent = NULL; + pr_warn("I quit now, hardware will probably reboot!\n"); + del_timer(&at91wdt_private.timer); - return res; + return 0; } #if defined(CONFIG_OF) -static const struct of_device_id at91_wdt_dt_ids[] __initconst = { +static const struct of_device_id at91_wdt_dt_ids[] = { { .compatible = "atmel,at91sam9260-wdt" }, { /* sentinel */ } }; @@ -326,4 +251,3 @@ module_platform_driver_probe(at91wdt_driver, at91wdt_probe); MODULE_AUTHOR("Renaud CERRATO <r.cerrato@til-technologies.fr>"); MODULE_DESCRIPTION("Watchdog driver for Atmel AT91SAM9x processors"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 38a999e60c0d..898799074a13 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/init.h> +#include <linux/io.h> #include <linux/kernel.h> #include <linux/miscdevice.h> #include <linux/module.h> @@ -32,14 +33,16 @@ #include <linux/watchdog.h> #include <linux/clk.h> #include <linux/err.h> - -#include <asm/mach-ath79/ath79.h> -#include <asm/mach-ath79/ar71xx_regs.h> +#include <linux/of.h> +#include <linux/of_platform.h> #define DRIVER_NAME "ath79-wdt" #define WDT_TIMEOUT 15 /* seconds */ +#define WDOG_REG_CTRL 0x00 +#define WDOG_REG_TIMER 0x04 + #define WDOG_CTRL_LAST_RESET BIT(31) #define WDOG_CTRL_ACTION_MASK 3 #define WDOG_CTRL_ACTION_NONE 0 /* no action */ @@ -66,27 +69,38 @@ static struct clk *wdt_clk; static unsigned long wdt_freq; static int boot_status; static int max_timeout; +static void __iomem *wdt_base; + +static inline void ath79_wdt_wr(unsigned reg, u32 val) +{ + iowrite32(val, wdt_base + reg); +} + +static inline u32 ath79_wdt_rr(unsigned reg) +{ + return ioread32(wdt_base + reg); +} static inline void ath79_wdt_keepalive(void) { - ath79_reset_wr(AR71XX_RESET_REG_WDOG, wdt_freq * timeout); + ath79_wdt_wr(WDOG_REG_TIMER, wdt_freq * timeout); /* flush write */ - ath79_reset_rr(AR71XX_RESET_REG_WDOG); + ath79_wdt_rr(WDOG_REG_TIMER); } static inline void ath79_wdt_enable(void) { ath79_wdt_keepalive(); - ath79_reset_wr(AR71XX_RESET_REG_WDOG_CTRL, WDOG_CTRL_ACTION_FCR); + ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_FCR); /* flush write */ - ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL); + ath79_wdt_rr(WDOG_REG_CTRL); } static inline void ath79_wdt_disable(void) { - ath79_reset_wr(AR71XX_RESET_REG_WDOG_CTRL, WDOG_CTRL_ACTION_NONE); + ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_NONE); /* flush write */ - ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL); + ath79_wdt_rr(WDOG_REG_CTRL); } static int ath79_wdt_set_timeout(int val) @@ -226,16 +240,32 @@ static struct miscdevice ath79_wdt_miscdev = { static int ath79_wdt_probe(struct platform_device *pdev) { + struct resource *res; u32 ctrl; int err; - wdt_clk = clk_get(&pdev->dev, "wdt"); + if (wdt_base) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "no memory resource found\n"); + return -EINVAL; + } + + wdt_base = devm_request_and_ioremap(&pdev->dev, res); + if (!wdt_base) { + dev_err(&pdev->dev, "unable to remap memory region\n"); + return -ENOMEM; + } + + wdt_clk = devm_clk_get(&pdev->dev, "wdt"); if (IS_ERR(wdt_clk)) return PTR_ERR(wdt_clk); err = clk_enable(wdt_clk); if (err) - goto err_clk_put; + return err; wdt_freq = clk_get_rate(wdt_clk); if (!wdt_freq) { @@ -251,7 +281,7 @@ static int ath79_wdt_probe(struct platform_device *pdev) max_timeout, timeout); } - ctrl = ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL); + ctrl = ath79_wdt_rr(WDOG_REG_CTRL); boot_status = (ctrl & WDOG_CTRL_LAST_RESET) ? WDIOF_CARDRESET : 0; err = misc_register(&ath79_wdt_miscdev); @@ -265,8 +295,6 @@ static int ath79_wdt_probe(struct platform_device *pdev) err_clk_disable: clk_disable(wdt_clk); -err_clk_put: - clk_put(wdt_clk); return err; } @@ -274,7 +302,6 @@ static int ath79_wdt_remove(struct platform_device *pdev) { misc_deregister(&ath79_wdt_miscdev); clk_disable(wdt_clk); - clk_put(wdt_clk); return 0; } @@ -283,6 +310,14 @@ static void ath97_wdt_shutdown(struct platform_device *pdev) ath79_wdt_disable(); } +#ifdef CONFIG_OF +static const struct of_device_id ath79_wdt_match[] = { + { .compatible = "qca,ar7130-wdt" }, + {}, +}; +MODULE_DEVICE_TABLE(of, ath79_wdt_match); +#endif + static struct platform_driver ath79_wdt_driver = { .probe = ath79_wdt_probe, .remove = ath79_wdt_remove, @@ -290,6 +325,7 @@ static struct platform_driver ath79_wdt_driver = { .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, + .of_match_table = of_match_ptr(ath79_wdt_match), }, }; diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c index bc0e91e78e86..b4021a2b459b 100644 --- a/drivers/watchdog/bcm47xx_wdt.c +++ b/drivers/watchdog/bcm47xx_wdt.c @@ -3,6 +3,7 @@ * * Copyright (C) 2008 Aleksandar Radovanovic <biblbroks@sezampro.rs> * Copyright (C) 2009 Matthieu CASTET <castet.matthieu@free.fr> + * Copyright (C) 2012-2013 Hauke Mehrtens <hauke@hauke-m.de> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -12,165 +13,143 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/bcm47xx_wdt.h> #include <linux/bitops.h> #include <linux/errno.h> -#include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/platform_device.h> #include <linux/reboot.h> #include <linux/types.h> -#include <linux/uaccess.h> #include <linux/watchdog.h> #include <linux/timer.h> #include <linux/jiffies.h> -#include <linux/ssb/ssb_embedded.h> -#include <asm/mach-bcm47xx/bcm47xx.h> #define DRV_NAME "bcm47xx_wdt" #define WDT_DEFAULT_TIME 30 /* seconds */ -#define WDT_MAX_TIME 255 /* seconds */ +#define WDT_SOFTTIMER_MAX 255 /* seconds */ +#define WDT_SOFTTIMER_THRESHOLD 60 /* seconds */ -static int wdt_time = WDT_DEFAULT_TIME; +static int timeout = WDT_DEFAULT_TIME; static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(wdt_time, int, 0); -MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default=" +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, "Watchdog time in seconds. (default=" __MODULE_STRING(WDT_DEFAULT_TIME) ")"); -#ifdef CONFIG_WATCHDOG_NOWAYOUT module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -#endif -static unsigned long bcm47xx_wdt_busy; -static char expect_release; -static struct timer_list wdt_timer; -static atomic_t ticks; - -static inline void bcm47xx_wdt_hw_start(void) +static inline struct bcm47xx_wdt *bcm47xx_wdt_get(struct watchdog_device *wdd) { - /* this is 2,5s on 100Mhz clock and 2s on 133 Mhz */ - switch (bcm47xx_bus_type) { -#ifdef CONFIG_BCM47XX_SSB - case BCM47XX_BUS_TYPE_SSB: - ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0xfffffff); - break; -#endif -#ifdef CONFIG_BCM47XX_BCMA - case BCM47XX_BUS_TYPE_BCMA: - bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, - 0xfffffff); - break; -#endif - } + return container_of(wdd, struct bcm47xx_wdt, wdd); } -static inline int bcm47xx_wdt_hw_stop(void) +static int bcm47xx_wdt_hard_keepalive(struct watchdog_device *wdd) { - switch (bcm47xx_bus_type) { -#ifdef CONFIG_BCM47XX_SSB - case BCM47XX_BUS_TYPE_SSB: - return ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0); -#endif -#ifdef CONFIG_BCM47XX_BCMA - case BCM47XX_BUS_TYPE_BCMA: - bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 0); - return 0; -#endif - } - return -EINVAL; -} + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); -static void bcm47xx_timer_tick(unsigned long unused) -{ - if (!atomic_dec_and_test(&ticks)) { - bcm47xx_wdt_hw_start(); - mod_timer(&wdt_timer, jiffies + HZ); - } else { - pr_crit("Watchdog will fire soon!!!\n"); - } + wdt->timer_set_ms(wdt, wdd->timeout * 1000); + + return 0; } -static inline void bcm47xx_wdt_pet(void) +static int bcm47xx_wdt_hard_start(struct watchdog_device *wdd) { - atomic_set(&ticks, wdt_time); + return 0; } -static void bcm47xx_wdt_start(void) +static int bcm47xx_wdt_hard_stop(struct watchdog_device *wdd) { - bcm47xx_wdt_pet(); - bcm47xx_timer_tick(0); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + wdt->timer_set(wdt, 0); + + return 0; } -static void bcm47xx_wdt_pause(void) +static int bcm47xx_wdt_hard_set_timeout(struct watchdog_device *wdd, + unsigned int new_time) { - del_timer_sync(&wdt_timer); - bcm47xx_wdt_hw_stop(); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + u32 max_timer = wdt->max_timer_ms; + + if (new_time < 1 || new_time > max_timer / 1000) { + pr_warn("timeout value must be 1<=x<=%d, using %d\n", + max_timer / 1000, new_time); + return -EINVAL; + } + + wdd->timeout = new_time; + return 0; } -static void bcm47xx_wdt_stop(void) +static struct watchdog_ops bcm47xx_wdt_hard_ops = { + .owner = THIS_MODULE, + .start = bcm47xx_wdt_hard_start, + .stop = bcm47xx_wdt_hard_stop, + .ping = bcm47xx_wdt_hard_keepalive, + .set_timeout = bcm47xx_wdt_hard_set_timeout, +}; + +static void bcm47xx_wdt_soft_timer_tick(unsigned long data) { - bcm47xx_wdt_pause(); + struct bcm47xx_wdt *wdt = (struct bcm47xx_wdt *)data; + u32 next_tick = min(wdt->wdd.timeout * 1000, wdt->max_timer_ms); + + if (!atomic_dec_and_test(&wdt->soft_ticks)) { + wdt->timer_set_ms(wdt, next_tick); + mod_timer(&wdt->soft_timer, jiffies + HZ); + } else { + pr_crit("Watchdog will fire soon!!!\n"); + } } -static int bcm47xx_wdt_settimeout(int new_time) +static int bcm47xx_wdt_soft_keepalive(struct watchdog_device *wdd) { - if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) - return -EINVAL; + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + atomic_set(&wdt->soft_ticks, wdd->timeout); - wdt_time = new_time; return 0; } -static int bcm47xx_wdt_open(struct inode *inode, struct file *file) +static int bcm47xx_wdt_soft_start(struct watchdog_device *wdd) { - if (test_and_set_bit(0, &bcm47xx_wdt_busy)) - return -EBUSY; + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + bcm47xx_wdt_soft_keepalive(wdd); + bcm47xx_wdt_soft_timer_tick((unsigned long)wdt); - bcm47xx_wdt_start(); - return nonseekable_open(inode, file); + return 0; } -static int bcm47xx_wdt_release(struct inode *inode, struct file *file) +static int bcm47xx_wdt_soft_stop(struct watchdog_device *wdd) { - if (expect_release == 42) { - bcm47xx_wdt_stop(); - } else { - pr_crit("Unexpected close, not stopping watchdog!\n"); - bcm47xx_wdt_start(); - } + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + del_timer_sync(&wdt->soft_timer); + wdt->timer_set(wdt, 0); - clear_bit(0, &bcm47xx_wdt_busy); - expect_release = 0; return 0; } -static ssize_t bcm47xx_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) +static int bcm47xx_wdt_soft_set_timeout(struct watchdog_device *wdd, + unsigned int new_time) { - if (len) { - if (!nowayout) { - size_t i; - - expect_release = 0; - - for (i = 0; i != len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - expect_release = 42; - } - } - bcm47xx_wdt_pet(); + if (new_time < 1 || new_time > WDT_SOFTTIMER_MAX) { + pr_warn("timeout value must be 1<=x<=%d, using %d\n", + WDT_SOFTTIMER_MAX, new_time); + return -EINVAL; } - return len; + + wdd->timeout = new_time; + return 0; } static const struct watchdog_info bcm47xx_wdt_info = { @@ -180,130 +159,100 @@ static const struct watchdog_info bcm47xx_wdt_info = { WDIOF_MAGICCLOSE, }; -static long bcm47xx_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value, retval = -EINVAL; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &bcm47xx_wdt_info, - sizeof(bcm47xx_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_SETOPTIONS: - if (get_user(new_value, p)) - return -EFAULT; - - if (new_value & WDIOS_DISABLECARD) { - bcm47xx_wdt_stop(); - retval = 0; - } - - if (new_value & WDIOS_ENABLECARD) { - bcm47xx_wdt_start(); - retval = 0; - } - - return retval; - - case WDIOC_KEEPALIVE: - bcm47xx_wdt_pet(); - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - if (bcm47xx_wdt_settimeout(new_value)) - return -EINVAL; - - bcm47xx_wdt_pet(); - - case WDIOC_GETTIMEOUT: - return put_user(wdt_time, p); - - default: - return -ENOTTY; - } -} - static int bcm47xx_wdt_notify_sys(struct notifier_block *this, - unsigned long code, void *unused) + unsigned long code, void *unused) { + struct bcm47xx_wdt *wdt; + + wdt = container_of(this, struct bcm47xx_wdt, notifier); if (code == SYS_DOWN || code == SYS_HALT) - bcm47xx_wdt_stop(); + wdt->wdd.ops->stop(&wdt->wdd); return NOTIFY_DONE; } -static const struct file_operations bcm47xx_wdt_fops = { +static struct watchdog_ops bcm47xx_wdt_soft_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = bcm47xx_wdt_ioctl, - .open = bcm47xx_wdt_open, - .release = bcm47xx_wdt_release, - .write = bcm47xx_wdt_write, -}; - -static struct miscdevice bcm47xx_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &bcm47xx_wdt_fops, -}; - -static struct notifier_block bcm47xx_wdt_notifier = { - .notifier_call = bcm47xx_wdt_notify_sys, + .start = bcm47xx_wdt_soft_start, + .stop = bcm47xx_wdt_soft_stop, + .ping = bcm47xx_wdt_soft_keepalive, + .set_timeout = bcm47xx_wdt_soft_set_timeout, }; -static int __init bcm47xx_wdt_init(void) +static int bcm47xx_wdt_probe(struct platform_device *pdev) { int ret; + bool soft; + struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev); - if (bcm47xx_wdt_hw_stop() < 0) - return -ENODEV; + if (!wdt) + return -ENXIO; - setup_timer(&wdt_timer, bcm47xx_timer_tick, 0L); + soft = wdt->max_timer_ms < WDT_SOFTTIMER_THRESHOLD * 1000; - if (bcm47xx_wdt_settimeout(wdt_time)) { - bcm47xx_wdt_settimeout(WDT_DEFAULT_TIME); - pr_info("wdt_time value must be 0 < wdt_time < %d, using %d\n", - (WDT_MAX_TIME + 1), wdt_time); + if (soft) { + wdt->wdd.ops = &bcm47xx_wdt_soft_ops; + setup_timer(&wdt->soft_timer, bcm47xx_wdt_soft_timer_tick, + (long unsigned int)wdt); + } else { + wdt->wdd.ops = &bcm47xx_wdt_hard_ops; } - ret = register_reboot_notifier(&bcm47xx_wdt_notifier); + wdt->wdd.info = &bcm47xx_wdt_info; + wdt->wdd.timeout = WDT_DEFAULT_TIME; + ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout); if (ret) - return ret; + goto err_timer; + watchdog_set_nowayout(&wdt->wdd, nowayout); - ret = misc_register(&bcm47xx_wdt_miscdev); - if (ret) { - unregister_reboot_notifier(&bcm47xx_wdt_notifier); - return ret; - } + wdt->notifier.notifier_call = &bcm47xx_wdt_notify_sys; + + ret = register_reboot_notifier(&wdt->notifier); + if (ret) + goto err_timer; - pr_info("BCM47xx Watchdog Timer enabled (%d seconds%s)\n", - wdt_time, nowayout ? ", nowayout" : ""); + ret = watchdog_register_device(&wdt->wdd); + if (ret) + goto err_notifier; + + dev_info(&pdev->dev, "BCM47xx Watchdog Timer enabled (%d seconds%s%s)\n", + timeout, nowayout ? ", nowayout" : "", + soft ? ", Software Timer" : ""); return 0; + +err_notifier: + unregister_reboot_notifier(&wdt->notifier); +err_timer: + if (soft) + del_timer_sync(&wdt->soft_timer); + + return ret; } -static void __exit bcm47xx_wdt_exit(void) +static int bcm47xx_wdt_remove(struct platform_device *pdev) { - if (!nowayout) - bcm47xx_wdt_stop(); + struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev); - misc_deregister(&bcm47xx_wdt_miscdev); + if (!wdt) + return -ENXIO; - unregister_reboot_notifier(&bcm47xx_wdt_notifier); + watchdog_unregister_device(&wdt->wdd); + unregister_reboot_notifier(&wdt->notifier); + + return 0; } -module_init(bcm47xx_wdt_init); -module_exit(bcm47xx_wdt_exit); +static struct platform_driver bcm47xx_wdt_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "bcm47xx-wdt", + }, + .probe = bcm47xx_wdt_probe, + .remove = bcm47xx_wdt_remove, +}; + +module_platform_driver(bcm47xx_wdt_driver); MODULE_AUTHOR("Aleksandar Radovanovic"); +MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>"); MODULE_DESCRIPTION("Watchdog driver for Broadcom BCM47xx"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index c0bc92d8e438..a8dbceb32914 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -15,12 +15,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> -#include <linux/fs.h> #include <linux/smp.h> -#include <linux/miscdevice.h> -#include <linux/notifier.h> #include <linux/watchdog.h> -#include <linux/uaccess.h> #include <asm/reg_booke.h> #include <asm/time.h> @@ -45,7 +41,7 @@ u32 booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT; #define WDTP_MASK (TCR_WP_MASK) #endif -static DEFINE_SPINLOCK(booke_wdt_lock); +#ifdef CONFIG_PPC_FSL_BOOK3E /* For the specified period, determine the number of seconds * corresponding to the reset time. There will be a watchdog @@ -86,6 +82,24 @@ static unsigned int sec_to_period(unsigned int secs) return 0; } +#define MAX_WDT_TIMEOUT period_to_sec(1) + +#else /* CONFIG_PPC_FSL_BOOK3E */ + +static unsigned long long period_to_sec(unsigned int period) +{ + return period; +} + +static unsigned int sec_to_period(unsigned int secs) +{ + return secs; +} + +#define MAX_WDT_TIMEOUT 3 /* from Kconfig */ + +#endif /* !CONFIG_PPC_FSL_BOOK3E */ + static void __booke_wdt_set(void *data) { u32 val; @@ -107,9 +121,11 @@ static void __booke_wdt_ping(void *data) mtspr(SPRN_TSR, TSR_ENW|TSR_WIS); } -static void booke_wdt_ping(void) +static int booke_wdt_ping(struct watchdog_device *wdog) { on_each_cpu(__booke_wdt_ping, NULL, 0); + + return 0; } static void __booke_wdt_enable(void *data) @@ -146,152 +162,81 @@ static void __booke_wdt_disable(void *data) } -static ssize_t booke_wdt_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static void __booke_wdt_start(struct watchdog_device *wdog) { - booke_wdt_ping(); - return count; + on_each_cpu(__booke_wdt_enable, NULL, 0); + pr_debug("watchdog enabled (timeout = %u sec)\n", wdog->timeout); } -static struct watchdog_info ident = { - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, - .identity = "PowerPC Book-E Watchdog", -}; - -static long booke_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static int booke_wdt_start(struct watchdog_device *wdog) { - u32 tmp = 0; - u32 __user *p = (u32 __user *)arg; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(p, &ident, sizeof(ident)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - return put_user(0, p); - case WDIOC_GETBOOTSTATUS: - /* XXX: something is clearing TSR */ - tmp = mfspr(SPRN_TSR) & TSR_WRS(3); - /* returns CARDRESET if last reset was caused by the WDT */ - return put_user((tmp ? WDIOF_CARDRESET : 0), p); - case WDIOC_SETOPTIONS: - if (get_user(tmp, p)) - return -EFAULT; - if (tmp == WDIOS_ENABLECARD) { - booke_wdt_ping(); - break; - } else - return -EINVAL; - return 0; - case WDIOC_KEEPALIVE: - booke_wdt_ping(); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(tmp, p)) - return -EFAULT; -#ifdef CONFIG_PPC_FSL_BOOK3E - /* period of 1 gives the largest possible timeout */ - if (tmp > period_to_sec(1)) - return -EINVAL; - booke_wdt_period = sec_to_period(tmp); -#else - booke_wdt_period = tmp; -#endif - booke_wdt_set(); - /* Fall */ - case WDIOC_GETTIMEOUT: -#ifdef CONFIG_FSL_BOOKE - return put_user(period_to_sec(booke_wdt_period), p); -#else - return put_user(booke_wdt_period, p); -#endif - default: - return -ENOTTY; - } - - return 0; -} - -/* wdt_is_active stores whether or not the /dev/watchdog device is opened */ -static unsigned long wdt_is_active; - -static int booke_wdt_open(struct inode *inode, struct file *file) -{ - /* /dev/watchdog can only be opened once */ - if (test_and_set_bit(0, &wdt_is_active)) - return -EBUSY; - - spin_lock(&booke_wdt_lock); if (booke_wdt_enabled == 0) { booke_wdt_enabled = 1; - on_each_cpu(__booke_wdt_enable, NULL, 0); - pr_debug("watchdog enabled (timeout = %llu sec)\n", - period_to_sec(booke_wdt_period)); + __booke_wdt_start(wdog); } - spin_unlock(&booke_wdt_lock); - - return nonseekable_open(inode, file); + return 0; } -static int booke_wdt_release(struct inode *inode, struct file *file) +static int booke_wdt_stop(struct watchdog_device *wdog) { -#ifndef CONFIG_WATCHDOG_NOWAYOUT - /* Normally, the watchdog is disabled when /dev/watchdog is closed, but - * if CONFIG_WATCHDOG_NOWAYOUT is defined, then it means that the - * watchdog should remain enabled. So we disable it only if - * CONFIG_WATCHDOG_NOWAYOUT is not defined. - */ on_each_cpu(__booke_wdt_disable, NULL, 0); booke_wdt_enabled = 0; pr_debug("watchdog disabled\n"); -#endif - clear_bit(0, &wdt_is_active); + return 0; +} + +static int booke_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int timeout) +{ + if (timeout > MAX_WDT_TIMEOUT) + return -EINVAL; + booke_wdt_period = sec_to_period(timeout); + wdt_dev->timeout = timeout; + booke_wdt_set(); return 0; } -static const struct file_operations booke_wdt_fops = { +static struct watchdog_info booke_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "PowerPC Book-E Watchdog", +}; + +static struct watchdog_ops booke_wdt_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, - .write = booke_wdt_write, - .unlocked_ioctl = booke_wdt_ioctl, - .open = booke_wdt_open, - .release = booke_wdt_release, + .start = booke_wdt_start, + .stop = booke_wdt_stop, + .ping = booke_wdt_ping, + .set_timeout = booke_wdt_set_timeout, }; -static struct miscdevice booke_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &booke_wdt_fops, +static struct watchdog_device booke_wdt_dev = { + .info = &booke_wdt_info, + .ops = &booke_wdt_ops, + .min_timeout = 1, + .max_timeout = 0xFFFF }; static void __exit booke_wdt_exit(void) { - misc_deregister(&booke_wdt_miscdev); + watchdog_unregister_device(&booke_wdt_dev); } static int __init booke_wdt_init(void) { int ret = 0; + bool nowayout = WATCHDOG_NOWAYOUT; pr_info("powerpc book-e watchdog driver loaded\n"); - ident.firmware_version = cur_cpu_spec->pvr_value; - - ret = misc_register(&booke_wdt_miscdev); - if (ret) { - pr_err("cannot register device (minor=%u, ret=%i)\n", - WATCHDOG_MINOR, ret); - return ret; - } - - spin_lock(&booke_wdt_lock); - if (booke_wdt_enabled == 1) { - pr_info("watchdog enabled (timeout = %llu sec)\n", - period_to_sec(booke_wdt_period)); - on_each_cpu(__booke_wdt_enable, NULL, 0); - } - spin_unlock(&booke_wdt_lock); + booke_wdt_info.firmware_version = cur_cpu_spec->pvr_value; + booke_wdt_set_timeout(&booke_wdt_dev, + period_to_sec(CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT)); + watchdog_set_nowayout(&booke_wdt_dev, nowayout); + if (booke_wdt_enabled) + __booke_wdt_start(&booke_wdt_dev); + + ret = watchdog_register_device(&booke_wdt_dev); return ret; } diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c index e8e87246ea6d..7df1fdca9e78 100644 --- a/drivers/watchdog/davinci_wdt.c +++ b/drivers/watchdog/davinci_wdt.c @@ -69,7 +69,6 @@ static unsigned long wdt_status; #define WDT_REGION_INITED 2 #define WDT_DEVICE_INITED 3 -static struct resource *wdt_mem; static void __iomem *wdt_base; struct clk *wdt_clk; @@ -201,10 +200,11 @@ static struct miscdevice davinci_wdt_miscdev = { static int davinci_wdt_probe(struct platform_device *pdev) { - int ret = 0, size; + int ret = 0; struct device *dev = &pdev->dev; + struct resource *wdt_mem; - wdt_clk = clk_get(dev, NULL); + wdt_clk = devm_clk_get(dev, NULL); if (WARN_ON(IS_ERR(wdt_clk))) return PTR_ERR(wdt_clk); @@ -221,43 +221,26 @@ static int davinci_wdt_probe(struct platform_device *pdev) return -ENOENT; } - size = resource_size(wdt_mem); - if (!request_mem_region(wdt_mem->start, size, pdev->name)) { - dev_err(dev, "failed to get memory region\n"); - return -ENOENT; - } - - wdt_base = ioremap(wdt_mem->start, size); + wdt_base = devm_request_and_ioremap(dev, wdt_mem); if (!wdt_base) { - dev_err(dev, "failed to map memory region\n"); - release_mem_region(wdt_mem->start, size); - wdt_mem = NULL; - return -ENOMEM; + dev_err(dev, "ioremap failed\n"); + return -EADDRNOTAVAIL; } ret = misc_register(&davinci_wdt_miscdev); if (ret < 0) { dev_err(dev, "cannot register misc device\n"); - release_mem_region(wdt_mem->start, size); - wdt_mem = NULL; } else { set_bit(WDT_DEVICE_INITED, &wdt_status); } - iounmap(wdt_base); return ret; } static int davinci_wdt_remove(struct platform_device *pdev) { misc_deregister(&davinci_wdt_miscdev); - if (wdt_mem) { - release_mem_region(wdt_mem->start, resource_size(wdt_mem)); - wdt_mem = NULL; - } - clk_disable_unprepare(wdt_clk); - clk_put(wdt_clk); return 0; } diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index b9c5b58e59d3..257cfbad21da 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -310,6 +310,7 @@ static struct platform_driver gef_wdt_driver = { .of_match_table = gef_wdt_ids, }, .probe = gef_wdt_probe, + .remove = gef_wdt_remove, }; static int __init gef_wdt_init(void) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index b0e541d022e6..af88ffd1068f 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -45,6 +45,11 @@ #include "omap_wdt.h" +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + static unsigned timer_margin; module_param(timer_margin, uint, 0); MODULE_PARM_DESC(timer_margin, "initial watchdog timeout (in seconds)"); @@ -201,7 +206,6 @@ static const struct watchdog_ops omap_wdt_ops = { static int omap_wdt_probe(struct platform_device *pdev) { struct omap_wd_timer_platform_data *pdata = pdev->dev.platform_data; - bool nowayout = WATCHDOG_NOWAYOUT; struct watchdog_device *omap_wdt; struct resource *res, *mem; struct omap_wdt_dev *wdev; diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index 7c18b3bffcf7..da577980d390 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -140,6 +140,7 @@ static const struct watchdog_ops orion_wdt_ops = { static struct watchdog_device orion_wdt = { .info = &orion_wdt_info, .ops = &orion_wdt_ops, + .min_timeout = 1, }; static int orion_wdt_probe(struct platform_device *pdev) @@ -164,12 +165,9 @@ static int orion_wdt_probe(struct platform_device *pdev) wdt_max_duration = WDT_MAX_CYCLE_COUNT / wdt_tclk; - if ((heartbeat < 1) || (heartbeat > wdt_max_duration)) - heartbeat = wdt_max_duration; - - orion_wdt.timeout = heartbeat; - orion_wdt.min_timeout = 1; + orion_wdt.timeout = wdt_max_duration; orion_wdt.max_timeout = wdt_max_duration; + watchdog_init_timeout(&orion_wdt, heartbeat, &pdev->dev); watchdog_set_nowayout(&orion_wdt, nowayout); ret = watchdog_register_device(&orion_wdt); @@ -179,7 +177,7 @@ static int orion_wdt_probe(struct platform_device *pdev) } pr_info("Initial timeout %d sec%s\n", - heartbeat, nowayout ? ", nowayout" : ""); + orion_wdt.timeout, nowayout ? ", nowayout" : ""); return 0; } @@ -225,4 +223,5 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:orion_wdt"); MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c index de1f3fa1d787..a3684a30eb69 100644 --- a/drivers/watchdog/pnx4008_wdt.c +++ b/drivers/watchdog/pnx4008_wdt.c @@ -142,6 +142,7 @@ static const struct watchdog_ops pnx4008_wdt_ops = { static struct watchdog_device pnx4008_wdd = { .info = &pnx4008_wdt_ident, .ops = &pnx4008_wdt_ops, + .timeout = DEFAULT_HEARTBEAT, .min_timeout = 1, .max_timeout = MAX_HEARTBEAT, }; @@ -151,8 +152,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) struct resource *r; int ret = 0; - if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT) - heartbeat = DEFAULT_HEARTBEAT; + watchdog_init_timeout(&pnx4008_wdd, heartbeat, &pdev->dev); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt_base = devm_ioremap_resource(&pdev->dev, r); @@ -167,7 +167,6 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) if (ret) goto out; - pnx4008_wdd.timeout = heartbeat; pnx4008_wdd.bootstatus = (readl(WDTIM_RES(wdt_base)) & WDOG_RESET) ? WDIOF_CARDRESET : 0; watchdog_set_nowayout(&pnx4008_wdd, nowayout); @@ -181,7 +180,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) } dev_info(&pdev->dev, "PNX4008 Watchdog Timer: heartbeat %d sec\n", - heartbeat); + pnx4008_wdd.timeout); return 0; diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c new file mode 100644 index 000000000000..f53615dc633d --- /dev/null +++ b/drivers/watchdog/retu_wdt.c @@ -0,0 +1,178 @@ +/* + * Retu watchdog driver + * + * Copyright (C) 2004, 2005 Nokia Corporation + * + * Based on code written by Amit Kucheria and Michael Buesch. + * Rewritten by Aaro Koskinen. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mfd/retu.h> +#include <linux/watchdog.h> +#include <linux/platform_device.h> + +/* Watchdog timer values in seconds */ +#define RETU_WDT_MAX_TIMER 63 + +struct retu_wdt_dev { + struct retu_dev *rdev; + struct device *dev; + struct delayed_work ping_work; +}; + +/* + * Since Retu watchdog cannot be disabled in hardware, we must kick it + * with a timer until userspace watchdog software takes over. If + * CONFIG_WATCHDOG_NOWAYOUT is set, we never start the feeding. + */ +static void retu_wdt_ping_enable(struct retu_wdt_dev *wdev) +{ + retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER); + schedule_delayed_work(&wdev->ping_work, + round_jiffies_relative(RETU_WDT_MAX_TIMER * HZ / 2)); +} + +static void retu_wdt_ping_disable(struct retu_wdt_dev *wdev) +{ + retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER); + cancel_delayed_work_sync(&wdev->ping_work); +} + +static void retu_wdt_ping_work(struct work_struct *work) +{ + struct retu_wdt_dev *wdev = container_of(to_delayed_work(work), + struct retu_wdt_dev, ping_work); + retu_wdt_ping_enable(wdev); +} + +static int retu_wdt_start(struct watchdog_device *wdog) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + retu_wdt_ping_disable(wdev); + + return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); +} + +static int retu_wdt_stop(struct watchdog_device *wdog) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + retu_wdt_ping_enable(wdev); + + return 0; +} + +static int retu_wdt_ping(struct watchdog_device *wdog) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); +} + +static int retu_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int timeout) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + wdog->timeout = timeout; + return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); +} + +static const struct watchdog_info retu_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "Retu watchdog", +}; + +static const struct watchdog_ops retu_wdt_ops = { + .owner = THIS_MODULE, + .start = retu_wdt_start, + .stop = retu_wdt_stop, + .ping = retu_wdt_ping, + .set_timeout = retu_wdt_set_timeout, +}; + +static int retu_wdt_probe(struct platform_device *pdev) +{ + struct retu_dev *rdev = dev_get_drvdata(pdev->dev.parent); + bool nowayout = WATCHDOG_NOWAYOUT; + struct watchdog_device *retu_wdt; + struct retu_wdt_dev *wdev; + int ret; + + retu_wdt = devm_kzalloc(&pdev->dev, sizeof(*retu_wdt), GFP_KERNEL); + if (!retu_wdt) + return -ENOMEM; + + wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL); + if (!wdev) + return -ENOMEM; + + retu_wdt->info = &retu_wdt_info; + retu_wdt->ops = &retu_wdt_ops; + retu_wdt->timeout = RETU_WDT_MAX_TIMER; + retu_wdt->min_timeout = 0; + retu_wdt->max_timeout = RETU_WDT_MAX_TIMER; + + watchdog_set_drvdata(retu_wdt, wdev); + watchdog_set_nowayout(retu_wdt, nowayout); + + wdev->rdev = rdev; + wdev->dev = &pdev->dev; + + INIT_DELAYED_WORK(&wdev->ping_work, retu_wdt_ping_work); + + ret = watchdog_register_device(retu_wdt); + if (ret < 0) + return ret; + + if (nowayout) + retu_wdt_ping(retu_wdt); + else + retu_wdt_ping_enable(wdev); + + platform_set_drvdata(pdev, retu_wdt); + + return 0; +} + +static int retu_wdt_remove(struct platform_device *pdev) +{ + struct watchdog_device *wdog = platform_get_drvdata(pdev); + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + watchdog_unregister_device(wdog); + cancel_delayed_work_sync(&wdev->ping_work); + + return 0; +} + +static struct platform_driver retu_wdt_driver = { + .probe = retu_wdt_probe, + .remove = retu_wdt_remove, + .driver = { + .name = "retu-wdt", + }, +}; +module_platform_driver(retu_wdt_driver); + +MODULE_ALIAS("platform:retu-wdt"); +MODULE_DESCRIPTION("Retu watchdog"); +MODULE_AUTHOR("Amit Kucheria"); +MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 27bcd4e2c4a4..c1a221cbeae4 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -53,7 +53,7 @@ #define CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME (15) static bool nowayout = WATCHDOG_NOWAYOUT; -static int tmr_margin = CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME; +static int tmr_margin; static int tmr_atboot = CONFIG_S3C2410_WATCHDOG_ATBOOT; static int soft_noboot; static int debug; @@ -226,6 +226,7 @@ static struct watchdog_ops s3c2410wdt_ops = { static struct watchdog_device s3c2410_wdd = { .info = &s3c2410_wdt_ident, .ops = &s3c2410wdt_ops, + .timeout = CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME, }; /* interrupt handler code */ @@ -309,7 +310,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev) unsigned int wtcon; int started = 0; int ret; - int size; DBG("%s: probe=%p\n", __func__, pdev); @@ -330,28 +330,20 @@ static int s3c2410wdt_probe(struct platform_device *pdev) } /* get the memory region for the watchdog timer */ - - size = resource_size(wdt_mem); - if (!request_mem_region(wdt_mem->start, size, pdev->name)) { - dev_err(dev, "failed to get memory region\n"); - ret = -EBUSY; - goto err; - } - - wdt_base = ioremap(wdt_mem->start, size); + wdt_base = devm_request_and_ioremap(dev, wdt_mem); if (wdt_base == NULL) { - dev_err(dev, "failed to ioremap() region\n"); - ret = -EINVAL; - goto err_req; + dev_err(dev, "failed to devm_request_and_ioremap() region\n"); + ret = -ENOMEM; + goto err; } DBG("probe: mapped wdt_base=%p\n", wdt_base); - wdt_clock = clk_get(&pdev->dev, "watchdog"); + wdt_clock = devm_clk_get(dev, "watchdog"); if (IS_ERR(wdt_clock)) { dev_err(dev, "failed to find watchdog clock source\n"); ret = PTR_ERR(wdt_clock); - goto err_map; + goto err; } clk_prepare_enable(wdt_clock); @@ -365,7 +357,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev) /* see if we can actually set the requested timer margin, and if * not, try the default value */ - if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, tmr_margin)) { + watchdog_init_timeout(&s3c2410_wdd, tmr_margin, &pdev->dev); + if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout)) { started = s3c2410wdt_set_heartbeat(&s3c2410_wdd, CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME); @@ -378,7 +371,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev) "cannot start\n"); } - ret = request_irq(wdt_irq->start, s3c2410wdt_irq, 0, pdev->name, pdev); + ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0, + pdev->name, pdev); if (ret != 0) { dev_err(dev, "failed to install irq (%d)\n", ret); goto err_cpufreq; @@ -389,7 +383,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev) ret = watchdog_register_device(&s3c2410_wdd); if (ret) { dev_err(dev, "cannot register watchdog (%d)\n", ret); - goto err_irq; + goto err_cpufreq; } if (tmr_atboot && started == 0) { @@ -414,23 +408,13 @@ static int s3c2410wdt_probe(struct platform_device *pdev) return 0; - err_irq: - free_irq(wdt_irq->start, pdev); - err_cpufreq: s3c2410wdt_cpufreq_deregister(); err_clk: clk_disable_unprepare(wdt_clock); - clk_put(wdt_clock); wdt_clock = NULL; - err_map: - iounmap(wdt_base); - - err_req: - release_mem_region(wdt_mem->start, size); - err: wdt_irq = NULL; wdt_mem = NULL; @@ -441,17 +425,11 @@ static int s3c2410wdt_remove(struct platform_device *dev) { watchdog_unregister_device(&s3c2410_wdd); - free_irq(wdt_irq->start, dev); - s3c2410wdt_cpufreq_deregister(); clk_disable_unprepare(wdt_clock); - clk_put(wdt_clock); wdt_clock = NULL; - iounmap(wdt_base); - - release_mem_region(wdt_mem->start, resource_size(wdt_mem)); wdt_irq = NULL; wdt_mem = NULL; return 0; diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 2b0e000d4377..e3b8f757d2d3 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -361,7 +361,7 @@ static unsigned char sp5100_tco_setupdevice(void) { struct pci_dev *dev = NULL; const char *dev_name = NULL; - u32 val; + u32 val, tmp_val; u32 index_reg, data_reg, base_addr; /* Match the PCI device */ @@ -497,30 +497,19 @@ static unsigned char sp5100_tco_setupdevice(void) pr_debug("Got 0x%04x from resource tree\n", val); } - /* Restore to the low three bits, if chipset is SB8x0(or later) */ - if (sp5100_tco_pci->revision >= 0x40) { - u8 reserved_bit; - reserved_bit = inb(base_addr) & 0x7; - val |= (u32)reserved_bit; - } + /* Restore to the low three bits */ + outb(base_addr+0, index_reg); + tmp_val = val | (inb(data_reg) & 0x7); /* Re-programming the watchdog timer base address */ outb(base_addr+0, index_reg); - /* Low three bits of BASE are reserved */ - outb((val >> 0) & 0xf8, data_reg); + outb((tmp_val >> 0) & 0xff, data_reg); outb(base_addr+1, index_reg); - outb((val >> 8) & 0xff, data_reg); + outb((tmp_val >> 8) & 0xff, data_reg); outb(base_addr+2, index_reg); - outb((val >> 16) & 0xff, data_reg); + outb((tmp_val >> 16) & 0xff, data_reg); outb(base_addr+3, index_reg); - outb((val >> 24) & 0xff, data_reg); - - /* - * Clear unnecessary the low three bits, - * if chipset is SB8x0(or later) - */ - if (sp5100_tco_pci->revision >= 0x40) - val &= ~0x7; + outb((tmp_val >> 24) & 0xff, data_reg); if (!request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE, dev_name)) { diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c new file mode 100644 index 000000000000..c97e98dcde62 --- /dev/null +++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c @@ -0,0 +1,111 @@ +/* + * Watchdog driver for the RTC based watchdog in STMP3xxx and i.MX23/28 + * + * Author: Wolfram Sang <w.sang@pengutronix.de> + * + * Copyright (C) 2011-12 Wolfram Sang, Pengutronix + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/miscdevice.h> +#include <linux/watchdog.h> +#include <linux/platform_device.h> +#include <linux/stmp3xxx_rtc_wdt.h> + +#define WDOG_TICK_RATE 1000 /* 1 kHz clock */ +#define STMP3XXX_DEFAULT_TIMEOUT 19 +#define STMP3XXX_MAX_TIMEOUT (UINT_MAX / WDOG_TICK_RATE) + +static int heartbeat = STMP3XXX_DEFAULT_TIMEOUT; +module_param(heartbeat, uint, 0); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeat period in seconds from 1 to " + __MODULE_STRING(STMP3XXX_MAX_TIMEOUT) ", default " + __MODULE_STRING(STMP3XXX_DEFAULT_TIMEOUT)); + +static int wdt_start(struct watchdog_device *wdd) +{ + struct device *dev = watchdog_get_drvdata(wdd); + struct stmp3xxx_wdt_pdata *pdata = dev->platform_data; + + pdata->wdt_set_timeout(dev->parent, wdd->timeout * WDOG_TICK_RATE); + return 0; +} + +static int wdt_stop(struct watchdog_device *wdd) +{ + struct device *dev = watchdog_get_drvdata(wdd); + struct stmp3xxx_wdt_pdata *pdata = dev->platform_data; + + pdata->wdt_set_timeout(dev->parent, 0); + return 0; +} + +static int wdt_set_timeout(struct watchdog_device *wdd, unsigned new_timeout) +{ + wdd->timeout = new_timeout; + return wdt_start(wdd); +} + +static const struct watchdog_info stmp3xxx_wdt_ident = { + .options = WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "STMP3XXX RTC Watchdog", +}; + +static const struct watchdog_ops stmp3xxx_wdt_ops = { + .owner = THIS_MODULE, + .start = wdt_start, + .stop = wdt_stop, + .set_timeout = wdt_set_timeout, +}; + +static struct watchdog_device stmp3xxx_wdd = { + .info = &stmp3xxx_wdt_ident, + .ops = &stmp3xxx_wdt_ops, + .min_timeout = 1, + .max_timeout = STMP3XXX_MAX_TIMEOUT, + .status = WATCHDOG_NOWAYOUT_INIT_STATUS, +}; + +static int stmp3xxx_wdt_probe(struct platform_device *pdev) +{ + int ret; + + watchdog_set_drvdata(&stmp3xxx_wdd, &pdev->dev); + + stmp3xxx_wdd.timeout = clamp_t(unsigned, heartbeat, 1, STMP3XXX_MAX_TIMEOUT); + + ret = watchdog_register_device(&stmp3xxx_wdd); + if (ret < 0) { + dev_err(&pdev->dev, "cannot register watchdog device\n"); + return ret; + } + + dev_info(&pdev->dev, "initialized watchdog with heartbeat %ds\n", + stmp3xxx_wdd.timeout); + return 0; +} + +static int stmp3xxx_wdt_remove(struct platform_device *pdev) +{ + watchdog_unregister_device(&stmp3xxx_wdd); + return 0; +} + +static struct platform_driver stmp3xxx_wdt_driver = { + .driver = { + .name = "stmp3xxx_rtc_wdt", + }, + .probe = stmp3xxx_wdt_probe, + .remove = stmp3xxx_wdt_remove, +}; +module_platform_driver(stmp3xxx_wdt_driver); + +MODULE_DESCRIPTION("STMP3XXX RTC Watchdog Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/stmp3xxx_wdt.c b/drivers/watchdog/stmp3xxx_wdt.c deleted file mode 100644 index 1f4f69728fee..000000000000 --- a/drivers/watchdog/stmp3xxx_wdt.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Watchdog driver for Freescale STMP37XX/STMP378X - * - * Author: Vitaly Wool <vital@embeddedalley.com> - * - * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. - * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/miscdevice.h> -#include <linux/watchdog.h> -#include <linux/platform_device.h> -#include <linux/spinlock.h> -#include <linux/uaccess.h> -#include <linux/module.h> - -#include <mach/platform.h> -#include <mach/regs-rtc.h> - -#define DEFAULT_HEARTBEAT 19 -#define MAX_HEARTBEAT (0x10000000 >> 6) - -/* missing bitmask in headers */ -#define BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER 0x80000000 - -#define WDT_IN_USE 0 -#define WDT_OK_TO_CLOSE 1 - -#define WDOG_COUNTER_RATE 1000 /* 1 kHz clock */ - -static DEFINE_SPINLOCK(stmp3xxx_wdt_io_lock); -static unsigned long wdt_status; -static const bool nowayout = WATCHDOG_NOWAYOUT; -static int heartbeat = DEFAULT_HEARTBEAT; -static unsigned long boot_status; - -static void wdt_enable(u32 value) -{ - spin_lock(&stmp3xxx_wdt_io_lock); - __raw_writel(value, REGS_RTC_BASE + HW_RTC_WATCHDOG); - stmp3xxx_setl(BM_RTC_CTRL_WATCHDOGEN, REGS_RTC_BASE + HW_RTC_CTRL); - stmp3xxx_setl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER, - REGS_RTC_BASE + HW_RTC_PERSISTENT1); - spin_unlock(&stmp3xxx_wdt_io_lock); -} - -static void wdt_disable(void) -{ - spin_lock(&stmp3xxx_wdt_io_lock); - stmp3xxx_clearl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER, - REGS_RTC_BASE + HW_RTC_PERSISTENT1); - stmp3xxx_clearl(BM_RTC_CTRL_WATCHDOGEN, REGS_RTC_BASE + HW_RTC_CTRL); - spin_unlock(&stmp3xxx_wdt_io_lock); -} - -static void wdt_ping(void) -{ - wdt_enable(heartbeat * WDOG_COUNTER_RATE); -} - -static int stmp3xxx_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(WDT_IN_USE, &wdt_status)) - return -EBUSY; - - clear_bit(WDT_OK_TO_CLOSE, &wdt_status); - wdt_ping(); - - return nonseekable_open(inode, file); -} - -static ssize_t stmp3xxx_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - if (len) { - if (!nowayout) { - size_t i; - - clear_bit(WDT_OK_TO_CLOSE, &wdt_status); - - for (i = 0; i != len; i++) { - char c; - - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - set_bit(WDT_OK_TO_CLOSE, &wdt_status); - } - } - wdt_ping(); - } - - return len; -} - -static const struct watchdog_info ident = { - .options = WDIOF_CARDRESET | - WDIOF_MAGICCLOSE | - WDIOF_SETTIMEOUT | - WDIOF_KEEPALIVEPING, - .identity = "STMP3XXX Watchdog", -}; - -static long stmp3xxx_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_heartbeat, opts; - int ret = -ENOTTY; - - switch (cmd) { - case WDIOC_GETSUPPORT: - ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; - break; - - case WDIOC_GETSTATUS: - ret = put_user(0, p); - break; - - case WDIOC_GETBOOTSTATUS: - ret = put_user(boot_status, p); - break; - - case WDIOC_SETOPTIONS: - if (get_user(opts, p)) { - ret = -EFAULT; - break; - } - if (opts & WDIOS_DISABLECARD) - wdt_disable(); - else if (opts & WDIOS_ENABLECARD) - wdt_ping(); - else { - pr_debug("%s: unknown option 0x%x\n", __func__, opts); - ret = -EINVAL; - break; - } - ret = 0; - break; - - case WDIOC_KEEPALIVE: - wdt_ping(); - ret = 0; - break; - - case WDIOC_SETTIMEOUT: - if (get_user(new_heartbeat, p)) { - ret = -EFAULT; - break; - } - if (new_heartbeat <= 0 || new_heartbeat > MAX_HEARTBEAT) { - ret = -EINVAL; - break; - } - - heartbeat = new_heartbeat; - wdt_ping(); - /* Fall through */ - - case WDIOC_GETTIMEOUT: - ret = put_user(heartbeat, p); - break; - } - return ret; -} - -static int stmp3xxx_wdt_release(struct inode *inode, struct file *file) -{ - int ret = 0; - - if (!nowayout) { - if (!test_bit(WDT_OK_TO_CLOSE, &wdt_status)) { - wdt_ping(); - pr_debug("%s: Device closed unexpectedly\n", __func__); - ret = -EINVAL; - } else { - wdt_disable(); - clear_bit(WDT_OK_TO_CLOSE, &wdt_status); - } - } - clear_bit(WDT_IN_USE, &wdt_status); - - return ret; -} - -static const struct file_operations stmp3xxx_wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = stmp3xxx_wdt_write, - .unlocked_ioctl = stmp3xxx_wdt_ioctl, - .open = stmp3xxx_wdt_open, - .release = stmp3xxx_wdt_release, -}; - -static struct miscdevice stmp3xxx_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &stmp3xxx_wdt_fops, -}; - -static int stmp3xxx_wdt_probe(struct platform_device *pdev) -{ - int ret = 0; - - if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT) - heartbeat = DEFAULT_HEARTBEAT; - - boot_status = __raw_readl(REGS_RTC_BASE + HW_RTC_PERSISTENT1) & - BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER; - boot_status = !!boot_status; - stmp3xxx_clearl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER, - REGS_RTC_BASE + HW_RTC_PERSISTENT1); - wdt_disable(); /* disable for now */ - - ret = misc_register(&stmp3xxx_wdt_miscdev); - if (ret < 0) { - dev_err(&pdev->dev, "cannot register misc device\n"); - return ret; - } - - pr_info("initialized, heartbeat %d sec\n", heartbeat); - - return ret; -} - -static int stmp3xxx_wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&stmp3xxx_wdt_miscdev); - return 0; -} - -#ifdef CONFIG_PM -static int wdt_suspended; -static u32 wdt_saved_time; - -static int stmp3xxx_wdt_suspend(struct platform_device *pdev, - pm_message_t state) -{ - if (__raw_readl(REGS_RTC_BASE + HW_RTC_CTRL) & - BM_RTC_CTRL_WATCHDOGEN) { - wdt_suspended = 1; - wdt_saved_time = __raw_readl(REGS_RTC_BASE + HW_RTC_WATCHDOG); - wdt_disable(); - } - return 0; -} - -static int stmp3xxx_wdt_resume(struct platform_device *pdev) -{ - if (wdt_suspended) { - wdt_enable(wdt_saved_time); - wdt_suspended = 0; - } - return 0; -} -#else -#define stmp3xxx_wdt_suspend NULL -#define stmp3xxx_wdt_resume NULL -#endif - -static struct platform_driver platform_wdt_driver = { - .driver = { - .name = "stmp3xxx_wdt", - }, - .probe = stmp3xxx_wdt_probe, - .remove = stmp3xxx_wdt_remove, - .suspend = stmp3xxx_wdt_suspend, - .resume = stmp3xxx_wdt_resume, -}; - -module_platform_driver(platform_wdt_driver); - -MODULE_DESCRIPTION("STMP3XXX Watchdog Driver"); -MODULE_LICENSE("GPL"); - -module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, - "Watchdog heartbeat period in seconds from 1 to " - __MODULE_STRING(MAX_HEARTBEAT) ", default " - __MODULE_STRING(DEFAULT_HEARTBEAT)); - -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 3796434991fa..05d18b4c661b 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -36,12 +36,68 @@ #include <linux/init.h> /* For __init/__exit/... */ #include <linux/idr.h> /* For ida_* macros */ #include <linux/err.h> /* For IS_ERR macros */ +#include <linux/of.h> /* For of_get_timeout_sec */ #include "watchdog_core.h" /* For watchdog_dev_register/... */ static DEFINE_IDA(watchdog_ida); static struct class *watchdog_class; +static void watchdog_check_min_max_timeout(struct watchdog_device *wdd) +{ + /* + * Check that we have valid min and max timeout values, if + * not reset them both to 0 (=not used or unknown) + */ + if (wdd->min_timeout > wdd->max_timeout) { + pr_info("Invalid min and max timeout values, resetting to 0!\n"); + wdd->min_timeout = 0; + wdd->max_timeout = 0; + } +} + +/** + * watchdog_init_timeout() - initialize the timeout field + * @timeout_parm: timeout module parameter + * @dev: Device that stores the timeout-sec property + * + * Initialize the timeout field of the watchdog_device struct with either the + * timeout module parameter (if it is valid value) or the timeout-sec property + * (only if it is a valid value and the timeout_parm is out of bounds). + * If none of them are valid then we keep the old value (which should normally + * be the default timeout value. + * + * A zero is returned on success and -EINVAL for failure. + */ +int watchdog_init_timeout(struct watchdog_device *wdd, + unsigned int timeout_parm, struct device *dev) +{ + unsigned int t = 0; + int ret = 0; + + watchdog_check_min_max_timeout(wdd); + + /* try to get the tiemout module parameter first */ + if (!watchdog_timeout_invalid(wdd, timeout_parm)) { + wdd->timeout = timeout_parm; + return ret; + } + if (timeout_parm) + ret = -EINVAL; + + /* try to get the timeout_sec property */ + if (dev == NULL || dev->of_node == NULL) + return ret; + of_property_read_u32(dev->of_node, "timeout-sec", &t); + if (!watchdog_timeout_invalid(wdd, t)) + wdd->timeout = t; + else + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(watchdog_init_timeout); + /** * watchdog_register_device() - register a watchdog device * @wdd: watchdog device @@ -63,15 +119,7 @@ int watchdog_register_device(struct watchdog_device *wdd) if (wdd->ops->start == NULL || wdd->ops->stop == NULL) return -EINVAL; - /* - * Check that we have valid min and max timeout values, if - * not reset them both to 0 (=not used or unknown) - */ - if (wdd->min_timeout > wdd->max_timeout) { - pr_info("Invalid min and max timeout values, resetting to 0!\n"); - wdd->min_timeout = 0; - wdd->max_timeout = 0; - } + watchdog_check_min_max_timeout(wdd); /* * Note: now that all watchdog_device data has been verified, we diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index ef8edecfc526..08b48bbf9f4b 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -200,8 +200,7 @@ static int watchdog_set_timeout(struct watchdog_device *wddev, !(wddev->info->options & WDIOF_SETTIMEOUT)) return -EOPNOTSUPP; - if ((wddev->max_timeout != 0) && - (timeout < wddev->min_timeout || timeout > wddev->max_timeout)) + if (watchdog_timeout_invalid(wddev, timeout)) return -EINVAL; mutex_lock(&wddev->lock); |