diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/dma/Kconfig | 8 | ||||
-rw-r--r-- | drivers/dma/Makefile | 1 | ||||
-rw-r--r-- | drivers/dma/amba-pl08x.c | 2167 | ||||
-rw-r--r-- | drivers/dma/dmaengine.c | 2 | ||||
-rw-r--r-- | drivers/dma/fsldma.c | 328 | ||||
-rw-r--r-- | drivers/dma/intel_mid_dma.c | 476 | ||||
-rw-r--r-- | drivers/dma/intel_mid_dma_regs.h | 53 | ||||
-rw-r--r-- | drivers/dma/mv_xor.c | 2 | ||||
-rw-r--r-- | drivers/dma/shdma.c | 3 | ||||
-rw-r--r-- | drivers/dma/ste_dma40.c | 975 | ||||
-rw-r--r-- | drivers/dma/ste_dma40_ll.c | 172 | ||||
-rw-r--r-- | drivers/dma/ste_dma40_ll.h | 79 |
12 files changed, 3388 insertions, 878 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index e0e1f40a9fc8..ab28f6093414 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -49,6 +49,14 @@ config INTEL_MID_DMAC config ASYNC_TX_DISABLE_CHANNEL_SWITCH bool +config AMBA_PL08X + bool "ARM PrimeCell PL080 or PL081 support" + depends on ARM_AMBA && EXPERIMENTAL + select DMA_ENGINE + help + Platform has a PL08x DMAC device + which can provide DMA engine support + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 79ac75b51065..a8a84f4587f2 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -27,3 +27,4 @@ obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o obj-$(CONFIG_PL330_DMA) += pl330.o obj-$(CONFIG_PCH_DMA) += pch_dma.o +obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c new file mode 100644 index 000000000000..b605cc9ac3a2 --- /dev/null +++ b/drivers/dma/amba-pl08x.c @@ -0,0 +1,2167 @@ +/* + * Copyright (c) 2006 ARM Ltd. + * Copyright (c) 2010 ST-Ericsson SA + * + * Author: Peter Pearse <peter.pearse@arm.com> + * Author: Linus Walleij <linus.walleij@stericsson.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is iin this distribution in the + * file called COPYING. + * + * Documentation: ARM DDI 0196G == PL080 + * Documentation: ARM DDI 0218E == PL081 + * + * PL080 & PL081 both have 16 sets of DMA signals that can be routed to + * any channel. + * + * The PL080 has 8 channels available for simultaneous use, and the PL081 + * has only two channels. So on these DMA controllers the number of channels + * and the number of incoming DMA signals are two totally different things. + * It is usually not possible to theoretically handle all physical signals, + * so a multiplexing scheme with possible denial of use is necessary. + * + * The PL080 has a dual bus master, PL081 has a single master. + * + * Memory to peripheral transfer may be visualized as + * Get data from memory to DMAC + * Until no data left + * On burst request from peripheral + * Destination burst from DMAC to peripheral + * Clear burst request + * Raise terminal count interrupt + * + * For peripherals with a FIFO: + * Source burst size == half the depth of the peripheral FIFO + * Destination burst size == the depth of the peripheral FIFO + * + * (Bursts are irrelevant for mem to mem transfers - there are no burst + * signals, the DMA controller will simply facilitate its AHB master.) + * + * ASSUMES default (little) endianness for DMA transfers + * + * Only DMAC flow control is implemented + * + * Global TODO: + * - Break out common code from arch/arm/mach-s3c64xx and share + */ +#include <linux/device.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/dmapool.h> +#include <linux/amba/bus.h> +#include <linux/dmaengine.h> +#include <linux/amba/pl08x.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include <asm/hardware/pl080.h> +#include <asm/dma.h> +#include <asm/mach/dma.h> +#include <asm/atomic.h> +#include <asm/processor.h> +#include <asm/cacheflush.h> + +#define DRIVER_NAME "pl08xdmac" + +/** + * struct vendor_data - vendor-specific config parameters + * for PL08x derivates + * @name: the name of this specific variant + * @channels: the number of channels available in this variant + * @dualmaster: whether this version supports dual AHB masters + * or not. + */ +struct vendor_data { + char *name; + u8 channels; + bool dualmaster; +}; + +/* + * PL08X private data structures + * An LLI struct - see pl08x TRM + * Note that next uses bit[0] as a bus bit, + * start & end do not - their bus bit info + * is in cctl + */ +struct lli { + dma_addr_t src; + dma_addr_t dst; + dma_addr_t next; + u32 cctl; +}; + +/** + * struct pl08x_driver_data - the local state holder for the PL08x + * @slave: slave engine for this instance + * @memcpy: memcpy engine for this instance + * @base: virtual memory base (remapped) for the PL08x + * @adev: the corresponding AMBA (PrimeCell) bus entry + * @vd: vendor data for this PL08x variant + * @pd: platform data passed in from the platform/machine + * @phy_chans: array of data for the physical channels + * @pool: a pool for the LLI descriptors + * @pool_ctr: counter of LLIs in the pool + * @lock: a spinlock for this struct + */ +struct pl08x_driver_data { + struct dma_device slave; + struct dma_device memcpy; + void __iomem *base; + struct amba_device *adev; + struct vendor_data *vd; + struct pl08x_platform_data *pd; + struct pl08x_phy_chan *phy_chans; + struct dma_pool *pool; + int pool_ctr; + spinlock_t lock; +}; + +/* + * PL08X specific defines + */ + +/* + * Memory boundaries: the manual for PL08x says that the controller + * cannot read past a 1KiB boundary, so these defines are used to + * create transfer LLIs that do not cross such boundaries. + */ +#define PL08X_BOUNDARY_SHIFT (10) /* 1KB 0x400 */ +#define PL08X_BOUNDARY_SIZE (1 << PL08X_BOUNDARY_SHIFT) + +/* Minimum period between work queue runs */ +#define PL08X_WQ_PERIODMIN 20 + +/* Size (bytes) of each LLI buffer allocated for one transfer */ +# define PL08X_LLI_TSFR_SIZE 0x2000 + +/* Maximimum times we call dma_pool_alloc on this pool without freeing */ +#define PL08X_MAX_ALLOCS 0x40 +#define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct lli)) +#define PL08X_ALIGN 8 + +static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan) +{ + return container_of(chan, struct pl08x_dma_chan, chan); +} + +/* + * Physical channel handling + */ + +/* Whether a certain channel is busy or not */ +static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch) +{ + unsigned int val; + + val = readl(ch->base + PL080_CH_CONFIG); + return val & PL080_CONFIG_ACTIVE; +} + +/* + * Set the initial DMA register values i.e. those for the first LLI + * The next lli pointer and the configuration interrupt bit have + * been set when the LLIs were constructed + */ +static void pl08x_set_cregs(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + /* Wait for channel inactive */ + while (pl08x_phy_channel_busy(ch)) + ; + + dev_vdbg(&pl08x->adev->dev, + "WRITE channel %d: csrc=%08x, cdst=%08x, " + "cctl=%08x, clli=%08x, ccfg=%08x\n", + ch->id, + ch->csrc, + ch->cdst, + ch->cctl, + ch->clli, + ch->ccfg); + + writel(ch->csrc, ch->base + PL080_CH_SRC_ADDR); + writel(ch->cdst, ch->base + PL080_CH_DST_ADDR); + writel(ch->clli, ch->base + PL080_CH_LLI); + writel(ch->cctl, ch->base + PL080_CH_CONTROL); + writel(ch->ccfg, ch->base + PL080_CH_CONFIG); +} + +static inline void pl08x_config_phychan_for_txd(struct pl08x_dma_chan *plchan) +{ + struct pl08x_channel_data *cd = plchan->cd; + struct pl08x_phy_chan *phychan = plchan->phychan; + struct pl08x_txd *txd = plchan->at; + + /* Copy the basic control register calculated at transfer config */ + phychan->csrc = txd->csrc; + phychan->cdst = txd->cdst; + phychan->clli = txd->clli; + phychan->cctl = txd->cctl; + + /* Assign the signal to the proper control registers */ + phychan->ccfg = cd->ccfg; + phychan->ccfg &= ~PL080_CONFIG_SRC_SEL_MASK; + phychan->ccfg &= ~PL080_CONFIG_DST_SEL_MASK; + /* If it wasn't set from AMBA, ignore it */ + if (txd->direction == DMA_TO_DEVICE) + /* Select signal as destination */ + phychan->ccfg |= + (phychan->signal << PL080_CONFIG_DST_SEL_SHIFT); + else if (txd->direction == DMA_FROM_DEVICE) + /* Select signal as source */ + phychan->ccfg |= + (phychan->signal << PL080_CONFIG_SRC_SEL_SHIFT); + /* Always enable error interrupts */ + phychan->ccfg |= PL080_CONFIG_ERR_IRQ_MASK; + /* Always enable terminal interrupts */ + phychan->ccfg |= PL080_CONFIG_TC_IRQ_MASK; +} + +/* + * Enable the DMA channel + * Assumes all other configuration bits have been set + * as desired before this code is called + */ +static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + u32 val; + + /* + * Do not access config register until channel shows as disabled + */ + while (readl(pl08x->base + PL080_EN_CHAN) & (1 << ch->id)) + ; + + /* + * Do not access config register until channel shows as inactive + */ + val = readl(ch->base + PL080_CH_CONFIG); + while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE)) + val = readl(ch->base + PL080_CH_CONFIG); + + writel(val | PL080_CONFIG_ENABLE, ch->base + PL080_CH_CONFIG); +} + +/* + * Overall DMAC remains enabled always. + * + * Disabling individual channels could lose data. + * + * Disable the peripheral DMA after disabling the DMAC + * in order to allow the DMAC FIFO to drain, and + * hence allow the channel to show inactive + * + */ +static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + + /* Set the HALT bit and wait for the FIFO to drain */ + val = readl(ch->base + PL080_CH_CONFIG); + val |= PL080_CONFIG_HALT; + writel(val, ch->base + PL080_CH_CONFIG); + + /* Wait for channel inactive */ + while (pl08x_phy_channel_busy(ch)) + ; +} + +static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + + /* Clear the HALT bit */ + val = readl(ch->base + PL080_CH_CONFIG); + val &= ~PL080_CONFIG_HALT; + writel(val, ch->base + PL080_CH_CONFIG); +} + + +/* Stops the channel */ +static void pl08x_stop_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + + pl08x_pause_phy_chan(ch); + + /* Disable channel */ + val = readl(ch->base + PL080_CH_CONFIG); + val &= ~PL080_CONFIG_ENABLE; + val &= ~PL080_CONFIG_ERR_IRQ_MASK; + val &= ~PL080_CONFIG_TC_IRQ_MASK; + writel(val, ch->base + PL080_CH_CONFIG); +} + +static inline u32 get_bytes_in_cctl(u32 cctl) +{ + /* The source width defines the number of bytes */ + u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK; + + switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) { + case PL080_WIDTH_8BIT: + break; + case PL080_WIDTH_16BIT: + bytes *= 2; + break; + case PL080_WIDTH_32BIT: + bytes *= 4; + break; + } + return bytes; +} + +/* The channel should be paused when calling this */ +static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) +{ + struct pl08x_phy_chan *ch; + struct pl08x_txd *txdi = NULL; + struct pl08x_txd *txd; + unsigned long flags; + u32 bytes = 0; + + spin_lock_irqsave(&plchan->lock, flags); + + ch = plchan->phychan; + txd = plchan->at; + + /* + * Next follow the LLIs to get the number of pending bytes in the + * currently active transaction. + */ + if (ch && txd) { + struct lli *llis_va = txd->llis_va; + struct lli *llis_bus = (struct lli *) txd->llis_bus; + u32 clli = readl(ch->base + PL080_CH_LLI); + + /* First get the bytes in the current active LLI */ + bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL)); + + if (clli) { + int i = 0; + + /* Forward to the LLI pointed to by clli */ + while ((clli != (u32) &(llis_bus[i])) && + (i < MAX_NUM_TSFR_LLIS)) + i++; + + while (clli) { + bytes += get_bytes_in_cctl(llis_va[i].cctl); + /* + * A clli of 0x00000000 will terminate the + * LLI list + */ + clli = llis_va[i].next; + i++; + } + } + } + + /* Sum up all queued transactions */ + if (!list_empty(&plchan->desc_list)) { + list_for_each_entry(txdi, &plchan->desc_list, node) { + bytes += txdi->len; + } + + } + + spin_unlock_irqrestore(&plchan->lock, flags); + + return bytes; +} + +/* + * Allocate a physical channel for a virtual channel + */ +static struct pl08x_phy_chan * +pl08x_get_phy_channel(struct pl08x_driver_data *pl08x, + struct pl08x_dma_chan *virt_chan) +{ + struct pl08x_phy_chan *ch = NULL; + unsigned long flags; + int i; + + /* + * Try to locate a physical channel to be used for + * this transfer. If all are taken return NULL and + * the requester will have to cope by using some fallback + * PIO mode or retrying later. + */ + for (i = 0; i < pl08x->vd->channels; i++) { + ch = &pl08x->phy_chans[i]; + + spin_lock_irqsave(&ch->lock, flags); + + if (!ch->serving) { + ch->serving = virt_chan; + ch->signal = -1; + spin_unlock_irqrestore(&ch->lock, flags); + break; + } + + spin_unlock_irqrestore(&ch->lock, flags); + } + + if (i == pl08x->vd->channels) { + /* No physical channel available, cope with it */ + return NULL; + } + + return ch; +} + +static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + unsigned long flags; + + /* Stop the channel and clear its interrupts */ + pl08x_stop_phy_chan(ch); + writel((1 << ch->id), pl08x->base + PL080_ERR_CLEAR); + writel((1 << ch->id), pl08x->base + PL080_TC_CLEAR); + + /* Mark it as free */ + spin_lock_irqsave(&ch->lock, flags); + ch->serving = NULL; + spin_unlock_irqrestore(&ch->lock, flags); +} + +/* + * LLI handling + */ + +static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded) +{ + switch (coded) { + case PL080_WIDTH_8BIT: + return 1; + case PL080_WIDTH_16BIT: + return 2; + case PL080_WIDTH_32BIT: + return 4; + default: + break; + } + BUG(); + return 0; +} + +static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, + u32 tsize) +{ + u32 retbits = cctl; + + /* Remove all src, dst and transfersize bits */ + retbits &= ~PL080_CONTROL_DWIDTH_MASK; + retbits &= ~PL080_CONTROL_SWIDTH_MASK; + retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK; + + /* Then set the bits according to the parameters */ + switch (srcwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + switch (dstwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT; + return retbits; +} + +/* + * Autoselect a master bus to use for the transfer + * this prefers the destination bus if both available + * if fixed address on one bus the other will be chosen + */ +void pl08x_choose_master_bus(struct pl08x_bus_data *src_bus, + struct pl08x_bus_data *dst_bus, struct pl08x_bus_data **mbus, + struct pl08x_bus_data **sbus, u32 cctl) +{ + if (!(cctl & PL080_CONTROL_DST_INCR)) { + *mbus = src_bus; + *sbus = dst_bus; + } else if (!(cctl & PL080_CONTROL_SRC_INCR)) { + *mbus = dst_bus; + *sbus = src_bus; + } else { + if (dst_bus->buswidth == 4) { + *mbus = dst_bus; + *sbus = src_bus; + } else if (src_bus->buswidth == 4) { + *mbus = src_bus; + *sbus = dst_bus; + } else if (dst_bus->buswidth == 2) { + *mbus = dst_bus; + *sbus = src_bus; + } else if (src_bus->buswidth == 2) { + *mbus = src_bus; + *sbus = dst_bus; + } else { + /* src_bus->buswidth == 1 */ + *mbus = dst_bus; + *sbus = src_bus; + } + } +} + +/* + * Fills in one LLI for a certain transfer descriptor + * and advance the counter + */ +int pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd, int num_llis, int len, + u32 cctl, u32 *remainder) +{ + struct lli *llis_va = txd->llis_va; + struct lli *llis_bus = (struct lli *) txd->llis_bus; + + BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS); + + llis_va[num_llis].cctl = cctl; + llis_va[num_llis].src = txd->srcbus.addr; + llis_va[num_llis].dst = txd->dstbus.addr; + + /* + * On versions with dual masters, you can optionally AND on + * PL080_LLI_LM_AHB2 to the LLI to tell the hardware to read + * in new LLIs with that controller, but we always try to + * choose AHB1 to point into memory. The idea is to have AHB2 + * fixed on the peripheral and AHB1 messing around in the + * memory. So we don't manipulate this bit currently. + */ + + llis_va[num_llis].next = + (dma_addr_t)((u32) &(llis_bus[num_llis + 1])); + + if (cctl & PL080_CONTROL_SRC_INCR) + txd->srcbus.addr += len; + if (cctl & PL080_CONTROL_DST_INCR) + txd->dstbus.addr += len; + + *remainder -= len; + + return num_llis + 1; +} + +/* + * Return number of bytes to fill to boundary, or len + */ +static inline u32 pl08x_pre_boundary(u32 addr, u32 len) +{ + u32 boundary; + + boundary = ((addr >> PL08X_BOUNDARY_SHIFT) + 1) + << PL08X_BOUNDARY_SHIFT; + + if (boundary < addr + len) + return boundary - addr; + else + return len; +} + +/* + * This fills in the table of LLIs for the transfer descriptor + * Note that we assume we never have to change the burst sizes + * Return 0 for error + */ +static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd) +{ + struct pl08x_channel_data *cd = txd->cd; + struct pl08x_bus_data *mbus, *sbus; + u32 remainder; + int num_llis = 0; + u32 cctl; + int max_bytes_per_lli; + int total_bytes = 0; + struct lli *llis_va; + struct lli *llis_bus; + + if (!txd) { + dev_err(&pl08x->adev->dev, "%s no descriptor\n", __func__); + return 0; + } + + txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, + &txd->llis_bus); + if (!txd->llis_va) { + dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__); + return 0; + } + + pl08x->pool_ctr++; + + /* + * Initialize bus values for this transfer + * from the passed optimal values + */ + if (!cd) { + dev_err(&pl08x->adev->dev, "%s no channel data\n", __func__); + return 0; + } + + /* Get the default CCTL from the platform data */ + cctl = cd->cctl; + + /* + * On the PL080 we have two bus masters and we + * should select one for source and one for + * destination. We try to use AHB2 for the + * bus which does not increment (typically the + * peripheral) else we just choose something. + */ + cctl &= ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2); + if (pl08x->vd->dualmaster) { + if (cctl & PL080_CONTROL_SRC_INCR) + /* Source increments, use AHB2 for destination */ + cctl |= PL080_CONTROL_DST_AHB2; + else if (cctl & PL080_CONTROL_DST_INCR) + /* Destination increments, use AHB2 for source */ + cctl |= PL080_CONTROL_SRC_AHB2; + else + /* Just pick something, source AHB1 dest AHB2 */ + cctl |= PL080_CONTROL_DST_AHB2; + } + + /* Find maximum width of the source bus */ + txd->srcbus.maxwidth = + pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >> + PL080_CONTROL_SWIDTH_SHIFT); + + /* Find maximum width of the destination bus */ + txd->dstbus.maxwidth = + pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >> + PL080_CONTROL_DWIDTH_SHIFT); + + /* Set up the bus widths to the maximum */ + txd->srcbus.buswidth = txd->srcbus.maxwidth; + txd->dstbus.buswidth = txd->dstbus.maxwidth; + dev_vdbg(&pl08x->adev->dev, + "%s source bus is %d bytes wide, dest bus is %d bytes wide\n", + __func__, txd->srcbus.buswidth, txd->dstbus.buswidth); + + + /* + * Bytes transferred == tsize * MIN(buswidths), not max(buswidths) + */ + max_bytes_per_lli = min(txd->srcbus.buswidth, txd->dstbus.buswidth) * + PL080_CONTROL_TRANSFER_SIZE_MASK; + dev_vdbg(&pl08x->adev->dev, + "%s max bytes per lli = %d\n", + __func__, max_bytes_per_lli); + + /* We need to count this down to zero */ + remainder = txd->len; + dev_vdbg(&pl08x->adev->dev, + "%s remainder = %d\n", + __func__, remainder); + + /* + * Choose bus to align to + * - prefers destination bus if both available + * - if fixed address on one bus chooses other + * - modifies cctl to choose an apropriate master + */ + pl08x_choose_master_bus(&txd->srcbus, &txd->dstbus, + &mbus, &sbus, cctl); + + + /* + * The lowest bit of the LLI register + * is also used to indicate which master to + * use for reading the LLIs. + */ + + if (txd->len < mbus->buswidth) { + /* + * Less than a bus width available + * - send as single bytes + */ + while (remainder) { + dev_vdbg(&pl08x->adev->dev, + "%s single byte LLIs for a transfer of " + "less than a bus width (remain %08x)\n", + __func__, remainder); + cctl = pl08x_cctl_bits(cctl, 1, 1, 1); + num_llis = + pl08x_fill_lli_for_desc(pl08x, txd, num_llis, 1, + cctl, &remainder); + total_bytes++; + } + } else { + /* + * Make one byte LLIs until master bus is aligned + * - slave will then be aligned also + */ + while ((mbus->addr) % (mbus->buswidth)) { + dev_vdbg(&pl08x->adev->dev, + "%s adjustment lli for less than bus width " + "(remain %08x)\n", + __func__, remainder); + cctl = pl08x_cctl_bits(cctl, 1, 1, 1); + num_llis = pl08x_fill_lli_for_desc + (pl08x, txd, num_llis, 1, cctl, &remainder); + total_bytes++; + } + + /* + * Master now aligned + * - if slave is not then we must set its width down + */ + if (sbus->addr % sbus->buswidth) { + dev_dbg(&pl08x->adev->dev, + "%s set down bus width to one byte\n", + __func__); + + sbus->buswidth = 1; + } + + /* + * Make largest possible LLIs until less than one bus + * width left + */ + while (remainder > (mbus->buswidth - 1)) { + int lli_len, target_len; + int tsize; + int odd_bytes; + + /* + * If enough left try to send max possible, + * otherwise try to send the remainder + */ + target_len = remainder; + if (remainder > max_bytes_per_lli) + target_len = max_bytes_per_lli; + + /* + * Set bus lengths for incrementing busses + * to number of bytes which fill to next memory + * boundary + */ + if (cctl & PL080_CONTROL_SRC_INCR) + txd->srcbus.fill_bytes = + pl08x_pre_boundary( + txd->srcbus.addr, + remainder); + else + txd->srcbus.fill_bytes = + max_bytes_per_lli; + + if (cctl & PL080_CONTROL_DST_INCR) + txd->dstbus.fill_bytes = + pl08x_pre_boundary( + txd->dstbus.addr, + remainder); + else + txd->dstbus.fill_bytes = + max_bytes_per_lli; + + /* + * Find the nearest + */ + lli_len = min(txd->srcbus.fill_bytes, + txd->dstbus.fill_bytes); + + BUG_ON(lli_len > remainder); + + if (lli_len <= 0) { + dev_err(&pl08x->adev->dev, + "%s lli_len is %d, <= 0\n", + __func__, lli_len); + return 0; + } + + if (lli_len == target_len) { + /* + * Can send what we wanted + */ + /* + * Maintain alignment + */ + lli_len = (lli_len/mbus->buswidth) * + mbus->buswidth; + odd_bytes = 0; + } else { + /* + * So now we know how many bytes to transfer + * to get to the nearest boundary + * The next lli will past the boundary + * - however we may be working to a boundary + * on the slave bus + * We need to ensure the master stays aligned + */ + odd_bytes = lli_len % mbus->buswidth; + /* + * - and that we are working in multiples + * of the bus widths + */ + lli_len -= odd_bytes; + + } + + if (lli_len) { + /* + * Check against minimum bus alignment: + * Calculate actual transfer size in relation + * to bus width an get a maximum remainder of + * the smallest bus width - 1 + */ + /* FIXME: use round_down()? */ + tsize = lli_len / min(mbus->buswidth, + sbus->buswidth); + lli_len = tsize * min(mbus->buswidth, + sbus->buswidth); + + if (target_len != lli_len) { + dev_vdbg(&pl08x->adev->dev, + "%s can't send what we want. Desired %08x, lli of %08x bytes in txd of %08x\n", + __func__, target_len, lli_len, txd->len); + } + + cctl = pl08x_cctl_bits(cctl, + txd->srcbus.buswidth, + txd->dstbus.buswidth, + tsize); + + dev_vdbg(&pl08x->adev->dev, + "%s fill lli with single lli chunk of size %08x (remainder %08x)\n", + __func__, lli_len, remainder); + num_llis = pl08x_fill_lli_for_desc(pl08x, txd, + num_llis, lli_len, cctl, + &remainder); + total_bytes += lli_len; + } + + + if (odd_bytes) { + /* + * Creep past the boundary, + * maintaining master alignment + */ + int j; + for (j = 0; (j < mbus->buswidth) + && (remainder); j++) { + cctl = pl08x_cctl_bits(cctl, 1, 1, 1); + dev_vdbg(&pl08x->adev->dev, + "%s align with boundardy, single byte (remain %08x)\n", + __func__, remainder); + num_llis = + pl08x_fill_lli_for_desc(pl08x, + txd, num_llis, 1, + cctl, &remainder); + total_bytes++; + } + } + } + + /* + * Send any odd bytes + */ + if (remainder < 0) { + dev_err(&pl08x->adev->dev, "%s remainder not fitted 0x%08x bytes\n", + __func__, remainder); + return 0; + } + + while (remainder) { + cctl = pl08x_cctl_bits(cctl, 1, 1, 1); + dev_vdbg(&pl08x->adev->dev, + "%s align with boundardy, single odd byte (remain %d)\n", + __func__, remainder); + num_llis = pl08x_fill_lli_for_desc(pl08x, txd, num_llis, + 1, cctl, &remainder); + total_bytes++; + } + } + if (total_bytes != txd->len) { + dev_err(&pl08x->adev->dev, + "%s size of encoded lli:s don't match total txd, transferred 0x%08x from size 0x%08x\n", + __func__, total_bytes, txd->len); + return 0; + } + + if (num_llis >= MAX_NUM_TSFR_LLIS) { + dev_err(&pl08x->adev->dev, + "%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n", + __func__, (u32) MAX_NUM_TSFR_LLIS); + return 0; + } + /* + * Decide whether this is a loop or a terminated transfer + */ + llis_va = txd->llis_va; + llis_bus = (struct lli *) txd->llis_bus; + + if (cd->circular_buffer) { + /* + * Loop the circular buffer so that the next element + * points back to the beginning of the LLI. + */ + llis_va[num_llis - 1].next = + (dma_addr_t)((unsigned int)&(llis_bus[0])); + } else { + /* + * On non-circular buffers, the final LLI terminates + * the LLI. + */ + llis_va[num_llis - 1].next = 0; + /* + * The final LLI element shall also fire an interrupt + */ + llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN; + } + + /* Now store the channel register values */ + txd->csrc = llis_va[0].src; + txd->cdst = llis_va[0].dst; + if (num_llis > 1) + txd->clli = llis_va[0].next; + else + txd->clli = 0; + + txd->cctl = llis_va[0].cctl; + /* ccfg will be set at physical channel allocation time */ + +#ifdef VERBOSE_DEBUG + { + int i; + + for (i = 0; i < num_llis; i++) { + dev_vdbg(&pl08x->adev->dev, + "lli %d @%p: csrc=%08x, cdst=%08x, cctl=%08x, clli=%08x\n", + i, + &llis_va[i], + llis_va[i].src, + llis_va[i].dst, + llis_va[i].cctl, + llis_va[i].next + ); + } + } +#endif + + return num_llis; +} + +/* You should call this with the struct pl08x lock held */ +static void pl08x_free_txd(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd) +{ + if (!txd) + dev_err(&pl08x->adev->dev, + "%s no descriptor to free\n", + __func__); + + /* Free the LLI */ + dma_pool_free(pl08x->pool, txd->llis_va, + txd->llis_bus); + + pl08x->pool_ctr--; + + kfree(txd); +} + +static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x, + struct pl08x_dma_chan *plchan) +{ + struct pl08x_txd *txdi = NULL; + struct pl08x_txd *next; + + if (!list_empty(&plchan->desc_list)) { + list_for_each_entry_safe(txdi, + next, &plchan->desc_list, node) { + list_del(&txdi->node); + pl08x_free_txd(pl08x, txdi); + } + + } +} + +/* + * The DMA ENGINE API + */ +static int pl08x_alloc_chan_resources(struct dma_chan *chan) +{ + return 0; +} + +static void pl08x_free_chan_resources(struct dma_chan *chan) +{ +} + +/* + * This should be called with the channel plchan->lock held + */ +static int prep_phy_channel(struct pl08x_dma_chan *plchan, + struct pl08x_txd *txd) +{ + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_phy_chan *ch; + int ret; + + /* Check if we already have a channel */ + if (plchan->phychan) + return 0; + + ch = pl08x_get_phy_channel(pl08x, plchan); + if (!ch) { + /* No physical channel available, cope with it */ + dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name); + return -EBUSY; + } + + /* + * OK we have a physical channel: for memcpy() this is all we + * need, but for slaves the physical signals may be muxed! + * Can the platform allow us to use this channel? + */ + if (plchan->slave && + ch->signal < 0 && + pl08x->pd->get_signal) { + ret = pl08x->pd->get_signal(plchan); + if (ret < 0) { + dev_dbg(&pl08x->adev->dev, + "unable to use physical channel %d for transfer on %s due to platform restrictions\n", + ch->id, plchan->name); + /* Release physical channel & return */ + pl08x_put_phy_channel(pl08x, ch); + return -EBUSY; + } + ch->signal = ret; + } + + dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n", + ch->id, + ch->signal, + plchan->name); + + plchan->phychan = ch; + + return 0; +} + +static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan); + + atomic_inc(&plchan->last_issued); + tx->cookie = atomic_read(&plchan->last_issued); + /* This unlock follows the lock in the prep() function */ + spin_unlock_irqrestore(&plchan->lock, plchan->lockflags); + + return tx->cookie; +} + +static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt( + struct dma_chan *chan, unsigned long flags) +{ + struct dma_async_tx_descriptor *retval = NULL; + + return retval; +} + +/* + * Code accessing dma_async_is_complete() in a tight loop + * may give problems - could schedule where indicated. + * If slaves are relying on interrupts to signal completion this + * function must not be called with interrupts disabled + */ +static enum dma_status +pl08x_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + u32 bytesleft = 0; + + last_used = atomic_read(&plchan->last_issued); + last_complete = plchan->lc; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) { + dma_set_tx_state(txstate, last_complete, last_used, 0); + return ret; + } + + /* + * schedule(); could be inserted here + */ + + /* + * This cookie not complete yet + */ + last_used = atomic_read(&plchan->last_issued); + last_complete = plchan->lc; + + /* Get number of bytes left in the active transactions and queue */ + bytesleft = pl08x_getbytes_chan(plchan); + + dma_set_tx_state(txstate, last_complete, last_used, + bytesleft); + + if (plchan->state == PL08X_CHAN_PAUSED) + return DMA_PAUSED; + + /* Whether waiting or running, we're in progress */ + return DMA_IN_PROGRESS; +} + +/* PrimeCell DMA extension */ +struct burst_table { + int burstwords; + u32 reg; +}; + +static const struct burst_table burst_sizes[] = { + { + .burstwords = 256, + .reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 128, + .reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 64, + .reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 32, + .reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 16, + .reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 8, + .reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 4, + .reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT), + }, + { + .burstwords = 1, + .reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT), + }, +}; + +static void dma_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_channel_data *cd = plchan->cd; + enum dma_slave_buswidth addr_width; + u32 maxburst; + u32 cctl = 0; + /* Mask out all except src and dst channel */ + u32 ccfg = cd->ccfg & 0x000003DEU; + int i = 0; + + /* Transfer direction */ + plchan->runtime_direction = config->direction; + if (config->direction == DMA_TO_DEVICE) { + plchan->runtime_addr = config->dst_addr; + cctl |= PL080_CONTROL_SRC_INCR; + ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; + addr_width = config->dst_addr_width; + maxburst = config->dst_maxburst; + } else if (config->direction == DMA_FROM_DEVICE) { + plchan->runtime_addr = config->src_addr; + cctl |= PL080_CONTROL_DST_INCR; + ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + addr_width = config->src_addr_width; + maxburst = config->src_maxburst; + } else { + dev_err(&pl08x->adev->dev, + "bad runtime_config: alien transfer direction\n"); + return; + } + + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) | + (PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT); + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) | + (PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT); + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) | + (PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT); + break; + default: + dev_err(&pl08x->adev->dev, + "bad runtime_config: alien address width\n"); + return; + } + + /* + * Now decide on a maxburst: + * If this channel will only request single transfers, set + * this down to ONE element. + */ + if (plchan->cd->single) { + cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | + (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT); + } else { + while (i < ARRAY_SIZE(burst_sizes)) { + if (burst_sizes[i].burstwords <= maxburst) + break; + i++; + } + cctl |= burst_sizes[i].reg; + } + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + cctl &= ~PL080_CONTROL_PROT_MASK; + cctl |= PL080_CONTROL_PROT_SYS; + + /* Modify the default channel data to fit PrimeCell request */ + cd->cctl = cctl; + cd->ccfg = ccfg; + + dev_dbg(&pl08x->adev->dev, + "configured channel %s (%s) for %s, data width %d, " + "maxburst %d words, LE, CCTL=%08x, CCFG=%08x\n", + dma_chan_name(chan), plchan->name, + (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX", + addr_width, + maxburst, + cctl, ccfg); +} + +/* + * Slave transactions callback to the slave device to allow + * synchronization of slave DMA signals with the DMAC enable + */ +static void pl08x_issue_pending(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + unsigned long flags; + + spin_lock_irqsave(&plchan->lock, flags); + /* Something is already active */ + if (plchan->at) { + spin_unlock_irqrestore(&plchan->lock, flags); + return; + } + + /* Didn't get a physical channel so waiting for it ... */ + if (plchan->state == PL08X_CHAN_WAITING) + return; + + /* Take the first element in the queue and execute it */ + if (!list_empty(&plchan->desc_list)) { + struct pl08x_txd *next; + + next = list_first_entry(&plchan->desc_list, + struct pl08x_txd, + node); + list_del(&next->node); + plchan->at = next; + plchan->state = PL08X_CHAN_RUNNING; + + /* Configure the physical channel for the active txd */ + pl08x_config_phychan_for_txd(plchan); + pl08x_set_cregs(pl08x, plchan->phychan); + pl08x_enable_phy_chan(pl08x, plchan->phychan); + } + + spin_unlock_irqrestore(&plchan->lock, flags); +} + +static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, + struct pl08x_txd *txd) +{ + int num_llis; + struct pl08x_driver_data *pl08x = plchan->host; + int ret; + + num_llis = pl08x_fill_llis_for_desc(pl08x, txd); + + if (!num_llis) + return -EINVAL; + + spin_lock_irqsave(&plchan->lock, plchan->lockflags); + + /* + * If this device is not using a circular buffer then + * queue this new descriptor for transfer. + * The descriptor for a circular buffer continues + * to be used until the channel is freed. + */ + if (txd->cd->circular_buffer) + dev_err(&pl08x->adev->dev, + "%s attempting to queue a circular buffer\n", + __func__); + else + list_add_tail(&txd->node, + &plchan->desc_list); + + /* + * See if we already have a physical channel allocated, + * else this is the time to try to get one. + */ + ret = prep_phy_channel(plchan, txd); + if (ret) { + /* + * No physical channel available, we will + * stack up the memcpy channels until there is a channel + * available to handle it whereas slave transfers may + * have been denied due to platform channel muxing restrictions + * and since there is no guarantee that this will ever be + * resolved, and since the signal must be aquired AFTER + * aquiring the physical channel, we will let them be NACK:ed + * with -EBUSY here. The drivers can alway retry the prep() + * call if they are eager on doing this using DMA. + */ + if (plchan->slave) { + pl08x_free_txd_list(pl08x, plchan); + spin_unlock_irqrestore(&plchan->lock, plchan->lockflags); + return -EBUSY; + } + /* Do this memcpy whenever there is a channel ready */ + plchan->state = PL08X_CHAN_WAITING; + plchan->waiting = txd; + } else + /* + * Else we're all set, paused and ready to roll, + * status will switch to PL08X_CHAN_RUNNING when + * we call issue_pending(). If there is something + * running on the channel already we don't change + * its state. + */ + if (plchan->state == PL08X_CHAN_IDLE) + plchan->state = PL08X_CHAN_PAUSED; + + /* + * Notice that we leave plchan->lock locked on purpose: + * it will be unlocked in the subsequent tx_submit() + * call. This is a consequence of the current API. + */ + + return 0; +} + +/* + * Initialize a descriptor to be used by memcpy submit + */ +static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + int ret; + + txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT); + if (!txd) { + dev_err(&pl08x->adev->dev, + "%s no memory for descriptor\n", __func__); + return NULL; + } + + dma_async_tx_descriptor_init(&txd->tx, chan); + txd->direction = DMA_NONE; + txd->srcbus.addr = src; + txd->dstbus.addr = dest; + + /* Set platform data for m2m */ + txd->cd = &pl08x->pd->memcpy_channel; + /* Both to be incremented or the code will break */ + txd->cd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; + txd->tx.tx_submit = pl08x_tx_submit; + txd->tx.callback = NULL; + txd->tx.callback_param = NULL; + txd->len = len; + + INIT_LIST_HEAD(&txd->node); + ret = pl08x_prep_channel_resources(plchan, txd); + if (ret) + return NULL; + /* + * NB: the channel lock is held at this point so tx_submit() + * must be called in direct succession. + */ + + return &txd->tx; +} + +struct dma_async_tx_descriptor *pl08x_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + int ret; + + /* + * Current implementation ASSUMES only one sg + */ + if (sg_len != 1) { + dev_err(&pl08x->adev->dev, "%s prepared too long sglist\n", + __func__); + BUG(); + } + + dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n", + __func__, sgl->length, plchan->name); + + txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT); + if (!txd) { + dev_err(&pl08x->adev->dev, "%s no txd\n", __func__); + return NULL; + } + + dma_async_tx_descriptor_init(&txd->tx, chan); + + if (direction != plchan->runtime_direction) + dev_err(&pl08x->adev->dev, "%s DMA setup does not match " + "the direction configured for the PrimeCell\n", + __func__); + + /* + * Set up addresses, the PrimeCell configured address + * will take precedence since this may configure the + * channel target address dynamically at runtime. + */ + txd->direction = direction; + if (direction == DMA_TO_DEVICE) { + txd->srcbus.addr = sgl->dma_address; + if (plchan->runtime_addr) + txd->dstbus.addr = plchan->runtime_addr; + else + txd->dstbus.addr = plchan->cd->addr; + } else if (direction == DMA_FROM_DEVICE) { + if (plchan->runtime_addr) + txd->srcbus.addr = plchan->runtime_addr; + else + txd->srcbus.addr = plchan->cd->addr; + txd->dstbus.addr = sgl->dma_address; + } else { + dev_err(&pl08x->adev->dev, + "%s direction unsupported\n", __func__); + return NULL; + } + txd->cd = plchan->cd; + txd->tx.tx_submit = pl08x_tx_submit; + txd->tx.callback = NULL; + txd->tx.callback_param = NULL; + txd->len = sgl->length; + INIT_LIST_HEAD(&txd->node); + + ret = pl08x_prep_channel_resources(plchan, txd); + if (ret) + return NULL; + /* + * NB: the channel lock is held at this point so tx_submit() + * must be called in direct succession. + */ + + return &txd->tx; +} + +static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + unsigned long flags; + int ret = 0; + + /* Controls applicable to inactive channels */ + if (cmd == DMA_SLAVE_CONFIG) { + dma_set_runtime_config(chan, + (struct dma_slave_config *) + arg); + return 0; + } + + /* + * Anything succeeds on channels with no physical allocation and + * no queued transfers. + */ + spin_lock_irqsave(&plchan->lock, flags); + if (!plchan->phychan && !plchan->at) { + spin_unlock_irqrestore(&plchan->lock, flags); + return 0; + } + + switch (cmd) { + case DMA_TERMINATE_ALL: + plchan->state = PL08X_CHAN_IDLE; + + if (plchan->phychan) { + pl08x_stop_phy_chan(plchan->phychan); + + /* + * Mark physical channel as free and free any slave + * signal + */ + if ((plchan->phychan->signal >= 0) && + pl08x->pd->put_signal) { + pl08x->pd->put_signal(plchan); + plchan->phychan->signal = -1; + } + pl08x_put_phy_channel(pl08x, plchan->phychan); + plchan->phychan = NULL; + } + /* Stop any pending tasklet */ + tasklet_disable(&plchan->tasklet); + /* Dequeue jobs and free LLIs */ + if (plchan->at) { + pl08x_free_txd(pl08x, plchan->at); + plchan->at = NULL; + } + /* Dequeue jobs not yet fired as well */ + pl08x_free_txd_list(pl08x, plchan); + break; + case DMA_PAUSE: + pl08x_pause_phy_chan(plchan->phychan); + plchan->state = PL08X_CHAN_PAUSED; + break; + case DMA_RESUME: + pl08x_resume_phy_chan(plchan->phychan); + plchan->state = PL08X_CHAN_RUNNING; + break; + default: + /* Unknown command */ + ret = -ENXIO; + break; + } + + spin_unlock_irqrestore(&plchan->lock, flags); + + return ret; +} + +bool pl08x_filter_id(struct dma_chan *chan, void *chan_id) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + char *name = chan_id; + + /* Check that the channel is not taken! */ + if (!strcmp(plchan->name, name)) + return true; + + return false; +} + +/* + * Just check that the device is there and active + * TODO: turn this bit on/off depending on the number of + * physical channels actually used, if it is zero... well + * shut it off. That will save some power. Cut the clock + * at the same time. + */ +static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) +{ + u32 val; + + val = readl(pl08x->base + PL080_CONFIG); + val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE); + /* We implictly clear bit 1 and that means little-endian mode */ + val |= PL080_CONFIG_ENABLE; + writel(val, pl08x->base + PL080_CONFIG); +} + +static void pl08x_tasklet(unsigned long data) +{ + struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data; + struct pl08x_phy_chan *phychan = plchan->phychan; + struct pl08x_driver_data *pl08x = plchan->host; + + if (!plchan) + BUG(); + + spin_lock(&plchan->lock); + + if (plchan->at) { + dma_async_tx_callback callback = + plchan->at->tx.callback; + void *callback_param = + plchan->at->tx.callback_param; + + /* + * Update last completed + */ + plchan->lc = + (plchan->at->tx.cookie); + + /* + * Callback to signal completion + */ + if (callback) + callback(callback_param); + + /* + * Device callbacks should NOT clear + * the current transaction on the channel + * Linus: sometimes they should? + */ + if (!plchan->at) + BUG(); + + /* + * Free the descriptor if it's not for a device + * using a circular buffer + */ + if (!plchan->at->cd->circular_buffer) { + pl08x_free_txd(pl08x, plchan->at); + plchan->at = NULL; + } + /* + * else descriptor for circular + * buffers only freed when + * client has disabled dma + */ + } + /* + * If a new descriptor is queued, set it up + * plchan->at is NULL here + */ + if (!list_empty(&plchan->desc_list)) { + struct pl08x_txd *next; + + next = list_first_entry(&plchan->desc_list, + struct pl08x_txd, + node); + list_del(&next->node); + plchan->at = next; + /* Configure the physical channel for the next txd */ + pl08x_config_phychan_for_txd(plchan); + pl08x_set_cregs(pl08x, plchan->phychan); + pl08x_enable_phy_chan(pl08x, plchan->phychan); + } else { + struct pl08x_dma_chan *waiting = NULL; + + /* + * No more jobs, so free up the physical channel + * Free any allocated signal on slave transfers too + */ + if ((phychan->signal >= 0) && pl08x->pd->put_signal) { + pl08x->pd->put_signal(plchan); + phychan->signal = -1; + } + pl08x_put_phy_channel(pl08x, phychan); + plchan->phychan = NULL; + plchan->state = PL08X_CHAN_IDLE; + + /* + * And NOW before anyone else can grab that free:d + * up physical channel, see if there is some memcpy + * pending that seriously needs to start because of + * being stacked up while we were choking the + * physical channels with data. + */ + list_for_each_entry(waiting, &pl08x->memcpy.channels, + chan.device_node) { + if (waiting->state == PL08X_CHAN_WAITING && + waiting->waiting != NULL) { + int ret; + + /* This should REALLY not fail now */ + ret = prep_phy_channel(waiting, + waiting->waiting); + BUG_ON(ret); + waiting->state = PL08X_CHAN_RUNNING; + waiting->waiting = NULL; + pl08x_issue_pending(&waiting->chan); + break; + } + } + } + + spin_unlock(&plchan->lock); +} + +static irqreturn_t pl08x_irq(int irq, void *dev) +{ + struct pl08x_driver_data *pl08x = dev; + u32 mask = 0; + u32 val; + int i; + + val = readl(pl08x->base + PL080_ERR_STATUS); + if (val) { + /* + * An error interrupt (on one or more channels) + */ + dev_err(&pl08x->adev->dev, + "%s error interrupt, register value 0x%08x\n", + __func__, val); + /* + * Simply clear ALL PL08X error interrupts, + * regardless of channel and cause + * FIXME: should be 0x00000003 on PL081 really. + */ + writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); + } + val = readl(pl08x->base + PL080_INT_STATUS); + for (i = 0; i < pl08x->vd->channels; i++) { + if ((1 << i) & val) { + /* Locate physical channel */ + struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i]; + struct pl08x_dma_chan *plchan = phychan->serving; + + /* Schedule tasklet on this channel */ + tasklet_schedule(&plchan->tasklet); + + mask |= (1 << i); + } + } + /* + * Clear only the terminal interrupts on channels we processed + */ + writel(mask, pl08x->base + PL080_TC_CLEAR); + + return mask ? IRQ_HANDLED : IRQ_NONE; +} + +/* + * Initialise the DMAC memcpy/slave channels. + * Make a local wrapper to hold required data + */ +static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, + struct dma_device *dmadev, + unsigned int channels, + bool slave) +{ + struct pl08x_dma_chan *chan; + int i; + + INIT_LIST_HEAD(&dmadev->channels); + /* + * Register as many many memcpy as we have physical channels, + * we won't always be able to use all but the code will have + * to cope with that situation. + */ + for (i = 0; i < channels; i++) { + chan = kzalloc(sizeof(struct pl08x_dma_chan), GFP_KERNEL); + if (!chan) { + dev_err(&pl08x->adev->dev, + "%s no memory for channel\n", __func__); + return -ENOMEM; + } + + chan->host = pl08x; + chan->state = PL08X_CHAN_IDLE; + + if (slave) { + chan->slave = true; + chan->name = pl08x->pd->slave_channels[i].bus_id; + chan->cd = &pl08x->pd->slave_channels[i]; + } else { + chan->cd = &pl08x->pd->memcpy_channel; + chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); + if (!chan->name) { + kfree(chan); + return -ENOMEM; + } + } + dev_info(&pl08x->adev->dev, + "initialize virtual channel \"%s\"\n", + chan->name); + + chan->chan.device = dmadev; + atomic_set(&chan->last_issued, 0); + chan->lc = atomic_read(&chan->last_issued); + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->desc_list); + tasklet_init(&chan->tasklet, pl08x_tasklet, + (unsigned long) chan); + + list_add_tail(&chan->chan.device_node, &dmadev->channels); + } + dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n", + i, slave ? "slave" : "memcpy"); + return i; +} + +static void pl08x_free_virtual_channels(struct dma_device *dmadev) +{ + struct pl08x_dma_chan *chan = NULL; + struct pl08x_dma_chan *next; + + list_for_each_entry_safe(chan, + next, &dmadev->channels, chan.device_node) { + list_del(&chan->chan.device_node); + kfree(chan); + } +} + +#ifdef CONFIG_DEBUG_FS +static const char *pl08x_state_str(enum pl08x_dma_chan_state state) +{ + switch (state) { + case PL08X_CHAN_IDLE: + return "idle"; + case PL08X_CHAN_RUNNING: + return "running"; + case PL08X_CHAN_PAUSED: + return "paused"; + case PL08X_CHAN_WAITING: + return "waiting"; + default: + break; + } + return "UNKNOWN STATE"; +} + +static int pl08x_debugfs_show(struct seq_file *s, void *data) +{ + struct pl08x_driver_data *pl08x = s->private; + struct pl08x_dma_chan *chan; + struct pl08x_phy_chan *ch; + unsigned long flags; + int i; + + seq_printf(s, "PL08x physical channels:\n"); + seq_printf(s, "CHANNEL:\tUSER:\n"); + seq_printf(s, "--------\t-----\n"); + for (i = 0; i < pl08x->vd->channels; i++) { + struct pl08x_dma_chan *virt_chan; + + ch = &pl08x->phy_chans[i]; + + spin_lock_irqsave(&ch->lock, flags); + virt_chan = ch->serving; + + seq_printf(s, "%d\t\t%s\n", + ch->id, virt_chan ? virt_chan->name : "(none)"); + + spin_unlock_irqrestore(&ch->lock, flags); + } + + seq_printf(s, "\nPL08x virtual memcpy channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) { + seq_printf(s, "%s\t\t\%s\n", chan->name, + pl08x_state_str(chan->state)); + } + + seq_printf(s, "\nPL08x virtual slave channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) { + seq_printf(s, "%s\t\t\%s\n", chan->name, + pl08x_state_str(chan->state)); + } + + return 0; +} + +static int pl08x_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, pl08x_debugfs_show, inode->i_private); +} + +static const struct file_operations pl08x_debugfs_operations = { + .open = pl08x_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) +{ + /* Expose a simple debugfs interface to view all clocks */ + (void) debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO, + NULL, pl08x, + &pl08x_debugfs_operations); +} + +#else +static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) +{ +} +#endif + +static int pl08x_probe(struct amba_device *adev, struct amba_id *id) +{ + struct pl08x_driver_data *pl08x; + struct vendor_data *vd = id->data; + int ret = 0; + int i; + + ret = amba_request_regions(adev, NULL); + if (ret) + return ret; + + /* Create the driver state holder */ + pl08x = kzalloc(sizeof(struct pl08x_driver_data), GFP_KERNEL); + if (!pl08x) { + ret = -ENOMEM; + goto out_no_pl08x; + } + + /* Initialize memcpy engine */ + dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); + pl08x->memcpy.dev = &adev->dev; + pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources; + pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources; + pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy; + pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt; + pl08x->memcpy.device_tx_status = pl08x_dma_tx_status; + pl08x->memcpy.device_issue_pending = pl08x_issue_pending; + pl08x->memcpy.device_control = pl08x_control; + + /* Initialize slave engine */ + dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask); + pl08x->slave.dev = &adev->dev; + pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources; + pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources; + pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt; + pl08x->slave.device_tx_status = pl08x_dma_tx_status; + pl08x->slave.device_issue_pending = pl08x_issue_pending; + pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg; + pl08x->slave.device_control = pl08x_control; + + /* Get the platform data */ + pl08x->pd = dev_get_platdata(&adev->dev); + if (!pl08x->pd) { + dev_err(&adev->dev, "no platform data supplied\n"); + goto out_no_platdata; + } + + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + + /* A DMA memory pool for LLIs, align on 1-byte boundary */ + pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev, + PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0); + if (!pl08x->pool) { + ret = -ENOMEM; + goto out_no_lli_pool; + } + + spin_lock_init(&pl08x->lock); + + pl08x->base = ioremap(adev->res.start, resource_size(&adev->res)); + if (!pl08x->base) { + ret = -ENOMEM; + goto out_no_ioremap; + } + + /* Turn on the PL08x */ + pl08x_ensure_on(pl08x); + + /* + * Attach the interrupt handler + */ + writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); + writel(0x000000FF, pl08x->base + PL080_TC_CLEAR); + + ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED, + vd->name, pl08x); + if (ret) { + dev_err(&adev->dev, "%s failed to request interrupt %d\n", + __func__, adev->irq[0]); + goto out_no_irq; + } + + /* Initialize physical channels */ + pl08x->phy_chans = kmalloc((vd->channels * sizeof(struct pl08x_phy_chan)), + GFP_KERNEL); + if (!pl08x->phy_chans) { + dev_err(&adev->dev, "%s failed to allocate " + "physical channel holders\n", + __func__); + goto out_no_phychans; + } + + for (i = 0; i < vd->channels; i++) { + struct pl08x_phy_chan *ch = &pl08x->phy_chans[i]; + + ch->id = i; + ch->base = pl08x->base + PL080_Cx_BASE(i); + spin_lock_init(&ch->lock); + ch->serving = NULL; + ch->signal = -1; + dev_info(&adev->dev, + "physical channel %d is %s\n", i, + pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE"); + } + + /* Register as many memcpy channels as there are physical channels */ + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy, + pl08x->vd->channels, false); + if (ret <= 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate memcpy channels - %d\n", + __func__, ret); + goto out_no_memcpy; + } + pl08x->memcpy.chancnt = ret; + + /* Register slave channels */ + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave, + pl08x->pd->num_slave_channels, + true); + if (ret <= 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate slave channels - %d\n", + __func__, ret); + goto out_no_slave; + } + pl08x->slave.chancnt = ret; + + ret = dma_async_device_register(&pl08x->memcpy); + if (ret) { + dev_warn(&pl08x->adev->dev, + "%s failed to register memcpy as an async device - %d\n", + __func__, ret); + goto out_no_memcpy_reg; + } + + ret = dma_async_device_register(&pl08x->slave); + if (ret) { + dev_warn(&pl08x->adev->dev, + "%s failed to register slave as an async device - %d\n", + __func__, ret); + goto out_no_slave_reg; + } + + amba_set_drvdata(adev, pl08x); + init_pl08x_debugfs(pl08x); + dev_info(&pl08x->adev->dev, "ARM(R) %s DMA block initialized @%08x\n", + vd->name, adev->res.start); + return 0; + +out_no_slave_reg: + dma_async_device_unregister(&pl08x->memcpy); +out_no_memcpy_reg: + pl08x_free_virtual_channels(&pl08x->slave); +out_no_slave: + pl08x_free_virtual_channels(&pl08x->memcpy); +out_no_memcpy: + kfree(pl08x->phy_chans); +out_no_phychans: + free_irq(adev->irq[0], pl08x); +out_no_irq: + iounmap(pl08x->base); +out_no_ioremap: + dma_pool_destroy(pl08x->pool); +out_no_lli_pool: +out_no_platdata: + kfree(pl08x); +out_no_pl08x: + amba_release_regions(adev); + return ret; +} + +/* PL080 has 8 channels and the PL080 have just 2 */ +static struct vendor_data vendor_pl080 = { + .name = "PL080", + .channels = 8, + .dualmaster = true, +}; + +static struct vendor_data vendor_pl081 = { + .name = "PL081", + .channels = 2, + .dualmaster = false, +}; + +static struct amba_id pl08x_ids[] = { + /* PL080 */ + { + .id = 0x00041080, + .mask = 0x000fffff, + .data = &vendor_pl080, + }, + /* PL081 */ + { + .id = 0x00041081, + .mask = 0x000fffff, + .data = &vendor_pl081, + }, + /* Nomadik 8815 PL080 variant */ + { + .id = 0x00280880, + .mask = 0x00ffffff, + .data = &vendor_pl080, + }, + { 0, 0 }, +}; + +static struct amba_driver pl08x_amba_driver = { + .drv.name = DRIVER_NAME, + .id_table = pl08x_ids, + .probe = pl08x_probe, +}; + +static int __init pl08x_init(void) +{ + int retval; + retval = amba_driver_register(&pl08x_amba_driver); + if (retval) + printk(KERN_WARNING DRIVER_NAME + "failed to register as an amba device (%d)\n", + retval); + return retval; +} +subsys_initcall(pl08x_init); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index e5e79ced4f4b..235153cd7ac5 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -690,6 +690,8 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt); + BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && + !device->device_prep_dma_sg); BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && !device->device_prep_slave_sg); BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) && diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index cea08bed9cf9..286c3ac6bdcc 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -35,9 +35,10 @@ #include <linux/dmapool.h> #include <linux/of_platform.h> -#include <asm/fsldma.h> #include "fsldma.h" +static const char msg_ld_oom[] = "No free memory for link descriptor\n"; + static void dma_init(struct fsldma_chan *chan) { /* Reset the channel */ @@ -499,7 +500,7 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags) new = fsl_dma_alloc_descriptor(chan); if (!new) { - dev_err(chan->dev, "No free memory for link descriptor\n"); + dev_err(chan->dev, msg_ld_oom); return NULL; } @@ -536,8 +537,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( /* Allocate the link descriptor from DMA pool */ new = fsl_dma_alloc_descriptor(chan); if (!new) { - dev_err(chan->dev, - "No free memory for link descriptor\n"); + dev_err(chan->dev, msg_ld_oom); goto fail; } #ifdef FSL_DMA_LD_DEBUG @@ -583,223 +583,205 @@ fail: return NULL; } -/** - * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction - * @chan: DMA channel - * @sgl: scatterlist to transfer to/from - * @sg_len: number of entries in @scatterlist - * @direction: DMA direction - * @flags: DMAEngine flags - * - * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the - * DMA_SLAVE API, this gets the device-specific information from the - * chan->private variable. - */ -static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( - struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, - enum dma_data_direction direction, unsigned long flags) +static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags) { - struct fsldma_chan *chan; struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; - struct fsl_dma_slave *slave; - size_t copy; - - int i; - struct scatterlist *sg; - size_t sg_used; - size_t hw_used; - struct fsl_dma_hw_addr *hw; - dma_addr_t dma_dst, dma_src; + struct fsldma_chan *chan = to_fsl_chan(dchan); + size_t dst_avail, src_avail; + dma_addr_t dst, src; + size_t len; - if (!dchan) + /* basic sanity checks */ + if (dst_nents == 0 || src_nents == 0) return NULL; - if (!dchan->private) + if (dst_sg == NULL || src_sg == NULL) return NULL; - chan = to_fsl_chan(dchan); - slave = dchan->private; + /* + * TODO: should we check that both scatterlists have the same + * TODO: number of bytes in total? Is that really an error? + */ - if (list_empty(&slave->addresses)) - return NULL; + /* get prepared for the loop */ + dst_avail = sg_dma_len(dst_sg); + src_avail = sg_dma_len(src_sg); - hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry); - hw_used = 0; + /* run until we are out of scatterlist entries */ + while (true) { - /* - * Build the hardware transaction to copy from the scatterlist to - * the hardware, or from the hardware to the scatterlist - * - * If you are copying from the hardware to the scatterlist and it - * takes two hardware entries to fill an entire page, then both - * hardware entries will be coalesced into the same page - * - * If you are copying from the scatterlist to the hardware and a - * single page can fill two hardware entries, then the data will - * be read out of the page into the first hardware entry, and so on - */ - for_each_sg(sgl, sg, sg_len, i) { - sg_used = 0; - - /* Loop until the entire scatterlist entry is used */ - while (sg_used < sg_dma_len(sg)) { - - /* - * If we've used up the current hardware address/length - * pair, we need to load a new one - * - * This is done in a while loop so that descriptors with - * length == 0 will be skipped - */ - while (hw_used >= hw->length) { - - /* - * If the current hardware entry is the last - * entry in the list, we're finished - */ - if (list_is_last(&hw->entry, &slave->addresses)) - goto finished; - - /* Get the next hardware address/length pair */ - hw = list_entry(hw->entry.next, - struct fsl_dma_hw_addr, entry); - hw_used = 0; - } - - /* Allocate the link descriptor from DMA pool */ - new = fsl_dma_alloc_descriptor(chan); - if (!new) { - dev_err(chan->dev, "No free memory for " - "link descriptor\n"); - goto fail; - } + /* create the largest transaction possible */ + len = min_t(size_t, src_avail, dst_avail); + len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT); + if (len == 0) + goto fetch; + + dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail; + src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail; + + /* allocate and populate the descriptor */ + new = fsl_dma_alloc_descriptor(chan); + if (!new) { + dev_err(chan->dev, msg_ld_oom); + goto fail; + } #ifdef FSL_DMA_LD_DEBUG - dev_dbg(chan->dev, "new link desc alloc %p\n", new); + dev_dbg(chan->dev, "new link desc alloc %p\n", new); #endif - /* - * Calculate the maximum number of bytes to transfer, - * making sure it is less than the DMA controller limit - */ - copy = min_t(size_t, sg_dma_len(sg) - sg_used, - hw->length - hw_used); - copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT); - - /* - * DMA_FROM_DEVICE - * from the hardware to the scatterlist - * - * DMA_TO_DEVICE - * from the scatterlist to the hardware - */ - if (direction == DMA_FROM_DEVICE) { - dma_src = hw->address + hw_used; - dma_dst = sg_dma_address(sg) + sg_used; - } else { - dma_src = sg_dma_address(sg) + sg_used; - dma_dst = hw->address + hw_used; - } - - /* Fill in the descriptor */ - set_desc_cnt(chan, &new->hw, copy); - set_desc_src(chan, &new->hw, dma_src); - set_desc_dst(chan, &new->hw, dma_dst); - - /* - * If this is not the first descriptor, chain the - * current descriptor after the previous descriptor - */ - if (!first) { - first = new; - } else { - set_desc_next(chan, &prev->hw, - new->async_tx.phys); - } - - new->async_tx.cookie = 0; - async_tx_ack(&new->async_tx); - - prev = new; - sg_used += copy; - hw_used += copy; - - /* Insert the link descriptor into the LD ring */ - list_add_tail(&new->node, &first->tx_list); - } - } + set_desc_cnt(chan, &new->hw, len); + set_desc_src(chan, &new->hw, src); + set_desc_dst(chan, &new->hw, dst); -finished: + if (!first) + first = new; + else + set_desc_next(chan, &prev->hw, new->async_tx.phys); - /* All of the hardware address/length pairs had length == 0 */ - if (!first || !new) - return NULL; + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + prev = new; - new->async_tx.flags = flags; - new->async_tx.cookie = -EBUSY; + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); - /* Set End-of-link to the last link descriptor of new list */ - set_ld_eol(chan, new); + /* update metadata */ + dst_avail -= len; + src_avail -= len; + +fetch: + /* fetch the next dst scatterlist entry */ + if (dst_avail == 0) { + + /* no more entries: we're done */ + if (dst_nents == 0) + break; + + /* fetch the next entry: if there are no more: done */ + dst_sg = sg_next(dst_sg); + if (dst_sg == NULL) + break; + + dst_nents--; + dst_avail = sg_dma_len(dst_sg); + } - /* Enable extra controller features */ - if (chan->set_src_loop_size) - chan->set_src_loop_size(chan, slave->src_loop_size); + /* fetch the next src scatterlist entry */ + if (src_avail == 0) { - if (chan->set_dst_loop_size) - chan->set_dst_loop_size(chan, slave->dst_loop_size); + /* no more entries: we're done */ + if (src_nents == 0) + break; - if (chan->toggle_ext_start) - chan->toggle_ext_start(chan, slave->external_start); + /* fetch the next entry: if there are no more: done */ + src_sg = sg_next(src_sg); + if (src_sg == NULL) + break; - if (chan->toggle_ext_pause) - chan->toggle_ext_pause(chan, slave->external_pause); + src_nents--; + src_avail = sg_dma_len(src_sg); + } + } - if (chan->set_request_count) - chan->set_request_count(chan, slave->request_count); + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(chan, new); return &first->async_tx; fail: - /* If first was not set, then we failed to allocate the very first - * descriptor, and we're done */ if (!first) return NULL; + fsldma_free_desc_list_reverse(chan, &first->tx_list); + return NULL; +} + +/** + * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: DMAEngine flags + * + * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the + * DMA_SLAVE API, this gets the device-specific information from the + * chan->private variable. + */ +static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_data_direction direction, unsigned long flags) +{ /* - * First is set, so all of the descriptors we allocated have been added - * to first->tx_list, INCLUDING "first" itself. Therefore we - * must traverse the list backwards freeing each descriptor in turn + * This operation is not supported on the Freescale DMA controller * - * We're re-using variables for the loop, oh well + * However, we need to provide the function pointer to allow the + * device_control() method to work. */ - fsldma_free_desc_list_reverse(chan, &first->tx_list); return NULL; } static int fsl_dma_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd, unsigned long arg) { + struct dma_slave_config *config; struct fsldma_chan *chan; unsigned long flags; - - /* Only supports DMA_TERMINATE_ALL */ - if (cmd != DMA_TERMINATE_ALL) - return -ENXIO; + int size; if (!dchan) return -EINVAL; chan = to_fsl_chan(dchan); - /* Halt the DMA engine */ - dma_halt(chan); + switch (cmd) { + case DMA_TERMINATE_ALL: + /* Halt the DMA engine */ + dma_halt(chan); - spin_lock_irqsave(&chan->desc_lock, flags); + spin_lock_irqsave(&chan->desc_lock, flags); - /* Remove and free all of the descriptors in the LD queue */ - fsldma_free_desc_list(chan, &chan->ld_pending); - fsldma_free_desc_list(chan, &chan->ld_running); + /* Remove and free all of the descriptors in the LD queue */ + fsldma_free_desc_list(chan, &chan->ld_pending); + fsldma_free_desc_list(chan, &chan->ld_running); - spin_unlock_irqrestore(&chan->desc_lock, flags); + spin_unlock_irqrestore(&chan->desc_lock, flags); + return 0; + + case DMA_SLAVE_CONFIG: + config = (struct dma_slave_config *)arg; + + /* make sure the channel supports setting burst size */ + if (!chan->set_request_count) + return -ENXIO; + + /* we set the controller burst size depending on direction */ + if (config->direction == DMA_TO_DEVICE) + size = config->dst_addr_width * config->dst_maxburst; + else + size = config->src_addr_width * config->src_maxburst; + + chan->set_request_count(chan, size); + return 0; + + case FSLDMA_EXTERNAL_START: + + /* make sure the channel supports external start */ + if (!chan->toggle_ext_start) + return -ENXIO; + + chan->toggle_ext_start(chan, arg); + return 0; + + default: + return -ENXIO; + } return 0; } @@ -1327,11 +1309,13 @@ static int __devinit fsldma_of_probe(struct platform_device *op, dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); + dma_cap_set(DMA_SG, fdev->common.cap_mask); dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; + fdev->common.device_prep_dma_sg = fsl_dma_prep_sg; fdev->common.device_tx_status = fsl_tx_status; fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index c2591e8d9b6e..338bc4eed1f3 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -25,6 +25,7 @@ */ #include <linux/pci.h> #include <linux/interrupt.h> +#include <linux/pm_runtime.h> #include <linux/intel_mid_dma.h> #define MAX_CHAN 4 /*max ch across controllers*/ @@ -91,13 +92,13 @@ static int get_block_ts(int len, int tx_width, int block_size) int byte_width = 0, block_ts = 0; switch (tx_width) { - case LNW_DMA_WIDTH_8BIT: + case DMA_SLAVE_BUSWIDTH_1_BYTE: byte_width = 1; break; - case LNW_DMA_WIDTH_16BIT: + case DMA_SLAVE_BUSWIDTH_2_BYTES: byte_width = 2; break; - case LNW_DMA_WIDTH_32BIT: + case DMA_SLAVE_BUSWIDTH_4_BYTES: default: byte_width = 4; break; @@ -247,16 +248,17 @@ static void midc_dostart(struct intel_mid_dma_chan *midc, struct middma_device *mid = to_middma_device(midc->chan.device); /* channel is idle */ - if (midc->in_use && test_ch_en(midc->dma_base, midc->ch_id)) { + if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) { /*error*/ pr_err("ERR_MDMA: channel is busy in start\n"); /* The tasklet will hopefully advance the queue... */ return; } - + midc->busy = true; /*write registers and en*/ iowrite32(first->sar, midc->ch_regs + SAR); iowrite32(first->dar, midc->ch_regs + DAR); + iowrite32(first->lli_phys, midc->ch_regs + LLP); iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH); iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW); iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW); @@ -264,9 +266,9 @@ static void midc_dostart(struct intel_mid_dma_chan *midc, pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n", (int)first->sar, (int)first->dar, first->cfg_hi, first->cfg_lo, first->ctl_hi, first->ctl_lo); + first->status = DMA_IN_PROGRESS; iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN); - first->status = DMA_IN_PROGRESS; } /** @@ -283,20 +285,36 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc, { struct dma_async_tx_descriptor *txd = &desc->txd; dma_async_tx_callback callback_txd = NULL; + struct intel_mid_dma_lli *llitem; void *param_txd = NULL; midc->completed = txd->cookie; callback_txd = txd->callback; param_txd = txd->callback_param; - list_move(&desc->desc_node, &midc->free_list); - + if (desc->lli != NULL) { + /*clear the DONE bit of completed LLI in memory*/ + llitem = desc->lli + desc->current_lli; + llitem->ctl_hi &= CLEAR_DONE; + if (desc->current_lli < desc->lli_length-1) + (desc->current_lli)++; + else + desc->current_lli = 0; + } spin_unlock_bh(&midc->lock); if (callback_txd) { pr_debug("MDMA: TXD callback set ... calling\n"); callback_txd(param_txd); - spin_lock_bh(&midc->lock); - return; + } + if (midc->raw_tfr) { + desc->status = DMA_SUCCESS; + if (desc->lli != NULL) { + pci_pool_free(desc->lli_pool, desc->lli, + desc->lli_phys); + pci_pool_destroy(desc->lli_pool); + } + list_move(&desc->desc_node, &midc->free_list); + midc->busy = false; } spin_lock_bh(&midc->lock); @@ -317,14 +335,89 @@ static void midc_scan_descriptors(struct middma_device *mid, /*tx is complete*/ list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) { - if (desc->status == DMA_IN_PROGRESS) { - desc->status = DMA_SUCCESS; + if (desc->status == DMA_IN_PROGRESS) midc_descriptor_complete(midc, desc); - } } return; -} + } +/** + * midc_lli_fill_sg - Helper function to convert + * SG list to Linked List Items. + *@midc: Channel + *@desc: DMA descriptor + *@sglist: Pointer to SG list + *@sglen: SG list length + *@flags: DMA transaction flags + * + * Walk through the SG list and convert the SG list into Linked + * List Items (LLI). + */ +static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc, + struct intel_mid_dma_desc *desc, + struct scatterlist *sglist, + unsigned int sglen, + unsigned int flags) +{ + struct intel_mid_dma_slave *mids; + struct scatterlist *sg; + dma_addr_t lli_next, sg_phy_addr; + struct intel_mid_dma_lli *lli_bloc_desc; + union intel_mid_dma_ctl_lo ctl_lo; + union intel_mid_dma_ctl_hi ctl_hi; + int i; + pr_debug("MDMA: Entered midc_lli_fill_sg\n"); + mids = midc->mid_slave; + + lli_bloc_desc = desc->lli; + lli_next = desc->lli_phys; + + ctl_lo.ctl_lo = desc->ctl_lo; + ctl_hi.ctl_hi = desc->ctl_hi; + for_each_sg(sglist, sg, sglen, i) { + /*Populate CTL_LOW and LLI values*/ + if (i != sglen - 1) { + lli_next = lli_next + + sizeof(struct intel_mid_dma_lli); + } else { + /*Check for circular list, otherwise terminate LLI to ZERO*/ + if (flags & DMA_PREP_CIRCULAR_LIST) { + pr_debug("MDMA: LLI is configured in circular mode\n"); + lli_next = desc->lli_phys; + } else { + lli_next = 0; + ctl_lo.ctlx.llp_dst_en = 0; + ctl_lo.ctlx.llp_src_en = 0; + } + } + /*Populate CTL_HI values*/ + ctl_hi.ctlx.block_ts = get_block_ts(sg->length, + desc->width, + midc->dma->block_size); + /*Populate SAR and DAR values*/ + sg_phy_addr = sg_phys(sg); + if (desc->dirn == DMA_TO_DEVICE) { + lli_bloc_desc->sar = sg_phy_addr; + lli_bloc_desc->dar = mids->dma_slave.dst_addr; + } else if (desc->dirn == DMA_FROM_DEVICE) { + lli_bloc_desc->sar = mids->dma_slave.src_addr; + lli_bloc_desc->dar = sg_phy_addr; + } + /*Copy values into block descriptor in system memroy*/ + lli_bloc_desc->llp = lli_next; + lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo; + lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi; + + lli_bloc_desc++; + } + /*Copy very first LLI values to descriptor*/ + desc->ctl_lo = desc->lli->ctl_lo; + desc->ctl_hi = desc->lli->ctl_hi; + desc->sar = desc->lli->sar; + desc->dar = desc->lli->dar; + + return 0; +} /***************************************************************************** DMA engine callback Functions*/ /** @@ -349,12 +442,12 @@ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx) desc->txd.cookie = cookie; - if (list_empty(&midc->active_list)) { - midc_dostart(midc, desc); + if (list_empty(&midc->active_list)) list_add_tail(&desc->desc_node, &midc->active_list); - } else { + else list_add_tail(&desc->desc_node, &midc->queue); - } + + midc_dostart(midc, desc); spin_unlock_bh(&midc->lock); return cookie; @@ -414,6 +507,23 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan, return ret; } +static int dma_slave_control(struct dma_chan *chan, unsigned long arg) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + struct dma_slave_config *slave = (struct dma_slave_config *)arg; + struct intel_mid_dma_slave *mid_slave; + + BUG_ON(!midc); + BUG_ON(!slave); + pr_debug("MDMA: slave control called\n"); + + mid_slave = to_intel_mid_dma_slave(slave); + + BUG_ON(!mid_slave); + + midc->mid_slave = mid_slave; + return 0; +} /** * intel_mid_dma_device_control - DMA device control * @chan: chan for DMA control @@ -428,49 +538,41 @@ static int intel_mid_dma_device_control(struct dma_chan *chan, struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); struct middma_device *mid = to_middma_device(chan->device); struct intel_mid_dma_desc *desc, *_desc; - LIST_HEAD(list); + union intel_mid_dma_cfg_lo cfg_lo; + + if (cmd == DMA_SLAVE_CONFIG) + return dma_slave_control(chan, arg); if (cmd != DMA_TERMINATE_ALL) return -ENXIO; spin_lock_bh(&midc->lock); - if (midc->in_use == false) { + if (midc->busy == false) { spin_unlock_bh(&midc->lock); return 0; } - list_splice_init(&midc->free_list, &list); - midc->descs_allocated = 0; - midc->slave = NULL; - + /*Suspend and disable the channel*/ + cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW); + cfg_lo.cfgx.ch_susp = 1; + iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW); + iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN); + midc->busy = false; /* Disable interrupts */ disable_dma_interrupt(midc); + midc->descs_allocated = 0; spin_unlock_bh(&midc->lock); - list_for_each_entry_safe(desc, _desc, &list, desc_node) { - pr_debug("MDMA: freeing descriptor %p\n", desc); - pci_pool_free(mid->dma_pool, desc, desc->txd.phys); + list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) { + if (desc->lli != NULL) { + pci_pool_free(desc->lli_pool, desc->lli, + desc->lli_phys); + pci_pool_destroy(desc->lli_pool); + } + list_move(&desc->desc_node, &midc->free_list); } return 0; } -/** - * intel_mid_dma_prep_slave_sg - Prep slave sg txn - * @chan: chan for DMA transfer - * @sgl: scatter gather list - * @sg_len: length of sg txn - * @direction: DMA transfer dirtn - * @flags: DMA flags - * - * Do DMA sg txn: NOT supported now - */ -static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg( - struct dma_chan *chan, struct scatterlist *sgl, - unsigned int sg_len, enum dma_data_direction direction, - unsigned long flags) -{ - /*not supported now*/ - return NULL; -} /** * intel_mid_dma_prep_memcpy - Prep memcpy txn @@ -495,23 +597,24 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( union intel_mid_dma_ctl_hi ctl_hi; union intel_mid_dma_cfg_lo cfg_lo; union intel_mid_dma_cfg_hi cfg_hi; - enum intel_mid_dma_width width = 0; + enum dma_slave_buswidth width; pr_debug("MDMA: Prep for memcpy\n"); - WARN_ON(!chan); + BUG_ON(!chan); if (!len) return NULL; - mids = chan->private; - WARN_ON(!mids); - midc = to_intel_mid_dma_chan(chan); - WARN_ON(!midc); + BUG_ON(!midc); + + mids = midc->mid_slave; + BUG_ON(!mids); pr_debug("MDMA:called for DMA %x CH %d Length %zu\n", midc->dma->pci_id, midc->ch_id, len); pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n", - mids->cfg_mode, mids->dirn, mids->hs_mode, mids->src_width); + mids->cfg_mode, mids->dma_slave.direction, + mids->hs_mode, mids->dma_slave.src_addr_width); /*calculate CFG_LO*/ if (mids->hs_mode == LNW_DMA_SW_HS) { @@ -530,13 +633,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( if (midc->dma->pimr_mask) { cfg_hi.cfgx.protctl = 0x0; /*default value*/ cfg_hi.cfgx.fifo_mode = 1; - if (mids->dirn == DMA_TO_DEVICE) { + if (mids->dma_slave.direction == DMA_TO_DEVICE) { cfg_hi.cfgx.src_per = 0; if (mids->device_instance == 0) cfg_hi.cfgx.dst_per = 3; if (mids->device_instance == 1) cfg_hi.cfgx.dst_per = 1; - } else if (mids->dirn == DMA_FROM_DEVICE) { + } else if (mids->dma_slave.direction == DMA_FROM_DEVICE) { if (mids->device_instance == 0) cfg_hi.cfgx.src_per = 2; if (mids->device_instance == 1) @@ -552,7 +655,8 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( /*calculate CTL_HI*/ ctl_hi.ctlx.reser = 0; - width = mids->src_width; + ctl_hi.ctlx.done = 0; + width = mids->dma_slave.src_addr_width; ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size); pr_debug("MDMA:calc len %d for block size %d\n", @@ -560,21 +664,21 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( /*calculate CTL_LO*/ ctl_lo.ctl_lo = 0; ctl_lo.ctlx.int_en = 1; - ctl_lo.ctlx.dst_tr_width = mids->dst_width; - ctl_lo.ctlx.src_tr_width = mids->src_width; - ctl_lo.ctlx.dst_msize = mids->src_msize; - ctl_lo.ctlx.src_msize = mids->dst_msize; + ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width; + ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width; + ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst; + ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst; if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) { ctl_lo.ctlx.tt_fc = 0; ctl_lo.ctlx.sinc = 0; ctl_lo.ctlx.dinc = 0; } else { - if (mids->dirn == DMA_TO_DEVICE) { + if (mids->dma_slave.direction == DMA_TO_DEVICE) { ctl_lo.ctlx.sinc = 0; ctl_lo.ctlx.dinc = 2; ctl_lo.ctlx.tt_fc = 1; - } else if (mids->dirn == DMA_FROM_DEVICE) { + } else if (mids->dma_slave.direction == DMA_FROM_DEVICE) { ctl_lo.ctlx.sinc = 2; ctl_lo.ctlx.dinc = 0; ctl_lo.ctlx.tt_fc = 2; @@ -597,7 +701,10 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( desc->ctl_lo = ctl_lo.ctl_lo; desc->ctl_hi = ctl_hi.ctl_hi; desc->width = width; - desc->dirn = mids->dirn; + desc->dirn = mids->dma_slave.direction; + desc->lli_phys = 0; + desc->lli = NULL; + desc->lli_pool = NULL; return &desc->txd; err_desc_get: @@ -605,6 +712,85 @@ err_desc_get: midc_desc_put(midc, desc); return NULL; } +/** + * intel_mid_dma_prep_slave_sg - Prep slave sg txn + * @chan: chan for DMA transfer + * @sgl: scatter gather list + * @sg_len: length of sg txn + * @direction: DMA transfer dirtn + * @flags: DMA flags + * + * Prepares LLI based periphral transfer + */ +static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags) +{ + struct intel_mid_dma_chan *midc = NULL; + struct intel_mid_dma_slave *mids = NULL; + struct intel_mid_dma_desc *desc = NULL; + struct dma_async_tx_descriptor *txd = NULL; + union intel_mid_dma_ctl_lo ctl_lo; + + pr_debug("MDMA: Prep for slave SG\n"); + + if (!sg_len) { + pr_err("MDMA: Invalid SG length\n"); + return NULL; + } + midc = to_intel_mid_dma_chan(chan); + BUG_ON(!midc); + + mids = midc->mid_slave; + BUG_ON(!mids); + + if (!midc->dma->pimr_mask) { + pr_debug("MDMA: SG list is not supported by this controller\n"); + return NULL; + } + + pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n", + sg_len, direction, flags); + + txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sgl->length, flags); + if (NULL == txd) { + pr_err("MDMA: Prep memcpy failed\n"); + return NULL; + } + desc = to_intel_mid_dma_desc(txd); + desc->dirn = direction; + ctl_lo.ctl_lo = desc->ctl_lo; + ctl_lo.ctlx.llp_dst_en = 1; + ctl_lo.ctlx.llp_src_en = 1; + desc->ctl_lo = ctl_lo.ctl_lo; + desc->lli_length = sg_len; + desc->current_lli = 0; + /* DMA coherent memory pool for LLI descriptors*/ + desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool", + midc->dma->pdev, + (sizeof(struct intel_mid_dma_lli)*sg_len), + 32, 0); + if (NULL == desc->lli_pool) { + pr_err("MID_DMA:LLI pool create failed\n"); + return NULL; + } + + desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys); + if (!desc->lli) { + pr_err("MID_DMA: LLI alloc failed\n"); + pci_pool_destroy(desc->lli_pool); + return NULL; + } + + midc_lli_fill_sg(midc, desc, sgl, sg_len, flags); + if (flags & DMA_PREP_INTERRUPT) { + iowrite32(UNMASK_INTR_REG(midc->ch_id), + midc->dma_base + MASK_BLOCK); + pr_debug("MDMA:Enabled Block interrupt\n"); + } + return &desc->txd; +} /** * intel_mid_dma_free_chan_resources - Frees dma resources @@ -618,11 +804,11 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan) struct middma_device *mid = to_middma_device(chan->device); struct intel_mid_dma_desc *desc, *_desc; - if (true == midc->in_use) { + if (true == midc->busy) { /*trying to free ch in use!!!!!*/ pr_err("ERR_MDMA: trying to free ch in use\n"); } - + pm_runtime_put(&mid->pdev->dev); spin_lock_bh(&midc->lock); midc->descs_allocated = 0; list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) { @@ -639,6 +825,7 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan) } spin_unlock_bh(&midc->lock); midc->in_use = false; + midc->busy = false; /* Disable CH interrupts */ iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK); iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR); @@ -659,11 +846,20 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan) dma_addr_t phys; int i = 0; + pm_runtime_get_sync(&mid->pdev->dev); + + if (mid->state == SUSPENDED) { + if (dma_resume(mid->pdev)) { + pr_err("ERR_MDMA: resume failed"); + return -EFAULT; + } + } /* ASSERT: channel is idle */ if (test_ch_en(mid->dma_base, midc->ch_id)) { /*ch is not idle*/ pr_err("ERR_MDMA: ch not idle\n"); + pm_runtime_put(&mid->pdev->dev); return -EIO; } midc->completed = chan->cookie = 1; @@ -674,6 +870,7 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan) desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys); if (!desc) { pr_err("ERR_MDMA: desc failed\n"); + pm_runtime_put(&mid->pdev->dev); return -ENOMEM; /*check*/ } @@ -686,7 +883,8 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan) list_add_tail(&desc->desc_node, &midc->free_list); } spin_unlock_bh(&midc->lock); - midc->in_use = false; + midc->in_use = true; + midc->busy = false; pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i); return i; } @@ -715,7 +913,7 @@ static void dma_tasklet(unsigned long data) { struct middma_device *mid = NULL; struct intel_mid_dma_chan *midc = NULL; - u32 status; + u32 status, raw_tfr, raw_block; int i; mid = (struct middma_device *)data; @@ -724,8 +922,9 @@ static void dma_tasklet(unsigned long data) return; } pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id); - status = ioread32(mid->dma_base + RAW_TFR); - pr_debug("MDMA:RAW_TFR %x\n", status); + raw_tfr = ioread32(mid->dma_base + RAW_TFR); + raw_block = ioread32(mid->dma_base + RAW_BLOCK); + status = raw_tfr | raw_block; status &= mid->intr_mask; while (status) { /*txn interrupt*/ @@ -741,15 +940,23 @@ static void dma_tasklet(unsigned long data) } pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n", status, midc->ch_id, i); + midc->raw_tfr = raw_tfr; + midc->raw_block = raw_block; + spin_lock_bh(&midc->lock); /*clearing this interrupts first*/ iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR); - iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_BLOCK); - - spin_lock_bh(&midc->lock); + if (raw_block) { + iowrite32((1 << midc->ch_id), + mid->dma_base + CLEAR_BLOCK); + } midc_scan_descriptors(mid, midc); pr_debug("MDMA:Scan of desc... complete, unmasking\n"); iowrite32(UNMASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_TFR); + if (raw_block) { + iowrite32(UNMASK_INTR_REG(midc->ch_id), + mid->dma_base + MASK_BLOCK); + } spin_unlock_bh(&midc->lock); } @@ -804,9 +1011,14 @@ static void dma_tasklet2(unsigned long data) static irqreturn_t intel_mid_dma_interrupt(int irq, void *data) { struct middma_device *mid = data; - u32 status; + u32 tfr_status, err_status; int call_tasklet = 0; + tfr_status = ioread32(mid->dma_base + RAW_TFR); + err_status = ioread32(mid->dma_base + RAW_ERR); + if (!tfr_status && !err_status) + return IRQ_NONE; + /*DMA Interrupt*/ pr_debug("MDMA:Got an interrupt on irq %d\n", irq); if (!mid) { @@ -814,19 +1026,18 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data) return -EINVAL; } - status = ioread32(mid->dma_base + RAW_TFR); - pr_debug("MDMA: Status %x, Mask %x\n", status, mid->intr_mask); - status &= mid->intr_mask; - if (status) { + pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask); + tfr_status &= mid->intr_mask; + if (tfr_status) { /*need to disable intr*/ - iowrite32((status << 8), mid->dma_base + MASK_TFR); - pr_debug("MDMA: Calling tasklet %x\n", status); + iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR); + iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK); + pr_debug("MDMA: Calling tasklet %x\n", tfr_status); call_tasklet = 1; } - status = ioread32(mid->dma_base + RAW_ERR); - status &= mid->intr_mask; - if (status) { - iowrite32(MASK_INTR_REG(status), mid->dma_base + MASK_ERR); + err_status &= mid->intr_mask; + if (err_status) { + iowrite32(MASK_INTR_REG(err_status), mid->dma_base + MASK_ERR); call_tasklet = 1; } if (call_tasklet) @@ -856,7 +1067,6 @@ static int mid_setup_dma(struct pci_dev *pdev) { struct middma_device *dma = pci_get_drvdata(pdev); int err, i; - unsigned int irq_level; /* DMA coherent memory pool for DMA descriptor allocations */ dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev, @@ -884,6 +1094,7 @@ static int mid_setup_dma(struct pci_dev *pdev) pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan); /*init CH structures*/ dma->intr_mask = 0; + dma->state = RUNNING; for (i = 0; i < dma->max_chan; i++) { struct intel_mid_dma_chan *midch = &dma->ch[i]; @@ -943,7 +1154,6 @@ static int mid_setup_dma(struct pci_dev *pdev) /*register irq */ if (dma->pimr_mask) { - irq_level = IRQF_SHARED; pr_debug("MDMA:Requesting irq shared for DMAC1\n"); err = request_irq(pdev->irq, intel_mid_dma_interrupt1, IRQF_SHARED, "INTEL_MID_DMAC1", dma); @@ -951,10 +1161,9 @@ static int mid_setup_dma(struct pci_dev *pdev) goto err_irq; } else { dma->intr_mask = 0x03; - irq_level = 0; pr_debug("MDMA:Requesting irq for DMAC2\n"); err = request_irq(pdev->irq, intel_mid_dma_interrupt2, - 0, "INTEL_MID_DMAC2", dma); + IRQF_SHARED, "INTEL_MID_DMAC2", dma); if (0 != err) goto err_irq; } @@ -1070,6 +1279,9 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, if (err) goto err_dma; + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_allow(&pdev->dev); return 0; err_dma: @@ -1104,6 +1316,85 @@ static void __devexit intel_mid_dma_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +/* Power Management */ +/* +* dma_suspend - PCI suspend function +* +* @pci: PCI device structure +* @state: PM message +* +* This function is called by OS when a power event occurs +*/ +int dma_suspend(struct pci_dev *pci, pm_message_t state) +{ + int i; + struct middma_device *device = pci_get_drvdata(pci); + pr_debug("MDMA: dma_suspend called\n"); + + for (i = 0; i < device->max_chan; i++) { + if (device->ch[i].in_use) + return -EAGAIN; + } + device->state = SUSPENDED; + pci_set_drvdata(pci, device); + pci_save_state(pci); + pci_disable_device(pci); + pci_set_power_state(pci, PCI_D3hot); + return 0; +} + +/** +* dma_resume - PCI resume function +* +* @pci: PCI device structure +* +* This function is called by OS when a power event occurs +*/ +int dma_resume(struct pci_dev *pci) +{ + int ret; + struct middma_device *device = pci_get_drvdata(pci); + + pr_debug("MDMA: dma_resume called\n"); + pci_set_power_state(pci, PCI_D0); + pci_restore_state(pci); + ret = pci_enable_device(pci); + if (ret) { + pr_err("MDMA: device cant be enabled for %x\n", pci->device); + return ret; + } + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + pci_set_drvdata(pci, device); + return 0; +} + +static int dma_runtime_suspend(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + return dma_suspend(pci_dev, PMSG_SUSPEND); +} + +static int dma_runtime_resume(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + return dma_resume(pci_dev); +} + +static int dma_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct middma_device *device = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < device->max_chan; i++) { + if (device->ch[i].in_use) + return -EAGAIN; + } + + return pm_schedule_suspend(dev, 0); +} + /****************************************************************************** * PCI stuff */ @@ -1116,11 +1407,24 @@ static struct pci_device_id intel_mid_dma_ids[] = { }; MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids); +static const struct dev_pm_ops intel_mid_dma_pm = { + .runtime_suspend = dma_runtime_suspend, + .runtime_resume = dma_runtime_resume, + .runtime_idle = dma_runtime_idle, +}; + static struct pci_driver intel_mid_dma_pci = { .name = "Intel MID DMA", .id_table = intel_mid_dma_ids, .probe = intel_mid_dma_probe, .remove = __devexit_p(intel_mid_dma_remove), +#ifdef CONFIG_PM + .suspend = dma_suspend, + .resume = dma_resume, + .driver = { + .pm = &intel_mid_dma_pm, + }, +#endif }; static int __init intel_mid_dma_init(void) diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h index d81aa658ab09..709fecbdde79 100644 --- a/drivers/dma/intel_mid_dma_regs.h +++ b/drivers/dma/intel_mid_dma_regs.h @@ -29,11 +29,12 @@ #include <linux/dmapool.h> #include <linux/pci_ids.h> -#define INTEL_MID_DMA_DRIVER_VERSION "1.0.5" +#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0" #define REG_BIT0 0x00000001 #define REG_BIT8 0x00000100 - +#define INT_MASK_WE 0x8 +#define CLEAR_DONE 0xFFFFEFFF #define UNMASK_INTR_REG(chan_num) \ ((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num)) #define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num) @@ -41,6 +42,9 @@ #define ENABLE_CHANNEL(chan_num) \ ((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num)) +#define DISABLE_CHANNEL(chan_num) \ + (REG_BIT8 << chan_num) + #define DESCS_PER_CHANNEL 16 /*DMA Registers*/ /*registers associated with channel programming*/ @@ -50,6 +54,7 @@ /*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/ #define SAR 0x00 /* Source Address Register*/ #define DAR 0x08 /* Destination Address Register*/ +#define LLP 0x10 /* Linked List Pointer Register*/ #define CTL_LOW 0x18 /* Control Register*/ #define CTL_HIGH 0x1C /* Control Register*/ #define CFG_LOW 0x40 /* Configuration Register Low*/ @@ -112,8 +117,8 @@ union intel_mid_dma_ctl_lo { union intel_mid_dma_ctl_hi { struct { u32 block_ts:12; /*block transfer size*/ - /*configured by DMAC*/ - u32 reser:20; + u32 done:1; /*Done - updated by DMAC*/ + u32 reser:19; /*configured by DMAC*/ } ctlx; u32 ctl_hi; @@ -152,6 +157,7 @@ union intel_mid_dma_cfg_hi { u32 cfg_hi; }; + /** * struct intel_mid_dma_chan - internal mid representation of a DMA channel * @chan: dma_chan strcture represetation for mid chan @@ -166,7 +172,10 @@ union intel_mid_dma_cfg_hi { * @slave: dma slave struture * @descs_allocated: total number of decsiptors allocated * @dma: dma device struture pointer + * @busy: bool representing if ch is busy (active txn) or not * @in_use: bool representing if ch is in use or not + * @raw_tfr: raw trf interrupt recieved + * @raw_block: raw block interrupt recieved */ struct intel_mid_dma_chan { struct dma_chan chan; @@ -178,10 +187,13 @@ struct intel_mid_dma_chan { struct list_head active_list; struct list_head queue; struct list_head free_list; - struct intel_mid_dma_slave *slave; unsigned int descs_allocated; struct middma_device *dma; + bool busy; bool in_use; + u32 raw_tfr; + u32 raw_block; + struct intel_mid_dma_slave *mid_slave; }; static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan( @@ -190,6 +202,10 @@ static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan( return container_of(chan, struct intel_mid_dma_chan, chan); } +enum intel_mid_dma_state { + RUNNING = 0, + SUSPENDED, +}; /** * struct middma_device - internal representation of a DMA device * @pdev: PCI device @@ -205,6 +221,7 @@ static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan( * @max_chan: max number of chs supported (from drv_data) * @block_size: Block size of DMA transfer supported (from drv_data) * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data) + * @state: dma PM device state */ struct middma_device { struct pci_dev *pdev; @@ -220,6 +237,7 @@ struct middma_device { int max_chan; int block_size; unsigned int pimr_mask; + enum intel_mid_dma_state state; }; static inline struct middma_device *to_middma_device(struct dma_device *common) @@ -238,14 +256,27 @@ struct intel_mid_dma_desc { u32 cfg_lo; u32 ctl_lo; u32 ctl_hi; + struct pci_pool *lli_pool; + struct intel_mid_dma_lli *lli; + dma_addr_t lli_phys; + unsigned int lli_length; + unsigned int current_lli; dma_addr_t next; enum dma_data_direction dirn; enum dma_status status; - enum intel_mid_dma_width width; /*width of DMA txn*/ + enum dma_slave_buswidth width; /*width of DMA txn*/ enum intel_mid_dma_mode cfg_mode; /*mode configuration*/ }; +struct intel_mid_dma_lli { + dma_addr_t sar; + dma_addr_t dar; + dma_addr_t llp; + u32 ctl_lo; + u32 ctl_hi; +} __attribute__ ((packed)); + static inline int test_ch_en(void __iomem *dma, u32 ch_no) { u32 en_reg = ioread32(dma + DMA_CHAN_EN); @@ -257,4 +288,14 @@ static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc { return container_of(txd, struct intel_mid_dma_desc, txd); } + +static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave + (struct dma_slave_config *slave) +{ + return container_of(slave, struct intel_mid_dma_slave, dma_slave); +} + + +int dma_resume(struct pci_dev *pci); + #endif /*__INTEL_MID_DMAC_REGS_H__*/ diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 86c5ae9fde34..411d5bf50fc4 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -162,7 +162,7 @@ static int mv_is_err_intr(u32 intr_cause) static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) { - u32 val = (1 << (1 + (chan->idx * 16))); + u32 val = ~(1 << (chan->idx * 16)); dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val); __raw_writel(val, XOR_INTR_CAUSE(chan)); } diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index fb64cf36ba61..eb6b54dbb806 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -580,7 +580,6 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg( sh_chan = to_sh_chan(chan); param = chan->private; - slave_addr = param->config->addr; /* Someone calling slave DMA on a public channel? */ if (!param || !sg_len) { @@ -589,6 +588,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg( return NULL; } + slave_addr = param->config->addr; + /* * if (param != NULL), this is a successfully requested slave channel, * therefore param->config != NULL too. diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 17e2600a00cf..3f76cd9af7c3 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -1,11 +1,8 @@ /* - * driver/dma/ste_dma40.c - * - * Copyright (C) ST-Ericsson 2007-2010 + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson * License terms: GNU General Public License (GPL) version 2 - * Author: Per Friden <per.friden@stericsson.com> - * Author: Jonas Aaberg <jonas.aberg@stericsson.com> - * */ #include <linux/kernel.h> @@ -14,6 +11,7 @@ #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/delay.h> +#include <linux/err.h> #include <plat/ste_dma40.h> @@ -32,6 +30,11 @@ /* Hardware requirement on LCLA alignment */ #define LCLA_ALIGNMENT 0x40000 + +/* Max number of links per event group */ +#define D40_LCLA_LINK_PER_EVENT_GRP 128 +#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP + /* Attempts before giving up to trying to get pages that are aligned */ #define MAX_LCLA_ALLOC_ATTEMPTS 256 @@ -41,7 +44,7 @@ #define D40_ALLOC_LOG_FREE 0 /* Hardware designer of the block */ -#define D40_PERIPHID2_DESIGNER 0x8 +#define D40_HW_DESIGNER 0x8 /** * enum 40_command - The different commands and/or statuses. @@ -84,18 +87,17 @@ struct d40_lli_pool { * @lli_log: Same as above but for logical channels. * @lli_pool: The pool with two entries pre-allocated. * @lli_len: Number of llis of current descriptor. - * @lli_count: Number of transfered llis. - * @lli_tx_len: Max number of LLIs per transfer, there can be - * many transfer for one descriptor. + * @lli_current: Number of transfered llis. + * @lcla_alloc: Number of LCLA entries allocated. * @txd: DMA engine struct. Used for among other things for communication * during a transfer. * @node: List entry. - * @dir: The transfer direction of this job. * @is_in_client_list: true if the client owns this descriptor. + * @is_hw_linked: true if this job will automatically be continued for + * the previous one. * * This descriptor is used for both logical and physical transfers. */ - struct d40_desc { /* LLI physical */ struct d40_phy_lli_bidir lli_phy; @@ -104,14 +106,14 @@ struct d40_desc { struct d40_lli_pool lli_pool; int lli_len; - int lli_count; - u32 lli_tx_len; + int lli_current; + int lcla_alloc; struct dma_async_tx_descriptor txd; struct list_head node; - enum dma_data_direction dir; bool is_in_client_list; + bool is_hw_linked; }; /** @@ -123,17 +125,14 @@ struct d40_desc { * @pages: The number of pages needed for all physical channels. * Only used later for clean-up on error * @lock: Lock to protect the content in this struct. - * @alloc_map: Bitmap mapping between physical channel and LCLA entries. - * @num_blocks: The number of entries of alloc_map. Equals to the - * number of physical channels. + * @alloc_map: big map over which LCLA entry is own by which job. */ struct d40_lcla_pool { void *base; void *base_unaligned; int pages; spinlock_t lock; - u32 *alloc_map; - int num_blocks; + struct d40_desc **alloc_map; }; /** @@ -146,9 +145,7 @@ struct d40_lcla_pool { * this physical channel. Can also be free or physically allocated. * @allocated_dst: Same as for src but is dst. * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as - * event line number. Both allocated_src and allocated_dst can not be - * allocated to a physical channel, since the interrupt handler has then - * no way of figure out which one the interrupt belongs to. + * event line number. */ struct d40_phy_res { spinlock_t lock; @@ -206,7 +203,6 @@ struct d40_chan { u32 src_def_cfg; u32 dst_def_cfg; struct d40_def_lcsp log_def; - struct d40_lcla_elem lcla; struct d40_log_lli_full *lcpa; /* Runtime reconfiguration */ dma_addr_t runtime_addr; @@ -234,7 +230,6 @@ struct d40_chan { * @dma_both: dma_device channels that can do both memcpy and slave transfers. * @dma_slave: dma_device channels that can do only do slave transfers. * @dma_memcpy: dma_device channels that can do only do memcpy transfers. - * @phy_chans: Room for all possible physical channels in system. * @log_chans: Room for all possible logical channels in system. * @lookup_log_chans: Used to map interrupt number to logical channel. Points * to log_chans entries. @@ -340,9 +335,6 @@ static int d40_pool_lli_alloc(struct d40_desc *d40d, align); d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len, align); - - d40d->lli_phy.src_addr = virt_to_phys(d40d->lli_phy.src); - d40d->lli_phy.dst_addr = virt_to_phys(d40d->lli_phy.dst); } return 0; @@ -357,22 +349,67 @@ static void d40_pool_lli_free(struct d40_desc *d40d) d40d->lli_log.dst = NULL; d40d->lli_phy.src = NULL; d40d->lli_phy.dst = NULL; - d40d->lli_phy.src_addr = 0; - d40d->lli_phy.dst_addr = 0; } -static dma_cookie_t d40_assign_cookie(struct d40_chan *d40c, - struct d40_desc *desc) +static int d40_lcla_alloc_one(struct d40_chan *d40c, + struct d40_desc *d40d) { - dma_cookie_t cookie = d40c->chan.cookie; + unsigned long flags; + int i; + int ret = -EINVAL; + int p; - if (++cookie < 0) - cookie = 1; + spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); + + p = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP; - d40c->chan.cookie = cookie; - desc->txd.cookie = cookie; + /* + * Allocate both src and dst at the same time, therefore the half + * start on 1 since 0 can't be used since zero is used as end marker. + */ + for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) { + if (!d40c->base->lcla_pool.alloc_map[p + i]) { + d40c->base->lcla_pool.alloc_map[p + i] = d40d; + d40d->lcla_alloc++; + ret = i; + break; + } + } + + spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); + + return ret; +} + +static int d40_lcla_free_all(struct d40_chan *d40c, + struct d40_desc *d40d) +{ + unsigned long flags; + int i; + int ret = -EINVAL; + + if (d40c->log_num == D40_PHY_CHAN) + return 0; + + spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); + + for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) { + if (d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num * + D40_LCLA_LINK_PER_EVENT_GRP + i] == d40d) { + d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num * + D40_LCLA_LINK_PER_EVENT_GRP + i] = NULL; + d40d->lcla_alloc--; + if (d40d->lcla_alloc == 0) { + ret = 0; + break; + } + } + } + + spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); + + return ret; - return cookie; } static void d40_desc_remove(struct d40_desc *d40d) @@ -382,28 +419,35 @@ static void d40_desc_remove(struct d40_desc *d40d) static struct d40_desc *d40_desc_get(struct d40_chan *d40c) { - struct d40_desc *d; - struct d40_desc *_d; + struct d40_desc *desc = NULL; if (!list_empty(&d40c->client)) { + struct d40_desc *d; + struct d40_desc *_d; + list_for_each_entry_safe(d, _d, &d40c->client, node) if (async_tx_test_ack(&d->txd)) { d40_pool_lli_free(d); d40_desc_remove(d); + desc = d; + memset(desc, 0, sizeof(*desc)); break; } - } else { - d = kmem_cache_alloc(d40c->base->desc_slab, GFP_NOWAIT); - if (d != NULL) { - memset(d, 0, sizeof(struct d40_desc)); - INIT_LIST_HEAD(&d->node); - } } - return d; + + if (!desc) + desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT); + + if (desc) + INIT_LIST_HEAD(&desc->node); + + return desc; } static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d) { + + d40_lcla_free_all(d40c, d40d); kmem_cache_free(d40c->base->desc_slab, d40d); } @@ -412,6 +456,59 @@ static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc) list_add_tail(&desc->node, &d40c->active); } +static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d) +{ + int curr_lcla = -EINVAL, next_lcla; + + if (d40c->log_num == D40_PHY_CHAN) { + d40_phy_lli_write(d40c->base->virtbase, + d40c->phy_chan->num, + d40d->lli_phy.dst, + d40d->lli_phy.src); + d40d->lli_current = d40d->lli_len; + } else { + + if ((d40d->lli_len - d40d->lli_current) > 1) + curr_lcla = d40_lcla_alloc_one(d40c, d40d); + + d40_log_lli_lcpa_write(d40c->lcpa, + &d40d->lli_log.dst[d40d->lli_current], + &d40d->lli_log.src[d40d->lli_current], + curr_lcla); + + d40d->lli_current++; + for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) { + struct d40_log_lli *lcla; + + if (d40d->lli_current + 1 < d40d->lli_len) + next_lcla = d40_lcla_alloc_one(d40c, d40d); + else + next_lcla = -EINVAL; + + lcla = d40c->base->lcla_pool.base + + d40c->phy_chan->num * 1024 + + 8 * curr_lcla * 2; + + d40_log_lli_lcla_write(lcla, + &d40d->lli_log.dst[d40d->lli_current], + &d40d->lli_log.src[d40d->lli_current], + next_lcla); + + (void) dma_map_single(d40c->base->dev, lcla, + 2 * sizeof(struct d40_log_lli), + DMA_TO_DEVICE); + + curr_lcla = next_lcla; + + if (curr_lcla == -EINVAL) { + d40d->lli_current++; + break; + } + + } + } +} + static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) { struct d40_desc *d; @@ -443,68 +540,26 @@ static struct d40_desc *d40_first_queued(struct d40_chan *d40c) return d; } -/* Support functions for logical channels */ - -static int d40_lcla_id_get(struct d40_chan *d40c) +static struct d40_desc *d40_last_queued(struct d40_chan *d40c) { - int src_id = 0; - int dst_id = 0; - struct d40_log_lli *lcla_lidx_base = - d40c->base->lcla_pool.base + d40c->phy_chan->num * 1024; - int i; - int lli_per_log = d40c->base->plat_data->llis_per_log; - unsigned long flags; - - if (d40c->lcla.src_id >= 0 && d40c->lcla.dst_id >= 0) - return 0; - - if (d40c->base->lcla_pool.num_blocks > 32) - return -EINVAL; - - spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); - - for (i = 0; i < d40c->base->lcla_pool.num_blocks; i++) { - if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] & - (0x1 << i))) { - d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |= - (0x1 << i); - break; - } - } - src_id = i; - if (src_id >= d40c->base->lcla_pool.num_blocks) - goto err; + struct d40_desc *d; - for (; i < d40c->base->lcla_pool.num_blocks; i++) { - if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] & - (0x1 << i))) { - d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |= - (0x1 << i); + if (list_empty(&d40c->queue)) + return NULL; + list_for_each_entry(d, &d40c->queue, node) + if (list_is_last(&d->node, &d40c->queue)) break; - } - } - - dst_id = i; - if (dst_id == src_id) - goto err; - - d40c->lcla.src_id = src_id; - d40c->lcla.dst_id = dst_id; - d40c->lcla.dst = lcla_lidx_base + dst_id * lli_per_log + 1; - d40c->lcla.src = lcla_lidx_base + src_id * lli_per_log + 1; - - spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); - return 0; -err: - spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); - return -EINVAL; + return d; } +/* Support functions for logical channels */ + static int d40_channel_execute_command(struct d40_chan *d40c, enum d40_command command) { - int status, i; + u32 status; + int i; void __iomem *active_reg; int ret = 0; unsigned long flags; @@ -567,35 +622,19 @@ done: static void d40_term_all(struct d40_chan *d40c) { struct d40_desc *d40d; - unsigned long flags; /* Release active descriptors */ while ((d40d = d40_first_active_get(d40c))) { d40_desc_remove(d40d); - - /* Return desc to free-list */ d40_desc_free(d40c, d40d); } /* Release queued descriptors waiting for transfer */ while ((d40d = d40_first_queued(d40c))) { d40_desc_remove(d40d); - - /* Return desc to free-list */ d40_desc_free(d40c, d40d); } - spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); - - d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &= - (~(0x1 << d40c->lcla.dst_id)); - d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &= - (~(0x1 << d40c->lcla.src_id)); - - d40c->lcla.src_id = -1; - d40c->lcla.dst_id = -1; - - spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); d40c->pending_tx = 0; d40c->busy = false; @@ -640,45 +679,22 @@ static void d40_config_set_event(struct d40_chan *d40c, bool do_enable) static u32 d40_chan_has_events(struct d40_chan *d40c) { - u32 val = 0; + u32 val; - /* If SSLNK or SDLNK is zero all events are disabled */ - if ((d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) || - (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) - val = readl(d40c->base->virtbase + D40_DREG_PCBASE + - d40c->phy_chan->num * D40_DREG_PCDELTA + - D40_CHAN_REG_SSLNK); - - if (d40c->dma_cfg.dir != STEDMA40_PERIPH_TO_MEM) - val = readl(d40c->base->virtbase + D40_DREG_PCBASE + - d40c->phy_chan->num * D40_DREG_PCDELTA + - D40_CHAN_REG_SDLNK); - return val; -} + val = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSLNK); -static void d40_config_enable_lidx(struct d40_chan *d40c) -{ - /* Set LIDX for lcla */ - writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) & - D40_SREG_ELEM_LOG_LIDX_MASK, - d40c->base->virtbase + D40_DREG_PCBASE + - d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SDELT); - - writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) & - D40_SREG_ELEM_LOG_LIDX_MASK, - d40c->base->virtbase + D40_DREG_PCBASE + - d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SSELT); + val |= readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK); + return val; } -static int d40_config_write(struct d40_chan *d40c) +static void d40_config_write(struct d40_chan *d40c) { u32 addr_base; u32 var; - int res; - - res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); - if (res) - return res; /* Odd addresses are even addresses + 4 */ addr_base = (d40c->phy_chan->num % 2) * 4; @@ -704,41 +720,181 @@ static int d40_config_write(struct d40_chan *d40c) d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SDCFG); - d40_config_enable_lidx(d40c); + /* Set LIDX for lcla */ + writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) & + D40_SREG_ELEM_LOG_LIDX_MASK, + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDELT); + + writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) & + D40_SREG_ELEM_LOG_LIDX_MASK, + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSELT); + } +} + +static u32 d40_residue(struct d40_chan *d40c) +{ + u32 num_elt; + + if (d40c->log_num != D40_PHY_CHAN) + num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK) + >> D40_MEM_LCSP2_ECNT_POS; + else + num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDELT) & + D40_SREG_ELEM_PHY_ECNT_MASK) >> + D40_SREG_ELEM_PHY_ECNT_POS; + return num_elt * (1 << d40c->dma_cfg.dst_info.data_width); +} + +static bool d40_tx_is_linked(struct d40_chan *d40c) +{ + bool is_link; + + if (d40c->log_num != D40_PHY_CHAN) + is_link = readl(&d40c->lcpa->lcsp3) & D40_MEM_LCSP3_DLOS_MASK; + else + is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK) & + D40_SREG_LNK_PHYS_LNK_MASK; + return is_link; +} + +static int d40_pause(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int res = 0; + unsigned long flags; + + if (!d40c->busy) + return 0; + + spin_lock_irqsave(&d40c->lock, flags); + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (res == 0) { + if (d40c->log_num != D40_PHY_CHAN) { + d40_config_set_event(d40c, false); + /* Resume the other logical channels if any */ + if (d40_chan_has_events(d40c)) + res = d40_channel_execute_command(d40c, + D40_DMA_RUN); + } + } + + spin_unlock_irqrestore(&d40c->lock, flags); return res; } -static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d) +static int d40_resume(struct dma_chan *chan) { - if (d40d->lli_phy.dst && d40d->lli_phy.src) { - d40_phy_lli_write(d40c->base->virtbase, - d40c->phy_chan->num, - d40d->lli_phy.dst, - d40d->lli_phy.src); - } else if (d40d->lli_log.dst && d40d->lli_log.src) { - struct d40_log_lli *src = d40d->lli_log.src; - struct d40_log_lli *dst = d40d->lli_log.dst; - int s; - - src += d40d->lli_count; - dst += d40d->lli_count; - s = d40_log_lli_write(d40c->lcpa, - d40c->lcla.src, d40c->lcla.dst, - dst, src, - d40c->base->plat_data->llis_per_log); - - /* If s equals to zero, the job is not linked */ - if (s > 0) { - (void) dma_map_single(d40c->base->dev, d40c->lcla.src, - s * sizeof(struct d40_log_lli), - DMA_TO_DEVICE); - (void) dma_map_single(d40c->base->dev, d40c->lcla.dst, - s * sizeof(struct d40_log_lli), - DMA_TO_DEVICE); + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int res = 0; + unsigned long flags; + + if (!d40c->busy) + return 0; + + spin_lock_irqsave(&d40c->lock, flags); + + if (d40c->base->rev == 0) + if (d40c->log_num != D40_PHY_CHAN) { + res = d40_channel_execute_command(d40c, + D40_DMA_SUSPEND_REQ); + goto no_suspend; } + + /* If bytes left to transfer or linked tx resume job */ + if (d40_residue(d40c) || d40_tx_is_linked(d40c)) { + + if (d40c->log_num != D40_PHY_CHAN) + d40_config_set_event(d40c, true); + + res = d40_channel_execute_command(d40c, D40_DMA_RUN); + } + +no_suspend: + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static void d40_tx_submit_log(struct d40_chan *d40c, struct d40_desc *d40d) +{ + /* TODO: Write */ +} + +static void d40_tx_submit_phy(struct d40_chan *d40c, struct d40_desc *d40d) +{ + struct d40_desc *d40d_prev = NULL; + int i; + u32 val; + + if (!list_empty(&d40c->queue)) + d40d_prev = d40_last_queued(d40c); + else if (!list_empty(&d40c->active)) + d40d_prev = d40_first_active_get(d40c); + + if (!d40d_prev) + return; + + /* Here we try to join this job with previous jobs */ + val = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSLNK); + + /* Figure out which link we're currently transmitting */ + for (i = 0; i < d40d_prev->lli_len; i++) + if (val == d40d_prev->lli_phy.src[i].reg_lnk) + break; + + val = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSELT) >> D40_SREG_ELEM_LOG_ECNT_POS; + + if (i == (d40d_prev->lli_len - 1) && val > 0) { + /* Change the current one */ + writel(virt_to_phys(d40d->lli_phy.src), + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSLNK); + writel(virt_to_phys(d40d->lli_phy.dst), + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK); + + d40d->is_hw_linked = true; + + } else if (i < d40d_prev->lli_len) { + (void) dma_unmap_single(d40c->base->dev, + virt_to_phys(d40d_prev->lli_phy.src), + d40d_prev->lli_pool.size, + DMA_TO_DEVICE); + + /* Keep the settings */ + val = d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk & + ~D40_SREG_LNK_PHYS_LNK_MASK; + d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk = + val | virt_to_phys(d40d->lli_phy.src); + + val = d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk & + ~D40_SREG_LNK_PHYS_LNK_MASK; + d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk = + val | virt_to_phys(d40d->lli_phy.dst); + + (void) dma_map_single(d40c->base->dev, + d40d_prev->lli_phy.src, + d40d_prev->lli_pool.size, + DMA_TO_DEVICE); + d40d->is_hw_linked = true; } - d40d->lli_count += d40d->lli_tx_len; } static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx) @@ -749,14 +905,28 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx) struct d40_desc *d40d = container_of(tx, struct d40_desc, txd); unsigned long flags; + (void) d40_pause(&d40c->chan); + spin_lock_irqsave(&d40c->lock, flags); - tx->cookie = d40_assign_cookie(d40c, d40d); + d40c->chan.cookie++; + + if (d40c->chan.cookie < 0) + d40c->chan.cookie = 1; + + d40d->txd.cookie = d40c->chan.cookie; + + if (d40c->log_num == D40_PHY_CHAN) + d40_tx_submit_phy(d40c, d40d); + else + d40_tx_submit_log(d40c, d40d); d40_desc_queue(d40c, d40d); spin_unlock_irqrestore(&d40c->lock, flags); + (void) d40_resume(&d40c->chan); + return tx->cookie; } @@ -796,14 +966,21 @@ static struct d40_desc *d40_queue_start(struct d40_chan *d40c) /* Add to active queue */ d40_desc_submit(d40c, d40d); - /* Initiate DMA job */ - d40_desc_load(d40c, d40d); + /* + * If this job is already linked in hw, + * do not submit it. + */ - /* Start dma job */ - err = d40_start(d40c); + if (!d40d->is_hw_linked) { + /* Initiate DMA job */ + d40_desc_load(d40c, d40d); - if (err) - return NULL; + /* Start dma job */ + err = d40_start(d40c); + + if (err) + return NULL; + } } return d40d; @@ -814,17 +991,15 @@ static void dma_tc_handle(struct d40_chan *d40c) { struct d40_desc *d40d; - if (!d40c->phy_chan) - return; - /* Get first active entry from list */ d40d = d40_first_active_get(d40c); if (d40d == NULL) return; - if (d40d->lli_count < d40d->lli_len) { + d40_lcla_free_all(d40c, d40d); + if (d40d->lli_current < d40d->lli_len) { d40_desc_load(d40c, d40d); /* Start dma job */ (void) d40_start(d40c); @@ -842,7 +1017,7 @@ static void dma_tc_handle(struct d40_chan *d40c) static void dma_tasklet(unsigned long data) { struct d40_chan *d40c = (struct d40_chan *) data; - struct d40_desc *d40d_fin; + struct d40_desc *d40d; unsigned long flags; dma_async_tx_callback callback; void *callback_param; @@ -850,12 +1025,12 @@ static void dma_tasklet(unsigned long data) spin_lock_irqsave(&d40c->lock, flags); /* Get first active entry from list */ - d40d_fin = d40_first_active_get(d40c); + d40d = d40_first_active_get(d40c); - if (d40d_fin == NULL) + if (d40d == NULL) goto err; - d40c->completed = d40d_fin->txd.cookie; + d40c->completed = d40d->txd.cookie; /* * If terminating a channel pending_tx is set to zero. @@ -867,19 +1042,19 @@ static void dma_tasklet(unsigned long data) } /* Callback to client */ - callback = d40d_fin->txd.callback; - callback_param = d40d_fin->txd.callback_param; - - if (async_tx_test_ack(&d40d_fin->txd)) { - d40_pool_lli_free(d40d_fin); - d40_desc_remove(d40d_fin); - /* Return desc to free-list */ - d40_desc_free(d40c, d40d_fin); + callback = d40d->txd.callback; + callback_param = d40d->txd.callback_param; + + if (async_tx_test_ack(&d40d->txd)) { + d40_pool_lli_free(d40d); + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); } else { - if (!d40d_fin->is_in_client_list) { - d40_desc_remove(d40d_fin); - list_add_tail(&d40d_fin->node, &d40c->client); - d40d_fin->is_in_client_list = true; + if (!d40d->is_in_client_list) { + d40_desc_remove(d40d); + d40_lcla_free_all(d40c, d40d); + list_add_tail(&d40d->node, &d40c->client); + d40d->is_in_client_list = true; } } @@ -890,7 +1065,7 @@ static void dma_tasklet(unsigned long data) spin_unlock_irqrestore(&d40c->lock, flags); - if (callback) + if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT)) callback(callback_param); return; @@ -919,7 +1094,6 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data) int i; u32 regs[ARRAY_SIZE(il)]; - u32 tmp; u32 idx; u32 row; long chan = -1; @@ -946,9 +1120,7 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data) idx = chan & (BITS_PER_LONG - 1); /* ACK interrupt */ - tmp = readl(base->virtbase + il[row].clr); - tmp |= 1 << idx; - writel(tmp, base->virtbase + il[row].clr); + writel(1 << idx, base->virtbase + il[row].clr); if (il[row].offset == D40_PHY_CHAN) d40c = base->lookup_phy_chans[idx]; @@ -971,7 +1143,6 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data) return IRQ_HANDLED; } - static int d40_validate_conf(struct d40_chan *d40c, struct stedma40_chan_cfg *conf) { @@ -981,14 +1152,39 @@ static int d40_validate_conf(struct d40_chan *d40c, bool is_log = (conf->channel_type & STEDMA40_CHANNEL_IN_OPER_MODE) == STEDMA40_CHANNEL_IN_LOG_MODE; - if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH && + if (!conf->dir) { + dev_err(&d40c->chan.dev->device, "[%s] Invalid direction.\n", + __func__); + res = -EINVAL; + } + + if (conf->dst_dev_type != STEDMA40_DEV_DST_MEMORY && + d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 && + d40c->runtime_addr == 0) { + + dev_err(&d40c->chan.dev->device, + "[%s] Invalid TX channel address (%d)\n", + __func__, conf->dst_dev_type); + res = -EINVAL; + } + + if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY && + d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 && + d40c->runtime_addr == 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Invalid RX channel address (%d)\n", + __func__, conf->src_dev_type); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_MEM_TO_PERIPH && dst_event_group == STEDMA40_DEV_DST_MEMORY) { dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n", __func__); res = -EINVAL; } - if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM && + if (conf->dir == STEDMA40_PERIPH_TO_MEM && src_event_group == STEDMA40_DEV_SRC_MEMORY) { dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n", __func__); @@ -1082,7 +1278,6 @@ static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src, spin_lock_irqsave(&phy->lock, flags); if (!log_event_line) { - /* Physical interrupts are masked per physical full channel */ phy->allocated_dst = D40_ALLOC_FREE; phy->allocated_src = D40_ALLOC_FREE; is_free = true; @@ -1251,7 +1446,6 @@ static int d40_free_dma(struct d40_chan *d40c) list_for_each_entry_safe(d, _d, &d40c->client, node) { d40_pool_lli_free(d); d40_desc_remove(d); - /* Return desc to free-list */ d40_desc_free(d40c, d); } @@ -1331,30 +1525,6 @@ static int d40_free_dma(struct d40_chan *d40c) return 0; } -static int d40_pause(struct dma_chan *chan) -{ - struct d40_chan *d40c = - container_of(chan, struct d40_chan, chan); - int res; - unsigned long flags; - - spin_lock_irqsave(&d40c->lock, flags); - - res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); - if (res == 0) { - if (d40c->log_num != D40_PHY_CHAN) { - d40_config_set_event(d40c, false); - /* Resume the other logical channels if any */ - if (d40_chan_has_events(d40c)) - res = d40_channel_execute_command(d40c, - D40_DMA_RUN); - } - } - - spin_unlock_irqrestore(&d40c->lock, flags); - return res; -} - static bool d40_is_paused(struct d40_chan *d40c) { bool is_paused = false; @@ -1381,16 +1551,22 @@ static bool d40_is_paused(struct d40_chan *d40c) } if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || - d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); - else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) + status = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK); + } else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) { event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); - else { + status = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSLNK); + } else { dev_err(&d40c->chan.dev->device, "[%s] Unknown direction\n", __func__); goto _exit; } - status = d40_chan_has_events(d40c); + status = (status & D40_EVENTLINE_MASK(event)) >> D40_EVENTLINE_POS(event); @@ -1403,64 +1579,6 @@ _exit: } -static bool d40_tx_is_linked(struct d40_chan *d40c) -{ - bool is_link; - - if (d40c->log_num != D40_PHY_CHAN) - is_link = readl(&d40c->lcpa->lcsp3) & D40_MEM_LCSP3_DLOS_MASK; - else - is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE + - d40c->phy_chan->num * D40_DREG_PCDELTA + - D40_CHAN_REG_SDLNK) & - D40_SREG_LNK_PHYS_LNK_MASK; - return is_link; -} - -static u32 d40_residue(struct d40_chan *d40c) -{ - u32 num_elt; - - if (d40c->log_num != D40_PHY_CHAN) - num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK) - >> D40_MEM_LCSP2_ECNT_POS; - else - num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE + - d40c->phy_chan->num * D40_DREG_PCDELTA + - D40_CHAN_REG_SDELT) & - D40_SREG_ELEM_PHY_ECNT_MASK) >> - D40_SREG_ELEM_PHY_ECNT_POS; - return num_elt * (1 << d40c->dma_cfg.dst_info.data_width); -} - -static int d40_resume(struct dma_chan *chan) -{ - struct d40_chan *d40c = - container_of(chan, struct d40_chan, chan); - int res = 0; - unsigned long flags; - - spin_lock_irqsave(&d40c->lock, flags); - - if (d40c->base->rev == 0) - if (d40c->log_num != D40_PHY_CHAN) { - res = d40_channel_execute_command(d40c, - D40_DMA_SUSPEND_REQ); - goto no_suspend; - } - - /* If bytes left to transfer or linked tx resume job */ - if (d40_residue(d40c) || d40_tx_is_linked(d40c)) { - if (d40c->log_num != D40_PHY_CHAN) - d40_config_set_event(d40c, true); - res = d40_channel_execute_command(d40c, D40_DMA_RUN); - } - -no_suspend: - spin_unlock_irqrestore(&d40c->lock, flags); - return res; -} - static u32 stedma40_residue(struct dma_chan *chan) { struct d40_chan *d40c = @@ -1475,51 +1593,6 @@ static u32 stedma40_residue(struct dma_chan *chan) return bytes_left; } -/* Public DMA functions in addition to the DMA engine framework */ - -int stedma40_set_psize(struct dma_chan *chan, - int src_psize, - int dst_psize) -{ - struct d40_chan *d40c = - container_of(chan, struct d40_chan, chan); - unsigned long flags; - - spin_lock_irqsave(&d40c->lock, flags); - - if (d40c->log_num != D40_PHY_CHAN) { - d40c->log_def.lcsp1 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK; - d40c->log_def.lcsp3 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK; - d40c->log_def.lcsp1 |= src_psize << - D40_MEM_LCSP1_SCFG_PSIZE_POS; - d40c->log_def.lcsp3 |= dst_psize << - D40_MEM_LCSP1_SCFG_PSIZE_POS; - goto out; - } - - if (src_psize == STEDMA40_PSIZE_PHY_1) - d40c->src_def_cfg &= ~(1 << D40_SREG_CFG_PHY_PEN_POS); - else { - d40c->src_def_cfg |= 1 << D40_SREG_CFG_PHY_PEN_POS; - d40c->src_def_cfg &= ~(STEDMA40_PSIZE_PHY_16 << - D40_SREG_CFG_PSIZE_POS); - d40c->src_def_cfg |= src_psize << D40_SREG_CFG_PSIZE_POS; - } - - if (dst_psize == STEDMA40_PSIZE_PHY_1) - d40c->dst_def_cfg &= ~(1 << D40_SREG_CFG_PHY_PEN_POS); - else { - d40c->dst_def_cfg |= 1 << D40_SREG_CFG_PHY_PEN_POS; - d40c->dst_def_cfg &= ~(STEDMA40_PSIZE_PHY_16 << - D40_SREG_CFG_PSIZE_POS); - d40c->dst_def_cfg |= dst_psize << D40_SREG_CFG_PSIZE_POS; - } -out: - spin_unlock_irqrestore(&d40c->lock, flags); - return 0; -} -EXPORT_SYMBOL(stedma40_set_psize); - struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, struct scatterlist *sgl_dst, struct scatterlist *sgl_src, @@ -1545,21 +1618,10 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, goto err; d40d->lli_len = sgl_len; - d40d->lli_tx_len = d40d->lli_len; + d40d->lli_current = 0; d40d->txd.flags = dma_flags; if (d40c->log_num != D40_PHY_CHAN) { - if (d40d->lli_len > d40c->base->plat_data->llis_per_log) - d40d->lli_tx_len = d40c->base->plat_data->llis_per_log; - - if (sgl_len > 1) - /* - * Check if there is space available in lcla. If not, - * split list into 1-length and run only in lcpa - * space. - */ - if (d40_lcla_id_get(d40c) != 0) - d40d->lli_tx_len = 1; if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) { dev_err(&d40c->chan.dev->device, @@ -1567,27 +1629,17 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, goto err; } - (void) d40_log_sg_to_lli(d40c->lcla.src_id, - sgl_src, + (void) d40_log_sg_to_lli(sgl_src, sgl_len, d40d->lli_log.src, d40c->log_def.lcsp1, - d40c->dma_cfg.src_info.data_width, - dma_flags & DMA_PREP_INTERRUPT, - d40d->lli_tx_len, - d40c->base->plat_data->llis_per_log); + d40c->dma_cfg.src_info.data_width); - (void) d40_log_sg_to_lli(d40c->lcla.dst_id, - sgl_dst, + (void) d40_log_sg_to_lli(sgl_dst, sgl_len, d40d->lli_log.dst, d40c->log_def.lcsp3, - d40c->dma_cfg.dst_info.data_width, - dma_flags & DMA_PREP_INTERRUPT, - d40d->lli_tx_len, - d40c->base->plat_data->llis_per_log); - - + d40c->dma_cfg.dst_info.data_width); } else { if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) { dev_err(&d40c->chan.dev->device, @@ -1599,11 +1651,10 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, sgl_len, 0, d40d->lli_phy.src, - d40d->lli_phy.src_addr, + virt_to_phys(d40d->lli_phy.src), d40c->src_def_cfg, d40c->dma_cfg.src_info.data_width, - d40c->dma_cfg.src_info.psize, - true); + d40c->dma_cfg.src_info.psize); if (res < 0) goto err; @@ -1612,11 +1663,10 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, sgl_len, 0, d40d->lli_phy.dst, - d40d->lli_phy.dst_addr, + virt_to_phys(d40d->lli_phy.dst), d40c->dst_def_cfg, d40c->dma_cfg.dst_info.data_width, - d40c->dma_cfg.dst_info.psize, - true); + d40c->dma_cfg.dst_info.psize); if (res < 0) goto err; @@ -1633,6 +1683,8 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, return &d40d->txd; err: + if (d40d) + d40_desc_free(d40c, d40d); spin_unlock_irqrestore(&d40c->lock, flags); return NULL; } @@ -1673,6 +1725,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan) * use default configuration (memcpy) */ if (d40c->dma_cfg.channel_type == 0) { + err = d40_config_memcpy(d40c); if (err) { dev_err(&d40c->chan.dev->device, @@ -1712,14 +1765,8 @@ static int d40_alloc_chan_resources(struct dma_chan *chan) * resource is free. In case of multiple logical channels * on the same physical resource, only the first write is necessary. */ - if (is_free_phy) { - err = d40_config_write(d40c); - if (err) { - dev_err(&d40c->chan.dev->device, - "[%s] Failed to configure channel\n", - __func__); - } - } + if (is_free_phy) + d40_config_write(d40c); fail: spin_unlock_irqrestore(&d40c->lock, flags); return err; @@ -1790,23 +1837,21 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, goto err; } d40d->lli_len = 1; - d40d->lli_tx_len = 1; + d40d->lli_current = 0; d40_log_fill_lli(d40d->lli_log.src, src, size, - 0, d40c->log_def.lcsp1, d40c->dma_cfg.src_info.data_width, - false, true); + true); d40_log_fill_lli(d40d->lli_log.dst, dst, size, - 0, d40c->log_def.lcsp3, d40c->dma_cfg.dst_info.data_width, - true, true); + true); } else { @@ -1851,12 +1896,25 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, err_fill_lli: dev_err(&d40c->chan.dev->device, "[%s] Failed filling in PHY LLI\n", __func__); - d40_pool_lli_free(d40d); err: + if (d40d) + d40_desc_free(d40c, d40d); spin_unlock_irqrestore(&d40c->lock, flags); return NULL; } +static struct dma_async_tx_descriptor * +d40_prep_sg(struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long dma_flags) +{ + if (dst_nents != src_nents) + return NULL; + + return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags); +} + static int d40_prep_slave_sg_log(struct d40_desc *d40d, struct d40_chan *d40c, struct scatterlist *sgl, @@ -1874,19 +1932,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d, } d40d->lli_len = sg_len; - if (d40d->lli_len <= d40c->base->plat_data->llis_per_log) - d40d->lli_tx_len = d40d->lli_len; - else - d40d->lli_tx_len = d40c->base->plat_data->llis_per_log; - - if (sg_len > 1) - /* - * Check if there is space available in lcla. - * If not, split list into 1-length and run only - * in lcpa space. - */ - if (d40_lcla_id_get(d40c) != 0) - d40d->lli_tx_len = 1; + d40d->lli_current = 0; if (direction == DMA_FROM_DEVICE) if (d40c->runtime_addr) @@ -1902,16 +1948,13 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d, else return -EINVAL; - total_size = d40_log_sg_to_dev(&d40c->lcla, - sgl, sg_len, + total_size = d40_log_sg_to_dev(sgl, sg_len, &d40d->lli_log, &d40c->log_def, d40c->dma_cfg.src_info.data_width, d40c->dma_cfg.dst_info.data_width, direction, - dma_flags & DMA_PREP_INTERRUPT, - dev_addr, d40d->lli_tx_len, - d40c->base->plat_data->llis_per_log); + dev_addr); if (total_size < 0) return -EINVAL; @@ -1937,7 +1980,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d, } d40d->lli_len = sgl_len; - d40d->lli_tx_len = sgl_len; + d40d->lli_current = 0; if (direction == DMA_FROM_DEVICE) { dst_dev_addr = 0; @@ -1958,11 +2001,10 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d, sgl_len, src_dev_addr, d40d->lli_phy.src, - d40d->lli_phy.src_addr, + virt_to_phys(d40d->lli_phy.src), d40c->src_def_cfg, d40c->dma_cfg.src_info.data_width, - d40c->dma_cfg.src_info.psize, - true); + d40c->dma_cfg.src_info.psize); if (res < 0) return res; @@ -1970,11 +2012,10 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d, sgl_len, dst_dev_addr, d40d->lli_phy.dst, - d40d->lli_phy.dst_addr, + virt_to_phys(d40d->lli_phy.dst), d40c->dst_def_cfg, d40c->dma_cfg.dst_info.data_width, - d40c->dma_cfg.dst_info.psize, - true); + d40c->dma_cfg.dst_info.psize); if (res < 0) return res; @@ -2001,17 +2042,11 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan, return ERR_PTR(-EINVAL); } - if (d40c->dma_cfg.pre_transfer) - d40c->dma_cfg.pre_transfer(chan, - d40c->dma_cfg.pre_transfer_data, - sg_dma_len(sgl)); - spin_lock_irqsave(&d40c->lock, flags); d40d = d40_desc_get(d40c); - spin_unlock_irqrestore(&d40c->lock, flags); if (d40d == NULL) - return NULL; + goto err; if (d40c->log_num != D40_PHY_CHAN) err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len, @@ -2024,7 +2059,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan, "[%s] Failed to prepare %s slave sg job: %d\n", __func__, d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err); - return NULL; + goto err; } d40d->txd.flags = dma_flags; @@ -2033,7 +2068,14 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan, d40d->txd.tx_submit = d40_tx_submit; + spin_unlock_irqrestore(&d40c->lock, flags); return &d40d->txd; + +err: + if (d40d) + d40_desc_free(d40c, d40d); + spin_unlock_irqrestore(&d40c->lock, flags); + return NULL; } static enum dma_status d40_tx_status(struct dma_chan *chan, @@ -2166,14 +2208,25 @@ static void d40_set_runtime_config(struct dma_chan *chan, return; } - if (config_maxburst >= 16) - psize = STEDMA40_PSIZE_LOG_16; - else if (config_maxburst >= 8) - psize = STEDMA40_PSIZE_LOG_8; - else if (config_maxburst >= 4) - psize = STEDMA40_PSIZE_LOG_4; - else - psize = STEDMA40_PSIZE_LOG_1; + if (d40c->log_num != D40_PHY_CHAN) { + if (config_maxburst >= 16) + psize = STEDMA40_PSIZE_LOG_16; + else if (config_maxburst >= 8) + psize = STEDMA40_PSIZE_LOG_8; + else if (config_maxburst >= 4) + psize = STEDMA40_PSIZE_LOG_4; + else + psize = STEDMA40_PSIZE_LOG_1; + } else { + if (config_maxburst >= 16) + psize = STEDMA40_PSIZE_PHY_16; + else if (config_maxburst >= 8) + psize = STEDMA40_PSIZE_PHY_8; + else if (config_maxburst >= 4) + psize = STEDMA40_PSIZE_PHY_4; + else + psize = STEDMA40_PSIZE_PHY_1; + } /* Set up all the endpoint configs */ cfg->src_info.data_width = addr_width; @@ -2185,6 +2238,13 @@ static void d40_set_runtime_config(struct dma_chan *chan, cfg->dst_info.endianess = STEDMA40_LITTLE_ENDIAN; cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; + /* Fill in register values */ + if (d40c->log_num != D40_PHY_CHAN) + d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3); + else + d40_phy_cfg(cfg, &d40c->src_def_cfg, + &d40c->dst_def_cfg, false); + /* These settings will take precedence later */ d40c->runtime_addr = config_addr; d40c->runtime_direction = config->direction; @@ -2247,10 +2307,6 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, d40c->base = base; d40c->chan.device = dma; - /* Invalidate lcla element */ - d40c->lcla.src_id = -1; - d40c->lcla.dst_id = -1; - spin_lock_init(&d40c->lock); d40c->log_num = D40_PHY_CHAN; @@ -2281,6 +2337,7 @@ static int __init d40_dmaengine_init(struct d40_base *base, base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources; base->dma_slave.device_free_chan_resources = d40_free_chan_resources; base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy; + base->dma_slave.device_prep_dma_sg = d40_prep_sg; base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg; base->dma_slave.device_tx_status = d40_tx_status; base->dma_slave.device_issue_pending = d40_issue_pending; @@ -2301,10 +2358,12 @@ static int __init d40_dmaengine_init(struct d40_base *base, dma_cap_zero(base->dma_memcpy.cap_mask); dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); + dma_cap_set(DMA_SG, base->dma_slave.cap_mask); base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources; base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources; base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy; + base->dma_slave.device_prep_dma_sg = d40_prep_sg; base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg; base->dma_memcpy.device_tx_status = d40_tx_status; base->dma_memcpy.device_issue_pending = d40_issue_pending; @@ -2331,10 +2390,12 @@ static int __init d40_dmaengine_init(struct d40_base *base, dma_cap_zero(base->dma_both.cap_mask); dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask); dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask); + dma_cap_set(DMA_SG, base->dma_slave.cap_mask); base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources; base->dma_both.device_free_chan_resources = d40_free_chan_resources; base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy; + base->dma_slave.device_prep_dma_sg = d40_prep_sg; base->dma_both.device_prep_slave_sg = d40_prep_slave_sg; base->dma_both.device_tx_status = d40_tx_status; base->dma_both.device_issue_pending = d40_issue_pending; @@ -2387,9 +2448,11 @@ static int __init d40_phy_res_init(struct d40_base *base) /* Mark disabled channels as occupied */ for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) { - base->phy_res[i].allocated_src = D40_ALLOC_PHY; - base->phy_res[i].allocated_dst = D40_ALLOC_PHY; - num_phy_chans_avail--; + int chan = base->plat_data->disabled_channels[i]; + + base->phy_res[chan].allocated_src = D40_ALLOC_PHY; + base->phy_res[chan].allocated_dst = D40_ALLOC_PHY; + num_phy_chans_avail--; } dev_info(base->dev, "%d of %d physical DMA channels available\n", @@ -2441,6 +2504,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) int num_phy_chans; int i; u32 val; + u32 rev; clk = clk_get(&pdev->dev, NULL); @@ -2479,21 +2543,26 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) } } - /* Get silicon revision */ + /* Get silicon revision and designer */ val = readl(virtbase + D40_DREG_PERIPHID2); - if ((val & 0xf) != D40_PERIPHID2_DESIGNER) { + if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) != + D40_HW_DESIGNER) { dev_err(&pdev->dev, "[%s] Unknown designer! Got %x wanted %x\n", - __func__, val & 0xf, D40_PERIPHID2_DESIGNER); + __func__, val & D40_DREG_PERIPHID2_DESIGNER_MASK, + D40_HW_DESIGNER); goto failure; } + rev = (val & D40_DREG_PERIPHID2_REV_MASK) >> + D40_DREG_PERIPHID2_REV_POS; + /* The number of physical channels on this HW */ num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n", - (val >> 4) & 0xf, res->start); + rev, res->start); plat_data = pdev->dev.platform_data; @@ -2515,7 +2584,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) goto failure; } - base->rev = (val >> 4) & 0xf; + base->rev = rev; base->clk = clk; base->num_phy_chans = num_phy_chans; base->num_log_chans = num_log_chans; @@ -2549,7 +2618,10 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) if (!base->lookup_log_chans) goto failure; } - base->lcla_pool.alloc_map = kzalloc(num_phy_chans * sizeof(u32), + + base->lcla_pool.alloc_map = kzalloc(num_phy_chans * + sizeof(struct d40_desc *) * + D40_LCLA_LINK_PER_EVENT_GRP, GFP_KERNEL); if (!base->lcla_pool.alloc_map) goto failure; @@ -2563,7 +2635,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) return base; failure: - if (clk) { + if (!IS_ERR(clk)) { clk_disable(clk); clk_put(clk); } @@ -2700,8 +2772,10 @@ static int __init d40_lcla_allocate(struct d40_base *base) if (i < MAX_LCLA_ALLOC_ATTEMPTS) { base->lcla_pool.base = (void *)page_list[i]; } else { - /* After many attempts, no succees with finding the correct - * alignment try with allocating a big buffer */ + /* + * After many attempts and no succees with finding the correct + * alignment, try with allocating a big buffer. + */ dev_warn(base->dev, "[%s] Failed to get %d pages @ 18 bit align.\n", __func__, base->lcla_pool.pages); @@ -2794,8 +2868,6 @@ static int __init d40_probe(struct platform_device *pdev) spin_lock_init(&base->lcla_pool.lock); - base->lcla_pool.num_blocks = base->num_phy_chans; - base->irq = platform_get_irq(pdev, 0); ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base); @@ -2823,8 +2895,9 @@ failure: if (!base->lcla_pool.base_unaligned && base->lcla_pool.base) free_pages((unsigned long)base->lcla_pool.base, base->lcla_pool.pages); - if (base->lcla_pool.base_unaligned) - kfree(base->lcla_pool.base_unaligned); + + kfree(base->lcla_pool.base_unaligned); + if (base->phy_lcpa) release_mem_region(base->phy_lcpa, base->lcpa_size); diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c index d937f76d6e2e..86a306dbe1b4 100644 --- a/drivers/dma/ste_dma40_ll.c +++ b/drivers/dma/ste_dma40_ll.c @@ -1,10 +1,8 @@ /* - * driver/dma/ste_dma40_ll.c - * - * Copyright (C) ST-Ericsson 2007-2010 + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson * License terms: GNU General Public License (GPL) version 2 - * Author: Per Friden <per.friden@stericsson.com> - * Author: Jonas Aaberg <jonas.aberg@stericsson.com> */ #include <linux/kernel.h> @@ -39,16 +37,13 @@ void d40_log_cfg(struct stedma40_chan_cfg *cfg, cfg->dir == STEDMA40_PERIPH_TO_PERIPH) l3 |= 1 << D40_MEM_LCSP3_DCFG_MST_POS; - l3 |= 1 << D40_MEM_LCSP3_DCFG_TIM_POS; l3 |= 1 << D40_MEM_LCSP3_DCFG_EIM_POS; l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS; l3 |= cfg->dst_info.data_width << D40_MEM_LCSP3_DCFG_ESIZE_POS; - l3 |= 1 << D40_MEM_LCSP3_DTCP_POS; l1 |= 1 << D40_MEM_LCSP1_SCFG_EIM_POS; l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS; l1 |= cfg->src_info.data_width << D40_MEM_LCSP1_SCFG_ESIZE_POS; - l1 |= 1 << D40_MEM_LCSP1_STCP_POS; *lcsp1 = l1; *lcsp3 = l3; @@ -197,8 +192,7 @@ int d40_phy_sg_to_lli(struct scatterlist *sg, dma_addr_t lli_phys, u32 reg_cfg, u32 data_width, - int psize, - bool term_int) + int psize) { int total_size = 0; int i; @@ -238,7 +232,7 @@ int d40_phy_sg_to_lli(struct scatterlist *sg, } return total_size; - err: +err: return err; } @@ -271,11 +265,59 @@ void d40_phy_lli_write(void __iomem *virtbase, /* DMA logical lli operations */ +static void d40_log_lli_link(struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next) +{ + u32 slos = 0; + u32 dlos = 0; + + if (next != -EINVAL) { + slos = next * 2; + dlos = next * 2 + 1; + } else { + lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK; + lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK; + } + + lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) | + (slos << D40_MEM_LCSP1_SLOS_POS); + + lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) | + (dlos << D40_MEM_LCSP1_SLOS_POS); +} + +void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next) +{ + d40_log_lli_link(lli_dst, lli_src, next); + + writel(lli_src->lcsp02, &lcpa[0].lcsp0); + writel(lli_src->lcsp13, &lcpa[0].lcsp1); + writel(lli_dst->lcsp02, &lcpa[0].lcsp2); + writel(lli_dst->lcsp13, &lcpa[0].lcsp3); +} + +void d40_log_lli_lcla_write(struct d40_log_lli *lcla, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next) +{ + d40_log_lli_link(lli_dst, lli_src, next); + + writel(lli_src->lcsp02, &lcla[0].lcsp02); + writel(lli_src->lcsp13, &lcla[0].lcsp13); + writel(lli_dst->lcsp02, &lcla[1].lcsp02); + writel(lli_dst->lcsp13, &lcla[1].lcsp13); +} + void d40_log_fill_lli(struct d40_log_lli *lli, dma_addr_t data, u32 data_size, - u32 lli_next_off, u32 reg_cfg, + u32 reg_cfg, u32 data_width, - bool term_int, bool addr_inc) + bool addr_inc) { lli->lcsp13 = reg_cfg; @@ -290,165 +332,69 @@ void d40_log_fill_lli(struct d40_log_lli *lli, if (addr_inc) lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK; - lli->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK; - /* If this scatter list entry is the last one, no next link */ - lli->lcsp13 |= (lli_next_off << D40_MEM_LCSP1_SLOS_POS) & - D40_MEM_LCSP1_SLOS_MASK; - - if (term_int) - lli->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK; - else - lli->lcsp13 &= ~D40_MEM_LCSP1_SCFG_TIM_MASK; } -int d40_log_sg_to_dev(struct d40_lcla_elem *lcla, - struct scatterlist *sg, +int d40_log_sg_to_dev(struct scatterlist *sg, int sg_len, struct d40_log_lli_bidir *lli, struct d40_def_lcsp *lcsp, u32 src_data_width, u32 dst_data_width, enum dma_data_direction direction, - bool term_int, dma_addr_t dev_addr, int max_len, - int llis_per_log) + dma_addr_t dev_addr) { int total_size = 0; struct scatterlist *current_sg = sg; int i; - u32 next_lli_off_dst = 0; - u32 next_lli_off_src = 0; for_each_sg(sg, current_sg, sg_len, i) { total_size += sg_dma_len(current_sg); - /* - * If this scatter list entry is the last one or - * max length, terminate link. - */ - if (sg_len - 1 == i || ((i+1) % max_len == 0)) { - next_lli_off_src = 0; - next_lli_off_dst = 0; - } else { - if (next_lli_off_dst == 0 && - next_lli_off_src == 0) { - /* The first lli will be at next_lli_off */ - next_lli_off_dst = (lcla->dst_id * - llis_per_log + 1); - next_lli_off_src = (lcla->src_id * - llis_per_log + 1); - } else { - next_lli_off_dst++; - next_lli_off_src++; - } - } - if (direction == DMA_TO_DEVICE) { d40_log_fill_lli(&lli->src[i], sg_phys(current_sg), sg_dma_len(current_sg), - next_lli_off_src, lcsp->lcsp1, src_data_width, - false, true); d40_log_fill_lli(&lli->dst[i], dev_addr, sg_dma_len(current_sg), - next_lli_off_dst, lcsp->lcsp3, dst_data_width, - /* No next == terminal interrupt */ - term_int && !next_lli_off_dst, false); } else { d40_log_fill_lli(&lli->dst[i], sg_phys(current_sg), sg_dma_len(current_sg), - next_lli_off_dst, lcsp->lcsp3, dst_data_width, - /* No next == terminal interrupt */ - term_int && !next_lli_off_dst, true); d40_log_fill_lli(&lli->src[i], dev_addr, sg_dma_len(current_sg), - next_lli_off_src, lcsp->lcsp1, src_data_width, - false, false); } } return total_size; } -int d40_log_sg_to_lli(int lcla_id, - struct scatterlist *sg, +int d40_log_sg_to_lli(struct scatterlist *sg, int sg_len, struct d40_log_lli *lli_sg, u32 lcsp13, /* src or dst*/ - u32 data_width, - bool term_int, int max_len, int llis_per_log) + u32 data_width) { int total_size = 0; struct scatterlist *current_sg = sg; int i; - u32 next_lli_off = 0; for_each_sg(sg, current_sg, sg_len, i) { total_size += sg_dma_len(current_sg); - /* - * If this scatter list entry is the last one or - * max length, terminate link. - */ - if (sg_len - 1 == i || ((i+1) % max_len == 0)) - next_lli_off = 0; - else { - if (next_lli_off == 0) - /* The first lli will be at next_lli_off */ - next_lli_off = lcla_id * llis_per_log + 1; - else - next_lli_off++; - } - d40_log_fill_lli(&lli_sg[i], sg_phys(current_sg), sg_dma_len(current_sg), - next_lli_off, lcsp13, data_width, - term_int && !next_lli_off, true); } return total_size; } - -int d40_log_lli_write(struct d40_log_lli_full *lcpa, - struct d40_log_lli *lcla_src, - struct d40_log_lli *lcla_dst, - struct d40_log_lli *lli_dst, - struct d40_log_lli *lli_src, - int llis_per_log) -{ - u32 slos; - u32 dlos; - int i; - - writel(lli_src->lcsp02, &lcpa->lcsp0); - writel(lli_src->lcsp13, &lcpa->lcsp1); - writel(lli_dst->lcsp02, &lcpa->lcsp2); - writel(lli_dst->lcsp13, &lcpa->lcsp3); - - slos = lli_src->lcsp13 & D40_MEM_LCSP1_SLOS_MASK; - dlos = lli_dst->lcsp13 & D40_MEM_LCSP3_DLOS_MASK; - - for (i = 0; (i < llis_per_log) && slos && dlos; i++) { - writel(lli_src[i + 1].lcsp02, &lcla_src[i].lcsp02); - writel(lli_src[i + 1].lcsp13, &lcla_src[i].lcsp13); - writel(lli_dst[i + 1].lcsp02, &lcla_dst[i].lcsp02); - writel(lli_dst[i + 1].lcsp13, &lcla_dst[i].lcsp13); - - slos = lli_src[i + 1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK; - dlos = lli_dst[i + 1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK; - } - - return i; - -} diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h index 9c0fa2f5fe57..37f81e84cd13 100644 --- a/drivers/dma/ste_dma40_ll.h +++ b/drivers/dma/ste_dma40_ll.h @@ -1,10 +1,8 @@ /* - * driver/dma/ste_dma40_ll.h - * - * Copyright (C) ST-Ericsson 2007-2010 + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson SA + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson SA * License terms: GNU General Public License (GPL) version 2 - * Author: Per Friden <per.friden@stericsson.com> - * Author: Jonas Aaberg <jonas.aberg@stericsson.com> */ #ifndef STE_DMA40_LL_H #define STE_DMA40_LL_H @@ -163,6 +161,9 @@ #define D40_DREG_PERIPHID0 0xFE0 #define D40_DREG_PERIPHID1 0xFE4 #define D40_DREG_PERIPHID2 0xFE8 +#define D40_DREG_PERIPHID2_REV_POS 4 +#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS) +#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf #define D40_DREG_PERIPHID3 0xFEC #define D40_DREG_CELLID0 0xFF0 #define D40_DREG_CELLID1 0xFF4 @@ -199,8 +200,6 @@ struct d40_phy_lli { * * @src: Register settings for src channel. * @dst: Register settings for dst channel. - * @dst_addr: Physical destination address. - * @src_addr: Physical source address. * * All DMA transfers have a source and a destination. */ @@ -208,8 +207,6 @@ struct d40_phy_lli { struct d40_phy_lli_bidir { struct d40_phy_lli *src; struct d40_phy_lli *dst; - dma_addr_t dst_addr; - dma_addr_t src_addr; }; @@ -271,29 +268,16 @@ struct d40_def_lcsp { u32 lcsp1; }; -/** - * struct d40_lcla_elem - Info for one LCA element. - * - * @src_id: logical channel src id - * @dst_id: logical channel dst id - * @src: LCPA formated src parameters - * @dst: LCPA formated dst parameters - * - */ -struct d40_lcla_elem { - int src_id; - int dst_id; - struct d40_log_lli *src; - struct d40_log_lli *dst; -}; - /* Physical channels */ void d40_phy_cfg(struct stedma40_chan_cfg *cfg, - u32 *src_cfg, u32 *dst_cfg, bool is_log); + u32 *src_cfg, + u32 *dst_cfg, + bool is_log); void d40_log_cfg(struct stedma40_chan_cfg *cfg, - u32 *lcsp1, u32 *lcsp2); + u32 *lcsp1, + u32 *lcsp2); int d40_phy_sg_to_lli(struct scatterlist *sg, int sg_len, @@ -302,8 +286,7 @@ int d40_phy_sg_to_lli(struct scatterlist *sg, dma_addr_t lli_phys, u32 reg_cfg, u32 data_width, - int psize, - bool term_int); + int psize); int d40_phy_fill_lli(struct d40_phy_lli *lli, dma_addr_t data, @@ -323,35 +306,35 @@ void d40_phy_lli_write(void __iomem *virtbase, /* Logical channels */ void d40_log_fill_lli(struct d40_log_lli *lli, - dma_addr_t data, u32 data_size, - u32 lli_next_off, u32 reg_cfg, + dma_addr_t data, + u32 data_size, + u32 reg_cfg, u32 data_width, - bool term_int, bool addr_inc); + bool addr_inc); -int d40_log_sg_to_dev(struct d40_lcla_elem *lcla, - struct scatterlist *sg, +int d40_log_sg_to_dev(struct scatterlist *sg, int sg_len, struct d40_log_lli_bidir *lli, struct d40_def_lcsp *lcsp, u32 src_data_width, u32 dst_data_width, enum dma_data_direction direction, - bool term_int, dma_addr_t dev_addr, int max_len, - int llis_per_log); - -int d40_log_lli_write(struct d40_log_lli_full *lcpa, - struct d40_log_lli *lcla_src, - struct d40_log_lli *lcla_dst, - struct d40_log_lli *lli_dst, - struct d40_log_lli *lli_src, - int llis_per_log); - -int d40_log_sg_to_lli(int lcla_id, - struct scatterlist *sg, + dma_addr_t dev_addr); + +int d40_log_sg_to_lli(struct scatterlist *sg, int sg_len, struct d40_log_lli *lli_sg, u32 lcsp13, /* src or dst*/ - u32 data_width, - bool term_int, int max_len, int llis_per_log); + u32 data_width); + +void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next); + +void d40_log_lli_lcla_write(struct d40_log_lli *lcla, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next); #endif /* STE_DMA40_LLI_H */ |