From c6303ab9b91e7ca20a49ff494338309259ed7c65 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 24 Aug 2020 11:03:08 +1200 Subject: arm64: mm: reserve per-numa CMA to localize coherent dma buffers Right now, smmu is using dma_alloc_coherent() to get memory to save queues and tables. Typically, on ARM64 server, there is a default CMA located at node0, which could be far away from node2, node3 etc. with this patch, smmu will get memory from local numa node to save command queues and page tables. that means dma_unmap latency will be shrunk much. Meanwhile, when iommu.passthrough is on, device drivers which call dma_ alloc_coherent() will also get local memory and avoid the travel between numa nodes. Acked-by: Will Deacon Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- arch/arm64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64/mm/init.c') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 481d22c32a2e..f1c75957ff3c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -429,6 +429,8 @@ void __init bootmem_init(void) arm64_hugetlb_cma_reserve(); #endif + dma_pernuma_cma_reserve(); + /* * sparse_init() tries to allocate memory from memblock, so must be * done after the fixed reservations -- cgit v1.2.3 From 0b1abd1fb7efafc25231c54a67c6fbb3d3127efd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:52 +0200 Subject: dma-mapping: merge into Merge dma-contiguous.h into dma-map-ops.h, after removing the comment describing the contiguous allocator into kernel/dma/contigous.c. Signed-off-by: Christoph Hellwig --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/arm/mach-davinci/devices-da8xx.c | 2 +- arch/arm/mach-shmobile/setup-rcar-gen2.c | 2 +- arch/arm/mm/dma-mapping.c | 1 - arch/arm/mm/init.c | 2 +- arch/arm64/mm/init.c | 3 +- arch/csky/kernel/setup.c | 2 +- arch/csky/mm/dma-mapping.c | 3 +- arch/microblaze/mm/init.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/mips/mm/dma-noncoherent.c | 1 - arch/s390/kernel/setup.c | 2 +- arch/x86/include/asm/dma-mapping.h | 1 - arch/x86/kernel/setup.c | 2 +- arch/xtensa/kernel/pci-dma.c | 2 +- arch/xtensa/mm/init.c | 2 +- drivers/dma-buf/heaps/cma_heap.c | 2 +- drivers/iommu/amd/iommu.c | 3 +- drivers/iommu/dma-iommu.c | 1 - drivers/iommu/intel/iommu.c | 2 +- drivers/media/platform/exynos4-is/fimc-is.c | 1 - include/linux/dma-contiguous.h | 135 ------------------------ include/linux/dma-map-ops.h | 65 ++++++++++++ kernel/dma/Kconfig | 2 +- kernel/dma/contiguous.c | 30 +++++- kernel/dma/direct.c | 1 - kernel/dma/ops_helpers.c | 1 - kernel/dma/pool.c | 2 +- 28 files changed, 112 insertions(+), 164 deletions(-) delete mode 100644 include/linux/dma-contiguous.h (limited to 'arch/arm64/mm/init.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e464cf0b5025..7657e00e83ca 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -597,7 +597,7 @@ placement constraint by the physical address range of memory allocations. A value of 0 disables CMA altogether. For more information, see - include/linux/dma-contiguous.h + kernel/dma/contiguous.c cma_pernuma=nn[MG] [ARM64,KNL] diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 1207eabe8d1c..bb368938fc49 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index c42ff8c314c8..e00f5b3b9293 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8bf0bc6bc311..154c24cec94c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 000c1b48e973..ab1d1931a352 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f1c75957ff3c..1b591ddb28b0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 0481f4e34538..e4cab16056d6 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 8f6571ae27c8..3d9ff26c4bb4 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -2,8 +2,7 @@ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. #include -#include -#include +#include #include #include #include diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 3344d4a1fe89..7659a8b86580 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -7,7 +7,7 @@ * for more details. */ -#include +#include #include #include #include diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index bf5f5acab0a8..464bfd3957ae 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index f4e8404ee049..22d99ccc70c8 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c2c1b4e723ea..151092565a27 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index fed67eafcacc..e0c380b3ec14 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -11,7 +11,6 @@ #include #include #include -#include extern int iommu_merge; extern int panic_on_overflow; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9286fa9d575e..e8155e85bd8f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 17c4384f8495..790dbe022a07 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -11,7 +11,7 @@ * Joe Taylor */ -#include +#include #include #include #include diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index a05b306cf371..8079007842ac 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 626cf7fd033a..e55384dc115b 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 10e4200d3552..5396eb8d730b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d2e3f2622815..22c221bba13e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index bd3470142b06..0c5b4500ae83 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index a474014f0a0f..32f5982afa1a 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h deleted file mode 100644 index f9ce1ee58d41..000000000000 --- a/include/linux/dma-contiguous.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef __LINUX_CMA_H -#define __LINUX_CMA_H - -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - */ - -/* - * Contiguous Memory Allocator - * - * The Contiguous Memory Allocator (CMA) makes it possible to - * allocate big contiguous chunks of memory after the system has - * booted. - * - * Why is it needed? - * - * Various devices on embedded systems have no scatter-getter and/or - * IO map support and require contiguous blocks of memory to - * operate. They include devices such as cameras, hardware video - * coders, etc. - * - * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 - * MB of memory), which makes mechanisms such as kmalloc() or - * alloc_page() ineffective. - * - * At the same time, a solution where a big memory region is - * reserved for a device is suboptimal since often more memory is - * reserved then strictly required and, moreover, the memory is - * inaccessible to page system even if device drivers don't use it. - * - * CMA tries to solve this issue by operating on memory regions - * where only movable pages can be allocated from. This way, kernel - * can use the memory for pagecache and when device driver requests - * it, allocated pages can be migrated. - * - * Driver usage - * - * CMA should not be used by the device drivers directly. It is - * only a helper framework for dma-mapping subsystem. - * - * For more information, see kernel-docs in kernel/dma/contiguous.c - */ - -#ifdef __KERNEL__ - -#include -#include - -struct cma; -struct page; - -#ifdef CONFIG_DMA_CMA - -extern struct cma *dma_contiguous_default_area; - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -void dma_contiguous_reserve(phys_addr_t addr_limit); - -int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed); - -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn); -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count); -struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); -void dma_free_contiguous(struct device *dev, struct page *page, size_t size); - -#else - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - return NULL; -} - -static inline void dma_contiguous_reserve(phys_addr_t limit) { } - -static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed) -{ - return -ENOSYS; -} - -static inline -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn) -{ - return NULL; -} - -static inline -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - return false; -} - -/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ -static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, - gfp_t gfp) -{ - return NULL; -} - -static inline void dma_free_contiguous(struct device *dev, struct page *page, - size_t size) -{ - __free_pages(page, get_order(size)); -} - -#endif - -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif - -#endif - -#endif diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4b4ba5bdcf6a..474fc81bd492 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -8,6 +8,8 @@ #include +struct cma; + struct dma_map_ops { void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -94,6 +96,69 @@ static inline void set_dma_ops(struct device *dev, } #endif /* CONFIG_DMA_OPS */ +#ifdef CONFIG_DMA_CMA +extern struct cma *dma_contiguous_default_area; + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +void dma_contiguous_reserve(phys_addr_t addr_limit); +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma, bool fixed); + +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, + unsigned int order, bool no_warn); +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count); +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); +void dma_free_contiguous(struct device *dev, struct page *page, size_t size); +#else /* CONFIG_DMA_CMA */ +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + return NULL; +} +static inline void dma_contiguous_reserve(phys_addr_t limit) +{ +} +static inline int dma_contiguous_reserve_area(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ + return -ENOSYS; +} +static inline struct page *dma_alloc_from_contiguous(struct device *dev, + size_t count, unsigned int order, bool no_warn) +{ + return NULL; +} +static inline bool dma_release_from_contiguous(struct device *dev, + struct page *pages, int count) +{ + return false; +} +/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ +static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, + gfp_t gfp) +{ + return NULL; +} +static inline void dma_free_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __free_pages(page, get_order(size)); +} +#endif /* CONFIG_DMA_CMA*/ + +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif /* CONFIG_DMA_PERNUMA_CMA */ + #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 647996702bc9..c99de4a21458 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -118,7 +118,7 @@ config DMA_CMA You can disable CMA by specifying "cma=0" on the kernel's command line. - For more information see . + For more information see . If unsure, say "n". if DMA_CMA diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index bf05ec2256e1..6bfb763fff6f 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -5,6 +5,34 @@ * Written by: * Marek Szyprowski * Michal Nazarewicz + * + * Contiguous Memory Allocator + * + * The Contiguous Memory Allocator (CMA) makes it possible to + * allocate big contiguous chunks of memory after the system has + * booted. + * + * Why is it needed? + * + * Various devices on embedded systems have no scatter-getter and/or + * IO map support and require contiguous blocks of memory to + * operate. They include devices such as cameras, hardware video + * coders, etc. + * + * Such devices often require big memory buffers (a full HD frame + * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * MB of memory), which makes mechanisms such as kmalloc() or + * alloc_page() ineffective. + * + * At the same time, a solution where a big memory region is + * reserved for a device is suboptimal since often more memory is + * reserved then strictly required and, moreover, the memory is + * inaccessible to page system even if device drivers don't use it. + * + * CMA tries to solve this issue by operating on memory regions + * where only movable pages can be allocated from. This way, kernel + * can use the memory for pagecache and when device driver requests + * it, allocated pages can be migrated. */ #define pr_fmt(fmt) "cma: " fmt @@ -21,7 +49,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_CMA_SIZE_MBYTES diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8cf5689a8c40..87697c86f0b8 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 60d7b6cdfd8d..bc80fd2648bc 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -3,7 +3,6 @@ * Helpers for DMA ops implementations. These generally rely on the fact that * the allocated memory contains normal pages in the direct kernel mapping. */ -#include #include #include diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index fe11643ff9cc..c9fb5c3d8bd0 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -5,7 +5,7 @@ */ #include #include -#include +#include #include #include #include -- cgit v1.2.3