From 66d7780f18eae0232827fcffeaded39a6a168236 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Jul 2019 11:51:56 -0700 Subject: [PATCH 1/2] dma-mapping: check pfn validity in dma_common_{mmap,get_sgtable} Check that the pfn returned from arch_dma_coherent_to_pfn refers to a valid page and reject the mmap / get_sgtable requests otherwise. Based on the arm implementation of the mmap and get_sgtable methods. Signed-off-by: Christoph Hellwig Tested-by: Vignesh Raghavendra --- kernel/dma/mapping.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 1f628e7ac7097a..b945239621d862 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -116,11 +116,16 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, int ret; if (!dev_is_dma_coherent(dev)) { + unsigned long pfn; + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) return -ENXIO; - page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr, - dma_addr)); + /* If the PFN is not valid, we do not have a struct page */ + pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + if (!pfn_valid(pfn)) + return -ENXIO; + page = pfn_to_page(pfn); } else { page = virt_to_page(cpu_addr); } @@ -170,7 +175,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, if (!dev_is_dma_coherent(dev)) { if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) return -ENXIO; + + /* If the PFN is not valid, we do not have a struct page */ pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + if (!pfn_valid(pfn)) + return -ENXIO; } else { pfn = page_to_pfn(virt_to_page(cpu_addr)); } From ad3c7b18c5b362be5dbd0f2c0bcf1fd5fd659315 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Jul 2019 11:33:12 +0200 Subject: [PATCH 2/2] arm: use swiotlb for bounce buffering on LPAE configs The DMA API requires that 32-bit DMA masks are always supported, but on arm LPAE configs they do not currently work when memory is present above 4GB. Wire up the swiotlb code like for all other architectures to provide the bounce buffering in that case. Fixes: 21e07dba9fb11 ("scsi: reduce use of block bounce buffers"). Reported-by: Roger Quadros Signed-off-by: Christoph Hellwig Tested-by: Vignesh Raghavendra --- arch/arm/include/asm/dma-mapping.h | 4 +- arch/arm/mm/Kconfig | 5 +++ arch/arm/mm/dma-mapping.c | 61 ++++++++++++++++++++++++++++++ arch/arm/mm/init.c | 5 +++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 7e0486ad1318cd..dba9355e24849c 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -18,7 +18,9 @@ extern const struct dma_map_ops arm_coherent_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : NULL; + if (IS_ENABLED(CONFIG_MMU) && !IS_ENABLED(CONFIG_ARM_LPAE)) + return &arm_dma_ops; + return NULL; } #ifdef __arch_page_to_dma diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 820b60a50125b9..c54cd7ed90ba5e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -663,6 +663,11 @@ config ARM_LPAE depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ !CPU_32v4 && !CPU_32v3 select PHYS_ADDR_T_64BIT + select SWIOTLB + select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_MMAP_PGPROT + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4789c60a86e345..6774b03aa405ca 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1125,6 +1126,19 @@ int arm_dma_supported(struct device *dev, u64 mask) static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) { + /* + * When CONFIG_ARM_LPAE is set, physical address can extend above + * 32-bits, which then can't be addressed by devices that only support + * 32-bit DMA. + * Use the generic dma-direct / swiotlb ops code in that case, as that + * handles bounce buffering for us. + * + * Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the + * latter is also selected by the Xen code, but that code for now relies + * on non-NULL dev_dma_ops. To be cleaned up later. + */ + if (IS_ENABLED(CONFIG_ARM_LPAE)) + return NULL; return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; } @@ -2329,6 +2343,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct dma_map_ops *dma_ops; dev->archdata.dma_coherent = coherent; +#ifdef CONFIG_SWIOTLB + dev->dma_coherent = coherent; +#endif /* * Don't override the dma_ops if they have already been set. Ideally @@ -2363,3 +2380,47 @@ void arch_teardown_dma_ops(struct device *dev) /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ set_dma_ops(dev, NULL); } + +#ifdef CONFIG_SWIOTLB +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), + size, dir); +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), + size, dir); +} + +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) +{ + return dma_to_pfn(dev, dma_addr); +} + +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + return __get_dma_pgprot(attrs, prot); + return prot; +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + return __dma_alloc(dev, size, dma_handle, gfp, + __get_dma_pgprot(attrs, PAGE_KERNEL), false, + attrs, __builtin_return_address(0)); +} + +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + __arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false); +} +#endif /* CONFIG_SWIOTLB */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 4920a206dce936..16d373d587c476 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -463,6 +464,10 @@ static void __init free_highpages(void) */ void __init mem_init(void) { +#ifdef CONFIG_ARM_LPAE + swiotlb_init(1); +#endif + set_max_mapnr(pfn_to_page(max_pfn) - mem_map); /* this will put all unused low memory onto the freelists */