From 30d697fa3a25fed809a873b17531a00282dc1234 Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Mon, 23 Feb 2009 18:03:04 -0800 Subject: x86: fix performance regression in write() syscall While the introduction of __copy_from_user_nocache (see commit: 0812a579c92fefa57506821fa08e90f47cb6dbdd) may have been an improvement for sufficiently large writes, there is evidence to show that it is deterimental for small writes. Unixbench's fstime test gives the following results for 256 byte writes with MAX_BLOCK of 2000: 2.6.29-rc6 ( 5 samples, each in KB/sec ): 283750, 295200, 294500, 293000, 293300 2.6.29-rc6 + this patch (5 samples, each in KB/sec): 313050, 3106750, 293350, 306300, 307900 2.6.18 395700, 342000, 399100, 366050, 359850 See w_test() in src/fstime.c in unixbench version 4.1.0. Basically, the above test consists of counting how much we can write in this manner: alarm(10); while (!sigalarm) { for (f_blocks = 0; f_blocks < 2000; ++f_blocks) { write(f, buf, 256); } lseek(f, 0L, 0); } Note, there are other components to the write syscall regression that are not addressed here. Signed-off-by: Salman Qazi Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm/uaccess_64.h') diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 84210c479fca..987a2c10fe20 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -192,14 +192,26 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { might_sleep(); - return __copy_user_nocache(dst, src, size, 1); + /* + * In practice this limit means that large file write()s + * which get chunked to 4K copies get handled via + * non-temporal stores here. Smaller writes get handled + * via regular __copy_from_user(): + */ + if (likely(size >= PAGE_SIZE)) + return __copy_user_nocache(dst, src, size, 1); + else + return __copy_from_user(dst, src, size); } static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { - return __copy_user_nocache(dst, src, size, 0); + if (likely(size >= PAGE_SIZE)) + return __copy_user_nocache(dst, src, size, 0); + else + return __copy_from_user_inatomic(dst, src, size); } unsigned long -- cgit v1.2.3 From 3255aa2eb636a508fc82a73fabbb8aaf2ff23c0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 08:21:52 +0100 Subject: x86, mm: pass in 'total' to __copy_from_user_*nocache() Impact: cleanup, enable future change Add a 'total bytes copied' parameter to __copy_from_user_*nocache(), and update all the callsites. The parameter is not used yet - architecture code can use it to more intelligently decide whether the copy should be cached or non-temporal. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++-- arch/x86/include/asm/uaccess_64.h | 5 ++--- drivers/gpu/drm/i915/i915_gem.c | 2 +- include/linux/uaccess.h | 4 ++-- mm/filemap.c | 10 ++++++---- mm/filemap_xip.c | 2 +- 6 files changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/x86/include/asm/uaccess_64.h') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 5e06259e90e5..a0ba61386972 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) } static __always_inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { might_fault(); if (__builtin_constant_p(n)) { @@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, - unsigned long n) + unsigned long n, unsigned long total) { return __copy_from_user_ll_nocache_nozero(to, from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 987a2c10fe20..a748253db0c9 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -189,7 +189,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); static inline int __copy_from_user_nocache(void *dst, const void __user *src, - unsigned size) + unsigned size, unsigned long total) { might_sleep(); /* @@ -205,8 +205,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, } static inline int __copy_from_user_inatomic_nocache(void *dst, - const void __user *src, - unsigned size) + const void __user *src, unsigned size, unsigned total) { if (likely(size >= PAGE_SIZE)) return __copy_user_nocache(dst, src, size, 0); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 818576654092..6b209db8370d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -215,7 +215,7 @@ fast_user_write(struct io_mapping *mapping, vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, - user_data, length); + user_data, length, length); io_mapping_unmap_atomic(vaddr_atomic); if (unwritten) return -EFAULT; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145e..6f3c603b0d67 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user(to, from, n); } diff --git a/mm/filemap.c b/mm/filemap.c index 23acefe51808..60fd56772cc6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1816,14 +1816,14 @@ EXPORT_SYMBOL(file_remove_suid); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { - size_t copied = 0, left = 0; + size_t copied = 0, left = 0, total = bytes; while (bytes) { char __user *buf = iov->iov_base + base; int copy = min(bytes, iov->iov_len - base); base = 0; - left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); + left = __copy_from_user_inatomic_nocache(vaddr, buf, copy, total); copied += copy; bytes -= copy; vaddr += copy; @@ -1851,8 +1851,9 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; + left = __copy_from_user_inatomic_nocache(kaddr + offset, - buf, bytes); + buf, bytes, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, @@ -1880,7 +1881,8 @@ size_t iov_iter_copy_from_user(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - left = __copy_from_user_nocache(kaddr + offset, buf, bytes); + + left = __copy_from_user_nocache(kaddr + offset, buf, bytes, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 0c04615651b7..bf54f8a2cf1d 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -354,7 +354,7 @@ __xip_file_write(struct file *filp, const char __user *buf, break; copied = bytes - - __copy_from_user_nocache(xip_mem + offset, buf, bytes); + __copy_from_user_nocache(xip_mem + offset, buf, bytes, bytes); if (likely(copied > 0)) { status = copied; -- cgit v1.2.3 From 95108fa34a83ffd97e0af959e4b28d7c62008781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 08:22:20 +0100 Subject: x86: usercopy: check for total size when deciding non-temporal cutoff Impact: make more types of copies non-temporal This change makes the following simple fix: 30d697f: x86: fix performance regression in write() syscall A bit more sophisticated: we check the 'total' number of bytes written to decide whether to copy in a cached or a non-temporal way. This will for example cause the tail (modulo 4096 bytes) chunk of a large write() to be non-temporal too - not just the page-sized chunks. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm/uaccess_64.h') diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index a748253db0c9..dcaa0404cf7b 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -198,7 +198,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, * non-temporal stores here. Smaller writes get handled * via regular __copy_from_user(): */ - if (likely(size >= PAGE_SIZE)) + if (likely(total >= PAGE_SIZE)) return __copy_user_nocache(dst, src, size, 1); else return __copy_from_user(dst, src, size); @@ -207,7 +207,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size, unsigned total) { - if (likely(size >= PAGE_SIZE)) + if (likely(total >= PAGE_SIZE)) return __copy_user_nocache(dst, src, size, 0); else return __copy_from_user_inatomic(dst, src, size); -- cgit v1.2.3 From f180053694b43d5714bf56cb95499a3c32ff155c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Mar 2009 11:00:57 +0100 Subject: x86, mm: dont use non-temporal stores in pagecache accesses Impact: standardize IO on cached ops On modern CPUs it is almost always a bad idea to use non-temporal stores, as the regression in this commit has shown it: 30d697f: x86: fix performance regression in write() syscall The kernel simply has no good information about whether using non-temporal stores is a good idea or not - and trying to add heuristics only increases complexity and inserts fragility. The regression on cached write()s took very long to be found - over two years. So dont take any chances and let the hardware decide how it makes use of its caches. The only exception is drivers/gpu/drm/i915/i915_gem.c: there were we are absolutely sure that another entity (the GPU) will pick up the dirty data immediately and that the CPU will not touch that data before the GPU will. Also, keep the _nocache() primitives to make it easier for people to experiment with these details. There may be more clear-cut cases where non-cached copies can be used, outside of filemap.c. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++-- arch/x86/include/asm/uaccess_64.h | 25 +++++++------------------ drivers/gpu/drm/i915/i915_gem.c | 2 +- include/linux/uaccess.h | 4 ++-- mm/filemap.c | 11 ++++------- mm/filemap_xip.c | 2 +- 6 files changed, 17 insertions(+), 31 deletions(-) (limited to 'arch/x86/include/asm/uaccess_64.h') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index a0ba61386972..5e06259e90e5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) } static __always_inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { might_fault(); if (__builtin_constant_p(n)) { @@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, - unsigned long n, unsigned long total) + unsigned long n) { return __copy_from_user_ll_nocache_nozero(to, from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index dcaa0404cf7b..8cc687326eb8 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -188,29 +188,18 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); -static inline int __copy_from_user_nocache(void *dst, const void __user *src, - unsigned size, unsigned long total) +static inline int +__copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { might_sleep(); - /* - * In practice this limit means that large file write()s - * which get chunked to 4K copies get handled via - * non-temporal stores here. Smaller writes get handled - * via regular __copy_from_user(): - */ - if (likely(total >= PAGE_SIZE)) - return __copy_user_nocache(dst, src, size, 1); - else - return __copy_from_user(dst, src, size); + return __copy_user_nocache(dst, src, size, 1); } -static inline int __copy_from_user_inatomic_nocache(void *dst, - const void __user *src, unsigned size, unsigned total) +static inline int +__copy_from_user_inatomic_nocache(void *dst, const void __user *src, + unsigned size) { - if (likely(total >= PAGE_SIZE)) - return __copy_user_nocache(dst, src, size, 0); - else - return __copy_from_user_inatomic(dst, src, size); + return __copy_user_nocache(dst, src, size, 0); } unsigned long diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6b209db8370d..818576654092 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -215,7 +215,7 @@ fast_user_write(struct io_mapping *mapping, vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, - user_data, length, length); + user_data, length); io_mapping_unmap_atomic(vaddr_atomic); if (unwritten) return -EFAULT; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6f3c603b0d67..6b58367d145e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user(to, from, n); } diff --git a/mm/filemap.c b/mm/filemap.c index 60fd56772cc6..126d3973b3d1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1816,14 +1816,14 @@ EXPORT_SYMBOL(file_remove_suid); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { - size_t copied = 0, left = 0, total = bytes; + size_t copied = 0, left = 0; while (bytes) { char __user *buf = iov->iov_base + base; int copy = min(bytes, iov->iov_len - base); base = 0; - left = __copy_from_user_inatomic_nocache(vaddr, buf, copy, total); + left = __copy_from_user_inatomic(vaddr, buf, copy); copied += copy; bytes -= copy; vaddr += copy; @@ -1851,9 +1851,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - - left = __copy_from_user_inatomic_nocache(kaddr + offset, - buf, bytes, bytes); + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, @@ -1881,8 +1879,7 @@ size_t iov_iter_copy_from_user(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - - left = __copy_from_user_nocache(kaddr + offset, buf, bytes, bytes); + left = __copy_from_user(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index bf54f8a2cf1d..0c04615651b7 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -354,7 +354,7 @@ __xip_file_write(struct file *filp, const char __user *buf, break; copied = bytes - - __copy_from_user_nocache(xip_mem + offset, buf, bytes, bytes); + __copy_from_user_nocache(xip_mem + offset, buf, bytes); if (likely(copied > 0)) { status = copied; -- cgit v1.2.3