diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 18 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/dynamic_debug.c | 8 | ||||
-rw-r--r-- | lib/genalloc.c | 4 | ||||
-rw-r--r-- | lib/idr.c | 2 | ||||
-rw-r--r-- | lib/iov_iter.c | 31 | ||||
-rw-r--r-- | lib/kobject.c | 62 | ||||
-rw-r--r-- | lib/kunit/executor.c | 48 | ||||
-rw-r--r-- | lib/kunit/executor_test.c | 13 | ||||
-rw-r--r-- | lib/kunit/test.c | 3 | ||||
-rw-r--r-- | lib/kunit_iov_iter.c | 777 | ||||
-rw-r--r-- | lib/math/Makefile | 2 | ||||
-rw-r--r-- | lib/math/int_log.c | 133 | ||||
-rw-r--r-- | lib/percpu_counter.c | 62 | ||||
-rw-r--r-- | lib/raid6/Makefile | 1 | ||||
-rw-r--r-- | lib/raid6/algos.c | 16 | ||||
-rw-r--r-- | lib/raid6/loongarch.h | 38 | ||||
-rw-r--r-- | lib/raid6/loongarch_simd.c | 422 | ||||
-rw-r--r-- | lib/raid6/recov_loongarch_simd.c | 513 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 12 | ||||
-rw-r--r-- | lib/string_helpers.c | 15 | ||||
-rw-r--r-- | lib/test_scanf.c | 2 | ||||
-rw-r--r-- | lib/xarray.c | 8 |
23 files changed, 2097 insertions, 94 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a8a1b0ac8b22..fa307f93fa2e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -355,6 +355,11 @@ endchoice # "Compressed Debug information" config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" depends on $(cc-option,-gsplit-dwarf) + # RISC-V linker relaxation + -gsplit-dwarf has issues with LLVM and GCC + # prior to 12.x: + # https://github.com/llvm/llvm-project/issues/56642 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99090 + depends on !RISCV || GCC_VERSION >= 120000 help Generate debug info into separate .dwo files. This significantly reduces the build directory size for builds with DEBUG_INFO, @@ -502,7 +507,7 @@ config SECTION_MISMATCH_WARN_ONLY config DEBUG_FORCE_FUNCTION_ALIGN_64B bool "Force all function address 64B aligned" - depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC || S390) + depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC || RISCV || S390) select FUNCTION_ALIGNMENT_64B help There are cases that a commit from one domain changes the function @@ -2232,6 +2237,17 @@ config TEST_DIV64 If unsure, say N. +config TEST_IOV_ITER + tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this to turn on testing of the operation of the I/O iterator + (iov_iter). This test is executed only once during system boot (so + affects only boot time), or at module load time. + + If unsure, say N. + config KPROBES_SANITY_TEST tristate "Kprobes sanity tests" if !KUNIT_ALL_TESTS depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index 2e08397f6210..740109b6e2c8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o +obj-$(CONFIG_TEST_IOV_ITER) += kunit_iov_iter.o obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o obj-$(CONFIG_TEST_IDA) += test_ida.o obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index fdd6d9800a70..6fba6423cc10 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -88,10 +88,11 @@ static inline const char *trim_prefix(const char *path) return path + skip; } -static struct { unsigned flag:8; char opt_char; } opt_array[] = { +static const struct { unsigned flag:8; char opt_char; } opt_array[] = { { _DPRINTK_FLAGS_PRINT, 'p' }, { _DPRINTK_FLAGS_INCL_MODNAME, 'm' }, { _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' }, + { _DPRINTK_FLAGS_INCL_SOURCENAME, 's' }, { _DPRINTK_FLAGS_INCL_LINENO, 'l' }, { _DPRINTK_FLAGS_INCL_TID, 't' }, { _DPRINTK_FLAGS_NONE, '_' }, @@ -808,7 +809,7 @@ const struct kernel_param_ops param_ops_dyndbg_classes = { }; EXPORT_SYMBOL(param_ops_dyndbg_classes); -#define PREFIX_SIZE 64 +#define PREFIX_SIZE 128 static int remaining(int wrote) { @@ -836,6 +837,9 @@ static char *__dynamic_emit_prefix(const struct _ddebug *desc, char *buf) if (desc->flags & _DPRINTK_FLAGS_INCL_FUNCNAME) pos += snprintf(buf + pos, remaining(pos), "%s:", desc->function); + if (desc->flags & _DPRINTK_FLAGS_INCL_SOURCENAME) + pos += snprintf(buf + pos, remaining(pos), "%s:", + trim_prefix(desc->filename)); if (desc->flags & _DPRINTK_FLAGS_INCL_LINENO) pos += snprintf(buf + pos, remaining(pos), "%d:", desc->lineno); diff --git a/lib/genalloc.c b/lib/genalloc.c index 6c644f954bc5..4fa5635bf81b 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -32,7 +32,9 @@ #include <linux/rculist.h> #include <linux/interrupt.h> #include <linux/genalloc.h> -#include <linux/of_device.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/vmalloc.h> static inline size_t chunk_size(const struct gen_pool_chunk *chunk) diff --git a/lib/idr.c b/lib/idr.c index 7ecdfdb5309e..13f2758c2377 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(idr_alloc); * @end: The maximum ID (exclusive). * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @nextid and @end. If + * Allocates an unused ID in the range specified by @start and @end. If * @end is <= 0, it is treated as one larger than %INT_MAX. This allows * callers to use @start + N as @end as long as N is within integer range. * The search for an unused ID will start at the last ID allocated and will diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 424737045b97..27234a820eeb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1557,6 +1557,7 @@ int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) iov_iter_ubuf(i, rw, buf, len); return 0; } +EXPORT_SYMBOL_GPL(import_ubuf); /** * iov_iter_restore() - Restore a &struct iov_iter to the same state as when @@ -1653,14 +1654,14 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, size_t *offset0) { struct page **p, *page; - size_t skip = i->iov_offset, offset; + size_t skip = i->iov_offset, offset, size; int k; for (;;) { if (i->nr_segs == 0) return 0; - maxsize = min(maxsize, i->bvec->bv_len - skip); - if (maxsize) + size = min(maxsize, i->bvec->bv_len - skip); + if (size) break; i->iov_offset = 0; i->nr_segs--; @@ -1673,16 +1674,16 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, offset = skip % PAGE_SIZE; *offset0 = offset; - maxpages = want_pages_array(pages, maxsize, offset, maxpages); + maxpages = want_pages_array(pages, size, offset, maxpages); if (!maxpages) return -ENOMEM; p = *pages; for (k = 0; k < maxpages; k++) p[k] = page + k; - maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset); - iov_iter_advance(i, maxsize); - return maxsize; + size = min_t(size_t, size, maxpages * PAGE_SIZE - offset); + iov_iter_advance(i, size); + return size; } /* @@ -1697,14 +1698,14 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i, { struct page **p, *page; const void *kaddr; - size_t skip = i->iov_offset, offset, len; + size_t skip = i->iov_offset, offset, len, size; int k; for (;;) { if (i->nr_segs == 0) return 0; - maxsize = min(maxsize, i->kvec->iov_len - skip); - if (maxsize) + size = min(maxsize, i->kvec->iov_len - skip); + if (size) break; i->iov_offset = 0; i->nr_segs--; @@ -1716,13 +1717,13 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i, offset = (unsigned long)kaddr & ~PAGE_MASK; *offset0 = offset; - maxpages = want_pages_array(pages, maxsize, offset, maxpages); + maxpages = want_pages_array(pages, size, offset, maxpages); if (!maxpages) return -ENOMEM; p = *pages; kaddr -= offset; - len = offset + maxsize; + len = offset + size; for (k = 0; k < maxpages; k++) { size_t seg = min_t(size_t, len, PAGE_SIZE); @@ -1736,9 +1737,9 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i, kaddr += PAGE_SIZE; } - maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset); - iov_iter_advance(i, maxsize); - return maxsize; + size = min_t(size_t, size, maxpages * PAGE_SIZE - offset); + iov_iter_advance(i, size); + return size; } /* diff --git a/lib/kobject.c b/lib/kobject.c index 16d530f9c174..59dbcbdb1c91 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -56,6 +56,14 @@ void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) kobj->ktype->get_ownership(kobj, uid, gid); } +static bool kobj_ns_type_is_valid(enum kobj_ns_type type) +{ + if ((type <= KOBJ_NS_TYPE_NONE) || (type >= KOBJ_NS_TYPES)) + return false; + + return true; +} + static int create_dir(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); @@ -66,12 +74,10 @@ static int create_dir(struct kobject *kobj) if (error) return error; - if (ktype) { - error = sysfs_create_groups(kobj, ktype->default_groups); - if (error) { - sysfs_remove_dir(kobj); - return error; - } + error = sysfs_create_groups(kobj, ktype->default_groups); + if (error) { + sysfs_remove_dir(kobj); + return error; } /* @@ -86,8 +92,7 @@ static int create_dir(struct kobject *kobj) */ ops = kobj_child_ns_ops(kobj); if (ops) { - BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE); - BUG_ON(ops->type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_is_valid(ops->type)); BUG_ON(!kobj_ns_type_registered(ops->type)); sysfs_enable_ns(kobj->sd); @@ -584,8 +589,7 @@ static void __kobject_del(struct kobject *kobj) sd = kobj->sd; ktype = get_ktype(kobj); - if (ktype) - sysfs_remove_groups(kobj, ktype->default_groups); + sysfs_remove_groups(kobj, ktype->default_groups); /* send "remove" if the caller did not do it but sent "add" */ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { @@ -662,10 +666,6 @@ static void kobject_cleanup(struct kobject *kobj) pr_debug("'%s' (%p): %s, parent %p\n", kobject_name(kobj), kobj, __func__, kobj->parent); - if (t && !t->release) - pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", - kobject_name(kobj), kobj); - /* remove from sysfs if the caller did not do it */ if (kobj->state_in_sysfs) { pr_debug("'%s' (%p): auto cleanup kobject_del\n", @@ -676,10 +676,13 @@ static void kobject_cleanup(struct kobject *kobj) parent = NULL; } - if (t && t->release) { + if (t->release) { pr_debug("'%s' (%p): calling ktype release\n", kobject_name(kobj), kobj); t->release(kobj); + } else { + pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", + kobject_name(kobj), kobj); } /* free name if we allocated it */ @@ -854,6 +857,11 @@ int kset_register(struct kset *k) if (!k) return -EINVAL; + if (!k->kobj.ktype) { + pr_err("must have a ktype to be initialized properly!\n"); + return -EINVAL; + } + kset_init(k); err = kobject_add_internal(&k->kobj); if (err) { @@ -1017,11 +1025,7 @@ int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) spin_lock(&kobj_ns_type_lock); error = -EINVAL; - if (type >= KOBJ_NS_TYPES) - goto out; - - error = -EINVAL; - if (type <= KOBJ_NS_TYPE_NONE) + if (!kobj_ns_type_is_valid(type)) goto out; error = -EBUSY; @@ -1041,7 +1045,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type) int registered = 0; spin_lock(&kobj_ns_type_lock); - if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES)) + if (kobj_ns_type_is_valid(type)) registered = kobj_ns_ops_tbl[type] != NULL; spin_unlock(&kobj_ns_type_lock); @@ -1052,7 +1056,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa { const struct kobj_ns_type_operations *ops = NULL; - if (parent && parent->ktype && parent->ktype->child_ns_type) + if (parent && parent->ktype->child_ns_type) ops = parent->ktype->child_ns_type(parent); return ops; @@ -1068,8 +1072,7 @@ bool kobj_ns_current_may_mount(enum kobj_ns_type type) bool may_mount = true; spin_lock(&kobj_ns_type_lock); - if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && - kobj_ns_ops_tbl[type]) + if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) may_mount = kobj_ns_ops_tbl[type]->current_may_mount(); spin_unlock(&kobj_ns_type_lock); @@ -1081,8 +1084,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type) void *ns = NULL; spin_lock(&kobj_ns_type_lock); - if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && - kobj_ns_ops_tbl[type]) + if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->grab_current_ns(); spin_unlock(&kobj_ns_type_lock); @@ -1095,8 +1097,7 @@ const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) const void *ns = NULL; spin_lock(&kobj_ns_type_lock); - if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && - kobj_ns_ops_tbl[type]) + if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); spin_unlock(&kobj_ns_type_lock); @@ -1108,8 +1109,7 @@ const void *kobj_ns_initial(enum kobj_ns_type type) const void *ns = NULL; spin_lock(&kobj_ns_type_lock); - if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && - kobj_ns_ops_tbl[type]) + if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->initial_ns(); spin_unlock(&kobj_ns_type_lock); @@ -1119,7 +1119,7 @@ const void *kobj_ns_initial(enum kobj_ns_type type) void kobj_ns_drop(enum kobj_ns_type type, void *ns) { spin_lock(&kobj_ns_type_lock); - if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) kobj_ns_ops_tbl[type]->drop_ns(ns); spin_unlock(&kobj_ns_type_lock); diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 5181aa2e760b..a6348489d45f 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -65,7 +65,7 @@ struct kunit_glob_filter { }; /* Split "suite_glob.test_glob" into two. Assumes filter_glob is not empty. */ -static void kunit_parse_glob_filter(struct kunit_glob_filter *parsed, +static int kunit_parse_glob_filter(struct kunit_glob_filter *parsed, const char *filter_glob) { const int len = strlen(filter_glob); @@ -73,16 +73,28 @@ static void kunit_parse_glob_filter(struct kunit_glob_filter *parsed, if (!period) { parsed->suite_glob = kzalloc(len + 1, GFP_KERNEL); + if (!parsed->suite_glob) + return -ENOMEM; + parsed->test_glob = NULL; strcpy(parsed->suite_glob, filter_glob); - return; + return 0; } parsed->suite_glob = kzalloc(period - filter_glob + 1, GFP_KERNEL); + if (!parsed->suite_glob) + return -ENOMEM; + parsed->test_glob = kzalloc(len - (period - filter_glob) + 1, GFP_KERNEL); + if (!parsed->test_glob) { + kfree(parsed->suite_glob); + return -ENOMEM; + } strncpy(parsed->suite_glob, filter_glob, period - filter_glob); strncpy(parsed->test_glob, period + 1, len - (period - filter_glob)); + + return 0; } /* Create a copy of suite with only tests that match test_glob. */ @@ -152,21 +164,24 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set, } copy_start = copy; - if (filter_glob) - kunit_parse_glob_filter(&parsed_glob, filter_glob); + if (filter_glob) { + *err = kunit_parse_glob_filter(&parsed_glob, filter_glob); + if (*err) + goto free_copy; + } /* Parse attribute filters */ if (filters) { filter_count = kunit_get_filter_count(filters); parsed_filters = kcalloc(filter_count, sizeof(*parsed_filters), GFP_KERNEL); if (!parsed_filters) { - kfree(copy); - return filtered; + *err = -ENOMEM; + goto free_parsed_glob; } for (j = 0; j < filter_count; j++) parsed_filters[j] = kunit_next_attr_filter(&filters, err); if (*err) - goto err; + goto free_parsed_filters; } for (i = 0; &suite_set->start[i] != suite_set->end; i++) { @@ -178,7 +193,7 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set, parsed_glob.test_glob); if (IS_ERR(filtered_suite)) { *err = PTR_ERR(filtered_suite); - goto err; + goto free_parsed_filters; } } if (filter_count > 0 && parsed_filters != NULL) { @@ -195,10 +210,11 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set, filtered_suite = new_filtered_suite; if (*err) - goto err; + goto free_parsed_filters; + if (IS_ERR(filtered_suite)) { *err = PTR_ERR(filtered_suite); - goto err; + goto free_parsed_filters; } if (!filtered_suite) break; @@ -213,17 +229,19 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set, filtered.start = copy_start; filtered.end = copy; -err: - if (*err) - kfree(copy); +free_parsed_filters: + if (filter_count) + kfree(parsed_filters); +free_parsed_glob: if (filter_glob) { kfree(parsed_glob.suite_glob); kfree(parsed_glob.test_glob); } - if (filter_count) - kfree(parsed_filters); +free_copy: + if (*err) + kfree(copy); return filtered; } diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c index 4084071d0eb5..b4f6f96b2844 100644 --- a/lib/kunit/executor_test.c +++ b/lib/kunit/executor_test.c @@ -119,7 +119,7 @@ static void parse_filter_attr_test(struct kunit *test) { int j, filter_count; struct kunit_attr_filter *parsed_filters; - char *filters = "speed>slow, module!=example"; + char filters[] = "speed>slow, module!=example", *filter = filters; int err = 0; filter_count = kunit_get_filter_count(filters); @@ -128,7 +128,7 @@ static void parse_filter_attr_test(struct kunit *test) parsed_filters = kunit_kcalloc(test, filter_count, sizeof(*parsed_filters), GFP_KERNEL); for (j = 0; j < filter_count; j++) { - parsed_filters[j] = kunit_next_attr_filter(&filters, &err); + parsed_filters[j] = kunit_next_attr_filter(&filter, &err); KUNIT_ASSERT_EQ_MSG(test, err, 0, "failed to parse filter '%s'", filters[j]); } @@ -154,6 +154,7 @@ static void filter_attr_test(struct kunit *test) .start = subsuite, .end = &subsuite[2], }; struct kunit_suite_set got; + char filter[] = "speed>slow"; int err = 0; subsuite[0] = alloc_fake_suite(test, "normal_suite", dummy_attr_test_cases); @@ -168,7 +169,7 @@ static void filter_attr_test(struct kunit *test) * attribute is unset and thus, the filtering is based on the parent attribute * of slow. */ - got = kunit_filter_suites(&suite_set, NULL, "speed>slow", NULL, &err); + got = kunit_filter_suites(&suite_set, NULL, filter, NULL, &err); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start); KUNIT_ASSERT_EQ(test, err, 0); kfree_at_end(test, got.start); @@ -191,12 +192,13 @@ static void filter_attr_empty_test(struct kunit *test) .start = subsuite, .end = &subsuite[2], }; struct kunit_suite_set got; + char filter[] = "module!=dummy"; int err = 0; subsuite[0] = alloc_fake_suite(test, "suite1", dummy_attr_test_cases); subsuite[1] = alloc_fake_suite(test, "suite2", dummy_attr_test_cases); - got = kunit_filter_suites(&suite_set, NULL, "module!=dummy", NULL, &err); + got = kunit_filter_suites(&suite_set, NULL, filter, NULL, &err); KUNIT_ASSERT_EQ(test, err, 0); kfree_at_end(test, got.start); /* just in case */ @@ -211,12 +213,13 @@ static void filter_attr_skip_test(struct kunit *test) .start = subsuite, .end = &subsuite[1], }; struct kunit_suite_set got; + char filter[] = "speed>slow"; int err = 0; subsuite[0] = alloc_fake_suite(test, "suite", dummy_attr_test_cases); /* Want: suite(slow, normal), NULL -> suite(slow with SKIP, normal), NULL */ - got = kunit_filter_suites(&suite_set, NULL, "speed>slow", "skip", &err); + got = kunit_filter_suites(&suite_set, NULL, filter, "skip", &err); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start); KUNIT_ASSERT_EQ(test, err, 0); kfree_at_end(test, got.start); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 49698a168437..421f13981412 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -784,12 +784,13 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val, switch (val) { case MODULE_STATE_LIVE: - kunit_module_init(mod); break; case MODULE_STATE_GOING: kunit_module_exit(mod); break; case MODULE_STATE_COMING: + kunit_module_init(mod); + break; case MODULE_STATE_UNFORMED: break; } diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c new file mode 100644 index 000000000000..859b67c4d697 --- /dev/null +++ b/lib/kunit_iov_iter.c @@ -0,0 +1,777 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* I/O iterator tests. This can only test kernel-backed iterator types. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/uio.h> +#include <linux/bvec.h> +#include <kunit/test.h> + +MODULE_DESCRIPTION("iov_iter testing"); +MODULE_AUTHOR("David Howells <dhowells@redhat.com>"); +MODULE_LICENSE("GPL"); + +struct kvec_test_range { + int from, to; +}; + +static const struct kvec_test_range kvec_test_ranges[] = { + { 0x00002, 0x00002 }, + { 0x00027, 0x03000 }, + { 0x05193, 0x18794 }, + { 0x20000, 0x20000 }, + { 0x20000, 0x24000 }, + { 0x24000, 0x27001 }, + { 0x29000, 0xffffb }, + { 0xffffd, 0xffffe }, + { -1 } +}; + +static inline u8 pattern(unsigned long x) +{ + return x & 0xff; +} + +static void iov_kunit_unmap(void *data) +{ + vunmap(data); +} + +static void *__init iov_kunit_create_buffer(struct kunit *test, + struct page ***ppages, + size_t npages) +{ + struct page **pages; + unsigned long got; + void *buffer; + + pages = kunit_kcalloc(test, npages, sizeof(struct page *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages); + *ppages = pages; + + got = alloc_pages_bulk_array(GFP_KERNEL, npages, pages); + if (got != npages) { + release_pages(pages, got); + KUNIT_ASSERT_EQ(test, got, npages); + } + + buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer); + + kunit_add_action_or_reset(test, iov_kunit_unmap, buffer); + return buffer; +} + +static void __init iov_kunit_load_kvec(struct kunit *test, + struct iov_iter *iter, int dir, + struct kvec *kvec, unsigned int kvmax, + void *buffer, size_t bufsize, + const struct kvec_test_range *pr) +{ + size_t size = 0; + int i; + + for (i = 0; i < kvmax; i++, pr++) { + if (pr->from < 0) + break; + KUNIT_ASSERT_GE(test, pr->to, pr->from); + KUNIT_ASSERT_LE(test, pr->to, bufsize); + kvec[i].iov_base = buffer + pr->from; + kvec[i].iov_len = pr->to - pr->from; + size += pr->to - pr->from; + } + KUNIT_ASSERT_LE(test, size, bufsize); + + iov_iter_kvec(iter, dir, kvec, i, size); +} + +/* + * Test copying to a ITER_KVEC-type iterator. + */ +static void __init iov_kunit_copy_to_kvec(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct page **spages, **bpages; + struct kvec kvec[8]; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, patt; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + scratch = iov_kunit_create_buffer(test, &spages, npages); + for (i = 0; i < bufsize; i++) + scratch[i] = pattern(i); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + memset(buffer, 0, bufsize); + + iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec), + buffer, bufsize, kvec_test_ranges); + size = iter.count; + + copied = copy_to_iter(scratch, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.nr_segs, 0); + + /* Build the expected image in the scratch buffer. */ + patt = 0; + memset(scratch, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) + for (i = pr->from; i < pr->to; i++) + scratch[i] = pattern(patt++); + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i); + if (buffer[i] != scratch[i]) + return; + } + + KUNIT_SUCCEED(); +} + +/* + * Test copying from a ITER_KVEC-type iterator. + */ +static void __init iov_kunit_copy_from_kvec(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct page **spages, **bpages; + struct kvec kvec[8]; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, j; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + for (i = 0; i < bufsize; i++) + buffer[i] = pattern(i); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + memset(scratch, 0, bufsize); + + iov_kunit_load_kvec(test, &iter, WRITE, kvec, ARRAY_SIZE(kvec), + buffer, bufsize, kvec_test_ranges); + size = min(iter.count, bufsize); + + copied = copy_from_iter(scratch, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.nr_segs, 0); + + /* Build the expected image in the main buffer. */ + i = 0; + memset(buffer, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + for (j = pr->from; j < pr->to; j++) { + buffer[i++] = pattern(j); + if (i >= bufsize) + goto stop; + } + } +stop: + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i); + if (scratch[i] != buffer[i]) + return; + } + + KUNIT_SUCCEED(); +} + +struct bvec_test_range { + int page, from, to; +}; + +static const struct bvec_test_range bvec_test_ranges[] = { + { 0, 0x0002, 0x0002 }, + { 1, 0x0027, 0x0893 }, + { 2, 0x0193, 0x0794 }, + { 3, 0x0000, 0x1000 }, + { 4, 0x0000, 0x1000 }, + { 5, 0x0000, 0x1000 }, + { 6, 0x0000, 0x0ffb }, + { 6, 0x0ffd, 0x0ffe }, + { -1, -1, -1 } +}; + +static void __init iov_kunit_load_bvec(struct kunit *test, + struct iov_iter *iter, int dir, + struct bio_vec *bvec, unsigned int bvmax, + struct page **pages, size_t npages, + size_t bufsize, + const struct bvec_test_range *pr) +{ + struct page *can_merge = NULL, *page; + size_t size = 0; + int i; + + for (i = 0; i < bvmax; i++, pr++) { + if (pr->from < 0) + break; + KUNIT_ASSERT_LT(test, pr->page, npages); + KUNIT_ASSERT_LT(test, pr->page * PAGE_SIZE, bufsize); + KUNIT_ASSERT_GE(test, pr->from, 0); + KUNIT_ASSERT_GE(test, pr->to, pr->from); + KUNIT_ASSERT_LE(test, pr->to, PAGE_SIZE); + + page = pages[pr->page]; + if (pr->from == 0 && pr->from != pr->to && page == can_merge) { + i--; + bvec[i].bv_len += pr->to; + } else { + bvec_set_page(&bvec[i], page, pr->to - pr->from, pr->from); + } + + size += pr->to - pr->from; + if ((pr->to & ~PAGE_MASK) == 0) + can_merge = page + pr->to / PAGE_SIZE; + else + can_merge = NULL; + } + + iov_iter_bvec(iter, dir, bvec, i, size); +} + +/* + * Test copying to a ITER_BVEC-type iterator. + */ +static void __init iov_kunit_copy_to_bvec(struct kunit *test) +{ + const struct bvec_test_range *pr; + struct iov_iter iter; + struct bio_vec bvec[8]; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, b, patt; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + scratch = iov_kunit_create_buffer(test, &spages, npages); + for (i = 0; i < bufsize; i++) + scratch[i] = pattern(i); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + memset(buffer, 0, bufsize); + + iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec), + bpages, npages, bufsize, bvec_test_ranges); + size = iter.count; + + copied = copy_to_iter(scratch, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.nr_segs, 0); + + /* Build the expected image in the scratch buffer. */ + b = 0; + patt = 0; + memset(scratch, 0, bufsize); + for (pr = bvec_test_ranges; pr->from >= 0; pr++, b++) { + u8 *p = scratch + pr->page * PAGE_SIZE; + + for (i = pr->from; i < pr->to; i++) + p[i] = pattern(patt++); + } + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i); + if (buffer[i] != scratch[i]) + return; + } + + KUNIT_SUCCEED(); +} + +/* + * Test copying from a ITER_BVEC-type iterator. + */ +static void __init iov_kunit_copy_from_bvec(struct kunit *test) +{ + const struct bvec_test_range *pr; + struct iov_iter iter; + struct bio_vec bvec[8]; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, j; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + for (i = 0; i < bufsize; i++) + buffer[i] = pattern(i); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + memset(scratch, 0, bufsize); + + iov_kunit_load_bvec(test, &iter, WRITE, bvec, ARRAY_SIZE(bvec), + bpages, npages, bufsize, bvec_test_ranges); + size = iter.count; + + copied = copy_from_iter(scratch, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.nr_segs, 0); + + /* Build the expected image in the main buffer. */ + i = 0; + memset(buffer, 0, bufsize); + for (pr = bvec_test_ranges; pr->from >= 0; pr++) { + size_t patt = pr->page * PAGE_SIZE; + + for (j = pr->from; j < pr->to; j++) { + buffer[i++] = pattern(patt + j); + if (i >= bufsize) + goto stop; + } + } +stop: + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i); + if (scratch[i] != buffer[i]) + return; + } + + KUNIT_SUCCEED(); +} + +static void iov_kunit_destroy_xarray(void *data) +{ + struct xarray *xarray = data; + + xa_destroy(xarray); + kfree(xarray); +} + +static void __init iov_kunit_load_xarray(struct kunit *test, + struct iov_iter *iter, int dir, + struct xarray *xarray, + struct page **pages, size_t npages) +{ + size_t size = 0; + int i; + + for (i = 0; i < npages; i++) { + void *x = xa_store(xarray, i, pages[i], GFP_KERNEL); + + KUNIT_ASSERT_FALSE(test, xa_is_err(x)); + size += PAGE_SIZE; + } + iov_iter_xarray(iter, dir, xarray, 0, size); +} + +static struct xarray *iov_kunit_create_xarray(struct kunit *test) +{ + struct xarray *xarray; + + xarray = kzalloc(sizeof(struct xarray), GFP_KERNEL); + xa_init(xarray); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xarray); + kunit_add_action_or_reset(test, iov_kunit_destroy_xarray, xarray); + return xarray; +} + +/* + * Test copying to a ITER_XARRAY-type iterator. + */ +static void __init iov_kunit_copy_to_xarray(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct xarray *xarray; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, patt; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + xarray = iov_kunit_create_xarray(test); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + for (i = 0; i < bufsize; i++) + scratch[i] = pattern(i); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + memset(buffer, 0, bufsize); + + iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages); + + i = 0; + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + size = pr->to - pr->from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_xarray(&iter, READ, xarray, pr->from, size); + copied = copy_to_iter(scratch + i, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, size); + i += size; + } + + /* Build the expected image in the scratch buffer. */ + patt = 0; + memset(scratch, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) + for (i = pr->from; i < pr->to; i++) + scratch[i] = pattern(patt++); + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i); + if (buffer[i] != scratch[i]) + return; + } + + KUNIT_SUCCEED(); +} + +/* + * Test copying from a ITER_XARRAY-type iterator. + */ +static void __init iov_kunit_copy_from_xarray(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct xarray *xarray; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, j; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + xarray = iov_kunit_create_xarray(test); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + for (i = 0; i < bufsize; i++) + buffer[i] = pattern(i); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + memset(scratch, 0, bufsize); + + iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages); + + i = 0; + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + size = pr->to - pr->from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_xarray(&iter, WRITE, xarray, pr->from, size); + copied = copy_from_iter(scratch + i, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, size); + i += size; + } + + /* Build the expected image in the main buffer. */ + i = 0; + memset(buffer, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + for (j = pr->from; j < pr->to; j++) { + buffer[i++] = pattern(j); + if (i >= bufsize) + goto stop; + } + } +stop: + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i); + if (scratch[i] != buffer[i]) + return; + } + + KUNIT_SUCCEED(); +} + +/* + * Test the extraction of ITER_KVEC-type iterators. + */ +static void __init iov_kunit_extract_pages_kvec(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct page **bpages, *pagelist[8], **pages = pagelist; + struct kvec kvec[8]; + u8 *buffer; + ssize_t len; + size_t bufsize, size = 0, npages; + int i, from; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + + iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec), + buffer, bufsize, kvec_test_ranges); + size = iter.count; + + pr = kvec_test_ranges; + from = pr->from; + do { + size_t offset0 = LONG_MAX; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) + pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL; + + len = iov_iter_extract_pages(&iter, &pages, 100 * 1024, + ARRAY_SIZE(pagelist), 0, &offset0); + KUNIT_EXPECT_GE(test, len, 0); + if (len < 0) + break; + KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0); + KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE); + KUNIT_EXPECT_LE(test, len, size); + KUNIT_EXPECT_EQ(test, iter.count, size - len); + size -= len; + + if (len == 0) + break; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) { + struct page *p; + ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0); + int ix; + + KUNIT_ASSERT_GE(test, part, 0); + while (from == pr->to) { + pr++; + from = pr->from; + if (from < 0) + goto stop; + } + ix = from / PAGE_SIZE; + KUNIT_ASSERT_LT(test, ix, npages); + p = bpages[ix]; + KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p); + KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE); + from += part; + len -= part; + KUNIT_ASSERT_GE(test, len, 0); + if (len == 0) + break; + offset0 = 0; + } + + if (test->status == KUNIT_FAILURE) + break; + } while (iov_iter_count(&iter) > 0); + +stop: + KUNIT_EXPECT_EQ(test, size, 0); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_SUCCEED(); +} + +/* + * Test the extraction of ITER_BVEC-type iterators. + */ +static void __init iov_kunit_extract_pages_bvec(struct kunit *test) +{ + const struct bvec_test_range *pr; + struct iov_iter iter; + struct page **bpages, *pagelist[8], **pages = pagelist; + struct bio_vec bvec[8]; + ssize_t len; + size_t bufsize, size = 0, npages; + int i, from; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + iov_kunit_create_buffer(test, &bpages, npages); + iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec), + bpages, npages, bufsize, bvec_test_ranges); + size = iter.count; + + pr = bvec_test_ranges; + from = pr->from; + do { + size_t offset0 = LONG_MAX; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) + pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL; + + len = iov_iter_extract_pages(&iter, &pages, 100 * 1024, + ARRAY_SIZE(pagelist), 0, &offset0); + KUNIT_EXPECT_GE(test, len, 0); + if (len < 0) + break; + KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0); + KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE); + KUNIT_EXPECT_LE(test, len, size); + KUNIT_EXPECT_EQ(test, iter.count, size - len); + size -= len; + + if (len == 0) + break; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) { + struct page *p; + ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0); + int ix; + + KUNIT_ASSERT_GE(test, part, 0); + while (from == pr->to) { + pr++; + from = pr->from; + if (from < 0) + goto stop; + } + ix = pr->page + from / PAGE_SIZE; + KUNIT_ASSERT_LT(test, ix, npages); + p = bpages[ix]; + KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p); + KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE); + from += part; + len -= part; + KUNIT_ASSERT_GE(test, len, 0); + if (len == 0) + break; + offset0 = 0; + } + + if (test->status == KUNIT_FAILURE) + break; + } while (iov_iter_count(&iter) > 0); + +stop: + KUNIT_EXPECT_EQ(test, size, 0); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_SUCCEED(); +} + +/* + * Test the extraction of ITER_XARRAY-type iterators. + */ +static void __init iov_kunit_extract_pages_xarray(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct xarray *xarray; + struct page **bpages, *pagelist[8], **pages = pagelist; + ssize_t len; + size_t bufsize, size = 0, npages; + int i, from; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + xarray = iov_kunit_create_xarray(test); + + iov_kunit_create_buffer(test, &bpages, npages); + iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages); + + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + from = pr->from; + size = pr->to - from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_xarray(&iter, WRITE, xarray, from, size); + + do { + size_t offset0 = LONG_MAX; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) + pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL; + + len = iov_iter_extract_pages(&iter, &pages, 100 * 1024, + ARRAY_SIZE(pagelist), 0, &offset0); + KUNIT_EXPECT_GE(test, len, 0); + if (len < 0) + break; + KUNIT_EXPECT_LE(test, len, size); + KUNIT_EXPECT_EQ(test, iter.count, size - len); + if (len == 0) + break; + size -= len; + KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0); + KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE); + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) { + struct page *p; + ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0); + int ix; + + KUNIT_ASSERT_GE(test, part, 0); + ix = from / PAGE_SIZE; + KUNIT_ASSERT_LT(test, ix, npages); + p = bpages[ix]; + KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p); + KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE); + from += part; + len -= part; + KUNIT_ASSERT_GE(test, len, 0); + if (len == 0) + break; + offset0 = 0; + } + + if (test->status == KUNIT_FAILURE) + goto stop; + } while (iov_iter_count(&iter) > 0); + + KUNIT_EXPECT_EQ(test, size, 0); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to - pr->from); + } + +stop: + KUNIT_SUCCEED(); +} + +static struct kunit_case __refdata iov_kunit_cases[] = { + KUNIT_CASE(iov_kunit_copy_to_kvec), + KUNIT_CASE(iov_kunit_copy_from_kvec), + KUNIT_CASE(iov_kunit_copy_to_bvec), + KUNIT_CASE(iov_kunit_copy_from_bvec), + KUNIT_CASE(iov_kunit_copy_to_xarray), + KUNIT_CASE(iov_kunit_copy_from_xarray), + KUNIT_CASE(iov_kunit_extract_pages_kvec), + KUNIT_CASE(iov_kunit_extract_pages_bvec), + KUNIT_CASE(iov_kunit_extract_pages_xarray), + {} +}; + +static struct kunit_suite iov_kunit_suite = { + .name = "iov_iter", + .test_cases = iov_kunit_cases, +}; + +kunit_test_suites(&iov_kunit_suite); diff --git a/lib/math/Makefile b/lib/math/Makefile index bfac26ddfc22..91fcdb0c9efe 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o +obj-y += div64.o gcd.o lcm.o int_log.o int_pow.o int_sqrt.o reciprocal_div.o obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o diff --git a/lib/math/int_log.c b/lib/math/int_log.c new file mode 100644 index 000000000000..8f9da3a2ad39 --- /dev/null +++ b/lib/math/int_log.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +/* + * Provides fixed-point logarithm operations. + * + * Copyright (C) 2006 Christoph Pfister (christophpfister@gmail.com) + */ + +#include <linux/bitops.h> +#include <linux/export.h> +#include <linux/int_log.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/bug.h> + +static const unsigned short logtable[256] = { + 0x0000, 0x0171, 0x02e0, 0x044e, 0x05ba, 0x0725, 0x088e, 0x09f7, + 0x0b5d, 0x0cc3, 0x0e27, 0x0f8a, 0x10eb, 0x124b, 0x13aa, 0x1508, + 0x1664, 0x17bf, 0x1919, 0x1a71, 0x1bc8, 0x1d1e, 0x1e73, 0x1fc6, + 0x2119, 0x226a, 0x23ba, 0x2508, 0x2656, 0x27a2, 0x28ed, 0x2a37, + 0x2b80, 0x2cc8, 0x2e0f, 0x2f54, 0x3098, 0x31dc, 0x331e, 0x345f, + 0x359f, 0x36de, 0x381b, 0x3958, 0x3a94, 0x3bce, 0x3d08, 0x3e41, + 0x3f78, 0x40af, 0x41e4, 0x4319, 0x444c, 0x457f, 0x46b0, 0x47e1, + 0x4910, 0x4a3f, 0x4b6c, 0x4c99, 0x4dc5, 0x4eef, 0x5019, 0x5142, + 0x526a, 0x5391, 0x54b7, 0x55dc, 0x5700, 0x5824, 0x5946, 0x5a68, + 0x5b89, 0x5ca8, 0x5dc7, 0x5ee5, 0x6003, 0x611f, 0x623a, 0x6355, + 0x646f, 0x6588, 0x66a0, 0x67b7, 0x68ce, 0x69e4, 0x6af8, 0x6c0c, + 0x6d20, 0x6e32, 0x6f44, 0x7055, 0x7165, 0x7274, 0x7383, 0x7490, + 0x759d, 0x76aa, 0x77b5, 0x78c0, 0x79ca, 0x7ad3, 0x7bdb, 0x7ce3, + 0x7dea, 0x7ef0, 0x7ff6, 0x80fb, 0x81ff, 0x8302, 0x8405, 0x8507, + 0x8608, 0x8709, 0x8809, 0x8908, 0x8a06, 0x8b04, 0x8c01, 0x8cfe, + 0x8dfa, 0x8ef5, 0x8fef, 0x90e9, 0x91e2, 0x92db, 0x93d2, 0x94ca, + 0x95c0, 0x96b6, 0x97ab, 0x98a0, 0x9994, 0x9a87, 0x9b7a, 0x9c6c, + 0x9d5e, 0x9e4f, 0x9f3f, 0xa02e, 0xa11e, 0xa20c, 0xa2fa, 0xa3e7, + 0xa4d4, 0xa5c0, 0xa6ab, 0xa796, 0xa881, 0xa96a, 0xaa53, 0xab3c, + 0xac24, 0xad0c, 0xadf2, 0xaed9, 0xafbe, 0xb0a4, 0xb188, 0xb26c, + 0xb350, 0xb433, 0xb515, 0xb5f7, 0xb6d9, 0xb7ba, 0xb89a, 0xb97a, + 0xba59, 0xbb38, 0xbc16, 0xbcf4, 0xbdd1, 0xbead, 0xbf8a, 0xc065, + 0xc140, 0xc21b, 0xc2f5, 0xc3cf, 0xc4a8, 0xc580, 0xc658, 0xc730, + 0xc807, 0xc8de, 0xc9b4, 0xca8a, 0xcb5f, 0xcc34, 0xcd08, 0xcddc, + 0xceaf, 0xcf82, 0xd054, 0xd126, 0xd1f7, 0xd2c8, 0xd399, 0xd469, + 0xd538, 0xd607, 0xd6d6, 0xd7a4, 0xd872, 0xd93f, 0xda0c, 0xdad9, + 0xdba5, 0xdc70, 0xdd3b, 0xde06, 0xded0, 0xdf9a, 0xe063, 0xe12c, + 0xe1f5, 0xe2bd, 0xe385, 0xe44c, 0xe513, 0xe5d9, 0xe69f, 0xe765, + 0xe82a, 0xe8ef, 0xe9b3, 0xea77, 0xeb3b, 0xebfe, 0xecc1, 0xed83, + 0xee45, 0xef06, 0xefc8, 0xf088, 0xf149, 0xf209, 0xf2c8, 0xf387, + 0xf446, 0xf505, 0xf5c3, 0xf680, 0xf73e, 0xf7fb, 0xf8b7, 0xf973, + 0xfa2f, 0xfaea, 0xfba5, 0xfc60, 0xfd1a, 0xfdd4, 0xfe8e, 0xff47, +}; + +unsigned int intlog2(u32 value) +{ + /** + * returns: log2(value) * 2^24 + * wrong result if value = 0 (log2(0) is undefined) + */ + unsigned int msb; + unsigned int logentry; + unsigned int significand; + unsigned int interpolation; + + if (unlikely(value == 0)) { + WARN_ON(1); + return 0; + } + + /* first detect the msb (count begins at 0) */ + msb = fls(value) - 1; + + /** + * now we use a logtable after the following method: + * + * log2(2^x * y) * 2^24 = x * 2^24 + log2(y) * 2^24 + * where x = msb and therefore 1 <= y < 2 + * first y is determined by shifting the value left + * so that msb is bit 31 + * 0x00231f56 -> 0x8C7D5800 + * the result is y * 2^31 -> "significand" + * then the highest 9 bits are used for a table lookup + * the highest bit is discarded because it's always set + * the highest nine bits in our example are 100011000 + * so we would use the entry 0x18 + */ + significand = value << (31 - msb); + logentry = (significand >> 23) % ARRAY_SIZE(logtable); + + /** + * last step we do is interpolation because of the + * limitations of the log table the error is that part of + * the significand which isn't used for lookup then we + * compute the ratio between the error and the next table entry + * and interpolate it between the log table entry used and the + * next one the biggest error possible is 0x7fffff + * (in our example it's 0x7D5800) + * needed value for next table entry is 0x800000 + * so the interpolation is + * (error / 0x800000) * (logtable_next - logtable_current) + * in the implementation the division is moved to the end for + * better accuracy there is also an overflow correction if + * logtable_next is 256 + */ + interpolation = ((significand & 0x7fffff) * + ((logtable[(logentry + 1) % ARRAY_SIZE(logtable)] - + logtable[logentry]) & 0xffff)) >> 15; + + /* now we return the result */ + return ((msb << 24) + (logtable[logentry] << 8) + interpolation); +} +EXPORT_SYMBOL(intlog2); + +unsigned int intlog10(u32 value) +{ + /** + * returns: log10(value) * 2^24 + * wrong result if value = 0 (log10(0) is undefined) + */ + u64 log; + + if (unlikely(value == 0)) { + WARN_ON(1); + return 0; + } + + log = intlog2(value); + + /** + * we use the following method: + * log10(x) = log2(x) * log10(2) + */ + + return (log * 646456993) >> 31; +} +EXPORT_SYMBOL(intlog10); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 5004463c4f9f..9073430dc865 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -151,48 +151,72 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) } EXPORT_SYMBOL(__percpu_counter_sum); -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, - struct lock_class_key *key) +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + gfp_t gfp, u32 nr_counters, + struct lock_class_key *key) { unsigned long flags __maybe_unused; - - raw_spin_lock_init(&fbc->lock); - lockdep_set_class(&fbc->lock, key); - fbc->count = amount; - fbc->counters = alloc_percpu_gfp(s32, gfp); - if (!fbc->counters) + size_t counter_size; + s32 __percpu *counters; + u32 i; + + counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); + counters = __alloc_percpu_gfp(nr_counters * counter_size, + __alignof__(*counters), gfp); + if (!counters) { + fbc[0].counters = NULL; return -ENOMEM; + } - debug_percpu_counter_activate(fbc); + for (i = 0; i < nr_counters; i++) { + raw_spin_lock_init(&fbc[i].lock); + lockdep_set_class(&fbc[i].lock, key); +#ifdef CONFIG_HOTPLUG_CPU + INIT_LIST_HEAD(&fbc[i].list); +#endif + fbc[i].count = amount; + fbc[i].counters = (void *)counters + (i * counter_size); + + debug_percpu_counter_activate(&fbc[i]); + } #ifdef CONFIG_HOTPLUG_CPU - INIT_LIST_HEAD(&fbc->list); spin_lock_irqsave(&percpu_counters_lock, flags); - list_add(&fbc->list, &percpu_counters); + for (i = 0; i < nr_counters; i++) + list_add(&fbc[i].list, &percpu_counters); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif return 0; } -EXPORT_SYMBOL(__percpu_counter_init); +EXPORT_SYMBOL(__percpu_counter_init_many); -void percpu_counter_destroy(struct percpu_counter *fbc) +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) { unsigned long flags __maybe_unused; + u32 i; + + if (WARN_ON_ONCE(!fbc)) + return; - if (!fbc->counters) + if (!fbc[0].counters) return; - debug_percpu_counter_deactivate(fbc); + for (i = 0; i < nr_counters; i++) + debug_percpu_counter_deactivate(&fbc[i]); #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); - list_del(&fbc->list); + for (i = 0; i < nr_counters; i++) + list_del(&fbc[i].list); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif - free_percpu(fbc->counters); - fbc->counters = NULL; + + free_percpu(fbc[0].counters); + + for (i = 0; i < nr_counters; i++) + fbc[i].counters = NULL; } -EXPORT_SYMBOL(percpu_counter_destroy); +EXPORT_SYMBOL(percpu_counter_destroy_many); int percpu_counter_batch __read_mostly = 32; EXPORT_SYMBOL(percpu_counter_batch); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 45e17619422b..035b0a4db476 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o +raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index a22a05c9af8a..0ec534faf019 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_neonx2, &raid6_neonx1, #endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_lsx, +#endif +#endif #if defined(__ia64__) &raid6_intx32, &raid6_intx16, @@ -104,6 +112,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #if defined(CONFIG_KERNEL_MODE_NEON) &raid6_recov_neon, #endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_recov_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_recov_lsx, +#endif +#endif &raid6_recov_intx1, NULL }; diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h new file mode 100644 index 000000000000..acfc33ce7056 --- /dev/null +++ b/lib/raid6/loongarch.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + * + * raid6/loongarch.h + * + * Definitions common to LoongArch RAID-6 code only + */ + +#ifndef _LIB_RAID6_LOONGARCH_H +#define _LIB_RAID6_LOONGARCH_H + +#ifdef __KERNEL__ + +#include <asm/cpu-features.h> +#include <asm/fpu.h> + +#else /* for user-space testing */ + +#include <sys/auxv.h> + +/* have to supply these defines for glibc 2.37- and musl */ +#ifndef HWCAP_LOONGARCH_LSX +#define HWCAP_LOONGARCH_LSX (1 << 4) +#endif +#ifndef HWCAP_LOONGARCH_LASX +#define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +#define kernel_fpu_begin() +#define kernel_fpu_end() + +#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX) +#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX) + +#endif /* __KERNEL__ */ + +#endif /* _LIB_RAID6_LOONGARCH_H */ diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c new file mode 100644 index 000000000000..aa5d9f924ca3 --- /dev/null +++ b/lib/raid6/loongarch_simd.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) + * + * Copyright 2023 WANG Xuerui <git@xen0n.name> + * + * Based on the generic RAID-6 code (int.uc): + * + * Copyright 2002-2004 H. Peter Anvin + */ + +#include <linux/raid/pq.h> +#include "loongarch.h" + +/* + * The vector algorithms are currently priority 0, which means the generic + * scalar algorithms are not being disabled if vector support is present. + * This is like the similar LoongArch RAID5 XOR code, with the main reason + * repeated here: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ + +#ifdef CONFIG_CPU_HAS_LSX +#define NSIZE 16 + +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1])); + asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2])); + asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1])); + asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2])); + asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3])); + } + + kernel_fpu_end(); +} + +static void raid6_lsx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr12"); + asm volatile("vxor.v $vr5, $vr17, $vr13"); + asm volatile("vxor.v $vr6, $vr18, $vr14"); + asm volatile("vxor.v $vr7, $vr19, $vr15"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "vld $vr20, %0\n\t" + "vld $vr21, %1\n\t" + "vld $vr22, %2\n\t" + "vld $vr23, %3\n\t" + "vld $vr24, %4\n\t" + "vld $vr25, %5\n\t" + "vld $vr26, %6\n\t" + "vld $vr27, %7\n\t" + "vxor.v $vr20, $vr20, $vr0\n\t" + "vxor.v $vr21, $vr21, $vr1\n\t" + "vxor.v $vr22, $vr22, $vr2\n\t" + "vxor.v $vr23, $vr23, $vr3\n\t" + "vxor.v $vr24, $vr24, $vr4\n\t" + "vxor.v $vr25, $vr25, $vr5\n\t" + "vxor.v $vr26, $vr26, $vr6\n\t" + "vxor.v $vr27, $vr27, $vr7\n\t" + "vst $vr20, %0\n\t" + "vst $vr21, %1\n\t" + "vst $vr22, %2\n\t" + "vst $vr23, %3\n\t" + "vst $vr24, %4\n\t" + "vst $vr25, %5\n\t" + "vst $vr26, %6\n\t" + "vst $vr27, %7\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]), + "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lsx = { + raid6_lsx_gen_syndrome, + raid6_lsx_xor_syndrome, + raid6_has_lsx, + "lsx", + .priority = 0 /* see the comment near the top of the file for reason */ +}; + +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +#define NSIZE 32 + +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1])); + } + + kernel_fpu_end(); +} + +static void raid6_lasx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr6"); + asm volatile("xvxor.v $xr3, $xr9, $xr7"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "xvld $xr10, %0\n\t" + "xvld $xr11, %1\n\t" + "xvld $xr12, %2\n\t" + "xvld $xr13, %3\n\t" + "xvxor.v $xr10, $xr10, $xr0\n\t" + "xvxor.v $xr11, $xr11, $xr1\n\t" + "xvxor.v $xr12, $xr12, $xr2\n\t" + "xvxor.v $xr13, $xr13, $xr3\n\t" + "xvst $xr10, %0\n\t" + "xvst $xr11, %1\n\t" + "xvst $xr12, %2\n\t" + "xvst $xr13, %3\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lasx = { + raid6_lasx_gen_syndrome, + raid6_lasx_xor_syndrome, + raid6_has_lasx, + "lasx", + .priority = 0 /* see the comment near the top of the file for reason */ +}; +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c new file mode 100644 index 000000000000..94aeac85e6f7 --- /dev/null +++ b/lib/raid6/recov_loongarch_simd.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) + * + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + * + * Originally based on recov_avx2.c and recov_ssse3.c: + * + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> + */ + +#include <linux/raid/pq.h> +#include "loongarch.h" + +/* + * Unlike with the syndrome calculation algorithms, there's no boot-time + * selection of recovery algorithms by benchmarking, so we have to specify + * the priorities and hope the future cores will all have decent vector + * support (i.e. no LASX slower than LSX, or even scalar code). + */ + +#ifdef CONFIG_CPU_HAS_LSX +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * vr20, vr21: qmul + * vr22, vr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + + while (bytes) { + /* vr4 - vr7: Q */ + asm volatile("vld $vr4, %0" : : "m" (q[0])); + asm volatile("vld $vr5, %0" : : "m" (q[16])); + asm volatile("vld $vr6, %0" : : "m" (q[32])); + asm volatile("vld $vr7, %0" : : "m" (q[48])); + /* vr4 - vr7: Q + Qxy */ + asm volatile("vld $vr8, %0" : : "m" (dq[0])); + asm volatile("vld $vr9, %0" : : "m" (dq[16])); + asm volatile("vld $vr10, %0" : : "m" (dq[32])); + asm volatile("vld $vr11, %0" : : "m" (dq[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + /* vr0 - vr3: P */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr0 - vr3: P + Pxy */ + asm volatile("vld $vr8, %0" : : "m" (dp[0])); + asm volatile("vld $vr9, %0" : : "m" (dp[16])); + asm volatile("vld $vr10, %0" : : "m" (dp[32])); + asm volatile("vld $vr11, %0" : : "m" (dp[48])); + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); + asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); + asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); + asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); + asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); + asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); + asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); + /* vr16 - vr19: B(Q + Qxy) */ + asm volatile("vxor.v $vr16, $vr8, $vr4"); + asm volatile("vxor.v $vr17, $vr9, $vr5"); + asm volatile("vxor.v $vr18, $vr10, $vr6"); + asm volatile("vxor.v $vr19, $vr11, $vr7"); + + /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("vsrli.b $vr4, $vr0, 4"); + asm volatile("vsrli.b $vr5, $vr1, 4"); + asm volatile("vsrli.b $vr6, $vr2, 4"); + asm volatile("vsrli.b $vr7, $vr3, 4"); + /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("vandi.b $vr12, $vr0, 0x0f"); + asm volatile("vandi.b $vr13, $vr1, 0x0f"); + asm volatile("vandi.b $vr14, $vr2, 0x0f"); + asm volatile("vandi.b $vr15, $vr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); + asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); + asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); + asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); + /* lookup from pbmul[16] */ + asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); + asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); + asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); + asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); + /* vr4 - vr7: A(P + Pxy) */ + asm volatile("vxor.v $vr4, $vr4, $vr12"); + asm volatile("vxor.v $vr5, $vr5, $vr13"); + asm volatile("vxor.v $vr6, $vr6, $vr14"); + asm volatile("vxor.v $vr7, $vr7, $vr15"); + + /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr16"); + asm volatile("vxor.v $vr5, $vr5, $vr17"); + asm volatile("vxor.v $vr6, $vr6, $vr18"); + asm volatile("vxor.v $vr7, $vr7, $vr19"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Pxy + Dx = Dy */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (dp[0])); + asm volatile("vst $vr1, %0" : "=m" (dp[16])); + asm volatile("vst $vr2, %0" : "=m" (dp[32])); + asm volatile("vst $vr3, %0" : "=m" (dp[48])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* vr22, vr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + + while (bytes) { + /* vr0 - vr3: P + Dx */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr4 - vr7: Qx */ + asm volatile("vld $vr4, %0" : : "m" (dq[0])); + asm volatile("vld $vr5, %0" : : "m" (dq[16])); + asm volatile("vld $vr6, %0" : : "m" (dq[32])); + asm volatile("vld $vr7, %0" : : "m" (dq[48])); + /* vr4 - vr7: Q + Qx */ + asm volatile("vld $vr8, %0" : : "m" (q[0])); + asm volatile("vld $vr9, %0" : : "m" (q[16])); + asm volatile("vld $vr10, %0" : : "m" (q[32])); + asm volatile("vld $vr11, %0" : : "m" (q[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); + asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); + asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); + asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); + asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); + asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); + asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); + /* vr4 - vr7: qmul(Q + Qx) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Dx + Dx = P */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (p[0])); + asm volatile("vst $vr1, %0" : "=m" (p[16])); + asm volatile("vst $vr2, %0" : "=m" (p[32])); + asm volatile("vst $vr3, %0" : "=m" (p[48])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lsx = { + .data2 = raid6_2data_recov_lsx, + .datap = raid6_datap_recov_lsx, + .valid = raid6_has_lsx, + .name = "lsx", + .priority = 1, +}; +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * xr20, xr21: qmul + * xr22, xr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + asm volatile("xvreplve0.q $xr20, $xr20"); + asm volatile("xvreplve0.q $xr21, $xr21"); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: Q */ + asm volatile("xvld $xr0, %0" : : "m" (q[0])); + asm volatile("xvld $xr1, %0" : : "m" (q[32])); + /* xr0, xr1: Q + Qxy */ + asm volatile("xvld $xr4, %0" : : "m" (dq[0])); + asm volatile("xvld $xr5, %0" : : "m" (dq[32])); + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* xr2, xr3: P */ + asm volatile("xvld $xr2, %0" : : "m" (p[0])); + asm volatile("xvld $xr3, %0" : : "m" (p[32])); + /* xr2, xr3: P + Pxy */ + asm volatile("xvld $xr4, %0" : : "m" (dp[0])); + asm volatile("xvld $xr5, %0" : : "m" (dp[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvsrli.b $xr4, $xr0, 4"); + asm volatile("xvsrli.b $xr5, $xr1, 4"); + /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvandi.b $xr0, $xr0, 0x0f"); + asm volatile("xvandi.b $xr1, $xr1, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); + asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); + asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); + /* xr6, xr7: B(Q + Qxy) */ + asm volatile("xvxor.v $xr6, $xr4, $xr0"); + asm volatile("xvxor.v $xr7, $xr5, $xr1"); + + /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("xvandi.b $xr0, $xr2, 0x0f"); + asm volatile("xvandi.b $xr1, $xr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); + asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); + /* lookup from pbmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr0, xr1: A(P + Pxy) */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + + /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("xvxor.v $xr0, $xr0, $xr6"); + asm volatile("xvxor.v $xr1, $xr1, $xr7"); + + /* xr2, xr3: P + Pxy + Dx = Dy */ + asm volatile("xvxor.v $xr2, $xr2, $xr0"); + asm volatile("xvxor.v $xr3, $xr3, $xr1"); + + asm volatile("xvst $xr0, %0" : "=m" (dq[0])); + asm volatile("xvst $xr1, %0" : "=m" (dq[32])); + asm volatile("xvst $xr2, %0" : "=m" (dp[0])); + asm volatile("xvst $xr3, %0" : "=m" (dp[32])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* xr22, xr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: P + Dx */ + asm volatile("xvld $xr0, %0" : : "m" (p[0])); + asm volatile("xvld $xr1, %0" : : "m" (p[32])); + /* xr2, xr3: Qx */ + asm volatile("xvld $xr2, %0" : : "m" (dq[0])); + asm volatile("xvld $xr3, %0" : : "m" (dq[32])); + /* xr2, xr3: Q + Qx */ + asm volatile("xvld $xr4, %0" : : "m" (q[0])); + asm volatile("xvld $xr5, %0" : : "m" (q[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("xvandi.b $xr2, $xr2, 0x0f"); + asm volatile("xvandi.b $xr3, $xr3, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); + asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr2, xr3: qmul(Q + Qx) = Dx */ + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr0, xr1: P + Dx + Dx = P */ + asm volatile("xvxor.v $xr0, $xr0, $xr2"); + asm volatile("xvxor.v $xr1, $xr1, $xr3"); + + asm volatile("xvst $xr2, %0" : "=m" (dq[0])); + asm volatile("xvst $xr3, %0" : "=m" (dq[32])); + asm volatile("xvst $xr0, %0" : "=m" (p[0])); + asm volatile("xvst $xr1, %0" : "=m" (p[32])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lasx = { + .data2 = raid6_2data_recov_lasx, + .datap = raid6_datap_recov_lasx, + .valid = raid6_has_lasx, + .name = "lasx", + .priority = 2, +}; +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 1f693ea3b980..2abe0076a636 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc) gcc -c -x c - >/dev/null && rm ./-.o && echo yes) endif +ifeq ($(ARCH),loongarch64) + CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1 + CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1) + CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1) +endif + ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o CFLAGS += -DCONFIG_X86 @@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -DCONFIG_ALTIVEC OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o +else ifeq ($(ARCH),loongarch64) + OBJS += loongarch_simd.o recov_loongarch_simd.o endif .c.o: diff --git a/lib/string_helpers.c b/lib/string_helpers.c index d3b1dd718daf..9982344cca34 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -719,6 +719,21 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp) } EXPORT_SYMBOL_GPL(kstrdup_quotable_file); +/* + * Returns duplicate string in which the @old characters are replaced by @new. + */ +char *kstrdup_and_replace(const char *src, char old, char new, gfp_t gfp) +{ + char *dst; + + dst = kstrdup(src, gfp); + if (!dst) + return NULL; + + return strreplace(dst, old, new); +} +EXPORT_SYMBOL_GPL(kstrdup_and_replace); + /** * kasprintf_strarray - allocate and fill array of sequential strings * @gfp: flags for the slab allocator diff --git a/lib/test_scanf.c b/lib/test_scanf.c index b620cf7de503..a2707af2951a 100644 --- a/lib/test_scanf.c +++ b/lib/test_scanf.c @@ -606,7 +606,7 @@ static void __init numbers_slice(void) #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn) \ do { \ const T expect[2] = { expect0, expect1 }; \ - T result[2] = {~expect[0], ~expect[1]}; \ + T result[2] = { (T)~expect[0], (T)~expect[1] }; \ \ _test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]); \ } while (0) diff --git a/lib/xarray.c b/lib/xarray.c index 2071a3718f4e..39f07bfc4dcc 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -206,7 +206,7 @@ static void *xas_descend(struct xa_state *xas, struct xa_node *node) void *entry = xa_entry(xas->xa, node, offset); xas->xa_node = node; - if (xa_is_sibling(entry)) { + while (xa_is_sibling(entry)) { offset = xa_to_sibling(entry); entry = xa_entry(xas->xa, node, offset); if (node->shift && xa_is_node(entry)) @@ -1802,6 +1802,9 @@ EXPORT_SYMBOL(xa_get_order); * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -1850,6 +1853,9 @@ EXPORT_SYMBOL(__xa_alloc); * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the |