From 04dad6c6d37d741bad9946a92171bfa637e989f0 Mon Sep 17 00:00:00 2001 From: André Almeida Date: Mon, 21 Oct 2024 13:37:19 -0300 Subject: unicode: Export latest available UTF-8 version number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export latest available UTF-8 version number so filesystems can easily load the newest one. Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-3-f443d5814194@igalia.com Acked-by: Gabriel Krisman Bertazi Signed-off-by: Christian Brauner --- include/linux/unicode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/unicode.h') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 4d39e6e11a95..0c0ab04e84ee 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -16,6 +16,8 @@ struct utf8data_table; ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ ((unsigned int)(REV))) +#define UTF8_LATEST UNICODE_AGE(12, 1, 0) + static inline u8 unicode_major(unsigned int age) { return (age >> UNICODE_MAJ_SHIFT) & 0xff; -- cgit v1.2.3 From 142fa60f61f93805471012f24e029af6d113c5cc Mon Sep 17 00:00:00 2001 From: André Almeida Date: Mon, 21 Oct 2024 13:37:20 -0300 Subject: unicode: Recreate utf8_parse_version() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All filesystems that currently support UTF-8 casefold can fetch the UTF-8 version from the filesystem metadata stored on disk. They can get the data stored and directly match it to a integer, so they can skip the string parsing step, which motivated the removal of this function in the first place. However, for tmpfs, the only way to tell the kernel which UTF-8 version we are about to use is via mount options, using a string. Re-introduce utf8_parse_version() to be used by tmpfs. This version differs from the original by skipping the intermediate step of copying the version string to an auxiliary string before calling match_token(). This versions calls match_token() in the argument string. The paramenters are simpler now as well. utf8_parse_version() was created by 9d53690f0d4 ("unicode: implement higher level API for string handling") and later removed by 49bd03cc7e9 ("unicode: pass a UNICODE_AGE() tripple to utf8_load"). Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20241021-tonyk-tmpfs-v8-4-f443d5814194@igalia.com Reviewed-by: Theodore Ts'o Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Christian Brauner --- fs/unicode/utf8-core.c | 26 ++++++++++++++++++++++++++ include/linux/unicode.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'include/linux/unicode.h') diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 8395066341a4..7f7cb14e01ce 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -214,3 +214,29 @@ void utf8_unload(struct unicode_map *um) } EXPORT_SYMBOL(utf8_unload); +/** + * utf8_parse_version - Parse a UTF-8 version number from a string + * + * @version: input string + * + * Returns the parsed version on success, negative code on error + */ +int utf8_parse_version(char *version) +{ + substring_t args[3]; + unsigned int maj, min, rev; + static const struct match_token token[] = { + {1, "%d.%d.%d"}, + {0, NULL} + }; + + if (match_token(version, token, args) != 1) + return -EINVAL; + + if (match_int(&args[0], &maj) || match_int(&args[1], &min) || + match_int(&args[2], &rev)) + return -EINVAL; + + return UNICODE_AGE(maj, min, rev); +} +EXPORT_SYMBOL(utf8_parse_version); diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 0c0ab04e84ee..5e6b212a2aed 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -78,4 +78,6 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct unicode_map *utf8_load(unsigned int version); void utf8_unload(struct unicode_map *um); +int utf8_parse_version(char *version); + #endif /* _LINUX_UNICODE_H */ -- cgit v1.2.3