mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-16 17:23:55 +08:00
unicode patches for 5.17
This includes patches from Christoph Hellwig to split the large data tables of the unicode subsystem into a loadable module, which allow users to not have them around if case-insensitive filesystems are not to be used. It also includes minor code fixes to unicode and its users, from the same author. There is a trivial conflict in the function encoding_show in fs/f2fs/sysfs.c reported by linux-next between commit84eab2a899
("f2fs: replace snprintf in show functions with sysfs_emit") and commita440943e68
("unicode: remove the charset field from struct unicode_map"). from my tree. All the patches here have been on linux-next releases for the past months. Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8jAUPq50yNjPBCi4QEuZqsMcppQFAmHeLp0ACgkQQEuZqsMc ppRWdhAAstuibIlhUj1Vae070P92oaxM/Azz3IgyVFWensJyQV1PvbtFQDhyKM4w M3tQ45eK49vVHn+JpLHbiAdZV66rD/sMSsruCVIf/8KNVDisOBQtFar5yxVr0Ion AOMoG6/Xrk8BZlZH62fhtJGtu/EFmeFoGVdC81NdTSroe9G+26we3IULwHSE1lNH XMJFCgU6otuLDOna16U7kL77Tu7GXRJcQe1+2nRJ+u6Agxy2xTo/s4FHuxzRK0/e GsgO1scY6unWM23O6z+qJYazng2Zt3EOZtSGqU4TsvZwjUi2UtAYW1/vAQGc/q3Y hGxPYGgKC1VrXLfIcuyng7j0vFPtADbdHMbsJPoyy+Nz4znDJ81IAKAHMO1in3C8 CHKjW+6InmXNye/uwdRt8Tx49jxUHmWUbQRT5FwMDpzC7MAL+DVdPpVVQgpLVM/H gW3YpBEk5qQvVdh8DWZVW3rT3SnMX/v0+u+76FsMHKYNJMNrCnP6vXpCPQl/Gyut ycgK7qVF3o/bgNBf072H3ZBZajTv7ePvacP4Wth7m9I2ykk+p4IjQLpTC5rJK0By VC1xS4im2VqiIWE9eE5y9cXU1oa/AfOcOF+7FZcxT13IL6hKTtd4+H4yKgdcNsyk 7RjpGgjp+SU51/EilhEqMFgEe07CURxwGwhApizBSiTIOgZS96U= =4q9x -----END PGP SIGNATURE----- Merge tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode Pull unicode updates from Gabriel Krisman Bertazi: "This includes patches from Christoph Hellwig to split the large data tables of the unicode subsystem into a loadable module, which allow users to not have them around if case-insensitive filesystems are not to be used. It also includes minor code fixes to unicode and its users, from the same author. All the patches here have been on linux-next releases for the past months" * tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode: unicode: only export internal symbols for the selftests unicode: Add utf8-data module unicode: cache the normalization tables in struct unicode_map unicode: move utf8cursor to utf8-selftest.c unicode: simplify utf8len unicode: remove the unused utf8{,n}age{min,max} functions unicode: pass a UNICODE_AGE() tripple to utf8_load unicode: mark the version field in struct unicode_map unsigned unicode: remove the charset field from struct unicode_map f2fs: simplify f2fs_sb_read_encoding ext4: simplify ext4_sb_read_encoding
This commit is contained in:
commit
6661224e66
@ -1966,29 +1966,22 @@ static const struct mount_opts {
|
|||||||
static const struct ext4_sb_encodings {
|
static const struct ext4_sb_encodings {
|
||||||
__u16 magic;
|
__u16 magic;
|
||||||
char *name;
|
char *name;
|
||||||
char *version;
|
unsigned int version;
|
||||||
} ext4_sb_encoding_map[] = {
|
} ext4_sb_encoding_map[] = {
|
||||||
{EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
|
{EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
|
||||||
};
|
};
|
||||||
|
|
||||||
static int ext4_sb_read_encoding(const struct ext4_super_block *es,
|
static const struct ext4_sb_encodings *
|
||||||
const struct ext4_sb_encodings **encoding,
|
ext4_sb_read_encoding(const struct ext4_super_block *es)
|
||||||
__u16 *flags)
|
|
||||||
{
|
{
|
||||||
__u16 magic = le16_to_cpu(es->s_encoding);
|
__u16 magic = le16_to_cpu(es->s_encoding);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
|
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
|
||||||
if (magic == ext4_sb_encoding_map[i].magic)
|
if (magic == ext4_sb_encoding_map[i].magic)
|
||||||
break;
|
return &ext4_sb_encoding_map[i];
|
||||||
|
|
||||||
if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
|
return NULL;
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
*encoding = &ext4_sb_encoding_map[i];
|
|
||||||
*flags = le16_to_cpu(es->s_encoding_flags);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -4624,10 +4617,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
|||||||
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
|
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
|
||||||
const struct ext4_sb_encodings *encoding_info;
|
const struct ext4_sb_encodings *encoding_info;
|
||||||
struct unicode_map *encoding;
|
struct unicode_map *encoding;
|
||||||
__u16 encoding_flags;
|
__u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
|
||||||
|
|
||||||
if (ext4_sb_read_encoding(es, &encoding_info,
|
encoding_info = ext4_sb_read_encoding(es);
|
||||||
&encoding_flags)) {
|
if (!encoding_info) {
|
||||||
ext4_msg(sb, KERN_ERR,
|
ext4_msg(sb, KERN_ERR,
|
||||||
"Encoding requested by superblock is unknown");
|
"Encoding requested by superblock is unknown");
|
||||||
goto failed_mount;
|
goto failed_mount;
|
||||||
@ -4636,15 +4629,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
|||||||
encoding = utf8_load(encoding_info->version);
|
encoding = utf8_load(encoding_info->version);
|
||||||
if (IS_ERR(encoding)) {
|
if (IS_ERR(encoding)) {
|
||||||
ext4_msg(sb, KERN_ERR,
|
ext4_msg(sb, KERN_ERR,
|
||||||
"can't mount with superblock charset: %s-%s "
|
"can't mount with superblock charset: %s-%u.%u.%u "
|
||||||
"not supported by the kernel. flags: 0x%x.",
|
"not supported by the kernel. flags: 0x%x.",
|
||||||
encoding_info->name, encoding_info->version,
|
encoding_info->name,
|
||||||
|
unicode_major(encoding_info->version),
|
||||||
|
unicode_minor(encoding_info->version),
|
||||||
|
unicode_rev(encoding_info->version),
|
||||||
encoding_flags);
|
encoding_flags);
|
||||||
goto failed_mount;
|
goto failed_mount;
|
||||||
}
|
}
|
||||||
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
|
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
|
||||||
"%s-%s with flags 0x%hx", encoding_info->name,
|
"%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
|
||||||
encoding_info->version?:"\b", encoding_flags);
|
unicode_major(encoding_info->version),
|
||||||
|
unicode_minor(encoding_info->version),
|
||||||
|
unicode_rev(encoding_info->version),
|
||||||
|
encoding_flags);
|
||||||
|
|
||||||
sb->s_encoding = encoding;
|
sb->s_encoding = encoding;
|
||||||
sb->s_encoding_flags = encoding_flags;
|
sb->s_encoding_flags = encoding_flags;
|
||||||
|
@ -260,29 +260,22 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
|
|||||||
static const struct f2fs_sb_encodings {
|
static const struct f2fs_sb_encodings {
|
||||||
__u16 magic;
|
__u16 magic;
|
||||||
char *name;
|
char *name;
|
||||||
char *version;
|
unsigned int version;
|
||||||
} f2fs_sb_encoding_map[] = {
|
} f2fs_sb_encoding_map[] = {
|
||||||
{F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
|
{F2FS_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
|
||||||
};
|
};
|
||||||
|
|
||||||
static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
|
static const struct f2fs_sb_encodings *
|
||||||
const struct f2fs_sb_encodings **encoding,
|
f2fs_sb_read_encoding(const struct f2fs_super_block *sb)
|
||||||
__u16 *flags)
|
|
||||||
{
|
{
|
||||||
__u16 magic = le16_to_cpu(sb->s_encoding);
|
__u16 magic = le16_to_cpu(sb->s_encoding);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
|
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
|
||||||
if (magic == f2fs_sb_encoding_map[i].magic)
|
if (magic == f2fs_sb_encoding_map[i].magic)
|
||||||
break;
|
return &f2fs_sb_encoding_map[i];
|
||||||
|
|
||||||
if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
|
return NULL;
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
*encoding = &f2fs_sb_encoding_map[i];
|
|
||||||
*flags = le16_to_cpu(sb->s_encoding_flags);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct kmem_cache *f2fs_cf_name_slab;
|
struct kmem_cache *f2fs_cf_name_slab;
|
||||||
@ -3874,25 +3867,32 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
|
|||||||
struct unicode_map *encoding;
|
struct unicode_map *encoding;
|
||||||
__u16 encoding_flags;
|
__u16 encoding_flags;
|
||||||
|
|
||||||
if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
|
encoding_info = f2fs_sb_read_encoding(sbi->raw_super);
|
||||||
&encoding_flags)) {
|
if (!encoding_info) {
|
||||||
f2fs_err(sbi,
|
f2fs_err(sbi,
|
||||||
"Encoding requested by superblock is unknown");
|
"Encoding requested by superblock is unknown");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
encoding_flags = le16_to_cpu(sbi->raw_super->s_encoding_flags);
|
||||||
encoding = utf8_load(encoding_info->version);
|
encoding = utf8_load(encoding_info->version);
|
||||||
if (IS_ERR(encoding)) {
|
if (IS_ERR(encoding)) {
|
||||||
f2fs_err(sbi,
|
f2fs_err(sbi,
|
||||||
"can't mount with superblock charset: %s-%s "
|
"can't mount with superblock charset: %s-%u.%u.%u "
|
||||||
"not supported by the kernel. flags: 0x%x.",
|
"not supported by the kernel. flags: 0x%x.",
|
||||||
encoding_info->name, encoding_info->version,
|
encoding_info->name,
|
||||||
|
unicode_major(encoding_info->version),
|
||||||
|
unicode_minor(encoding_info->version),
|
||||||
|
unicode_rev(encoding_info->version),
|
||||||
encoding_flags);
|
encoding_flags);
|
||||||
return PTR_ERR(encoding);
|
return PTR_ERR(encoding);
|
||||||
}
|
}
|
||||||
f2fs_info(sbi, "Using encoding defined by superblock: "
|
f2fs_info(sbi, "Using encoding defined by superblock: "
|
||||||
"%s-%s with flags 0x%hx", encoding_info->name,
|
"%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
|
||||||
encoding_info->version?:"\b", encoding_flags);
|
unicode_major(encoding_info->version),
|
||||||
|
unicode_minor(encoding_info->version),
|
||||||
|
unicode_rev(encoding_info->version),
|
||||||
|
encoding_flags);
|
||||||
|
|
||||||
sbi->sb->s_encoding = encoding;
|
sbi->sb->s_encoding = encoding;
|
||||||
sbi->sb->s_encoding_flags = encoding_flags;
|
sbi->sb->s_encoding_flags = encoding_flags;
|
||||||
|
@ -196,8 +196,7 @@ static ssize_t encoding_show(struct f2fs_attr *a,
|
|||||||
struct super_block *sb = sbi->sb;
|
struct super_block *sb = sbi->sb;
|
||||||
|
|
||||||
if (f2fs_sb_has_casefold(sbi))
|
if (f2fs_sb_has_casefold(sbi))
|
||||||
return sysfs_emit(buf, "%s (%d.%d.%d)\n",
|
return sysfs_emit(buf, "UTF-8 (%d.%d.%d)\n",
|
||||||
sb->s_encoding->charset,
|
|
||||||
(sb->s_encoding->version >> 16) & 0xff,
|
(sb->s_encoding->version >> 16) & 0xff,
|
||||||
(sb->s_encoding->version >> 8) & 0xff,
|
(sb->s_encoding->version >> 8) & 0xff,
|
||||||
sb->s_encoding->version & 0xff);
|
sb->s_encoding->version & 0xff);
|
||||||
|
@ -8,7 +8,16 @@ config UNICODE
|
|||||||
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
|
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
|
||||||
support.
|
support.
|
||||||
|
|
||||||
|
config UNICODE_UTF8_DATA
|
||||||
|
tristate "UTF-8 normalization and casefolding tables"
|
||||||
|
depends on UNICODE
|
||||||
|
default UNICODE
|
||||||
|
help
|
||||||
|
This contains a large table of case foldings, which can be loaded as
|
||||||
|
a separate module if you say M here. To be on the safe side stick
|
||||||
|
to the default of Y. Saying N here makes no sense, if you do not want
|
||||||
|
utf8 casefolding support, disable CONFIG_UNICODE instead.
|
||||||
|
|
||||||
config UNICODE_NORMALIZATION_SELFTEST
|
config UNICODE_NORMALIZATION_SELFTEST
|
||||||
tristate "Test UTF-8 normalization support"
|
tristate "Test UTF-8 normalization support"
|
||||||
depends on UNICODE
|
depends on UNICODE_UTF8_DATA
|
||||||
default n
|
|
||||||
|
@ -2,14 +2,15 @@
|
|||||||
|
|
||||||
obj-$(CONFIG_UNICODE) += unicode.o
|
obj-$(CONFIG_UNICODE) += unicode.o
|
||||||
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
|
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
|
||||||
|
obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o
|
||||||
|
|
||||||
unicode-y := utf8-norm.o utf8-core.o
|
unicode-y := utf8-norm.o utf8-core.o
|
||||||
|
|
||||||
$(obj)/utf8-norm.o: $(obj)/utf8data.h
|
$(obj)/utf8-data.o: $(obj)/utf8data.c
|
||||||
|
|
||||||
# In the normal build, the checked-in utf8data.h is just shipped.
|
# In the normal build, the checked-in utf8data.c is just shipped.
|
||||||
#
|
#
|
||||||
# To generate utf8data.h from UCD, put *.txt files in this directory
|
# To generate utf8data.c from UCD, put *.txt files in this directory
|
||||||
# and pass REGENERATE_UTF8DATA=1 from the command line.
|
# and pass REGENERATE_UTF8DATA=1 from the command line.
|
||||||
ifdef REGENERATE_UTF8DATA
|
ifdef REGENERATE_UTF8DATA
|
||||||
|
|
||||||
@ -24,15 +25,15 @@ quiet_cmd_utf8data = GEN $@
|
|||||||
-t $(srctree)/$(src)/NormalizationTest.txt \
|
-t $(srctree)/$(src)/NormalizationTest.txt \
|
||||||
-o $@
|
-o $@
|
||||||
|
|
||||||
$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
|
$(obj)/utf8data.c: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
|
||||||
$(call if_changed,utf8data)
|
$(call if_changed,utf8data)
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
|
$(obj)/utf8data.c: $(src)/utf8data.c_shipped FORCE
|
||||||
$(call if_changed,shipped)
|
$(call if_changed,shipped)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
targets += utf8data.h
|
targets += utf8data.c
|
||||||
hostprogs += mkutf8data
|
hostprogs += mkutf8data
|
||||||
|
@ -3287,12 +3287,10 @@ static void write_file(void)
|
|||||||
open_fail(utf8_name, errno);
|
open_fail(utf8_name, errno);
|
||||||
|
|
||||||
fprintf(file, "/* This file is generated code, do not edit. */\n");
|
fprintf(file, "/* This file is generated code, do not edit. */\n");
|
||||||
fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n");
|
|
||||||
fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n");
|
|
||||||
fprintf(file, "#endif\n");
|
|
||||||
fprintf(file, "\n");
|
fprintf(file, "\n");
|
||||||
fprintf(file, "static const unsigned int utf8vers = %#x;\n",
|
fprintf(file, "#include <linux/module.h>\n");
|
||||||
unicode_maxage);
|
fprintf(file, "#include <linux/kernel.h>\n");
|
||||||
|
fprintf(file, "#include \"utf8n.h\"\n");
|
||||||
fprintf(file, "\n");
|
fprintf(file, "\n");
|
||||||
fprintf(file, "static const unsigned int utf8agetab[] = {\n");
|
fprintf(file, "static const unsigned int utf8agetab[] = {\n");
|
||||||
for (i = 0; i != ages_count; i++)
|
for (i = 0; i != ages_count; i++)
|
||||||
@ -3339,6 +3337,22 @@ static void write_file(void)
|
|||||||
fprintf(file, "\n");
|
fprintf(file, "\n");
|
||||||
}
|
}
|
||||||
fprintf(file, "};\n");
|
fprintf(file, "};\n");
|
||||||
|
fprintf(file, "\n");
|
||||||
|
fprintf(file, "struct utf8data_table utf8_data_table = {\n");
|
||||||
|
fprintf(file, "\t.utf8agetab = utf8agetab,\n");
|
||||||
|
fprintf(file, "\t.utf8agetab_size = ARRAY_SIZE(utf8agetab),\n");
|
||||||
|
fprintf(file, "\n");
|
||||||
|
fprintf(file, "\t.utf8nfdicfdata = utf8nfdicfdata,\n");
|
||||||
|
fprintf(file, "\t.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),\n");
|
||||||
|
fprintf(file, "\n");
|
||||||
|
fprintf(file, "\t.utf8nfdidata = utf8nfdidata,\n");
|
||||||
|
fprintf(file, "\t.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),\n");
|
||||||
|
fprintf(file, "\n");
|
||||||
|
fprintf(file, "\t.utf8data = utf8data,\n");
|
||||||
|
fprintf(file, "};\n");
|
||||||
|
fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);");
|
||||||
|
fprintf(file, "\n");
|
||||||
|
fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n");
|
||||||
fclose(file);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,16 +5,13 @@
|
|||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/parser.h>
|
#include <linux/parser.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/unicode.h>
|
|
||||||
#include <linux/stringhash.h>
|
#include <linux/stringhash.h>
|
||||||
|
|
||||||
#include "utf8n.h"
|
#include "utf8n.h"
|
||||||
|
|
||||||
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
|
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdi(um->version);
|
if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
|
||||||
|
|
||||||
if (utf8nlen(data, str->name, str->len) < 0)
|
|
||||||
return -1;
|
return -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -23,14 +20,13 @@ EXPORT_SYMBOL(utf8_validate);
|
|||||||
int utf8_strncmp(const struct unicode_map *um,
|
int utf8_strncmp(const struct unicode_map *um,
|
||||||
const struct qstr *s1, const struct qstr *s2)
|
const struct qstr *s1, const struct qstr *s2)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdi(um->version);
|
|
||||||
struct utf8cursor cur1, cur2;
|
struct utf8cursor cur1, cur2;
|
||||||
int c1, c2;
|
int c1, c2;
|
||||||
|
|
||||||
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
|
if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
|
if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@ -50,14 +46,13 @@ EXPORT_SYMBOL(utf8_strncmp);
|
|||||||
int utf8_strncasecmp(const struct unicode_map *um,
|
int utf8_strncasecmp(const struct unicode_map *um,
|
||||||
const struct qstr *s1, const struct qstr *s2)
|
const struct qstr *s1, const struct qstr *s2)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdicf(um->version);
|
|
||||||
struct utf8cursor cur1, cur2;
|
struct utf8cursor cur1, cur2;
|
||||||
int c1, c2;
|
int c1, c2;
|
||||||
|
|
||||||
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
|
if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
|
if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@ -81,12 +76,11 @@ int utf8_strncasecmp_folded(const struct unicode_map *um,
|
|||||||
const struct qstr *cf,
|
const struct qstr *cf,
|
||||||
const struct qstr *s1)
|
const struct qstr *s1)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdicf(um->version);
|
|
||||||
struct utf8cursor cur1;
|
struct utf8cursor cur1;
|
||||||
int c1, c2;
|
int c1, c2;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
|
if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@ -105,11 +99,10 @@ EXPORT_SYMBOL(utf8_strncasecmp_folded);
|
|||||||
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
|
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
|
||||||
unsigned char *dest, size_t dlen)
|
unsigned char *dest, size_t dlen)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdicf(um->version);
|
|
||||||
struct utf8cursor cur;
|
struct utf8cursor cur;
|
||||||
size_t nlen = 0;
|
size_t nlen = 0;
|
||||||
|
|
||||||
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
|
if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for (nlen = 0; nlen < dlen; nlen++) {
|
for (nlen = 0; nlen < dlen; nlen++) {
|
||||||
@ -128,12 +121,11 @@ EXPORT_SYMBOL(utf8_casefold);
|
|||||||
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
|
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
|
||||||
struct qstr *str)
|
struct qstr *str)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdicf(um->version);
|
|
||||||
struct utf8cursor cur;
|
struct utf8cursor cur;
|
||||||
int c;
|
int c;
|
||||||
unsigned long hash = init_name_hash(salt);
|
unsigned long hash = init_name_hash(salt);
|
||||||
|
|
||||||
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
|
if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
while ((c = utf8byte(&cur))) {
|
while ((c = utf8byte(&cur))) {
|
||||||
@ -149,11 +141,10 @@ EXPORT_SYMBOL(utf8_casefold_hash);
|
|||||||
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
|
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
|
||||||
unsigned char *dest, size_t dlen)
|
unsigned char *dest, size_t dlen)
|
||||||
{
|
{
|
||||||
const struct utf8data *data = utf8nfdi(um->version);
|
|
||||||
struct utf8cursor cur;
|
struct utf8cursor cur;
|
||||||
ssize_t nlen = 0;
|
ssize_t nlen = 0;
|
||||||
|
|
||||||
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
|
if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for (nlen = 0; nlen < dlen; nlen++) {
|
for (nlen = 0; nlen < dlen; nlen++) {
|
||||||
@ -167,69 +158,59 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
|
|||||||
}
|
}
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(utf8_normalize);
|
EXPORT_SYMBOL(utf8_normalize);
|
||||||
|
|
||||||
static int utf8_parse_version(const char *version, unsigned int *maj,
|
static const struct utf8data *find_table_version(const struct utf8data *table,
|
||||||
unsigned int *min, unsigned int *rev)
|
size_t nr_entries, unsigned int version)
|
||||||
{
|
{
|
||||||
substring_t args[3];
|
size_t i = nr_entries - 1;
|
||||||
char version_string[12];
|
|
||||||
static const struct match_token token[] = {
|
|
||||||
{1, "%d.%d.%d"},
|
|
||||||
{0, NULL}
|
|
||||||
};
|
|
||||||
|
|
||||||
strncpy(version_string, version, sizeof(version_string));
|
while (version < table[i].maxage)
|
||||||
|
i--;
|
||||||
if (match_token(version_string, token, args) != 1)
|
if (version > table[i].maxage)
|
||||||
return -EINVAL;
|
return NULL;
|
||||||
|
return &table[i];
|
||||||
if (match_int(&args[0], maj) || match_int(&args[1], min) ||
|
|
||||||
match_int(&args[2], rev))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct unicode_map *utf8_load(const char *version)
|
struct unicode_map *utf8_load(unsigned int version)
|
||||||
{
|
{
|
||||||
struct unicode_map *um = NULL;
|
struct unicode_map *um;
|
||||||
int unicode_version;
|
|
||||||
|
|
||||||
if (version) {
|
|
||||||
unsigned int maj, min, rev;
|
|
||||||
|
|
||||||
if (utf8_parse_version(version, &maj, &min, &rev) < 0)
|
|
||||||
return ERR_PTR(-EINVAL);
|
|
||||||
|
|
||||||
if (!utf8version_is_supported(maj, min, rev))
|
|
||||||
return ERR_PTR(-EINVAL);
|
|
||||||
|
|
||||||
unicode_version = UNICODE_AGE(maj, min, rev);
|
|
||||||
} else {
|
|
||||||
unicode_version = utf8version_latest();
|
|
||||||
printk(KERN_WARNING"UTF-8 version not specified. "
|
|
||||||
"Assuming latest supported version (%d.%d.%d).",
|
|
||||||
(unicode_version >> 16) & 0xff,
|
|
||||||
(unicode_version >> 8) & 0xff,
|
|
||||||
(unicode_version & 0xff));
|
|
||||||
}
|
|
||||||
|
|
||||||
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
|
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
|
||||||
if (!um)
|
if (!um)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
um->version = version;
|
||||||
|
|
||||||
um->charset = "UTF-8";
|
um->tables = symbol_request(utf8_data_table);
|
||||||
um->version = unicode_version;
|
if (!um->tables)
|
||||||
|
goto out_free_um;
|
||||||
|
|
||||||
|
if (!utf8version_is_supported(um, version))
|
||||||
|
goto out_symbol_put;
|
||||||
|
um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata,
|
||||||
|
um->tables->utf8nfdidata_size, um->version);
|
||||||
|
if (!um->ntab[UTF8_NFDI])
|
||||||
|
goto out_symbol_put;
|
||||||
|
um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata,
|
||||||
|
um->tables->utf8nfdicfdata_size, um->version);
|
||||||
|
if (!um->ntab[UTF8_NFDICF])
|
||||||
|
goto out_symbol_put;
|
||||||
return um;
|
return um;
|
||||||
|
|
||||||
|
out_symbol_put:
|
||||||
|
symbol_put(um->tables);
|
||||||
|
out_free_um:
|
||||||
|
kfree(um);
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf8_load);
|
EXPORT_SYMBOL(utf8_load);
|
||||||
|
|
||||||
void utf8_unload(struct unicode_map *um)
|
void utf8_unload(struct unicode_map *um)
|
||||||
{
|
{
|
||||||
kfree(um);
|
if (um) {
|
||||||
|
symbol_put(utf8_data_table);
|
||||||
|
kfree(um);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf8_unload);
|
EXPORT_SYMBOL(utf8_unload);
|
||||||
|
|
||||||
|
@ -6,34 +6,17 @@
|
|||||||
|
|
||||||
#include "utf8n.h"
|
#include "utf8n.h"
|
||||||
|
|
||||||
struct utf8data {
|
int utf8version_is_supported(const struct unicode_map *um, unsigned int version)
|
||||||
unsigned int maxage;
|
|
||||||
unsigned int offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define __INCLUDED_FROM_UTF8NORM_C__
|
|
||||||
#include "utf8data.h"
|
|
||||||
#undef __INCLUDED_FROM_UTF8NORM_C__
|
|
||||||
|
|
||||||
int utf8version_is_supported(u8 maj, u8 min, u8 rev)
|
|
||||||
{
|
{
|
||||||
int i = ARRAY_SIZE(utf8agetab) - 1;
|
int i = um->tables->utf8agetab_size - 1;
|
||||||
unsigned int sb_utf8version = UNICODE_AGE(maj, min, rev);
|
|
||||||
|
|
||||||
while (i >= 0 && utf8agetab[i] != 0) {
|
while (i >= 0 && um->tables->utf8agetab[i] != 0) {
|
||||||
if (sb_utf8version == utf8agetab[i])
|
if (version == um->tables->utf8agetab[i])
|
||||||
return 1;
|
return 1;
|
||||||
i--;
|
i--;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf8version_is_supported);
|
|
||||||
|
|
||||||
int utf8version_latest(void)
|
|
||||||
{
|
|
||||||
return utf8vers;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8version_latest);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* UTF-8 valid ranges.
|
* UTF-8 valid ranges.
|
||||||
@ -168,7 +151,7 @@ typedef const unsigned char utf8trie_t;
|
|||||||
* underlying datatype: unsigned char.
|
* underlying datatype: unsigned char.
|
||||||
*
|
*
|
||||||
* leaf[0]: The unicode version, stored as a generation number that is
|
* leaf[0]: The unicode version, stored as a generation number that is
|
||||||
* an index into utf8agetab[]. With this we can filter code
|
* an index into ->utf8agetab[]. With this we can filter code
|
||||||
* points based on the unicode version in which they were
|
* points based on the unicode version in which they were
|
||||||
* defined. The CCC of a non-defined code point is 0.
|
* defined. The CCC of a non-defined code point is 0.
|
||||||
* leaf[1]: Canonical Combining Class. During normalization, we need
|
* leaf[1]: Canonical Combining Class. During normalization, we need
|
||||||
@ -316,21 +299,19 @@ utf8hangul(const char *str, unsigned char *hangul)
|
|||||||
* is well-formed and corresponds to a known unicode code point. The
|
* is well-formed and corresponds to a known unicode code point. The
|
||||||
* shorthand for this will be "is valid UTF-8 unicode".
|
* shorthand for this will be "is valid UTF-8 unicode".
|
||||||
*/
|
*/
|
||||||
static utf8leaf_t *utf8nlookup(const struct utf8data *data,
|
static utf8leaf_t *utf8nlookup(const struct unicode_map *um,
|
||||||
unsigned char *hangul, const char *s, size_t len)
|
enum utf8_normalization n, unsigned char *hangul, const char *s,
|
||||||
|
size_t len)
|
||||||
{
|
{
|
||||||
utf8trie_t *trie = NULL;
|
utf8trie_t *trie = um->tables->utf8data + um->ntab[n]->offset;
|
||||||
int offlen;
|
int offlen;
|
||||||
int offset;
|
int offset;
|
||||||
int mask;
|
int mask;
|
||||||
int node;
|
int node;
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return NULL;
|
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
trie = utf8data + data->offset;
|
|
||||||
node = 1;
|
node = 1;
|
||||||
while (node) {
|
while (node) {
|
||||||
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
|
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
|
||||||
@ -392,172 +373,29 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data,
|
|||||||
*
|
*
|
||||||
* Forwards to utf8nlookup().
|
* Forwards to utf8nlookup().
|
||||||
*/
|
*/
|
||||||
static utf8leaf_t *utf8lookup(const struct utf8data *data,
|
static utf8leaf_t *utf8lookup(const struct unicode_map *um,
|
||||||
unsigned char *hangul, const char *s)
|
enum utf8_normalization n, unsigned char *hangul, const char *s)
|
||||||
{
|
{
|
||||||
return utf8nlookup(data, hangul, s, (size_t)-1);
|
return utf8nlookup(um, n, hangul, s, (size_t)-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Maximum age of any character in s.
|
|
||||||
* Return -1 if s is not valid UTF-8 unicode.
|
|
||||||
* Return 0 if only non-assigned code points are used.
|
|
||||||
*/
|
|
||||||
int utf8agemax(const struct utf8data *data, const char *s)
|
|
||||||
{
|
|
||||||
utf8leaf_t *leaf;
|
|
||||||
int age = 0;
|
|
||||||
int leaf_age;
|
|
||||||
unsigned char hangul[UTF8HANGULLEAF];
|
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
while (*s) {
|
|
||||||
leaf = utf8lookup(data, hangul, s);
|
|
||||||
if (!leaf)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
|
||||||
if (leaf_age <= data->maxage && leaf_age > age)
|
|
||||||
age = leaf_age;
|
|
||||||
s += utf8clen(s);
|
|
||||||
}
|
|
||||||
return age;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8agemax);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Minimum age of any character in s.
|
|
||||||
* Return -1 if s is not valid UTF-8 unicode.
|
|
||||||
* Return 0 if non-assigned code points are used.
|
|
||||||
*/
|
|
||||||
int utf8agemin(const struct utf8data *data, const char *s)
|
|
||||||
{
|
|
||||||
utf8leaf_t *leaf;
|
|
||||||
int age;
|
|
||||||
int leaf_age;
|
|
||||||
unsigned char hangul[UTF8HANGULLEAF];
|
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
age = data->maxage;
|
|
||||||
while (*s) {
|
|
||||||
leaf = utf8lookup(data, hangul, s);
|
|
||||||
if (!leaf)
|
|
||||||
return -1;
|
|
||||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
|
||||||
if (leaf_age <= data->maxage && leaf_age < age)
|
|
||||||
age = leaf_age;
|
|
||||||
s += utf8clen(s);
|
|
||||||
}
|
|
||||||
return age;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8agemin);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Maximum age of any character in s, touch at most len bytes.
|
|
||||||
* Return -1 if s is not valid UTF-8 unicode.
|
|
||||||
*/
|
|
||||||
int utf8nagemax(const struct utf8data *data, const char *s, size_t len)
|
|
||||||
{
|
|
||||||
utf8leaf_t *leaf;
|
|
||||||
int age = 0;
|
|
||||||
int leaf_age;
|
|
||||||
unsigned char hangul[UTF8HANGULLEAF];
|
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
while (len && *s) {
|
|
||||||
leaf = utf8nlookup(data, hangul, s, len);
|
|
||||||
if (!leaf)
|
|
||||||
return -1;
|
|
||||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
|
||||||
if (leaf_age <= data->maxage && leaf_age > age)
|
|
||||||
age = leaf_age;
|
|
||||||
len -= utf8clen(s);
|
|
||||||
s += utf8clen(s);
|
|
||||||
}
|
|
||||||
return age;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8nagemax);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Maximum age of any character in s, touch at most len bytes.
|
|
||||||
* Return -1 if s is not valid UTF-8 unicode.
|
|
||||||
*/
|
|
||||||
int utf8nagemin(const struct utf8data *data, const char *s, size_t len)
|
|
||||||
{
|
|
||||||
utf8leaf_t *leaf;
|
|
||||||
int leaf_age;
|
|
||||||
int age;
|
|
||||||
unsigned char hangul[UTF8HANGULLEAF];
|
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
age = data->maxage;
|
|
||||||
while (len && *s) {
|
|
||||||
leaf = utf8nlookup(data, hangul, s, len);
|
|
||||||
if (!leaf)
|
|
||||||
return -1;
|
|
||||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
|
||||||
if (leaf_age <= data->maxage && leaf_age < age)
|
|
||||||
age = leaf_age;
|
|
||||||
len -= utf8clen(s);
|
|
||||||
s += utf8clen(s);
|
|
||||||
}
|
|
||||||
return age;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8nagemin);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Length of the normalization of s.
|
|
||||||
* Return -1 if s is not valid UTF-8 unicode.
|
|
||||||
*
|
|
||||||
* A string of Default_Ignorable_Code_Point has length 0.
|
|
||||||
*/
|
|
||||||
ssize_t utf8len(const struct utf8data *data, const char *s)
|
|
||||||
{
|
|
||||||
utf8leaf_t *leaf;
|
|
||||||
size_t ret = 0;
|
|
||||||
unsigned char hangul[UTF8HANGULLEAF];
|
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
while (*s) {
|
|
||||||
leaf = utf8lookup(data, hangul, s);
|
|
||||||
if (!leaf)
|
|
||||||
return -1;
|
|
||||||
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
|
|
||||||
ret += utf8clen(s);
|
|
||||||
else if (LEAF_CCC(leaf) == DECOMPOSE)
|
|
||||||
ret += strlen(LEAF_STR(leaf));
|
|
||||||
else
|
|
||||||
ret += utf8clen(s);
|
|
||||||
s += utf8clen(s);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8len);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Length of the normalization of s, touch at most len bytes.
|
* Length of the normalization of s, touch at most len bytes.
|
||||||
* Return -1 if s is not valid UTF-8 unicode.
|
* Return -1 if s is not valid UTF-8 unicode.
|
||||||
*/
|
*/
|
||||||
ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
|
ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
|
||||||
|
const char *s, size_t len)
|
||||||
{
|
{
|
||||||
utf8leaf_t *leaf;
|
utf8leaf_t *leaf;
|
||||||
size_t ret = 0;
|
size_t ret = 0;
|
||||||
unsigned char hangul[UTF8HANGULLEAF];
|
unsigned char hangul[UTF8HANGULLEAF];
|
||||||
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
while (len && *s) {
|
while (len && *s) {
|
||||||
leaf = utf8nlookup(data, hangul, s, len);
|
leaf = utf8nlookup(um, n, hangul, s, len);
|
||||||
if (!leaf)
|
if (!leaf)
|
||||||
return -1;
|
return -1;
|
||||||
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
|
if (um->tables->utf8agetab[LEAF_GEN(leaf)] >
|
||||||
|
um->ntab[n]->maxage)
|
||||||
ret += utf8clen(s);
|
ret += utf8clen(s);
|
||||||
else if (LEAF_CCC(leaf) == DECOMPOSE)
|
else if (LEAF_CCC(leaf) == DECOMPOSE)
|
||||||
ret += strlen(LEAF_STR(leaf));
|
ret += strlen(LEAF_STR(leaf));
|
||||||
@ -568,7 +406,6 @@ ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
|
|||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf8nlen);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set up an utf8cursor for use by utf8byte().
|
* Set up an utf8cursor for use by utf8byte().
|
||||||
@ -580,14 +417,13 @@ EXPORT_SYMBOL(utf8nlen);
|
|||||||
*
|
*
|
||||||
* Returns -1 on error, 0 on success.
|
* Returns -1 on error, 0 on success.
|
||||||
*/
|
*/
|
||||||
int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
|
int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
|
||||||
const char *s, size_t len)
|
enum utf8_normalization n, const char *s, size_t len)
|
||||||
{
|
{
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
if (!s)
|
if (!s)
|
||||||
return -1;
|
return -1;
|
||||||
u8c->data = data;
|
u8c->um = um;
|
||||||
|
u8c->n = n;
|
||||||
u8c->s = s;
|
u8c->s = s;
|
||||||
u8c->p = NULL;
|
u8c->p = NULL;
|
||||||
u8c->ss = NULL;
|
u8c->ss = NULL;
|
||||||
@ -604,23 +440,6 @@ int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
|
|||||||
return -1;
|
return -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf8ncursor);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set up an utf8cursor for use by utf8byte().
|
|
||||||
*
|
|
||||||
* u8c : pointer to cursor.
|
|
||||||
* data : const struct utf8data to use for normalization.
|
|
||||||
* s : NUL-terminated string.
|
|
||||||
*
|
|
||||||
* Returns -1 on error, 0 on success.
|
|
||||||
*/
|
|
||||||
int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
|
|
||||||
const char *s)
|
|
||||||
{
|
|
||||||
return utf8ncursor(u8c, data, s, (unsigned int)-1);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8cursor);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get one byte from the normalized form of the string described by u8c.
|
* Get one byte from the normalized form of the string described by u8c.
|
||||||
@ -678,9 +497,9 @@ int utf8byte(struct utf8cursor *u8c)
|
|||||||
|
|
||||||
/* Look up the data for the current character. */
|
/* Look up the data for the current character. */
|
||||||
if (u8c->p) {
|
if (u8c->p) {
|
||||||
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
|
leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
|
||||||
} else {
|
} else {
|
||||||
leaf = utf8nlookup(u8c->data, u8c->hangul,
|
leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul,
|
||||||
u8c->s, u8c->len);
|
u8c->s, u8c->len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -690,7 +509,8 @@ int utf8byte(struct utf8cursor *u8c)
|
|||||||
|
|
||||||
ccc = LEAF_CCC(leaf);
|
ccc = LEAF_CCC(leaf);
|
||||||
/* Characters that are too new have CCC 0. */
|
/* Characters that are too new have CCC 0. */
|
||||||
if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
|
if (u8c->um->tables->utf8agetab[LEAF_GEN(leaf)] >
|
||||||
|
u8c->um->ntab[u8c->n]->maxage) {
|
||||||
ccc = STOPPER;
|
ccc = STOPPER;
|
||||||
} else if (ccc == DECOMPOSE) {
|
} else if (ccc == DECOMPOSE) {
|
||||||
u8c->len -= utf8clen(u8c->s);
|
u8c->len -= utf8clen(u8c->s);
|
||||||
@ -704,7 +524,7 @@ int utf8byte(struct utf8cursor *u8c)
|
|||||||
goto ccc_mismatch;
|
goto ccc_mismatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
|
leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
|
||||||
if (!leaf)
|
if (!leaf)
|
||||||
return -1;
|
return -1;
|
||||||
ccc = LEAF_CCC(leaf);
|
ccc = LEAF_CCC(leaf);
|
||||||
@ -765,28 +585,10 @@ ccc_mismatch:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf8byte);
|
|
||||||
|
|
||||||
const struct utf8data *utf8nfdi(unsigned int maxage)
|
#ifdef CONFIG_UNICODE_NORMALIZATION_SELFTEST_MODULE
|
||||||
{
|
EXPORT_SYMBOL_GPL(utf8version_is_supported);
|
||||||
int i = ARRAY_SIZE(utf8nfdidata) - 1;
|
EXPORT_SYMBOL_GPL(utf8nlen);
|
||||||
|
EXPORT_SYMBOL_GPL(utf8ncursor);
|
||||||
while (maxage < utf8nfdidata[i].maxage)
|
EXPORT_SYMBOL_GPL(utf8byte);
|
||||||
i--;
|
#endif
|
||||||
if (maxage > utf8nfdidata[i].maxage)
|
|
||||||
return NULL;
|
|
||||||
return &utf8nfdidata[i];
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8nfdi);
|
|
||||||
|
|
||||||
const struct utf8data *utf8nfdicf(unsigned int maxage)
|
|
||||||
{
|
|
||||||
int i = ARRAY_SIZE(utf8nfdicfdata) - 1;
|
|
||||||
|
|
||||||
while (maxage < utf8nfdicfdata[i].maxage)
|
|
||||||
i--;
|
|
||||||
if (maxage > utf8nfdicfdata[i].maxage)
|
|
||||||
return NULL;
|
|
||||||
return &utf8nfdicfdata[i];
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(utf8nfdicf);
|
|
||||||
|
@ -18,9 +18,7 @@ unsigned int failed_tests;
|
|||||||
unsigned int total_tests;
|
unsigned int total_tests;
|
||||||
|
|
||||||
/* Tests will be based on this version. */
|
/* Tests will be based on this version. */
|
||||||
#define latest_maj 12
|
#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
|
||||||
#define latest_min 1
|
|
||||||
#define latest_rev 0
|
|
||||||
|
|
||||||
#define _test(cond, func, line, fmt, ...) do { \
|
#define _test(cond, func, line, fmt, ...) do { \
|
||||||
total_tests++; \
|
total_tests++; \
|
||||||
@ -160,18 +158,22 @@ static const struct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void check_utf8_nfdi(void)
|
static ssize_t utf8len(const struct unicode_map *um, enum utf8_normalization n,
|
||||||
|
const char *s)
|
||||||
|
{
|
||||||
|
return utf8nlen(um, n, s, (size_t)-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int utf8cursor(struct utf8cursor *u8c, const struct unicode_map *um,
|
||||||
|
enum utf8_normalization n, const char *s)
|
||||||
|
{
|
||||||
|
return utf8ncursor(u8c, um, n, s, (unsigned int)-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_utf8_nfdi(struct unicode_map *um)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct utf8cursor u8c;
|
struct utf8cursor u8c;
|
||||||
const struct utf8data *data;
|
|
||||||
|
|
||||||
data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
|
|
||||||
if (!data) {
|
|
||||||
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
|
|
||||||
__func__, latest_maj, latest_min, latest_rev);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
|
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
|
||||||
int len = strlen(nfdi_test_data[i].str);
|
int len = strlen(nfdi_test_data[i].str);
|
||||||
@ -179,10 +181,11 @@ static void check_utf8_nfdi(void)
|
|||||||
int j = 0;
|
int j = 0;
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
|
|
||||||
test((utf8len(data, nfdi_test_data[i].str) == nlen));
|
test((utf8len(um, UTF8_NFDI, nfdi_test_data[i].str) == nlen));
|
||||||
test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
|
test((utf8nlen(um, UTF8_NFDI, nfdi_test_data[i].str, len) ==
|
||||||
|
nlen));
|
||||||
|
|
||||||
if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
|
if (utf8cursor(&u8c, um, UTF8_NFDI, nfdi_test_data[i].str) < 0)
|
||||||
pr_err("can't create cursor\n");
|
pr_err("can't create cursor\n");
|
||||||
|
|
||||||
while ((c = utf8byte(&u8c)) > 0) {
|
while ((c = utf8byte(&u8c)) > 0) {
|
||||||
@ -196,18 +199,10 @@ static void check_utf8_nfdi(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_utf8_nfdicf(void)
|
static void check_utf8_nfdicf(struct unicode_map *um)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct utf8cursor u8c;
|
struct utf8cursor u8c;
|
||||||
const struct utf8data *data;
|
|
||||||
|
|
||||||
data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
|
|
||||||
if (!data) {
|
|
||||||
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
|
|
||||||
__func__, latest_maj, latest_min, latest_rev);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
|
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
|
||||||
int len = strlen(nfdicf_test_data[i].str);
|
int len = strlen(nfdicf_test_data[i].str);
|
||||||
@ -215,10 +210,13 @@ static void check_utf8_nfdicf(void)
|
|||||||
int j = 0;
|
int j = 0;
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
|
|
||||||
test((utf8len(data, nfdicf_test_data[i].str) == nlen));
|
test((utf8len(um, UTF8_NFDICF, nfdicf_test_data[i].str) ==
|
||||||
test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
|
nlen));
|
||||||
|
test((utf8nlen(um, UTF8_NFDICF, nfdicf_test_data[i].str, len) ==
|
||||||
|
nlen));
|
||||||
|
|
||||||
if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
|
if (utf8cursor(&u8c, um, UTF8_NFDICF,
|
||||||
|
nfdicf_test_data[i].str) < 0)
|
||||||
pr_err("can't create cursor\n");
|
pr_err("can't create cursor\n");
|
||||||
|
|
||||||
while ((c = utf8byte(&u8c)) > 0) {
|
while ((c = utf8byte(&u8c)) > 0) {
|
||||||
@ -232,16 +230,9 @@ static void check_utf8_nfdicf(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_utf8_comparisons(void)
|
static void check_utf8_comparisons(struct unicode_map *table)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct unicode_map *table = utf8_load("12.1.0");
|
|
||||||
|
|
||||||
if (IS_ERR(table)) {
|
|
||||||
pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
|
|
||||||
__func__, latest_maj, latest_min, latest_rev);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
|
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
|
||||||
const struct qstr s1 = {.name = nfdi_test_data[i].str,
|
const struct qstr s1 = {.name = nfdi_test_data[i].str,
|
||||||
@ -262,42 +253,49 @@ static void check_utf8_comparisons(void)
|
|||||||
test_f(!utf8_strncasecmp(table, &s1, &s2),
|
test_f(!utf8_strncasecmp(table, &s1, &s2),
|
||||||
"%s %s comparison mismatch\n", s1.name, s2.name);
|
"%s %s comparison mismatch\n", s1.name, s2.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
utf8_unload(table);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_supported_versions(void)
|
static void check_supported_versions(struct unicode_map *um)
|
||||||
{
|
{
|
||||||
/* Unicode 7.0.0 should be supported. */
|
/* Unicode 7.0.0 should be supported. */
|
||||||
test(utf8version_is_supported(7, 0, 0));
|
test(utf8version_is_supported(um, UNICODE_AGE(7, 0, 0)));
|
||||||
|
|
||||||
/* Unicode 9.0.0 should be supported. */
|
/* Unicode 9.0.0 should be supported. */
|
||||||
test(utf8version_is_supported(9, 0, 0));
|
test(utf8version_is_supported(um, UNICODE_AGE(9, 0, 0)));
|
||||||
|
|
||||||
/* Unicode 1x.0.0 (the latest version) should be supported. */
|
/* Unicode 1x.0.0 (the latest version) should be supported. */
|
||||||
test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
|
test(utf8version_is_supported(um, UTF8_LATEST));
|
||||||
|
|
||||||
/* Next versions don't exist. */
|
/* Next versions don't exist. */
|
||||||
test(!utf8version_is_supported(13, 0, 0));
|
test(!utf8version_is_supported(um, UNICODE_AGE(13, 0, 0)));
|
||||||
test(!utf8version_is_supported(0, 0, 0));
|
test(!utf8version_is_supported(um, UNICODE_AGE(0, 0, 0)));
|
||||||
test(!utf8version_is_supported(-1, -1, -1));
|
test(!utf8version_is_supported(um, UNICODE_AGE(-1, -1, -1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init init_test_ucd(void)
|
static int __init init_test_ucd(void)
|
||||||
{
|
{
|
||||||
|
struct unicode_map *um;
|
||||||
|
|
||||||
failed_tests = 0;
|
failed_tests = 0;
|
||||||
total_tests = 0;
|
total_tests = 0;
|
||||||
|
|
||||||
check_supported_versions();
|
um = utf8_load(UTF8_LATEST);
|
||||||
check_utf8_nfdi();
|
if (IS_ERR(um)) {
|
||||||
check_utf8_nfdicf();
|
pr_err("%s: Unable to load utf8 table.\n", __func__);
|
||||||
check_utf8_comparisons();
|
return PTR_ERR(um);
|
||||||
|
}
|
||||||
|
|
||||||
|
check_supported_versions(um);
|
||||||
|
check_utf8_nfdi(um);
|
||||||
|
check_utf8_nfdicf(um);
|
||||||
|
check_utf8_comparisons(um);
|
||||||
|
|
||||||
if (!failed_tests)
|
if (!failed_tests)
|
||||||
pr_info("All %u tests passed\n", total_tests);
|
pr_info("All %u tests passed\n", total_tests);
|
||||||
else
|
else
|
||||||
pr_err("%u out of %u tests failed\n", failed_tests,
|
pr_err("%u out of %u tests failed\n", failed_tests,
|
||||||
total_tests);
|
total_tests);
|
||||||
|
utf8_unload(um);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
/* This file is generated code, do not edit. */
|
/* This file is generated code, do not edit. */
|
||||||
#ifndef __INCLUDED_FROM_UTF8NORM_C__
|
|
||||||
#error Only nls_utf8-norm.c should include this file.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const unsigned int utf8vers = 0xc0100;
|
#include <linux/module.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include "utf8n.h"
|
||||||
|
|
||||||
static const unsigned int utf8agetab[] = {
|
static const unsigned int utf8agetab[] = {
|
||||||
0,
|
0,
|
||||||
@ -4107,3 +4106,18 @@ static const unsigned char utf8data[64256] = {
|
|||||||
0x52,0x04,0x00,0x00,0x11,0x04,0x00,0x00,0x02,0x00,0xcf,0x86,0xcf,0x06,0x02,0x00,
|
0x52,0x04,0x00,0x00,0x11,0x04,0x00,0x00,0x02,0x00,0xcf,0x86,0xcf,0x06,0x02,0x00,
|
||||||
0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00
|
0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct utf8data_table utf8_data_table = {
|
||||||
|
.utf8agetab = utf8agetab,
|
||||||
|
.utf8agetab_size = ARRAY_SIZE(utf8agetab),
|
||||||
|
|
||||||
|
.utf8nfdicfdata = utf8nfdicfdata,
|
||||||
|
.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),
|
||||||
|
|
||||||
|
.utf8nfdidata = utf8nfdidata,
|
||||||
|
.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),
|
||||||
|
|
||||||
|
.utf8data = utf8data,
|
||||||
|
};
|
||||||
|
EXPORT_SYMBOL_GPL(utf8_data_table);
|
||||||
|
MODULE_LICENSE("GPL v2");
|
@ -11,53 +11,9 @@
|
|||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
|
#include <linux/unicode.h>
|
||||||
|
|
||||||
/* Encoding a unicode version number as a single unsigned int. */
|
int utf8version_is_supported(const struct unicode_map *um, unsigned int version);
|
||||||
#define UNICODE_MAJ_SHIFT (16)
|
|
||||||
#define UNICODE_MIN_SHIFT (8)
|
|
||||||
|
|
||||||
#define UNICODE_AGE(MAJ, MIN, REV) \
|
|
||||||
(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
|
|
||||||
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
|
|
||||||
((unsigned int)(REV)))
|
|
||||||
|
|
||||||
/* Highest unicode version supported by the data tables. */
|
|
||||||
extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
|
|
||||||
extern int utf8version_latest(void);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Look for the correct const struct utf8data for a unicode version.
|
|
||||||
* Returns NULL if the version requested is too new.
|
|
||||||
*
|
|
||||||
* Two normalization forms are supported: nfdi and nfdicf.
|
|
||||||
*
|
|
||||||
* nfdi:
|
|
||||||
* - Apply unicode normalization form NFD.
|
|
||||||
* - Remove any Default_Ignorable_Code_Point.
|
|
||||||
*
|
|
||||||
* nfdicf:
|
|
||||||
* - Apply unicode normalization form NFD.
|
|
||||||
* - Remove any Default_Ignorable_Code_Point.
|
|
||||||
* - Apply a full casefold (C + F).
|
|
||||||
*/
|
|
||||||
extern const struct utf8data *utf8nfdi(unsigned int maxage);
|
|
||||||
extern const struct utf8data *utf8nfdicf(unsigned int maxage);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine the maximum age of any unicode character in the string.
|
|
||||||
* Returns 0 if only unassigned code points are present.
|
|
||||||
* Returns -1 if the input is not valid UTF-8.
|
|
||||||
*/
|
|
||||||
extern int utf8agemax(const struct utf8data *data, const char *s);
|
|
||||||
extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine the minimum age of any unicode character in the string.
|
|
||||||
* Returns 0 if any unassigned code points are present.
|
|
||||||
* Returns -1 if the input is not valid UTF-8.
|
|
||||||
*/
|
|
||||||
extern int utf8agemin(const struct utf8data *data, const char *s);
|
|
||||||
extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine the length of the normalized from of the string,
|
* Determine the length of the normalized from of the string,
|
||||||
@ -65,8 +21,8 @@ extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
|
|||||||
* Returns 0 if only ignorable code points are present.
|
* Returns 0 if only ignorable code points are present.
|
||||||
* Returns -1 if the input is not valid UTF-8.
|
* Returns -1 if the input is not valid UTF-8.
|
||||||
*/
|
*/
|
||||||
extern ssize_t utf8len(const struct utf8data *data, const char *s);
|
ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
|
||||||
extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
|
const char *s, size_t len);
|
||||||
|
|
||||||
/* Needed in struct utf8cursor below. */
|
/* Needed in struct utf8cursor below. */
|
||||||
#define UTF8HANGULLEAF (12)
|
#define UTF8HANGULLEAF (12)
|
||||||
@ -75,7 +31,8 @@ extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
|
|||||||
* Cursor structure used by the normalizer.
|
* Cursor structure used by the normalizer.
|
||||||
*/
|
*/
|
||||||
struct utf8cursor {
|
struct utf8cursor {
|
||||||
const struct utf8data *data;
|
const struct unicode_map *um;
|
||||||
|
enum utf8_normalization n;
|
||||||
const char *s;
|
const char *s;
|
||||||
const char *p;
|
const char *p;
|
||||||
const char *ss;
|
const char *ss;
|
||||||
@ -92,10 +49,8 @@ struct utf8cursor {
|
|||||||
* Returns 0 on success.
|
* Returns 0 on success.
|
||||||
* Returns -1 on failure.
|
* Returns -1 on failure.
|
||||||
*/
|
*/
|
||||||
extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
|
int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
|
||||||
const char *s);
|
enum utf8_normalization n, const char *s, size_t len);
|
||||||
extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
|
|
||||||
const char *s, size_t len);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get the next byte in the normalization.
|
* Get the next byte in the normalization.
|
||||||
@ -105,4 +60,24 @@ extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
|
|||||||
*/
|
*/
|
||||||
extern int utf8byte(struct utf8cursor *u8c);
|
extern int utf8byte(struct utf8cursor *u8c);
|
||||||
|
|
||||||
|
struct utf8data {
|
||||||
|
unsigned int maxage;
|
||||||
|
unsigned int offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct utf8data_table {
|
||||||
|
const unsigned int *utf8agetab;
|
||||||
|
int utf8agetab_size;
|
||||||
|
|
||||||
|
const struct utf8data *utf8nfdicfdata;
|
||||||
|
int utf8nfdicfdata_size;
|
||||||
|
|
||||||
|
const struct utf8data *utf8nfdidata;
|
||||||
|
int utf8nfdidata_size;
|
||||||
|
|
||||||
|
const unsigned char *utf8data;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern struct utf8data_table utf8_data_table;
|
||||||
|
|
||||||
#endif /* UTF8NORM_H */
|
#endif /* UTF8NORM_H */
|
||||||
|
@ -5,9 +5,52 @@
|
|||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/dcache.h>
|
#include <linux/dcache.h>
|
||||||
|
|
||||||
|
struct utf8data;
|
||||||
|
struct utf8data_table;
|
||||||
|
|
||||||
|
#define UNICODE_MAJ_SHIFT 16
|
||||||
|
#define UNICODE_MIN_SHIFT 8
|
||||||
|
|
||||||
|
#define UNICODE_AGE(MAJ, MIN, REV) \
|
||||||
|
(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
|
||||||
|
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
|
||||||
|
((unsigned int)(REV)))
|
||||||
|
|
||||||
|
static inline u8 unicode_major(unsigned int age)
|
||||||
|
{
|
||||||
|
return (age >> UNICODE_MAJ_SHIFT) & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u8 unicode_minor(unsigned int age)
|
||||||
|
{
|
||||||
|
return (age >> UNICODE_MIN_SHIFT) & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u8 unicode_rev(unsigned int age)
|
||||||
|
{
|
||||||
|
return age & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Two normalization forms are supported:
|
||||||
|
* 1) NFDI
|
||||||
|
* - Apply unicode normalization form NFD.
|
||||||
|
* - Remove any Default_Ignorable_Code_Point.
|
||||||
|
* 2) NFDICF
|
||||||
|
* - Apply unicode normalization form NFD.
|
||||||
|
* - Remove any Default_Ignorable_Code_Point.
|
||||||
|
* - Apply a full casefold (C + F).
|
||||||
|
*/
|
||||||
|
enum utf8_normalization {
|
||||||
|
UTF8_NFDI = 0,
|
||||||
|
UTF8_NFDICF,
|
||||||
|
UTF8_NMAX,
|
||||||
|
};
|
||||||
|
|
||||||
struct unicode_map {
|
struct unicode_map {
|
||||||
const char *charset;
|
unsigned int version;
|
||||||
int version;
|
const struct utf8data *ntab[UTF8_NMAX];
|
||||||
|
const struct utf8data_table *tables;
|
||||||
};
|
};
|
||||||
|
|
||||||
int utf8_validate(const struct unicode_map *um, const struct qstr *str);
|
int utf8_validate(const struct unicode_map *um, const struct qstr *str);
|
||||||
@ -30,7 +73,7 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
|
|||||||
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
|
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
|
||||||
struct qstr *str);
|
struct qstr *str);
|
||||||
|
|
||||||
struct unicode_map *utf8_load(const char *version);
|
struct unicode_map *utf8_load(unsigned int version);
|
||||||
void utf8_unload(struct unicode_map *um);
|
void utf8_unload(struct unicode_map *um);
|
||||||
|
|
||||||
#endif /* _LINUX_UNICODE_H */
|
#endif /* _LINUX_UNICODE_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user