Remove mbstring identify filters

mbstring had an 'identify filter' for almost every supported text encoding
which was used when auto-detecting the most likely encoding for a string.
It would run over the string and set a 'flag' if it saw anything which
did not appear likely to be the encoding in question.

One problem with this scheme was that encodings which merely appeared
less likely to be the correct one were completely rejected, even if there
was no better candidate. Another problem was that the 'identify filters'
had a huge amount of code duplication with the 'conversion filters'.

Eliminate the identify filters. Instead, when auto-detecting text
encoding, use conversion filters to see whether the input string is valid
in candidate encodings or not. At the same type, watch the type of
codepoints which the string decodes to and mark it as less likely if
non-printable characters (ESC, form feed, bell, etc.) or 'private use
area' codepoints are seen.

Interestingly, one old test case in which JIS text was misidentified
as UTF-8 (and this wrong behavior was enshrined in the test) was 'fixed'
and the JIS string is now auto-detected as JIS.
This commit is contained in:
Alex Dowad 2020-11-04 20:10:14 +02:00
parent a416f938f3
commit 3e7acf901d
121 changed files with 118 additions and 2817 deletions

View File

@ -161,7 +161,6 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/mbfl/mbfl_convert.c
libmbfl/mbfl/mbfl_encoding.c
libmbfl/mbfl/mbfl_filter_output.c
libmbfl/mbfl/mbfl_ident.c
libmbfl/mbfl/mbfl_language.c
libmbfl/mbfl/mbfl_memory_device.c
libmbfl/mbfl/mbfl_string.c
@ -177,7 +176,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/nls/nls_tr.c
libmbfl/nls/nls_ua.c
])
PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h])
PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h])
])
dnl

View File

@ -40,14 +40,14 @@ if (PHP_MBSTRING != "no") {
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
mbfl_filter_output.c mbfl_ident.c mbfl_language.c mbfl_memory_device.c \
mbfl_filter_output.c mbfl_language.c mbfl_memory_device.c \
mbfl_string.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \
nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \
nls_ua.c nls_tr.c", "mbstring");
PHP_INSTALL_HEADERS("ext/mbstring", "mbstring.h libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h");
PHP_INSTALL_HEADERS("ext/mbstring", "mbstring.h libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h");
AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support');

View File

@ -31,8 +31,6 @@
#include "mbfilter.h"
#include "mbfilter_7bit.h"
static int mbfl_filt_ident_7bit(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_7bit = {
mbfl_no_encoding_7bit,
"7bit",
@ -44,12 +42,6 @@ const mbfl_encoding mbfl_encoding_7bit = {
NULL
};
const struct mbfl_identify_vtbl vtbl_identify_7bit = {
mbfl_no_encoding_7bit,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_7bit
};
const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
mbfl_no_encoding_8bit,
mbfl_no_encoding_7bit,
@ -88,11 +80,3 @@ int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter)
}
return c;
}
static int mbfl_filt_ident_7bit(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80) {
filter->flag = 1;
}
return c;
}

View File

@ -34,7 +34,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_7bit;
extern const struct mbfl_identify_vtbl vtbl_identify_7bit;
extern const struct mbfl_convert_vtbl vtbl_8bit_7bit;
extern const struct mbfl_convert_vtbl vtbl_7bit_8bit;

View File

@ -30,8 +30,6 @@
#include "mbfilter_armscii8.h"
#include "unicode_table_armscii8.h"
static int mbfl_filt_ident_armscii8(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_armscii8_aliases[] = {"ArmSCII-8", "ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL};
const mbfl_encoding mbfl_encoding_armscii8 = {
@ -45,12 +43,6 @@ const mbfl_encoding mbfl_encoding_armscii8 = {
&vtbl_wchar_armscii8
};
const struct mbfl_identify_vtbl vtbl_identify_armscii8 = {
mbfl_no_encoding_armscii8,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_armscii8
};
const struct mbfl_convert_vtbl vtbl_wchar_armscii8 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_armscii8,
@ -108,11 +100,3 @@ int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_armscii8(int c, mbfl_identify_filter *filter)
{
if (c >= armscii8_ucs_table_min && !armscii8_ucs_table[c - armscii8_ucs_table_min]) {
filter->flag = 1;
}
return c;
}

View File

@ -28,7 +28,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_armscii8;
extern const struct mbfl_identify_vtbl vtbl_identify_armscii8;
extern const struct mbfl_convert_vtbl vtbl_wchar_armscii8;
extern const struct mbfl_convert_vtbl vtbl_armscii8_wchar;

View File

@ -31,8 +31,6 @@
#include "mbfilter.h"
#include "mbfilter_ascii.h"
static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "IBM-367", "cp367", "csASCII", NULL};
const mbfl_encoding mbfl_encoding_ascii = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_ascii = {
&vtbl_wchar_ascii
};
const struct mbfl_identify_vtbl vtbl_identify_ascii = {
mbfl_no_encoding_ascii,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_ascii
};
const struct mbfl_convert_vtbl vtbl_ascii_wchar = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_wchar,
@ -101,16 +93,3 @@ int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter)
{
if (c >= 0x20 && c < 0x80) {
;
} else if (c == 0x0d || c == 0x0a || c == 0x09 || c == 0) { /* CR or LF or HTAB or null */
;
} else {
filter->flag = 1;
}
return c;
}

View File

@ -34,7 +34,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_ascii;
extern const struct mbfl_identify_vtbl vtbl_identify_ascii;
extern const struct mbfl_convert_vtbl vtbl_ascii_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ascii;

View File

@ -32,8 +32,6 @@
#include "unicode_table_big5.h"
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -77,18 +75,6 @@ const mbfl_encoding mbfl_encoding_cp950 = {
&vtbl_wchar_cp950
};
const struct mbfl_identify_vtbl vtbl_identify_big5 = {
mbfl_no_encoding_big5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_big5
};
const struct mbfl_identify_vtbl vtbl_identify_cp950 = {
mbfl_no_encoding_cp950,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_big5
};
const struct mbfl_convert_vtbl vtbl_big5_wchar = {
mbfl_no_encoding_big5,
mbfl_no_encoding_wchar,
@ -322,28 +308,3 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter)
{
int c1;
if (filter->encoding->no_encoding == mbfl_no_encoding_cp950) {
c1 = 0x80;
} else {
c1 = 0xa0;
}
if (filter->status) { /* kanji second char */
if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > c1 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View File

@ -33,12 +33,10 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_big5;
extern const struct mbfl_identify_vtbl vtbl_identify_big5;
extern const struct mbfl_convert_vtbl vtbl_big5_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_big5;
extern const mbfl_encoding mbfl_encoding_cp950;
extern const struct mbfl_identify_vtbl vtbl_identify_cp950;
extern const struct mbfl_convert_vtbl vtbl_cp950_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp950;

View File

@ -31,8 +31,6 @@
#include "mbfilter_cp1251.h"
#include "unicode_table_cp1251.h"
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
const mbfl_encoding mbfl_encoding_cp1251 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp1251 = {
&vtbl_wchar_cp1251
};
const struct mbfl_identify_vtbl vtbl_identify_cp1251 = {
mbfl_no_encoding_cp1251,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp1251
};
const struct mbfl_convert_vtbl vtbl_wchar_cp1251 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp1251,
@ -107,12 +99,3 @@ int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter)
{
/* Only one byte in this single-byte encoding is not used */
if (c == 0x98) {
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp1251;
extern const struct mbfl_identify_vtbl vtbl_identify_cp1251;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp1251;
extern const struct mbfl_convert_vtbl vtbl_cp1251_wchar;

View File

@ -31,8 +31,6 @@
#include "mbfilter_cp1252.h"
#include "unicode_table_cp1252.h"
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL};
const mbfl_encoding mbfl_encoding_cp1252 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp1252 = {
&vtbl_wchar_cp1252
};
const struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
mbfl_no_encoding_cp1252,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp1252
};
const struct mbfl_convert_vtbl vtbl_cp1252_wchar = {
mbfl_no_encoding_cp1252,
mbfl_no_encoding_wchar,
@ -115,11 +107,3 @@ int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xA0 && !cp1252_ucs_table[c - 0x80]) {
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp1252;
extern const struct mbfl_identify_vtbl vtbl_identify_cp1252;
extern const struct mbfl_convert_vtbl vtbl_cp1252_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp1252;

View File

@ -31,8 +31,6 @@
#include "mbfilter_cp1254.h"
#include "unicode_table_cp1254.h"
static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
const mbfl_encoding mbfl_encoding_cp1254 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp1254 = {
&vtbl_wchar_cp1254
};
const struct mbfl_identify_vtbl vtbl_identify_cp1254 = {
mbfl_no_encoding_cp1254,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp1254
};
const struct mbfl_convert_vtbl vtbl_cp1254_wchar = {
mbfl_no_encoding_cp1254,
mbfl_no_encoding_wchar,
@ -107,11 +99,3 @@ int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(s, filter->data));
return c;
}
static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter)
{
if (c >= 0x81 && c <= 0x9E && !cp1254_ucs_table[c - cp1254_ucs_table_min]) {
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp1254;
extern const struct mbfl_identify_vtbl vtbl_identify_cp1254;
extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254;

View File

@ -36,10 +36,6 @@ typedef struct _mbfl_filt_conv_wchar_cp50220_ctx {
mbfl_convert_filter last;
} mbfl_filt_conv_wchar_cp50220_ctx;
static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter);
static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter);
static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter);
static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter);
static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt);
static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt);
static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest);
@ -99,36 +95,6 @@ const mbfl_encoding mbfl_encoding_cp50222 = {
&vtbl_wchar_cp50222
};
const struct mbfl_identify_vtbl vtbl_identify_jis_ms = {
mbfl_no_encoding_jis_ms,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_jis_ms
};
const struct mbfl_identify_vtbl vtbl_identify_cp50220 = {
mbfl_no_encoding_cp50220,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp50220
};
const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = {
mbfl_no_encoding_cp50220raw,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp50220
};
const struct mbfl_identify_vtbl vtbl_identify_cp50221 = {
mbfl_no_encoding_cp50221,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp50221
};
const struct mbfl_identify_vtbl vtbl_identify_cp50222 = {
mbfl_no_encoding_cp50222,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp50222
};
const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = {
mbfl_no_encoding_jis_ms,
mbfl_no_encoding_wchar,
@ -948,332 +914,3 @@ mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
return 0;
}
static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x20: X 0201 kana */
/* case 0x80: X 0208 */
/* case 0x90: X 0212 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (c == 0x0e) { /* "kana in" */
filter->status = 0x20;
} else if (c == 0x0f) { /* "kana out" */
filter->status = 0;
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
/* case 0x91: X 0212 second char */
case 1:
filter->status &= ~0xf;
if (c == 0x1b) {
goto retry;
} else if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ ( */
case 4:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x44) { /* 'D' */
filter->status = 0x90;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x80: X 0208 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x80: X 0208 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x80: X 0208 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -37,11 +37,7 @@ extern const mbfl_encoding mbfl_encoding_cp50220;
extern const mbfl_encoding mbfl_encoding_cp50220raw;
extern const mbfl_encoding mbfl_encoding_cp50221;
extern const mbfl_encoding mbfl_encoding_cp50222;
extern const struct mbfl_identify_vtbl vtbl_identify_jis_ms;
extern const struct mbfl_identify_vtbl vtbl_identify_cp50220;
extern const struct mbfl_identify_vtbl vtbl_identify_cp50220raw;
extern const struct mbfl_identify_vtbl vtbl_identify_cp50221;
extern const struct mbfl_identify_vtbl vtbl_identify_cp50222;
extern const struct mbfl_convert_vtbl vtbl_jis_ms_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_jis_ms;
extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar;

View File

@ -34,8 +34,6 @@
#include "unicode_table_jis.h"
#include "cp932_table.h"
static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -55,15 +53,8 @@ static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL};
const struct mbfl_identify_vtbl vtbl_identify_cp51932 = {
mbfl_no_encoding_cp51932,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp51932
};
const mbfl_encoding mbfl_encoding_cp51932 = {
mbfl_no_encoding_cp51932,
"CP51932",
@ -299,40 +290,3 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* kanji first char */
filter->status = 1;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got first half */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got 0x8e */
if (c < 0xa1 || c > 0xdf) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp51932;
extern const struct mbfl_identify_vtbl vtbl_identify_cp51932;
extern const struct mbfl_convert_vtbl vtbl_cp51932_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp51932;

View File

@ -40,12 +40,6 @@ const mbfl_encoding mbfl_encoding_cp850 = {
&vtbl_wchar_cp850
};
const struct mbfl_identify_vtbl vtbl_identify_cp850 = {
mbfl_no_encoding_cp850,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_wchar_cp850 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp850,

View File

@ -27,7 +27,6 @@
#define MBFL_MBFILTER_CP850_H
extern const mbfl_encoding mbfl_encoding_cp850;
extern const struct mbfl_identify_vtbl vtbl_identify_cp850;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp850;
extern const struct mbfl_convert_vtbl vtbl_cp850_wchar;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp866 = {
&vtbl_wchar_cp866
};
const struct mbfl_identify_vtbl vtbl_identify_cp866 = {
mbfl_no_encoding_cp866,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_wchar_cp866 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp866,

View File

@ -31,7 +31,6 @@
#define MBFL_MBFILTER_CP866_H
extern const mbfl_encoding mbfl_encoding_cp866;
extern const struct mbfl_identify_vtbl vtbl_identify_cp866;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp866;
extern const struct mbfl_convert_vtbl vtbl_cp866_wchar;

View File

@ -33,8 +33,6 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -67,12 +65,6 @@ const mbfl_encoding mbfl_encoding_cp932 = {
&vtbl_wchar_cp932
};
const struct mbfl_identify_vtbl vtbl_identify_cp932 = {
mbfl_no_encoding_cp932,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp932
};
const struct mbfl_convert_vtbl vtbl_cp932_wchar = {
mbfl_no_encoding_cp932,
mbfl_no_encoding_wchar,
@ -323,23 +315,3 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xe0) { /* kana ok */
;
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp932;
extern const struct mbfl_identify_vtbl vtbl_identify_cp932;
extern const struct mbfl_convert_vtbl vtbl_cp932_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp932;

View File

@ -32,8 +32,6 @@
#define UNICODE_TABLE_CP936_DEF
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -66,12 +64,6 @@ const mbfl_encoding mbfl_encoding_cp936 = {
&vtbl_wchar_cp936
};
const struct mbfl_identify_vtbl vtbl_identify_cp936 = {
mbfl_no_encoding_cp936,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_cp936
};
const struct mbfl_convert_vtbl vtbl_cp936_wchar = {
mbfl_no_encoding_cp936,
mbfl_no_encoding_wchar,
@ -283,21 +275,3 @@ mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp936;
extern const struct mbfl_identify_vtbl vtbl_identify_cp936;
extern const struct mbfl_convert_vtbl vtbl_cp936_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp936;

View File

@ -32,8 +32,6 @@
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -66,12 +64,6 @@ const mbfl_encoding mbfl_encoding_euc_cn = {
&vtbl_wchar_euccn
};
const struct mbfl_identify_vtbl vtbl_identify_euccn = {
mbfl_no_encoding_euc_cn,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_euccn
};
const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_wchar,
@ -209,31 +201,3 @@ mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_cn;
extern const struct mbfl_identify_vtbl vtbl_identify_euccn;
extern const struct mbfl_convert_vtbl vtbl_euccn_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euccn;

View File

@ -33,7 +33,6 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter);
static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter);
const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
@ -68,12 +67,6 @@ const mbfl_encoding mbfl_encoding_euc_jp = {
&vtbl_wchar_eucjp
};
const struct mbfl_identify_vtbl vtbl_identify_eucjp = {
mbfl_no_encoding_euc_jp,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_eucjp
};
const struct mbfl_convert_vtbl vtbl_eucjp_wchar = {
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_wchar,
@ -252,81 +245,3 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
return c;
}
/* Not all byte sequences in JIS X 0208 which would otherwise be valid are
* actually mapped to a character */
static inline int in_unused_jisx0208_range(int c1, int c2)
{
/* `c1`, `c2` are kuten codes */
unsigned int s = (c1 - 0x21)*94 + c2 - 0x21;
return s >= jisx0208_ucs_table_size || !jisx0208_ucs_table[s];
}
static inline int in_unused_jisx0212_range(int c1, int c2)
{
unsigned int s = (c1 - 0x21)*94 + c2 - 0x21;
return s >= jisx0212_ucs_table_size || !jisx0212_ucs_table[s];
}
int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter)
{
unsigned char ku, ten;
switch (filter->status & 0xF) {
case 0: /* latin */
if (c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) {
/* JIS X 0208, first byte
* In EUC-JP, each such byte ranges from 0xA1-0xFE; however,
* the bytes of JIS X 0208 kuten codes range from 0x21-0x7E */
filter->status = ((c - 0xA1 + 0x21) << 8) | 1;
} else if (c == 0x8e) { /* JIS X 0201 */
filter->status = 2;
} else if (c == 0x8f) { /* JIS X 0212 */
filter->status = 3;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* 2nd byte of JIS X 0208 */
ku = filter->status >> 8;
ten = c - 0xA1 + 0x21;
if (c < 0xa1 || c > 0xfe || in_unused_jisx0208_range(ku, ten)) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* JIS X 0201 */
if (c < 0xa1 || c > 0xdf) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3: /* JIS X 0212 */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
} else {
filter->status = ((c - 0xA1 + 0x21) << 8) | 4;
}
break;
case 4: /* JIS X 0212, final byte */
ku = filter->status >> 8;
ten = c - 0xA1 + 0x21;
if (c < 0xa1 || c > 0xfe || in_unused_jisx0212_range(ku, ten)) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_jp;
extern const struct mbfl_identify_vtbl vtbl_identify_eucjp;
extern const struct mbfl_convert_vtbl vtbl_eucjp_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp;

View File

@ -31,7 +31,6 @@
#include "mbfilter_euc_jp_2004.h"
#include "mbfilter_sjis_2004.h"
extern int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter);
extern const unsigned char mblen_table_eucjp[];
static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL};
@ -47,12 +46,6 @@ const mbfl_encoding mbfl_encoding_eucjp2004 = {
&vtbl_wchar_eucjp2004
};
const struct mbfl_identify_vtbl vtbl_identify_eucjp2004 = {
mbfl_no_encoding_eucjp2004,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_eucjp
};
const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = {
mbfl_no_encoding_eucjp2004,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_eucjp2004;
extern const struct mbfl_identify_vtbl vtbl_identify_eucjp2004;
extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004;

View File

@ -34,8 +34,6 @@
#include "unicode_table_jis.h"
#include "cp932_table.h"
static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -55,15 +53,7 @@ static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open",
"eucJP-ms", NULL};
const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = {
mbfl_no_encoding_eucjp_win,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_eucjp_win
};
static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL};
const mbfl_encoding mbfl_encoding_eucjp_win = {
mbfl_no_encoding_eucjp_win,
@ -373,55 +363,3 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* kanji first char */
filter->status = 1;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got first half */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got 0x8e */
if (c < 0xa1 || c > 0xdf) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3: /* got 0x8f */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status++;
break;
case 4: /* got 0x8f */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_eucjp_win;
extern const struct mbfl_identify_vtbl vtbl_identify_eucjpwin;
extern const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin;

View File

@ -31,8 +31,6 @@
#include "mbfilter_euc_kr.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -65,12 +63,6 @@ const mbfl_encoding mbfl_encoding_euc_kr = {
&vtbl_wchar_euckr
};
const struct mbfl_identify_vtbl vtbl_identify_euckr = {
mbfl_no_encoding_euc_kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_euckr
};
const struct mbfl_convert_vtbl vtbl_euckr_wchar = {
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_wchar,
@ -223,31 +215,3 @@ mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_kr;
extern const struct mbfl_identify_vtbl vtbl_identify_euckr;
extern const struct mbfl_convert_vtbl vtbl_euckr_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euckr;

View File

@ -32,8 +32,6 @@
#include "unicode_table_cns11643.h"
static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -67,12 +65,6 @@ const mbfl_encoding mbfl_encoding_euc_tw = {
&vtbl_wchar_euctw
};
const struct mbfl_identify_vtbl vtbl_identify_euctw = {
mbfl_no_encoding_euc_tw,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_euctw
};
const struct mbfl_convert_vtbl vtbl_euctw_wchar = {
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_wchar,
@ -271,55 +263,3 @@ mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter)
}
return c;
}
static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else if (c == 0x8e) { /* DBCS lead byte */
filter->status = 2;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got lead byte */
if (c >= 0xa1 && c < 0xaf) { /* ok */
filter->status = 3;
} else {
filter->flag = 1; /* bad */
}
break;
case 3: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 4;
break;
case 4: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_tw;
extern const struct mbfl_identify_vtbl vtbl_identify_euctw;
extern const struct mbfl_convert_vtbl vtbl_euctw_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euctw;

View File

@ -33,8 +33,6 @@
#include "unicode_table_cp936.h"
#include "unicode_table_gb18030.h"
static int mbfl_filt_ident_gb18030(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL};
const mbfl_encoding mbfl_encoding_gb18030 = {
@ -48,12 +46,6 @@ const mbfl_encoding mbfl_encoding_gb18030 = {
&vtbl_wchar_gb18030
};
const struct mbfl_identify_vtbl vtbl_identify_gb18030 = {
mbfl_no_encoding_gb18030,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_gb18030
};
const struct mbfl_convert_vtbl vtbl_gb18030_wchar = {
mbfl_no_encoding_gb18030,
mbfl_no_encoding_wchar,
@ -414,55 +406,3 @@ mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_gb18030(int c, mbfl_identify_filter *filter)
{
int c1;
c1 = (filter->status >> 8) & 0xff;
filter->status &= 0xff;
if (filter->status == 0) {
if (c <= 0x80 || c == 0xff) {
filter->status = 0;
} else {
filter->status = 1;
filter->status |= (c << 8);
}
} else if (filter->status == 1) { /* dbcs/qbcs 2nd byte */
if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c >= 0x30 && c <= 0x39) { /* qbcs */
filter->status = 2;
} else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) {
filter->status = 0; /* UDA part 1,2 */
} else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
filter->status = 0; /* UDA part 3 */
} else if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) ||
(c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) ||
(c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) ||
(c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) ||
(c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) {
filter->status = 0; /* DBCS */
} else {
filter->flag = 1; /* bad */
filter->status = 0;
}
} else if (filter->status == 2) { /* qbcs 3rd byte */
if (c > 0x80 && c < 0xff) {
filter->status = 3;
} else {
filter->flag = 1; /* bad */
filter->status = 0;
}
} else if (filter->status == 3) { /* qbcs 4th byte */
if (c >= 0x30 && c < 0x40) {
filter->status = 0;
} else {
filter->flag = 1; /* bad */
filter->status = 0;
}
} else { /* bad */
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_gb18030;
extern const struct mbfl_identify_vtbl vtbl_identify_gb18030;
extern const struct mbfl_convert_vtbl vtbl_gb18030_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_gb18030;

View File

@ -32,8 +32,6 @@
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_hz = {
mbfl_no_encoding_hz,
"HZ",
@ -45,12 +43,6 @@ const mbfl_encoding mbfl_encoding_hz = {
&vtbl_wchar_hz
};
const struct mbfl_identify_vtbl vtbl_identify_hz = {
mbfl_no_encoding_hz,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_hz
};
const struct mbfl_convert_vtbl vtbl_hz_wchar = {
mbfl_no_encoding_hz,
mbfl_no_encoding_wchar,
@ -225,49 +217,3 @@ mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter)
filter->status &= 0xff;
return 0;
}
static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter)
{
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: GB2312 */
case 0:
if (c == 0x7e) {
filter->status += 2;
} else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) { /* DBCS first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x11: GB2312 second char */
case 1:
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
case 2:
if (c == 0x7d) { /* '}' */
filter->status = 0;
} else if (c == 0x7b) { /* '{' */
filter->status = 0x10;
} else if (c == 0x7e) { /* '~' */
filter->status = 0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_hz;
extern const struct mbfl_identify_vtbl vtbl_identify_hz;
extern const struct mbfl_convert_vtbl vtbl_hz_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_hz;

View File

@ -34,8 +34,6 @@
#include "unicode_table_jis.h"
#include "cp932_table.h"
int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL};
const mbfl_encoding mbfl_encoding_2022jpms = {
@ -49,12 +47,6 @@ const mbfl_encoding mbfl_encoding_2022jpms = {
&vtbl_wchar_2022jpms
};
const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {
mbfl_no_encoding_2022jpms,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_2022jpms
};
const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
mbfl_no_encoding_2022jpms,
mbfl_no_encoding_wchar,
@ -429,97 +421,3 @@ mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
return 0;
}
int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x20: X 0201 kana */
/* case 0x80: X 0208 */
/* case 0xa0: X UDC */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
/* case 0xa1: UDC second char */
case 1:
filter->status &= ~0xf;
if (c == 0x1b) {
goto retry;
} else if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ ( */
case 4:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x3f) { /* '?' */
filter->status = 0xa0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022jpms;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms;
extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms;

View File

@ -31,8 +31,6 @@
#include "mbfilter_iso2022_kr.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_2022kr = {
mbfl_no_encoding_2022kr,
"ISO-2022-KR",
@ -44,12 +42,6 @@ const mbfl_encoding mbfl_encoding_2022kr = {
&vtbl_wchar_2022kr
};
const struct mbfl_identify_vtbl vtbl_identify_2022kr = {
mbfl_no_encoding_2022kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_2022kr
};
const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022kr,
@ -282,73 +274,3 @@ mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
return 0;
}
static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: KSC5601 mode */
/* case 0x20: KSC5601 DBCS */
/* case 0x40: KSC5601 SBCS */
case 0:
if (!(filter->status & 0x10)) {
if (c == 0x1b)
filter->status += 2;
} else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x21: KSC5601 second char */
case 1:
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x29) { /* ')' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $) */
case 5:
if (c == 0x43) { /* 'C' */
filter->status = 0x10;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022kr;
extern const struct mbfl_identify_vtbl vtbl_identify_2022kr;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022kr;
extern const struct mbfl_convert_vtbl vtbl_2022kr_wchar;

View File

@ -34,7 +34,6 @@
#include "unicode_table_jis.h"
extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_2022jp_2004 = {
mbfl_no_encoding_2022jp_2004,
@ -47,12 +46,6 @@ const mbfl_encoding mbfl_encoding_2022jp_2004 = {
&vtbl_wchar_2022jp_2004
};
const struct mbfl_identify_vtbl vtbl_identify_2022jp_2004 = {
mbfl_no_encoding_2022jp_2004,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_2022jp_2004
};
const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = {
mbfl_no_encoding_2022jp_2004,
mbfl_no_encoding_wchar,
@ -72,93 +65,3 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = {
mbfl_filt_conv_jis2004_flush,
NULL,
};
static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x80: X 0212 */
/* case 0x90: X 0213 plane 1 */
/* case 0xa0: X 0213 plane 2 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x42) { /* 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ ( */
case 4:
if (c == 0x51) { /* JIS X 0213 plane 1 */
filter->status = 0x90;
} else if (c == 0x50) { /* JIS X 0213 plane 2 */
filter->status = 0xa0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022jp_2004;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jp_2004;
extern const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004;

View File

@ -36,7 +36,6 @@
#include "cp932_table.h"
extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
extern int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};
@ -51,12 +50,6 @@ const mbfl_encoding mbfl_encoding_2022jp_kddi = {
&vtbl_wchar_2022jp_kddi
};
const struct mbfl_identify_vtbl vtbl_identify_2022jp_kddi = {
mbfl_no_encoding_2022jp_kddi,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_2022jpms
};
const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {
mbfl_no_encoding_2022jp_kddi,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022jp_kddi;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jp_kddi;
extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi;

View File

@ -43,12 +43,6 @@ const mbfl_encoding mbfl_encoding_8859_1 = {
&vtbl_wchar_8859_1
};
const struct mbfl_identify_vtbl vtbl_identify_8859_1 = {
mbfl_no_encoding_8859_1,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_1_wchar = {
mbfl_no_encoding_8859_1,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_1;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_1;
extern const struct mbfl_convert_vtbl vtbl_8859_1_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_1;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_10 = {
&vtbl_wchar_8859_10
};
const struct mbfl_identify_vtbl vtbl_identify_8859_10 = {
mbfl_no_encoding_8859_10,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_10_wchar = {
mbfl_no_encoding_8859_10,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_10;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_10;
extern const struct mbfl_convert_vtbl vtbl_8859_10_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_10;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_13 = {
&vtbl_wchar_8859_13
};
const struct mbfl_identify_vtbl vtbl_identify_8859_13 = {
mbfl_no_encoding_8859_13,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_13_wchar = {
mbfl_no_encoding_8859_13,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_13;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_13;
extern const struct mbfl_convert_vtbl vtbl_8859_13_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_13;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_14 = {
&vtbl_wchar_8859_14
};
const struct mbfl_identify_vtbl vtbl_identify_8859_14 = {
mbfl_no_encoding_8859_14,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_14_wchar = {
mbfl_no_encoding_8859_14,
mbfl_no_encoding_wchar,

View File

@ -16,7 +16,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_14;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_14;
extern const struct mbfl_convert_vtbl vtbl_8859_14_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_14;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_15 = {
&vtbl_wchar_8859_15
};
const struct mbfl_identify_vtbl vtbl_identify_8859_15 = {
mbfl_no_encoding_8859_15,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_15_wchar = {
mbfl_no_encoding_8859_15,
mbfl_no_encoding_wchar,

View File

@ -16,7 +16,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_15;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_15;
extern const struct mbfl_convert_vtbl vtbl_8859_15_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_15;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_16 = {
&vtbl_wchar_8859_16
};
const struct mbfl_identify_vtbl vtbl_identify_8859_16 = {
mbfl_no_encoding_8859_16,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_16_wchar = {
mbfl_no_encoding_8859_16,
mbfl_no_encoding_wchar,

View File

@ -13,7 +13,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_16;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_16;
extern const struct mbfl_convert_vtbl vtbl_8859_16_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_16;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_2 = {
&vtbl_wchar_8859_2
};
const struct mbfl_identify_vtbl vtbl_identify_8859_2 = {
mbfl_no_encoding_8859_2,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_2_wchar = {
mbfl_no_encoding_8859_2,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_2;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_2;
extern const struct mbfl_convert_vtbl vtbl_8859_2_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_2;

View File

@ -31,8 +31,6 @@
#include "mbfilter_iso8859_3.h"
#include "unicode_table_iso8859_3.h"
static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_8859_3_aliases[] = {"ISO8859-3", "latin3", NULL};
const mbfl_encoding mbfl_encoding_8859_3 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_3 = {
&vtbl_wchar_8859_3
};
const struct mbfl_identify_vtbl vtbl_identify_8859_3 = {
mbfl_no_encoding_8859_3,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_iso8859_3
};
const struct mbfl_convert_vtbl vtbl_8859_3_wchar = {
mbfl_no_encoding_8859_3,
mbfl_no_encoding_wchar,
@ -131,11 +123,3 @@ int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter)
{
if (c >= 0xA0 && !iso8859_3_ucs_table[c - 0xA0]) {
filter->status = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_3;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_3;
extern const struct mbfl_convert_vtbl vtbl_8859_3_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_3;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_4 = {
&vtbl_wchar_8859_4
};
const struct mbfl_identify_vtbl vtbl_identify_8859_4 = {
mbfl_no_encoding_8859_4,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_4_wchar = {
mbfl_no_encoding_8859_4,
mbfl_no_encoding_wchar,

View File

@ -31,7 +31,6 @@
#define MBFL_MBFILTER_ISO8859_4_H
extern const mbfl_encoding mbfl_encoding_8859_4;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_4;
extern const struct mbfl_convert_vtbl vtbl_8859_4_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_4;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_5 = {
&vtbl_wchar_8859_5
};
const struct mbfl_identify_vtbl vtbl_identify_8859_5 = {
mbfl_no_encoding_8859_5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_5_wchar = {
mbfl_no_encoding_8859_5,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_5;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_5;
extern const struct mbfl_convert_vtbl vtbl_8859_5_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_5;

View File

@ -31,8 +31,6 @@
#include "mbfilter_iso8859_6.h"
#include "unicode_table_iso8859_6.h"
static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_8859_6_aliases[] = {"ISO8859-6", "arabic", NULL};
const mbfl_encoding mbfl_encoding_8859_6 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_6 = {
&vtbl_wchar_8859_6
};
const struct mbfl_identify_vtbl vtbl_identify_8859_6 = {
mbfl_no_encoding_8859_6,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_iso8859_6
};
const struct mbfl_convert_vtbl vtbl_8859_6_wchar = {
mbfl_no_encoding_8859_6,
mbfl_no_encoding_wchar,
@ -131,11 +123,3 @@ int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter)
{
if (c >= 0xA0 && !iso8859_6_ucs_table[c - 0xA0]) {
filter->status = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_6;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_6;
extern const struct mbfl_convert_vtbl vtbl_8859_6_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_6;

View File

@ -31,8 +31,6 @@
#include "mbfilter_iso8859_7.h"
#include "unicode_table_iso8859_7.h"
static int mbfl_filt_ident_iso8859_7(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_8859_7_aliases[] = {"ISO8859-7", "greek", NULL};
const mbfl_encoding mbfl_encoding_8859_7 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_7 = {
&vtbl_wchar_8859_7
};
const struct mbfl_identify_vtbl vtbl_identify_8859_7 = {
mbfl_no_encoding_8859_7,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_iso8859_7
};
const struct mbfl_convert_vtbl vtbl_8859_7_wchar = {
mbfl_no_encoding_8859_7,
mbfl_no_encoding_wchar,
@ -131,11 +123,3 @@ int mbfl_filt_conv_wchar_8859_7(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_iso8859_7(int c, mbfl_identify_filter *filter)
{
/* These bytes are not mapped to any character in ISO-8859-7 */
if (c == 0xAE || c == 0xD2 || c == 0xFF)
filter->status = 1;
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_7;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_7;
extern const struct mbfl_convert_vtbl vtbl_8859_7_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_7;

View File

@ -31,8 +31,6 @@
#include "mbfilter_iso8859_8.h"
#include "unicode_table_iso8859_8.h"
static int mbfl_filt_ident_iso8859_8(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_8859_8_aliases[] = {"ISO8859-8", "hebrew", NULL};
const mbfl_encoding mbfl_encoding_8859_8 = {
@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_8 = {
&vtbl_wchar_8859_8
};
const struct mbfl_identify_vtbl vtbl_identify_8859_8 = {
mbfl_no_encoding_8859_8,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_iso8859_8
};
const struct mbfl_convert_vtbl vtbl_8859_8_wchar = {
mbfl_no_encoding_8859_8,
mbfl_no_encoding_wchar,
@ -130,11 +122,3 @@ int mbfl_filt_conv_wchar_8859_8(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_iso8859_8(int c, mbfl_identify_filter *filter)
{
if (c >= 0xA0 && !iso8859_8_ucs_table[c - 0xA0]) {
filter->status = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_8;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_8;
extern const struct mbfl_convert_vtbl vtbl_8859_8_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_8;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_9 = {
&vtbl_wchar_8859_9
};
const struct mbfl_identify_vtbl vtbl_identify_8859_9 = {
mbfl_no_encoding_8859_9,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_9_wchar = {
mbfl_no_encoding_8859_9,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_9;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_9;
extern const struct mbfl_convert_vtbl vtbl_8859_9_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_9;

View File

@ -33,9 +33,6 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter);
static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_jis = {
mbfl_no_encoding_jis,
"JIS",
@ -58,18 +55,6 @@ const mbfl_encoding mbfl_encoding_2022jp = {
&vtbl_wchar_2022jp
};
const struct mbfl_identify_vtbl vtbl_identify_jis = {
mbfl_no_encoding_jis,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_jis
};
const struct mbfl_identify_vtbl vtbl_identify_2022jp = {
mbfl_no_encoding_2022jp,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_2022jp
};
const struct mbfl_convert_vtbl vtbl_jis_wchar = {
mbfl_no_encoding_jis,
mbfl_no_encoding_wchar,
@ -483,177 +468,3 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter)
return 0;
}
static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x20: X 0201 kana */
/* case 0x80: X 0208 */
/* case 0x90: X 0212 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (c == 0x0e) { /* "kana in" */
filter->status = 0x20;
} else if (c == 0x0f) { /* "kana out" */
filter->status = 0;
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
/* case 0x91: X 0212 second char */
case 1:
filter->status &= ~0xf;
if (c == 0x1b) {
goto retry;
} else if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ ( */
case 4:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x44) { /* 'D' */
filter->status = 0x90;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x80: X 0208 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -34,8 +34,6 @@
extern const mbfl_encoding mbfl_encoding_jis;
extern const mbfl_encoding mbfl_encoding_2022jp;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jp;
extern const struct mbfl_identify_vtbl vtbl_identify_jis;
extern const struct mbfl_convert_vtbl vtbl_jis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_jis;
extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar;

View File

@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_koi8r = {
&vtbl_wchar_koi8r
};
const struct mbfl_identify_vtbl vtbl_identify_koi8r = {
mbfl_no_encoding_koi8r,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_wchar_koi8r = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_koi8r,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_koi8r;
extern const struct mbfl_identify_vtbl vtbl_identify_koi8r;
extern const struct mbfl_convert_vtbl vtbl_wchar_koi8r;
extern const struct mbfl_convert_vtbl vtbl_koi8r_wchar;

View File

@ -41,12 +41,6 @@ const mbfl_encoding mbfl_encoding_koi8u = {
&vtbl_wchar_koi8u
};
const struct mbfl_identify_vtbl vtbl_identify_koi8u = {
mbfl_no_encoding_koi8u,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_wchar_koi8u = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_koi8u,

View File

@ -30,7 +30,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_koi8u;
extern const struct mbfl_identify_vtbl vtbl_identify_koi8u;
extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u;
extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar;

View File

@ -37,7 +37,6 @@
#include "unicode_table_jis.h"
static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -71,12 +70,6 @@ const mbfl_encoding mbfl_encoding_sjis = {
&vtbl_wchar_sjis
};
const struct mbfl_identify_vtbl vtbl_identify_sjis = {
mbfl_no_encoding_sjis,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis_wchar = {
mbfl_no_encoding_sjis,
mbfl_no_encoding_wchar,
@ -267,28 +260,3 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
return c;
}
int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* Kanji, second byte */
if (c < 0x40 || c > 0xFC || c == 0x7F) {
filter->flag = 1;
} else {
int s1, s2;
SJIS_DECODE(filter->status, c, s1, s2);
int w = ((s1 - 0x21) * 94) + s2 - 0x21;
if (w >= jisx0208_ucs_table_size || !jisx0208_ucs_table[w]) {
filter->flag = 1;
}
}
filter->status = 0;
} else if (c < 0x80 || (c > 0xA0 && c < 0xE0)) { /* Latin/Kana */
;
} else if (c > 0x80 && c < 0xF0 && c != 0xA0) { /* Kanji, first byte */
filter->status = c;
} else {
filter->flag = 1;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis;
extern const struct mbfl_convert_vtbl vtbl_sjis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis;

View File

@ -35,7 +35,6 @@
extern const unsigned char mblen_table_sjis[];
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n);
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
@ -52,12 +51,6 @@ const mbfl_encoding mbfl_encoding_sjis2004 = {
&vtbl_wchar_sjis2004
};
const struct mbfl_identify_vtbl vtbl_identify_sjis2004 = {
mbfl_no_encoding_sjis2004,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
mbfl_no_encoding_sjis2004,
mbfl_no_encoding_wchar,

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis2004;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis2004;
extern const struct mbfl_convert_vtbl vtbl_sjis2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004;

View File

@ -35,7 +35,6 @@
#include "sjis_mac2uni.h"
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
extern const unsigned char mblen_table_sjis[];
static int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
@ -53,12 +52,6 @@ const mbfl_encoding mbfl_encoding_sjis_mac = {
&vtbl_wchar_sjis_mac
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_mac = {
mbfl_no_encoding_sjis_mac,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = {
mbfl_no_encoding_sjis_mac,
mbfl_no_encoding_wchar,

View File

@ -33,8 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_mac;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_mac;
extern const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac;

View File

@ -36,7 +36,6 @@
#include "emoji2uni.h"
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
extern const unsigned char mblen_table_sjis[];
static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
@ -76,24 +75,6 @@ const mbfl_encoding mbfl_encoding_sjis_sb = {
&vtbl_wchar_sjis_sb
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = {
mbfl_no_encoding_sjis_docomo,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = {
mbfl_no_encoding_sjis_kddi,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = {
mbfl_no_encoding_sjis_sb,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
mbfl_no_encoding_sjis_docomo,
mbfl_no_encoding_wchar,

View File

@ -36,10 +36,6 @@ extern const mbfl_encoding mbfl_encoding_sjis_docomo;
extern const mbfl_encoding mbfl_encoding_sjis_kddi;
extern const mbfl_encoding mbfl_encoding_sjis_sb;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_sb;
extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo;
extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar;

View File

@ -33,8 +33,6 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -67,12 +65,6 @@ const mbfl_encoding mbfl_encoding_sjis_open = {
&vtbl_wchar_sjis_open
};
const struct mbfl_identify_vtbl vtbl_identify_sjis_open = {
mbfl_no_encoding_sjis_open,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_sjis_open
};
const struct mbfl_convert_vtbl vtbl_sjis_open_wchar = {
mbfl_no_encoding_sjis_open,
mbfl_no_encoding_wchar,
@ -323,23 +315,3 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xe0) { /* kana ok */
;
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View File

@ -34,7 +34,6 @@
extern const mbfl_encoding mbfl_encoding_sjis_open;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_open;
extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open;

View File

@ -30,8 +30,6 @@
#include "mbfilter.h"
#include "mbfilter_ucs2.h"
static int mbfl_filt_ident_ucs2(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL};
/* This library historically had encodings called 'byte2be' and 'byte2le'
@ -74,24 +72,6 @@ const mbfl_encoding mbfl_encoding_ucs2le = {
&vtbl_wchar_ucs2le
};
const struct mbfl_identify_vtbl vtbl_identify_ucs2 = {
mbfl_no_encoding_ucs2,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_ucs2
};
const struct mbfl_identify_vtbl vtbl_identify_ucs2be = {
mbfl_no_encoding_ucs2be,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_ucs2
};
const struct mbfl_identify_vtbl vtbl_identify_ucs2le = {
mbfl_no_encoding_ucs2le,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_ucs2
};
const struct mbfl_convert_vtbl vtbl_ucs2_wchar = {
mbfl_no_encoding_ucs2,
mbfl_no_encoding_wchar,
@ -264,10 +244,3 @@ int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_ucs2(int c, mbfl_identify_filter *filter)
{
/* Input string must be a multiple of 2 bytes */
filter->status = (filter->status + 1) % 2;
return c;
}

View File

@ -35,9 +35,6 @@
extern const mbfl_encoding mbfl_encoding_ucs2;
extern const mbfl_encoding mbfl_encoding_ucs2be;
extern const mbfl_encoding mbfl_encoding_ucs2le;
extern const struct mbfl_identify_vtbl vtbl_identify_ucs2;
extern const struct mbfl_identify_vtbl vtbl_identify_ucs2be;
extern const struct mbfl_identify_vtbl vtbl_identify_ucs2le;
extern const struct mbfl_convert_vtbl vtbl_ucs2_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs2;
extern const struct mbfl_convert_vtbl vtbl_ucs2be_wchar;

View File

@ -32,8 +32,6 @@
#define UNICODE_TABLE_UHC_DEF
#include "unicode_table_uhc.h"
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -66,12 +64,6 @@ const mbfl_encoding mbfl_encoding_uhc = {
&vtbl_wchar_uhc
};
const struct mbfl_identify_vtbl vtbl_identify_uhc = {
mbfl_no_encoding_uhc,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_uhc
};
const struct mbfl_convert_vtbl vtbl_uhc_wchar = {
mbfl_no_encoding_uhc,
mbfl_no_encoding_wchar,
@ -220,43 +212,3 @@ mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */
filter->status= 1;
} else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */
filter->status= 2;
} else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */
filter->status= 3;
} else { /* bad */
filter->flag = 1;
}
case 1:
case 2:
if (c < 0x41 || (c > 0x5a && c < 0x61)
|| (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3:
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View File

@ -33,7 +33,6 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_uhc;
extern const struct mbfl_identify_vtbl vtbl_identify_uhc;
extern const struct mbfl_convert_vtbl vtbl_uhc_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_uhc;

Some files were not shown because too many files have changed in this diff Show More