mirror of
https://github.com/php/php-src.git
synced 2024-11-24 18:34:21 +08:00
renamed jstring to mbstring.
This commit is contained in:
parent
9907826a66
commit
09197a4531
2
ext/mbstring/CREDITS
Normal file
2
ext/mbstring/CREDITS
Normal file
@ -0,0 +1,2 @@
|
||||
Multibyte (Japanese) String Functions
|
||||
Tsukada Takuya
|
5
ext/mbstring/EXPERIMENTAL
Normal file
5
ext/mbstring/EXPERIMENTAL
Normal file
@ -0,0 +1,5 @@
|
||||
this extension is experimental,
|
||||
its functions may change their names
|
||||
or move to extension all together
|
||||
so do not rely to much on them
|
||||
you have been warned!
|
7
ext/mbstring/Makefile.in
Normal file
7
ext/mbstring/Makefile.in
Normal file
@ -0,0 +1,7 @@
|
||||
# $Id$
|
||||
|
||||
LTLIBRARY_NAME = libmbstring.la
|
||||
LTLIBRARY_SOURCES = mbfilter_ja.c mbfilter.c mbstring.c
|
||||
LTLIBRARY_SHARED_NAME = mbstring.la
|
||||
|
||||
include $(top_srcdir)/build/dynlib.mk
|
774
ext/mbstring/README_PHP3-i18n-ja
Normal file
774
ext/mbstring/README_PHP3-i18n-ja
Normal file
@ -0,0 +1,774 @@
|
||||
==========================================
|
||||
README for I18N Package
|
||||
==========================================
|
||||
|
||||
o Name and location of package
|
||||
|
||||
Name: php-3.0.18-i18n-ja-2
|
||||
Location: http://www.happysize.co.jp/techie/php-ja-jp/
|
||||
ftp://ftp.happysize.co.jp/php-ja-jp/
|
||||
http://php.vdomains.org/
|
||||
ftp://ftp.vdomains.org/pub/php-ja-jp/
|
||||
http://php.jpnnet.com/
|
||||
|
||||
Currently, this I18N version of PHP only adds Japanese support to base
|
||||
PHP. It allows you to use Japanese in scripts, as well as conversion
|
||||
between various Japanese encodings. It will work perfectly fine with
|
||||
ASCII with i18n option enabled. (note: executable is bit larger due
|
||||
to UNICODE table). The basic design aproach is to allow for other
|
||||
languages to be added in the future. Developers are encourage to join
|
||||
us!
|
||||
|
||||
For more information on Japanese encodings, please refer to the
|
||||
section "Additional Notes."
|
||||
|
||||
|
||||
o What is this package?
|
||||
|
||||
This package allows you to handle multiple Japanese encodings (SJIS, EUC,
|
||||
UTF-8, JIS) in PHP. If you find any bugs in this package, please report
|
||||
them to the appropriate mailing list. For now, the PHP-jp mailing list
|
||||
is the best place for this.
|
||||
|
||||
PHP-jp ML mailto:PHP-jp@sidecar.ics.es.osaka-u.ac.jp
|
||||
http://sidecar.ics.es.osaka-u.ac.jp/php-jp/
|
||||
(discussions are in Japanese)
|
||||
|
||||
|
||||
o Who should use this
|
||||
|
||||
Due to lack of documentation, it's not intended for beginners. If
|
||||
something goes wrong, be prepared to fix it on your own.
|
||||
|
||||
|
||||
o Warranty and Copyright
|
||||
|
||||
There is no warranty with this package. Use it at your own risk.
|
||||
|
||||
Please refer to the source code for the copyrights. In general, each
|
||||
program's copyright is owned by the programmer. Unless you obey the
|
||||
copyright holders restrictions, you are not allowed to use it in any
|
||||
form.
|
||||
|
||||
|
||||
o Redistribution
|
||||
|
||||
As described in the source code, this package and the components are
|
||||
allowed to be redistributed with certain restrictions.
|
||||
|
||||
Due to this package being still in beta, please try to redistribute
|
||||
it as an entire package. Please try not to distribute it as a form
|
||||
of patch. Because we would prefer to have this package distributed
|
||||
as one single package (not patch of patch of patch), avoid releasing
|
||||
any patch to this package.
|
||||
|
||||
|
||||
o Who made this
|
||||
|
||||
A team of volunteers, PHP3 Internationalization, has been contributing
|
||||
their free time producing it. Although we are not related to the core
|
||||
PHP programmers, we are hoping to have our modifications merged into the
|
||||
core distribution in the near future. Thus, we did not call this a
|
||||
"Japanese Patch" (or distribution). Our final goal is to have true
|
||||
i18nized PHP!
|
||||
|
||||
For anyone interested in this project, please drop us a line.
|
||||
|
||||
Contact Address:
|
||||
phpj-dev@kage.net
|
||||
(Discussions are in Japanese, but feel free to write us in English)
|
||||
|
||||
Webpage (English and Japanese):
|
||||
http://php.jpnnet.com/
|
||||
|
||||
Project Outline (Japanese):
|
||||
http://www.happysize.co.jp/techie/php-ja-jp/spec.htm
|
||||
|
||||
Developers:
|
||||
Hironori Sato <satoh@jpnnet.com>
|
||||
Shigeru Kanemoto <sgk@happysize.co.jp>
|
||||
Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
|
||||
U. Kenkichi <kenkichi@axes.co.jp>
|
||||
Tateyama <tateyan@amy.hi-ho.ne.jp>
|
||||
Other gracious contributors
|
||||
|
||||
|
||||
o Future plans
|
||||
|
||||
- fulfilling what's written in outline
|
||||
- support for other languages other than Japanese
|
||||
- make the character conversion as a library (?)
|
||||
- more testing
|
||||
|
||||
|
||||
o Special Thanks to
|
||||
|
||||
PHP Japanese webpage maintainer, Hirokawa-san
|
||||
http://www.cityfujisawa.ne.jp/%7Elouis/apps/phpfi/
|
||||
PHP-JP ML's Yamamoto-san
|
||||
http://sidecar.ics.es.osaka-u.ac.jp/php-jp/
|
||||
Previous jp-patch developers
|
||||
|
||||
|
||||
|
||||
==========================================
|
||||
Advantages of using I18N package
|
||||
==========================================
|
||||
|
||||
- allows you to use various character encodings for script files and
|
||||
http output
|
||||
- distinguish character encoding in POST/GET/COOKIE
|
||||
- proper mail output using JIS as body and MIME/Base64/JIS subject
|
||||
- if http output's Content-Type is text/html, it will set proper charset
|
||||
- stable character encoding conversion
|
||||
- multibyte regex
|
||||
|
||||
|
||||
|
||||
==========================================
|
||||
Installation
|
||||
==========================================
|
||||
|
||||
o Summary
|
||||
|
||||
Add --enable-i18n option when running configure. For your own setup,
|
||||
add any other appropriate options as well.
|
||||
|
||||
Don't forget to copy php3.ini-dist to desired location.
|
||||
(ex. /usr/local/lib/php3.ini)
|
||||
|
||||
If you have already installed PHP3, copy all the entries in php3.ini-dist
|
||||
which start with "i18n.xxxx" to php3.ini.
|
||||
|
||||
|
||||
o configure option
|
||||
--enable-i18n
|
||||
include i18n features
|
||||
|
||||
--enable-mbregex
|
||||
include multibyte regex library
|
||||
(without i18n enabled, mbregex functions will not function)
|
||||
|
||||
|
||||
o creating cgi version
|
||||
|
||||
% tar xvzf php-3.0.18-i18n-ja-2.tar.gz
|
||||
% cd php-3.0.18-i18n-ja-2
|
||||
% ./configure --enable-i18n --enable-mbregex
|
||||
% make
|
||||
|
||||
|
||||
o creating Apache version (regular module)
|
||||
|
||||
% tar xvzf php-3.0.18-i18n-ja-2.tar.gz
|
||||
% tar xvzf apache_1.3.x.tar.gz
|
||||
% cd apache_1.3.x
|
||||
% ./configure
|
||||
% cd ../php-3.0.18-i18n-ja-2
|
||||
% ./configure --with-apache=../apache_1.3.x --enable-i18n --enable-mbregex
|
||||
% make
|
||||
% make install
|
||||
% cd ../apache_1.3.x
|
||||
% ./configure --activate-module=src/modules/php3/libphp3.a
|
||||
% make
|
||||
% make install
|
||||
|
||||
|
||||
o creating Apache DSO version
|
||||
|
||||
create DSO capable Apache first
|
||||
% tar xvzf apache_1.3.x.tar.gz
|
||||
% cd apache-1.3.x
|
||||
% ./configure --enable-shared=max
|
||||
% make
|
||||
% make install
|
||||
|
||||
now create php3
|
||||
% cd php-3.0.18-i18n-ja-2
|
||||
% ./configure --with-apxs=/usr/local/apache/bin/apxs --enable-i18n \
|
||||
--enable-mbregex
|
||||
% make
|
||||
% make install
|
||||
|
||||
|
||||
==========================================
|
||||
Additional Notes
|
||||
==========================================
|
||||
|
||||
o Multibyte regex library
|
||||
|
||||
From beta4, we have included the multibyte (mb) regex library which comes with
|
||||
Ruby. With this addition, you can now use regex in EUC, SJIS and UTF-8
|
||||
encoding. To avoid any conflicts with HSREGEX included with Apache,
|
||||
each function name has been changed. Therefore, mb regex functions are
|
||||
named differently from the original ereg functions in PHP. The character
|
||||
encoding used in mb regex is configured in i18n.internal_encoding.
|
||||
|
||||
|
||||
o Binary Output
|
||||
|
||||
If http output encoding is set to other than 'pass', conversion of encoding
|
||||
from internal encoding to http output is done automatically. Thus,
|
||||
if you prefer to spit out anything in raw binary format, your data
|
||||
may be corrupted. In such event, set http_output to 'pass'.
|
||||
|
||||
ex.
|
||||
<?
|
||||
i18n_http_output("pass");
|
||||
...
|
||||
echo $the_binary_data_string;
|
||||
?>
|
||||
|
||||
|
||||
o Content-Type
|
||||
|
||||
Depending on the setting of http_output, PHP will output the proper charset.
|
||||
ex. Content-Type: text/html; charset="..."
|
||||
|
||||
Be aware of following:
|
||||
|
||||
- If you set Content-Type header using header() function, that will
|
||||
override the automatic addition of charset.
|
||||
- Be cautious when you set i18n_http_output, since if any output is
|
||||
made prior to this, proper header may have been sent out to the
|
||||
client already.
|
||||
|
||||
|
||||
o In the event of trouble
|
||||
|
||||
If you find any bugs or trouble, please contact us at the above address.
|
||||
It may help us to track the problem if you send us the script as well.
|
||||
|
||||
If you encounter any memory related error such as segmentation violation,
|
||||
add --enable-debug when you run configure. This will give you more
|
||||
detail information on where error has occurred. The error is stored
|
||||
in the server log or regular http output in CGI mode.
|
||||
|
||||
|
||||
o About Japanese encodings
|
||||
|
||||
Due to historical reason, there are multiple character encodings used
|
||||
for Japanese. The most common encodings are: SJIS, EUC, JIS, and UTF-8.
|
||||
Here are (very) brief description of them:
|
||||
|
||||
EUC
|
||||
commonly used in UNIX environment
|
||||
8bit-8bit combo
|
||||
always >=0x80
|
||||
|
||||
SJIS
|
||||
commonly used in Mac or PCs
|
||||
similar to EUC
|
||||
mostly 8bit-8bit (some 8bit-7bit)
|
||||
mostly >=0x80
|
||||
there are some halfwidth (size of ASCII) multibytes
|
||||
|
||||
JIS
|
||||
commonly used in 7bit environment (nntp and smtp)
|
||||
starts with escaping char, \033 and a few more characters
|
||||
|
||||
UTF-8
|
||||
16bit+ encoding
|
||||
defines many languages existing in this world
|
||||
see http://www.unicode.org/ for more detail
|
||||
|
||||
Because of having all these character encodings, PHP needs to translate
|
||||
between these encodings on the fly. Also, the addition of the mb regex
|
||||
library allows you to handle mb strings without fear of getting mb char
|
||||
chopped in half.
|
||||
|
||||
Since Japanese is not the only language with multiple encodings, we
|
||||
encourage other developers to modify our code to suit your needs. We
|
||||
definitely need people to work with Korean, Chinese (both traditional
|
||||
and simplified), and Russian. Let us know if you are interested in
|
||||
this project!
|
||||
|
||||
|
||||
|
||||
==========================================
|
||||
php3.ini setting
|
||||
==========================================
|
||||
|
||||
The following init options will allow you to change the default settings.
|
||||
Define these settings in the global section of php3.ini.
|
||||
|
||||
All keywords are case-insensitive.
|
||||
|
||||
o Encoding naming
|
||||
|
||||
For each encoding, there are three names: standarized, alias, MIME
|
||||
|
||||
- UTF-8
|
||||
standard: UTF-8
|
||||
alias: N/A
|
||||
mime: UTF-8
|
||||
|
||||
- ASCII
|
||||
standard: ASCII
|
||||
alias: N/A
|
||||
mime: US-ASCII
|
||||
|
||||
- Japanese EUC
|
||||
standard: EUC-JP
|
||||
alias: EUC, EUC_JP, eucJP, x-euc-jp
|
||||
mime: EUC-JP
|
||||
|
||||
- Shift JIS
|
||||
standard: SJIS
|
||||
alias: x-sjis, MS_Kanji
|
||||
mime: Shift_JIS
|
||||
|
||||
- JIS
|
||||
standard: JIS
|
||||
alias: N/A
|
||||
mime: ISO-2022-JP
|
||||
|
||||
- Quoted-Printable
|
||||
standard: Quoted-Printable
|
||||
alias: qprint
|
||||
mime: N/A
|
||||
|
||||
- BASE64
|
||||
standard: BASE64
|
||||
alias: N/A
|
||||
mime: N/A
|
||||
|
||||
- no conversion
|
||||
standard: pass
|
||||
alias: none
|
||||
mime: N/A
|
||||
|
||||
- auto encoding detection
|
||||
standard: auto
|
||||
alias: unknown
|
||||
mime: N/A
|
||||
|
||||
* N/A - Not Applicapable
|
||||
|
||||
o i18n.http_output - default http output encoding
|
||||
|
||||
i18n.http_output = EUC-JP|SJIS|JIS|UTF-8|pass
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
pass: no conversion
|
||||
|
||||
The default is pass (internal encoding is used)
|
||||
It can be re-configured on the fly using i18n_http_output().
|
||||
|
||||
|
||||
o i18n.internal_encoding - internal encoding
|
||||
|
||||
i18n.internal_encoding = EUC-JP|SJIS|UTF-8
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
The default is EUC-JP.
|
||||
|
||||
PHP parser is designed based on using ISO-8859-1. For other
|
||||
encodings, following conditions have to be satisfied in order
|
||||
to use them:
|
||||
- per byte encoding
|
||||
- single byte charactor in range of 00h-7fh which is compatible
|
||||
with ASCII
|
||||
- multibyte without 00h-7fh
|
||||
In case of Japanese, EUC-JP and UTF-8 are the only encoding that
|
||||
meets this criteria.
|
||||
|
||||
If i18n.internal_encoding and i18n.http_output differs, conversion
|
||||
takes place at the time of output. If you convert any data within
|
||||
PHP scripts to URL encoding, BASE64 or Quoted-Printable, encoding
|
||||
stays as defined in i18n.internal_encoding. Thus, if you would
|
||||
prefer to encode in compliance with i18n.http_output, you need
|
||||
to manually convert encoding.
|
||||
|
||||
ex. $str = urlencode( i18n_convert($str, i18n_http_output()) );
|
||||
|
||||
Encoding such as ISO-2022-** and HZ encoding which uses escape
|
||||
sequences can not be used as internal encoding. If used, they
|
||||
result in following errors:
|
||||
- parser pukes funky error
|
||||
- magic_quotes_*** breaks encoding (SJIS may have similar problem)
|
||||
- string manipulation and regex will malfunction
|
||||
|
||||
|
||||
o i18n.script_encoding - script encoding
|
||||
|
||||
i18n.script_encoding = auto|EUC-JP|SJIS|JIS|UTF-8
|
||||
auto: automatic
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
The default is auto.
|
||||
The script's encoding is converted to i18n.internal_encoding before
|
||||
entering the script parser.
|
||||
|
||||
Be aware that auto detection may fail under some conditions.
|
||||
For best auto detection, add multibyte charactor at begining of
|
||||
script.
|
||||
|
||||
|
||||
o i18n.http_input - handling of http input (GET/POST/COOKIE)
|
||||
|
||||
i18n.http_input = pass|auto
|
||||
auto: auto conversion
|
||||
pass: no conversion
|
||||
|
||||
The default is auto.
|
||||
If set to pass, no conversion will take place.
|
||||
If set to auto, it will automatically detect the encoding. If
|
||||
detection is successful, it will convert to the proper internal
|
||||
encoding. If not, it will assume the input as defined in
|
||||
i18n.http_input_default.
|
||||
|
||||
o i18n.http_input_default - default http input encoding
|
||||
|
||||
i18n.http_input_default = pass|EUC-JP|SJIS|JIS|UTF-8
|
||||
pass: no conversion
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
The default is pass.
|
||||
This option is only effective as long as i18n.http_input is set to
|
||||
auto. If the auto detection fails, this encoding is used as an
|
||||
assumption to convert the http input to the internal encoding.
|
||||
If set to pass, no conversion will take place.
|
||||
|
||||
o sample settings
|
||||
|
||||
1) For most flexibility, we recommend using following example.
|
||||
i18n.http_output = SJIS
|
||||
i18n.internal_encoding = EUC-JP
|
||||
i18n.script_encoding = auto
|
||||
i18n.http_input = auto
|
||||
i18n.http_input_default = SJIS
|
||||
|
||||
2) To avoid unexpected encoding problems, try these:
|
||||
|
||||
i18n.http_output = pass
|
||||
i18n.internal_encoding = EUC-JP
|
||||
i18n.script_encoding = pass
|
||||
i18n.http_input = pass
|
||||
i18n.http_input_default = pass
|
||||
|
||||
|
||||
|
||||
==========================================
|
||||
PHP functions
|
||||
==========================================
|
||||
|
||||
The following describes the additional PHP functions.
|
||||
|
||||
All keywords are case-insensitive.
|
||||
|
||||
o i18n_http_output(encoding)
|
||||
o encoding = i18n_http_output()
|
||||
|
||||
This will set the http output encoding. Any output following this
|
||||
function will be controlled by this function. If no argument is given,
|
||||
the current http output encode setting is returned.
|
||||
|
||||
encodings
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
pass: no conversion
|
||||
|
||||
NONE is not allowed
|
||||
|
||||
|
||||
o encoding = i18n_internal_encoding()
|
||||
|
||||
Returns the current internal encoding as a string.
|
||||
|
||||
internal encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
|
||||
o encoding = i18n_http_input()
|
||||
|
||||
Returns http input encoding.
|
||||
|
||||
encodings
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
pass: no conversion (only if i18n.http_input is set to pass)
|
||||
|
||||
|
||||
o string = i18n_convert(string, encoding)
|
||||
string = i18n_convert(string, encoding, pre-conversion-encoding)
|
||||
|
||||
Returns converted string in desired encoding. If
|
||||
pre-conversion-encoding is not defined, the given
|
||||
string is assumed to be in internal encoding.
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
pass: no conversion
|
||||
|
||||
pre-conversion-encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
pass: no conversion
|
||||
auto: auto detection
|
||||
|
||||
|
||||
o encoding = i18n_discover_encoding(string)
|
||||
|
||||
Encoding of the given string is returned (as a string).
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
ASCII: ASCII (only 09h, 0Ah, 0Dh, 20h-7Eh)
|
||||
pass: unable to determine (text is too short to determine)
|
||||
unknown: unknown or possible error
|
||||
|
||||
|
||||
o int = mbstrlen(string)
|
||||
o int = mbstrlen(string, encoding)
|
||||
|
||||
Returns character length of a given string. If no encoding is defined,
|
||||
the encoding of string is assumed to be the internal encoding.
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
auto: automatic
|
||||
|
||||
|
||||
o int = mbstrpos(string1, string2)
|
||||
o int = mbstrpos(string1, string2, start)
|
||||
o int = mbstrpos(string1, string2, start, encoding)
|
||||
|
||||
Same as strpos. If no encoding is defined, the encoding of string
|
||||
is assumed to be the internal encoding.
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
|
||||
o int = mbstrrpos(string1, string2)
|
||||
o int = mbstrrpos(string1, string2, encoding)
|
||||
|
||||
Same as strrpos. If no encoding is defined, the encoding of string
|
||||
is assumed to be the internal encoding.
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
|
||||
o string = mbsubstr(string, position)
|
||||
o string = mbsubstr(string, position, length)
|
||||
o string = mbsubstr(string, position, length, encoding)
|
||||
|
||||
Same as substr. If no encoding is defined, the encoding of string
|
||||
is assumed to be the internal encoding.
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
|
||||
o string = mbstrcut(string, position)
|
||||
o string = mbstrcut(string, position, length)
|
||||
o string = mbstrcut(string, position, length, encoding)
|
||||
|
||||
Same as subcut. If position is the 2nd byte of a mb character, it will cut
|
||||
from the first byte of that character. It will cut the string without
|
||||
chopping a single byte from a mb character. In another words, if you
|
||||
set length to 5, you will only get two mb characters. If no encoding
|
||||
is defined, the encoding of string is assumed to be the internal encoding.
|
||||
|
||||
encoding
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
|
||||
o string = i18n_mime_header_encode(string)
|
||||
MIME encode the string in the format of =?ISO-2022-JP?B?[string]?=.
|
||||
|
||||
|
||||
o string = i18n_mime_header_decode(string)
|
||||
MIME decodes the string.
|
||||
|
||||
|
||||
o string = i18n_ja_jp_hantozen(string)
|
||||
o string = i18n_ja_jp_hantozen(string, option)
|
||||
o string = i18n_ja_jp_hantozen(string, option, encoding)
|
||||
|
||||
Conversion between full width character and halfwidth character.
|
||||
|
||||
option
|
||||
The following options are allowed. The default is "KV".
|
||||
Acronym: FW = fullwidth, HW = halfwidth
|
||||
|
||||
"r" : FW alphabet -> HW alphabet
|
||||
|
||||
"R" : HW alphabet -> FW alphabet
|
||||
|
||||
"n" : FW number -> HW number
|
||||
|
||||
"N" : HW number -> FW number
|
||||
|
||||
"a" : FW alpha numeric (21h-7Eh) -> HW alpha numeric
|
||||
|
||||
"A" : HW alpha numeric (21h-7Eh) -> FW alpha numeric
|
||||
|
||||
"k" : FW katakana -> HW katakana
|
||||
|
||||
"K" : HW katakana -> FW katakana
|
||||
|
||||
"h" : FW hiragana -> HW hiragana
|
||||
|
||||
"H" : HW hiragana -> FW katakana
|
||||
|
||||
"c" : FW katakana -> FW hiragana
|
||||
|
||||
"C" : FW hiragana -> FW katakana
|
||||
|
||||
"V" : merge dakuon character. only works with "K" and "H" option
|
||||
|
||||
encoding
|
||||
If no encoding is defined, the encoding of string is assumed to be
|
||||
the internal encoding.
|
||||
EUC-JP : EUC
|
||||
SJIS: SJIS
|
||||
JIS : JIS
|
||||
UTF-8: UTF-8
|
||||
|
||||
|
||||
int = mbereg(regex_pattern, string, string)
|
||||
int = mberegi(regex_pattern, string, string)
|
||||
mb version of ereg() and eregi()
|
||||
|
||||
|
||||
string = mbereg_replace(regex_pattern, string, string)
|
||||
string = mberegi_replace(regex_pattern, string, string)
|
||||
mb version of ereg_replace() and eregi_replace()
|
||||
|
||||
|
||||
string_array = mbsplit(regex, string, limit)
|
||||
mb version of split()
|
||||
|
||||
|
||||
|
||||
==========================================
|
||||
FAQ
|
||||
==========================================
|
||||
|
||||
Here, we have gathered some commonly asked questions on PHP-jp mailing
|
||||
list.
|
||||
|
||||
o To use Japanese in GET method
|
||||
|
||||
If you need to assign Japanese text in GET method with argument, such as;
|
||||
xxxx.php?data=<Japanese text>, use urlencode function in PHP. If not,
|
||||
text may not be passed onto action php properly.
|
||||
|
||||
ex: <a href="hoge.php?data=<? echo urlencode($data) ?>">Link</a>
|
||||
|
||||
|
||||
o When passing data via GET/POST/COOKIE, \ character sneaks in
|
||||
|
||||
When using SJIS as internal encoding, or passed-on data includes '"\,
|
||||
PHP automatically inserts escaping character, \. Set magic_quotes_gpc
|
||||
in php3.ini from On to Off. An alternative work around to this problem
|
||||
is to use StripSlashes().
|
||||
|
||||
If $quote_str is in SJIS and you would like to extract Japanese text,
|
||||
use ereg_replace as follows:
|
||||
|
||||
ereg_replace(sprintf("([%c-%c%c-%c]\\\\)\\\\",0x81,0x9f,0xe0,0xfc),
|
||||
"\\1",$quote_str);
|
||||
|
||||
This will effectively extract Japanese text out of $quote_str.
|
||||
|
||||
|
||||
o Sometimes, encoding detection fails
|
||||
|
||||
If i18n_http_input() returns 'pass', it's likely that PHP failed to
|
||||
detect whether it's SJIS or EUC. In such case, use <input type=hidden
|
||||
value="some Japanese text"> to properly detect the incoming text's
|
||||
encoding.
|
||||
|
||||
|
||||
|
||||
==========================================
|
||||
Japanese Manual
|
||||
==========================================
|
||||
Translated manual done by "PHP Japanese Manual Project" :
|
||||
|
||||
http://www.php.net/manual/ja/manual.php
|
||||
|
||||
Starting 3.0.18-i18n-ja, we have removed doc-jp from tarball package.
|
||||
|
||||
|
||||
==========================================
|
||||
Change Logs
|
||||
==========================================
|
||||
|
||||
o 2000-10-28, Rui Hirokawa <hirokawa@php.net>
|
||||
|
||||
This patch is derived from php-3.0.15-i18n-ja as well as php-3.0.16 by
|
||||
Kuwamura applied to original php-3.0.18. It also includes following fixes:
|
||||
|
||||
1) allows you to set charset in mail().
|
||||
2) fixed mbregex definitions to avoid conflicts with system regex
|
||||
3) php3.ini-dist now uses PASS for http_output instead of SJIS
|
||||
|
||||
o 2000-11-24, Hironori Sato <satoh@yyplanet.com>
|
||||
|
||||
Applied above patched and added detection for gdImageStringTTF in configure.
|
||||
Following setups are known to work:
|
||||
|
||||
gd-1.3-6, gd-devel-1.3-6, freetype-1.3.1-5, freetype-devel-1.3.1-5
|
||||
ImageTTFText($im,$size,$angle,$x1,$y1,$color,"/path/to/font.ttf",
|
||||
i18n_convert("ÆüËܸì", "UTF-8"));
|
||||
ImageGif($im);
|
||||
|
||||
gd-1.7.3-1k1, gd-devel-1.7.3-1k1, freetype-1.3.1-5, freetype-devel-1.3.1-5
|
||||
ImageTTFText($im,$size,$angle,$x1,$y1,$color,"/path/to/font.ttf","ÆüËܸì");
|
||||
ImagePng($im);
|
||||
* i18n_internal_encoding = EUC Ëô¤Ï SJIS
|
||||
|
||||
For any gd libraries before 1.6.2, you need to use i18n_convert. For
|
||||
gd-1.5.2/3, upgrade to anything above 1.7 to use ImageTTFText without
|
||||
using i18n_convert. As long as you have internal_encoding set to EUC or
|
||||
SJIS, ImageTTFText should work without mojibake. Again, make sure you
|
||||
have i18n_http_output("pass") before calling ImageGif, ImagePng, ImageJpeg!
|
||||
|
||||
o 2000-12-09, Rui Hirokawa <hirokawa@php.net>
|
||||
|
||||
Fixed mail() which was causing segmentation fault when header was null.
|
||||
|
23
ext/mbstring/config.m4
Normal file
23
ext/mbstring/config.m4
Normal file
@ -0,0 +1,23 @@
|
||||
dnl $Id$
|
||||
dnl config.m4 for extension mbstring
|
||||
|
||||
PHP_ARG_ENABLE(mbstring, whether to enable multibyte string support,
|
||||
[ --enable-mbstring Enable multibyte string support])
|
||||
|
||||
if test "$PHP_MBSTRING" != "no"; then
|
||||
AC_DEFINE(HAVE_MBSTRING,1,[ ])
|
||||
PHP_EXTENSION(mbstring, $ext_shared)
|
||||
fi
|
||||
|
||||
AC_MSG_CHECKING(whether to enable japanese encoding translation)
|
||||
AC_ARG_ENABLE(mbstr_enc_trans,
|
||||
[ --enable-mbstr-enc-trans Enable japanese encoding translation],[
|
||||
if test "$enableval" = "yes" ; then
|
||||
AC_DEFINE(MBSTR_ENC_TRANS, 1, [ ])
|
||||
AC_MSG_RESULT(yes)
|
||||
else
|
||||
AC_MSG_RESULT(no)
|
||||
fi
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
8244
ext/mbstring/mbfilter.c
Normal file
8244
ext/mbstring/mbfilter.c
Normal file
File diff suppressed because it is too large
Load Diff
539
ext/mbstring/mbfilter.h
Normal file
539
ext/mbstring/mbfilter.h
Normal file
@ -0,0 +1,539 @@
|
||||
/* charset=UTF-8 */
|
||||
|
||||
/*
|
||||
* "streamable kanji code filter and converter"
|
||||
*
|
||||
* Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
|
||||
*
|
||||
* This software is released under the GNU Lesser General Public License.
|
||||
* Please read the following detail of the licence (in japanese).
|
||||
*
|
||||
* ◆使用許諾条件◆
|
||||
*
|
||||
* このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
|
||||
* ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
|
||||
* するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
|
||||
* ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
|
||||
* をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
|
||||
* することはできません。
|
||||
*
|
||||
* このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
|
||||
* 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
|
||||
* General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
|
||||
* による許諾を得る必要があります。
|
||||
*
|
||||
* 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
|
||||
* ます。「GNU Lesser General Public License」とは、これまでLibrary General
|
||||
* Public Licenseと呼ばれていたものです。
|
||||
* http://www.gnu.org/ --- GNUウェブサイト
|
||||
* http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
|
||||
* このライセンスの内容がわからない方、守れない方には使用を許諾しません。
|
||||
*
|
||||
* しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
|
||||
* はありません。
|
||||
*
|
||||
* ◆保証内容◆
|
||||
*
|
||||
* このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
|
||||
* 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
|
||||
* のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
|
||||
* 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
|
||||
* る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
|
||||
* 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
|
||||
* 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
|
||||
* 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
|
||||
* 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
|
||||
* 契約・規定に優先します。
|
||||
*
|
||||
* ◆著作権者の連絡先および使用条件についての問い合わせ先◆
|
||||
*
|
||||
* 〒112-0004東京都文京区後楽1-1-13小野ビル7F
|
||||
* 株式会社ハッピーサイズ
|
||||
* Phone: 03-5803-2964, Fax: 03-5803-2965
|
||||
* http://www.happysize.co.jp/ mailto:info@happysize.co.jp
|
||||
*
|
||||
* ◆著者◆
|
||||
*
|
||||
* 金本 茂 <sgk@happysize.co.jp>
|
||||
*
|
||||
* ◆履歴◆
|
||||
*
|
||||
* 1998/11/10 sgk implementation in C++
|
||||
* 1999/4/25 sgk Cで書きなおし。
|
||||
* 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
|
||||
* 1999/6/?? Unicodeサポート。
|
||||
* 1999/6/22 sgk ライセンスをLGPLに変更。
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Unicode support
|
||||
*
|
||||
* Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
|
||||
* All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* streamable kanji code filter and converter
|
||||
* mbfl : Multi Byte FiLter Liblary
|
||||
*
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
|
||||
#ifndef MBFL_MBFILTER_H
|
||||
#define MBFL_MBFILTER_H
|
||||
|
||||
enum mbfl_no_language {
|
||||
mbfl_no_language_invalid = -1,
|
||||
mbfl_no_language_uni,
|
||||
mbfl_no_language_min,
|
||||
mbfl_no_language_catalan, /* ca */
|
||||
mbfl_no_language_danish, /* da */
|
||||
mbfl_no_language_german, /* de */
|
||||
mbfl_no_language_english, /* en */
|
||||
mbfl_no_language_estonian, /* et */
|
||||
mbfl_no_language_greek, /* el */
|
||||
mbfl_no_language_spanish, /* es */
|
||||
mbfl_no_language_french, /* fr */
|
||||
mbfl_no_language_italian, /* it */
|
||||
mbfl_no_language_japanese, /* ja */
|
||||
mbfl_no_language_korean, /* ko */
|
||||
mbfl_no_language_dutch, /* nl */
|
||||
mbfl_no_language_polish, /* pl */
|
||||
mbfl_no_language_portuguese, /* pt */
|
||||
mbfl_no_language_swedish, /* sv */
|
||||
mbfl_no_language_chinese, /* zh */
|
||||
mbfl_no_language_max
|
||||
};
|
||||
|
||||
enum mbfl_no_encoding {
|
||||
mbfl_no_encoding_invalid = -1,
|
||||
mbfl_no_encoding_pass,
|
||||
mbfl_no_encoding_auto,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_byte2be,
|
||||
mbfl_no_encoding_byte2le,
|
||||
mbfl_no_encoding_byte4be,
|
||||
mbfl_no_encoding_byte4le,
|
||||
mbfl_no_encoding_base64,
|
||||
mbfl_no_encoding_qprint,
|
||||
mbfl_no_encoding_7bit,
|
||||
mbfl_no_encoding_8bit,
|
||||
mbfl_no_encoding_charset_min,
|
||||
mbfl_no_encoding_ucs4,
|
||||
mbfl_no_encoding_ucs4be,
|
||||
mbfl_no_encoding_ucs4le,
|
||||
mbfl_no_encoding_ucs2,
|
||||
mbfl_no_encoding_ucs2be,
|
||||
mbfl_no_encoding_ucs2le,
|
||||
mbfl_no_encoding_utf32,
|
||||
mbfl_no_encoding_utf32be,
|
||||
mbfl_no_encoding_utf32le,
|
||||
mbfl_no_encoding_utf16,
|
||||
mbfl_no_encoding_utf16be,
|
||||
mbfl_no_encoding_utf16le,
|
||||
mbfl_no_encoding_utf8,
|
||||
mbfl_no_encoding_utf7,
|
||||
mbfl_no_encoding_utf7imap,
|
||||
mbfl_no_encoding_ascii,
|
||||
mbfl_no_encoding_euc_jp,
|
||||
mbfl_no_encoding_sjis,
|
||||
mbfl_no_encoding_eucjp_win,
|
||||
mbfl_no_encoding_sjis_win,
|
||||
mbfl_no_encoding_sjis_mac,
|
||||
mbfl_no_encoding_jis,
|
||||
mbfl_no_encoding_2022jp,
|
||||
mbfl_no_encoding_8859_1,
|
||||
mbfl_no_encoding_8859_2,
|
||||
mbfl_no_encoding_8859_3,
|
||||
mbfl_no_encoding_8859_4,
|
||||
mbfl_no_encoding_8859_5,
|
||||
mbfl_no_encoding_8859_6,
|
||||
mbfl_no_encoding_8859_7,
|
||||
mbfl_no_encoding_8859_8,
|
||||
mbfl_no_encoding_8859_9,
|
||||
mbfl_no_encoding_8859_10,
|
||||
mbfl_no_encoding_8859_13,
|
||||
mbfl_no_encoding_8859_14,
|
||||
mbfl_no_encoding_8859_15,
|
||||
mbfl_no_encoding_charset_max
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* language
|
||||
*/
|
||||
typedef struct _mbfl_language {
|
||||
enum mbfl_no_language no_language;
|
||||
const char *name;
|
||||
const char *short_name;
|
||||
const char *(*aliases)[];
|
||||
enum mbfl_no_encoding mail_charset;
|
||||
enum mbfl_no_encoding mail_header_encoding;
|
||||
enum mbfl_no_encoding mail_body_encoding;
|
||||
} mbfl_language;
|
||||
|
||||
|
||||
/*
|
||||
* encoding
|
||||
*/
|
||||
typedef struct _mbfl_encoding {
|
||||
enum mbfl_no_encoding no_encoding;
|
||||
const char *name;
|
||||
const char *mime_name;
|
||||
const char *(*aliases)[];
|
||||
const unsigned char *mblen_table;
|
||||
unsigned int flag;
|
||||
} mbfl_encoding;
|
||||
|
||||
|
||||
#define MBFL_ENCTYPE_SBCS 0x00000001
|
||||
#define MBFL_ENCTYPE_MBCS 0x00000002
|
||||
#define MBFL_ENCTYPE_WCS2BE 0x00000010
|
||||
#define MBFL_ENCTYPE_WCS2LE 0x00000020
|
||||
#define MBFL_ENCTYPE_MWC2BE 0x00000040
|
||||
#define MBFL_ENCTYPE_MWC2LE 0x00000080
|
||||
#define MBFL_ENCTYPE_WCS4BE 0x00000100
|
||||
#define MBFL_ENCTYPE_WCS4LE 0x00000200
|
||||
#define MBFL_ENCTYPE_MWC4BE 0x00000400
|
||||
#define MBFL_ENCTYPE_MWC4LE 0x00000800
|
||||
#define MBFL_ENCTYPE_SHFTCODE 0x00001000
|
||||
|
||||
/* wchar plane, spesial charactor */
|
||||
#define MBFL_WCSPLANE_MASK 0xffff
|
||||
#define MBFL_WCSPLANE_UCS2MAX 0x00010000
|
||||
#define MBFL_WCSPLANE_SUPMIN 0x00010000
|
||||
#define MBFL_WCSPLANE_SUPMAX 0x00200000
|
||||
#define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */
|
||||
#define MBFL_WCSPLANE_JIS0212 0x70e20000 /* JIS HEX : 2121h - 7E7Eh */
|
||||
#define MBFL_WCSPLANE_WINCP932 0x70e30000 /* JIS HEX : 2121h - 9898h */
|
||||
#define MBFL_WCSPLANE_8859_1 0x70e40000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_2 0x70e50000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_3 0x70e60000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_4 0x70e70000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_5 0x70e80000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_6 0x70e90000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_7 0x70ea0000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_8 0x70eb0000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_9 0x70ec0000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_10 0x70ed0000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_13 0x70ee0000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_14 0x70ef0000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_8859_15 0x70f00000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_KSC5601 0x70f10000 /* 2121h - 7E7Eh */
|
||||
#define MBFL_WCSPLANE_GB2312 0x70f20000 /* 2121h - 7E7Eh */
|
||||
|
||||
#define MBFL_WCSGROUP_MASK 0xffffff
|
||||
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
|
||||
#define MBFL_WCSGROUP_WCHARMAX 0x78000000
|
||||
#define MBFL_WCSGROUP_THROUGH 0x78000000 /* 000000h - FFFFFFh */
|
||||
|
||||
|
||||
/*
|
||||
* string object
|
||||
*/
|
||||
typedef struct _mbfl_string {
|
||||
enum mbfl_no_language no_language;
|
||||
enum mbfl_no_encoding no_encoding;
|
||||
unsigned char *val;
|
||||
unsigned int len;
|
||||
} mbfl_string;
|
||||
|
||||
void mbfl_string_init(mbfl_string *string);
|
||||
|
||||
|
||||
/*
|
||||
* language resolver
|
||||
*/
|
||||
mbfl_language * mbfl_name2language(const char *name);
|
||||
mbfl_language * mbfl_no2language(enum mbfl_no_language no_language);
|
||||
enum mbfl_no_language mbfl_name2no_language(const char *name);
|
||||
|
||||
|
||||
/*
|
||||
* encoding resolver
|
||||
*/
|
||||
mbfl_encoding * mbfl_name2encoding(const char *name);
|
||||
mbfl_encoding * mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
|
||||
enum mbfl_no_encoding mbfl_name2no_encoding(const char *name);
|
||||
const char * mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);
|
||||
const char * mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding);
|
||||
int mbfl_is_support_encoding(const char *name);
|
||||
|
||||
|
||||
/*
|
||||
* memory output function
|
||||
*/
|
||||
#define MBFL_MEMORY_DEVICE_ALLOC_SIZE 64
|
||||
|
||||
typedef struct _mbfl_memory_device {
|
||||
unsigned char *buffer;
|
||||
int length;
|
||||
int pos;
|
||||
int allocsz;
|
||||
} mbfl_memory_device;
|
||||
|
||||
typedef struct _mbfl_wchar_device {
|
||||
unsigned int *buffer;
|
||||
int length;
|
||||
int pos;
|
||||
int allocsz;
|
||||
} mbfl_wchar_device;
|
||||
|
||||
void mbfl_memory_device_init(mbfl_memory_device *device, int initsz, int allocsz);
|
||||
void mbfl_memory_device_realloc(mbfl_memory_device *device, int initsz, int allocsz);
|
||||
void mbfl_memory_device_clear(mbfl_memory_device *device);
|
||||
void mbfl_memory_device_reset(mbfl_memory_device *device);
|
||||
mbfl_string * mbfl_memory_device_result(mbfl_memory_device *device, mbfl_string *result);
|
||||
int mbfl_memory_device_output(int c, void *data);
|
||||
int mbfl_memory_device_output2(int c, void *data);
|
||||
int mbfl_memory_device_output4(int c, void *data);
|
||||
int mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc);
|
||||
int mbfl_memory_device_strncat(mbfl_memory_device *device, const char *psrc, int len);
|
||||
int mbfl_memory_device_devcat(mbfl_memory_device *dest, mbfl_memory_device *src);
|
||||
|
||||
void mbfl_wchar_device_init(mbfl_wchar_device *device);
|
||||
int mbfl_wchar_device_output(int c, void *data);
|
||||
|
||||
|
||||
/*
|
||||
* convert filter
|
||||
*/
|
||||
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0
|
||||
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1
|
||||
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2
|
||||
|
||||
typedef struct _mbfl_convert_filter mbfl_convert_filter;
|
||||
|
||||
struct _mbfl_convert_filter {
|
||||
void (*filter_ctor)(mbfl_convert_filter *filter);
|
||||
void (*filter_dtor)(mbfl_convert_filter *filter);
|
||||
int (*filter_function)(int c, mbfl_convert_filter *filter);
|
||||
int (*filter_flush)(mbfl_convert_filter *filter);
|
||||
int (*output_function)(int c, void *data);
|
||||
int (*flush_function)(void *data);
|
||||
void *data;
|
||||
int status;
|
||||
int cache;
|
||||
mbfl_encoding *from;
|
||||
mbfl_encoding *to;
|
||||
int illegal_mode;
|
||||
int illegal_substchar;
|
||||
};
|
||||
|
||||
struct mbfl_convert_vtbl {
|
||||
enum mbfl_no_encoding from;
|
||||
enum mbfl_no_encoding to;
|
||||
void (*filter_ctor)(mbfl_convert_filter *filter);
|
||||
void (*filter_dtor)(mbfl_convert_filter *filter);
|
||||
int (*filter_function)(int c, mbfl_convert_filter *filter);
|
||||
int (*filter_flush)(mbfl_convert_filter *filter);
|
||||
};
|
||||
|
||||
mbfl_convert_filter *
|
||||
mbfl_convert_filter_new(
|
||||
enum mbfl_no_encoding from,
|
||||
enum mbfl_no_encoding to,
|
||||
int (*output_function)(int, void *),
|
||||
int (*flush_function)(void *),
|
||||
void *data);
|
||||
void mbfl_convert_filter_delete(mbfl_convert_filter *filter);
|
||||
int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_convert_filter_flush(mbfl_convert_filter *filter);
|
||||
void mbfl_convert_filter_reset(mbfl_convert_filter *filter, enum mbfl_no_encoding from, enum mbfl_no_encoding to);
|
||||
void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dist);
|
||||
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter);
|
||||
|
||||
|
||||
/*
|
||||
* identify filter
|
||||
*/
|
||||
typedef struct _mbfl_identify_filter mbfl_identify_filter;
|
||||
|
||||
struct _mbfl_identify_filter {
|
||||
void (*filter_ctor)(mbfl_identify_filter *filter);
|
||||
void (*filter_dtor)(mbfl_identify_filter *filter);
|
||||
int (*filter_function)(int c, mbfl_identify_filter *filter);
|
||||
int status;
|
||||
int flag;
|
||||
int score;
|
||||
mbfl_encoding *encoding;
|
||||
};
|
||||
|
||||
struct mbfl_identify_vtbl {
|
||||
enum mbfl_no_encoding encoding;
|
||||
void (*filter_ctor)(mbfl_identify_filter *filter);
|
||||
void (*filter_dtor)(mbfl_identify_filter *filter);
|
||||
int (*filter_function)(int c, mbfl_identify_filter *filter);
|
||||
};
|
||||
|
||||
mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding);
|
||||
void mbfl_identify_filter_delete(mbfl_identify_filter *filter);
|
||||
|
||||
|
||||
/*
|
||||
* buffering converter
|
||||
*/
|
||||
typedef struct _mbfl_buffer_converter mbfl_buffer_converter;
|
||||
|
||||
struct _mbfl_buffer_converter {
|
||||
mbfl_convert_filter *filter1;
|
||||
mbfl_convert_filter *filter2;
|
||||
mbfl_memory_device device;
|
||||
mbfl_encoding *from;
|
||||
mbfl_encoding *to;
|
||||
};
|
||||
|
||||
mbfl_buffer_converter * mbfl_buffer_converter_new(enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz);
|
||||
void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
|
||||
void mbfl_buffer_converter_reset(mbfl_buffer_converter *convd);
|
||||
int mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
|
||||
int mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar);
|
||||
int mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n);
|
||||
int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
|
||||
mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
|
||||
mbfl_string * mbfl_buffer_converter_feed_getbuffer(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
|
||||
mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
|
||||
|
||||
|
||||
/*
|
||||
* encoding detector
|
||||
*/
|
||||
typedef struct _mbfl_encoding_detector mbfl_encoding_detector;
|
||||
|
||||
struct _mbfl_encoding_detector {
|
||||
mbfl_identify_filter **filter_list;
|
||||
int filter_list_size;
|
||||
};
|
||||
|
||||
mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int eliztsz);
|
||||
void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd);
|
||||
int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string);
|
||||
enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd);
|
||||
|
||||
|
||||
/*
|
||||
* encoding converter
|
||||
*/
|
||||
mbfl_string *
|
||||
mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_encoding toenc);
|
||||
|
||||
|
||||
/*
|
||||
* identify encoding
|
||||
*/
|
||||
mbfl_encoding *
|
||||
mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz);
|
||||
|
||||
const char *
|
||||
mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz);
|
||||
|
||||
enum mbfl_no_encoding
|
||||
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz);
|
||||
|
||||
/*
|
||||
* strlen
|
||||
*/
|
||||
int
|
||||
mbfl_strlen(mbfl_string *string);
|
||||
|
||||
/*
|
||||
* strpos
|
||||
*/
|
||||
int
|
||||
mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, int offset, int reverse);
|
||||
|
||||
/*
|
||||
* substr
|
||||
*/
|
||||
mbfl_string *
|
||||
mbfl_substr(mbfl_string *string, mbfl_string *result, int from, int length);
|
||||
|
||||
/*
|
||||
* strcut
|
||||
*/
|
||||
mbfl_string *
|
||||
mbfl_strcut(mbfl_string *string, mbfl_string *result, int from, int length);
|
||||
|
||||
/*
|
||||
* strwidth
|
||||
*/
|
||||
int
|
||||
mbfl_strwidth(mbfl_string *string);
|
||||
|
||||
/*
|
||||
* strimwidth
|
||||
*/
|
||||
mbfl_string *
|
||||
mbfl_strimwidth(mbfl_string *string, mbfl_string *marker, mbfl_string *result, int from, int width);
|
||||
|
||||
/*
|
||||
* MIME header encode
|
||||
*/
|
||||
struct mime_header_encoder_data; /* forward declaration */
|
||||
|
||||
struct mime_header_encoder_data *
|
||||
mime_header_encoder_new(
|
||||
enum mbfl_no_encoding incode,
|
||||
enum mbfl_no_encoding outcode,
|
||||
enum mbfl_no_encoding encoding);
|
||||
|
||||
void
|
||||
mime_header_encoder_delete(struct mime_header_encoder_data *pe);
|
||||
|
||||
int
|
||||
mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe);
|
||||
|
||||
mbfl_string *
|
||||
mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result);
|
||||
|
||||
mbfl_string *
|
||||
mbfl_mime_header_encode(
|
||||
mbfl_string *string, mbfl_string *result,
|
||||
enum mbfl_no_encoding outcode,
|
||||
enum mbfl_no_encoding encoding,
|
||||
const char *linefeed,
|
||||
int indent);
|
||||
|
||||
/*
|
||||
* MIME header decode
|
||||
*/
|
||||
struct mime_header_decoder_data; /* forward declaration */
|
||||
|
||||
struct mime_header_decoder_data *
|
||||
mime_header_decoder_new(enum mbfl_no_encoding outcode);
|
||||
|
||||
void
|
||||
mime_header_decoder_delete(struct mime_header_decoder_data *pd);
|
||||
|
||||
int
|
||||
mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd);
|
||||
|
||||
mbfl_string *
|
||||
mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result);
|
||||
|
||||
mbfl_string *
|
||||
mbfl_mime_header_decode(
|
||||
mbfl_string *string,
|
||||
mbfl_string *result,
|
||||
enum mbfl_no_encoding outcode);
|
||||
|
||||
|
||||
/*
|
||||
* convert HTML numeric entity
|
||||
*/
|
||||
mbfl_string *
|
||||
mbfl_html_numeric_entity(mbfl_string *string, mbfl_string *result, int *convmap, int mapsize, int type);
|
||||
|
||||
|
||||
/*
|
||||
* convert of harfwidth and fullwidth for japanese
|
||||
*/
|
||||
mbfl_string *
|
||||
mbfl_ja_jp_hantozen(mbfl_string *string, mbfl_string *result, int mode);
|
||||
|
||||
#endif /* MBFL_MBFILTER_H */
|
7331
ext/mbstring/mbfilter_ja.c
Normal file
7331
ext/mbstring/mbfilter_ja.c
Normal file
File diff suppressed because it is too large
Load Diff
95
ext/mbstring/mbfilter_ja.h
Normal file
95
ext/mbstring/mbfilter_ja.h
Normal file
@ -0,0 +1,95 @@
|
||||
/* charset=UTF-8 */
|
||||
|
||||
/*
|
||||
* "streamable kanji code filter and converter"
|
||||
*
|
||||
* Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
|
||||
*
|
||||
* This software is released under the GNU Lesser General Public License.
|
||||
* Please read the following detail of the licence (in japanese).
|
||||
*
|
||||
* ◆使用許諾条件◆
|
||||
*
|
||||
* このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
|
||||
* ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
|
||||
* するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
|
||||
* ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
|
||||
* をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
|
||||
* することはできません。
|
||||
*
|
||||
* このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
|
||||
* 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
|
||||
* General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
|
||||
* による許諾を得る必要があります。
|
||||
*
|
||||
* 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
|
||||
* ます。「GNU Lesser General Public License」とは、これまでLibrary General
|
||||
* Public Licenseと呼ばれていたものです。
|
||||
* http://www.gnu.org/ --- GNUウェブサイト
|
||||
* http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
|
||||
* このライセンスの内容がわからない方、守れない方には使用を許諾しません。
|
||||
*
|
||||
* しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
|
||||
* はありません。
|
||||
*
|
||||
* ◆保証内容◆
|
||||
*
|
||||
* このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
|
||||
* 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
|
||||
* のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
|
||||
* 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
|
||||
* る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
|
||||
* 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
|
||||
* 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
|
||||
* 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
|
||||
* 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
|
||||
* 契約・規定に優先します。
|
||||
*
|
||||
* ◆著作権者の連絡先および使用条件についての問い合わせ先◆
|
||||
*
|
||||
* 〒112-0004東京都文京区後楽1-1-13小野ビル7F
|
||||
* 株式会社ハッピーサイズ
|
||||
* Phone: 03-5803-2964, Fax: 03-5803-2965
|
||||
* http://www.happysize.co.jp/ mailto:info@happysize.co.jp
|
||||
*
|
||||
* ◆著者◆
|
||||
*
|
||||
* 金本 茂 <sgk@happysize.co.jp>
|
||||
*
|
||||
* ◆履歴◆
|
||||
*
|
||||
* 1998/11/10 sgk implementation in C++
|
||||
* 1999/4/25 sgk Cで書きなおし。
|
||||
* 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
|
||||
* 1999/6/?? Unicodeサポート。
|
||||
* 1999/6/22 sgk ライセンスをLGPLに変更。
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Unicode support
|
||||
*
|
||||
* Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
|
||||
* All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef MBFL_MBFILTER_JA_H
|
||||
#define MBFL_MBFILTER_JA_H
|
||||
|
||||
int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
|
||||
|
||||
#endif /* MBFL_MBFILTER_JA_H */
|
2457
ext/mbstring/mbstring.c
Normal file
2457
ext/mbstring/mbstring.c
Normal file
File diff suppressed because it is too large
Load Diff
150
ext/mbstring/mbstring.h
Normal file
150
ext/mbstring/mbstring.h
Normal file
@ -0,0 +1,150 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP version 4.0 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Authors: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
/*
|
||||
* PHP4 Multibyte String module "mbstring" (currently only for Japanese)
|
||||
*
|
||||
* History:
|
||||
* 2000.5.19 Release php-4.0RC2_jstring-1.0
|
||||
* 2001.4.1 Release php4_jstring-1.0.91
|
||||
* 2001.4.30 Release php4-jstring-1.1 (contribute to The PHP Group)
|
||||
* 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
|
||||
*/
|
||||
|
||||
/*
|
||||
* PHP3 Internationalization support program.
|
||||
*
|
||||
* Copyright (c) 1999,2000 by the PHP3 internationalization team.
|
||||
* All rights reserved.
|
||||
*
|
||||
* See README_PHP3-i18n-ja for more detail.
|
||||
*
|
||||
* Authors:
|
||||
* Hironori Sato <satoh@jpnnet.com>
|
||||
* Shigeru Kanemoto <sgk@happysize.co.jp>
|
||||
* Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _MBSTRING_H
|
||||
#define _MBSTRING_H
|
||||
|
||||
#ifdef COMPILE_DL_MBSTRING
|
||||
#undef HAVE_MBSTRING
|
||||
#define HAVE_MBSTRING 1
|
||||
#endif
|
||||
|
||||
#if HAVE_MBSTRING
|
||||
|
||||
#include "mbfilter.h"
|
||||
|
||||
extern zend_module_entry mbstring_module_entry;
|
||||
#define mbstring_module_ptr &mbstring_module_entry
|
||||
|
||||
extern PHP_MINIT_FUNCTION(mbstring);
|
||||
extern PHP_MSHUTDOWN_FUNCTION(mbstring);
|
||||
extern PHP_RINIT_FUNCTION(mbstring);
|
||||
extern PHP_RSHUTDOWN_FUNCTION(mbstring);
|
||||
PHP_MINFO_FUNCTION(mbstring);
|
||||
|
||||
/* php function registration */
|
||||
PHP_FUNCTION(mbstr_language);
|
||||
PHP_FUNCTION(mbstr_internal_encoding);
|
||||
PHP_FUNCTION(mbstr_http_input);
|
||||
PHP_FUNCTION(mbstr_http_output);
|
||||
PHP_FUNCTION(mbstr_detect_order);
|
||||
PHP_FUNCTION(mbstr_substitute_character);
|
||||
PHP_FUNCTION(mbstr_preferred_mime_name);
|
||||
PHP_FUNCTION(mbstr_gpc_handler);
|
||||
PHP_FUNCTION(mbstr_output_handler);
|
||||
PHP_FUNCTION(mbstr_strlen);
|
||||
PHP_FUNCTION(mbstr_strpos);
|
||||
PHP_FUNCTION(mbstr_strrpos);
|
||||
PHP_FUNCTION(mbstr_substr);
|
||||
PHP_FUNCTION(mbstr_strcut);
|
||||
PHP_FUNCTION(mbstr_strwidth);
|
||||
PHP_FUNCTION(mbstr_strimwidth);
|
||||
PHP_FUNCTION(mbstr_convert_encoding);
|
||||
PHP_FUNCTION(mbstr_detect_encoding);
|
||||
PHP_FUNCTION(mbstr_convert_kana);
|
||||
PHP_FUNCTION(mbstr_encode_mimeheader);
|
||||
PHP_FUNCTION(mbstr_decode_mimeheader);
|
||||
PHP_FUNCTION(mbstr_convert_variables);
|
||||
PHP_FUNCTION(mbstr_encode_numericentity);
|
||||
PHP_FUNCTION(mbstr_decode_numericentity);
|
||||
PHP_FUNCTION(mbstr_send_mail);
|
||||
|
||||
ZEND_BEGIN_MODULE_GLOBALS(mbstring)
|
||||
int language;
|
||||
int current_language;
|
||||
int internal_encoding;
|
||||
int current_internal_encoding;
|
||||
int http_output_encoding;
|
||||
int current_http_output_encoding;
|
||||
int http_input_identify;
|
||||
int http_input_identify_get;
|
||||
int http_input_identify_post;
|
||||
int http_input_identify_cookie;
|
||||
int *http_input_list;
|
||||
int http_input_list_size;
|
||||
int *detect_order_list;
|
||||
int detect_order_list_size;
|
||||
int *current_detect_order_list;
|
||||
int current_detect_order_list_size;
|
||||
int filter_illegal_mode;
|
||||
int filter_illegal_substchar;
|
||||
int current_filter_illegal_mode;
|
||||
int current_filter_illegal_substchar;
|
||||
mbfl_buffer_converter *outconv;
|
||||
ZEND_END_MODULE_GLOBALS(mbstring);
|
||||
|
||||
|
||||
#ifdef ZTS
|
||||
#define MBSTRLS_D zend_mbstring_globals *mbstring_globals
|
||||
#define MBSTRLS_DC , MBSTRLS_D
|
||||
#define MBSTRLS_C mbstring_globals
|
||||
#define MBSTRLS_CC , MBSTRLS_C
|
||||
#define MBSTRG(v) (mbstring_globals->v)
|
||||
#define MBSTRLS_FETCH() zend_mbstring_globals *mbstring_globals = ts_resource(mbstring_globals_id)
|
||||
#else
|
||||
#define MBSTRLS_D
|
||||
#define MBSTRLS_DC
|
||||
#define MBSTRLS_C
|
||||
#define MBSTRLS_CC
|
||||
#define MBSTRG(v) (mbstring_globals.v)
|
||||
#define MBSTRLS_FETCH()
|
||||
#endif
|
||||
|
||||
#else /* HAVE_MBSTRING */
|
||||
|
||||
#define mbstring_module_ptr NULL
|
||||
|
||||
#endif /* HAVE_MBSTRING */
|
||||
|
||||
#define phpext_mbstring_ptr mbstring_module_ptr
|
||||
|
||||
#endif /* _MBSTRING_H */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
*/
|
@ -53,8 +53,8 @@ extern void php_call_shutdown_functions(void);
|
||||
extern int php_init_environ(void);
|
||||
extern int php_shutdown_environ(void);
|
||||
|
||||
#if defined(JSTR_ENC_TRANS)
|
||||
#define php_treat_data jstr_treat_data
|
||||
#if defined(MBSTR_ENC_TRANS)
|
||||
#define php_treat_data mbstr_treat_data
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user