mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-14 14:33:40 +08:00
ISO_8859_1.java, [...]: New files.
2002-11-11 Jesse Rosenstock <jmr@ugcs.caltech.edu> * gnu/java/nio/charset/ISO_8859_1.java, gnu/java/nio/charset/Provider.java, gnu/java/nio/charset/US_ASCII.java, gnu/java/nio/charset/UTF_16.java, gnu/java/nio/charset/UTF_16BE.java, gnu/java/nio/charset/UTF_16Decoder.java, gnu/java/nio/charset/UTF_16Encoder.java, gnu/java/nio/charset/UTF_16LE.java, gnu/java/nio/charset/UTF_8.java: New files. * Makefile.am (): Added new files. * Makefile.in: Regenerated. From-SVN: r59013
This commit is contained in:
parent
ac7bc6bb2f
commit
8a423d779f
@ -1,3 +1,18 @@
|
||||
2002-11-11 Jesse Rosenstock <jmr@ugcs.caltech.edu>
|
||||
|
||||
* gnu/java/nio/charset/ISO_8859_1.java,
|
||||
gnu/java/nio/charset/Provider.java,
|
||||
gnu/java/nio/charset/US_ASCII.java,
|
||||
gnu/java/nio/charset/UTF_16.java,
|
||||
gnu/java/nio/charset/UTF_16BE.java,
|
||||
gnu/java/nio/charset/UTF_16Decoder.java,
|
||||
gnu/java/nio/charset/UTF_16Encoder.java,
|
||||
gnu/java/nio/charset/UTF_16LE.java,
|
||||
gnu/java/nio/charset/UTF_8.java: New files.
|
||||
* Makefile.am ():
|
||||
Added new files.
|
||||
* Makefile.in: Regenerated.
|
||||
|
||||
2002-11-11 Michael Koch <konqueror@gmx.de>
|
||||
|
||||
* java/nio/charset/CharacterCodingException.java:
|
||||
|
132
libjava/gnu/java/nio/charset/ISO_8859_1.java
Normal file
132
libjava/gnu/java/nio/charset/ISO_8859_1.java
Normal file
@ -0,0 +1,132 @@
|
||||
/* ISO_8859_1.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* ISO-8859-1 charset.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class ISO_8859_1 extends Charset
|
||||
{
|
||||
ISO_8859_1 ()
|
||||
{
|
||||
super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
|
||||
}
|
||||
|
||||
public boolean contains (Charset cs)
|
||||
{
|
||||
return cs instanceof US_ASCII || cs instanceof ISO_8859_1;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder ()
|
||||
{
|
||||
return new Decoder (this);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder ()
|
||||
{
|
||||
return new Encoder (this);
|
||||
}
|
||||
|
||||
private static final class Decoder extends CharsetDecoder
|
||||
{
|
||||
private Decoder (Charset cs)
|
||||
{
|
||||
super (cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
byte b = in.get ();
|
||||
|
||||
if (!out.hasRemaining ())
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
out.put ((char) (b & 0xFF));
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Encoder extends CharsetEncoder
|
||||
{
|
||||
private Encoder (Charset cs)
|
||||
{
|
||||
super (cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
char c = in.get ();
|
||||
|
||||
if (c > 0xFF)
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.unmappableForLength (1);
|
||||
}
|
||||
if (!out.hasRemaining ())
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
out.put ((byte) c);
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
135
libjava/gnu/java/nio/charset/Provider.java
Normal file
135
libjava/gnu/java/nio/charset/Provider.java
Normal file
@ -0,0 +1,135 @@
|
||||
/* Provider.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.spi.CharsetProvider;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Charset provider for the required charsets. Used by
|
||||
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
* @see Charset
|
||||
*/
|
||||
public final class Provider extends CharsetProvider
|
||||
{
|
||||
private static Provider singleton;
|
||||
|
||||
static
|
||||
{
|
||||
synchronized (Provider.class)
|
||||
{
|
||||
singleton = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map from charset name to charset canonical name.
|
||||
*/
|
||||
private final HashMap canonicalNames;
|
||||
|
||||
/**
|
||||
* Map from canonical name to Charset.
|
||||
* TODO: We may want to use soft references. We would then need to keep
|
||||
* track of the class name to regenerate the object.
|
||||
*/
|
||||
private final HashMap charsets;
|
||||
|
||||
private Provider ()
|
||||
{
|
||||
// FIXME: We might need to make the name comparison case insensitive.
|
||||
// Verify this with the Sun JDK.
|
||||
canonicalNames = new HashMap ();
|
||||
charsets = new HashMap ();
|
||||
|
||||
// US-ASCII aka ISO646-US
|
||||
addCharset (new US_ASCII ());
|
||||
|
||||
// ISO-8859-1 aka ISO-LATIN-1
|
||||
addCharset (new ISO_8859_1 ());
|
||||
|
||||
// UTF-8
|
||||
addCharset (new UTF_8 ());
|
||||
|
||||
// UTF-16BE
|
||||
addCharset (new UTF_16BE ());
|
||||
|
||||
// UTF-16LE
|
||||
addCharset (new UTF_16LE ());
|
||||
|
||||
// UTF-16
|
||||
addCharset (new UTF_16 ());
|
||||
}
|
||||
|
||||
public Iterator charsets ()
|
||||
{
|
||||
return Collections.unmodifiableCollection (charsets.values ())
|
||||
.iterator ();
|
||||
}
|
||||
|
||||
public Charset charsetForName (String charsetName)
|
||||
{
|
||||
return (Charset) charsets.get (canonicalize (charsetName));
|
||||
}
|
||||
|
||||
private Object canonicalize (String charsetName)
|
||||
{
|
||||
Object o = canonicalNames.get (charsetName);
|
||||
return o == null ? charsetName : o;
|
||||
}
|
||||
|
||||
private void addCharset (Charset cs)
|
||||
{
|
||||
String canonicalName = cs.name ();
|
||||
charsets.put (canonicalName, cs);
|
||||
|
||||
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
|
||||
canonicalNames.put (i.next (), canonicalName);
|
||||
}
|
||||
|
||||
public static synchronized Provider provider ()
|
||||
{
|
||||
if (singleton == null)
|
||||
singleton = new Provider ();
|
||||
return singleton;
|
||||
}
|
||||
}
|
137
libjava/gnu/java/nio/charset/US_ASCII.java
Normal file
137
libjava/gnu/java/nio/charset/US_ASCII.java
Normal file
@ -0,0 +1,137 @@
|
||||
/* US_ASCII.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* US-ASCII charset.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class US_ASCII extends Charset
|
||||
{
|
||||
US_ASCII ()
|
||||
{
|
||||
super ("US-ASCII", new String[]{"ISO646-US"});
|
||||
}
|
||||
|
||||
public boolean contains (Charset cs)
|
||||
{
|
||||
return cs instanceof US_ASCII;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder ()
|
||||
{
|
||||
return new Decoder (this);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder ()
|
||||
{
|
||||
return new Encoder (this);
|
||||
}
|
||||
|
||||
private static final class Decoder extends CharsetDecoder
|
||||
{
|
||||
private Decoder (Charset cs)
|
||||
{
|
||||
super (cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
byte b = in.get ();
|
||||
|
||||
if (b < 0)
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.malformedForLength (1);
|
||||
}
|
||||
if (!out.hasRemaining ())
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
out.put ((char) b);
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Encoder extends CharsetEncoder
|
||||
{
|
||||
private Encoder (Charset cs)
|
||||
{
|
||||
super (cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
char c = in.get ();
|
||||
|
||||
if (c > Byte.MAX_VALUE)
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.unmappableForLength (1);
|
||||
}
|
||||
if (!out.hasRemaining ())
|
||||
{
|
||||
in.position (in.position () - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
out.put ((byte) c);
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
75
libjava/gnu/java/nio/charset/UTF_16.java
Normal file
75
libjava/gnu/java/nio/charset/UTF_16.java
Normal file
@ -0,0 +1,75 @@
|
||||
/* UTF_16.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* UTF-16 charset.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class UTF_16 extends Charset
|
||||
{
|
||||
UTF_16 ()
|
||||
{
|
||||
super ("UTF-16", null);
|
||||
}
|
||||
|
||||
public boolean contains (Charset cs)
|
||||
{
|
||||
return cs instanceof US_ASCII || cs instanceof ISO_8859_1
|
||||
|| cs instanceof UTF_8 || cs instanceof UTF_16BE
|
||||
|| cs instanceof UTF_16LE || cs instanceof UTF_16;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder ()
|
||||
{
|
||||
return new UTF_16Decoder (this, UTF_16Decoder.UNKNOWN_ENDIAN);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder ()
|
||||
{
|
||||
return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, false);
|
||||
}
|
||||
}
|
75
libjava/gnu/java/nio/charset/UTF_16BE.java
Normal file
75
libjava/gnu/java/nio/charset/UTF_16BE.java
Normal file
@ -0,0 +1,75 @@
|
||||
/* UTF_16BE.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* UTF-16BE charset.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class UTF_16BE extends Charset
|
||||
{
|
||||
UTF_16BE ()
|
||||
{
|
||||
super ("UTF-16BE", null);
|
||||
}
|
||||
|
||||
public boolean contains (Charset cs)
|
||||
{
|
||||
return cs instanceof US_ASCII || cs instanceof ISO_8859_1
|
||||
|| cs instanceof UTF_8 || cs instanceof UTF_16BE
|
||||
|| cs instanceof UTF_16LE || cs instanceof UTF_16;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder ()
|
||||
{
|
||||
return new UTF_16Decoder (this, UTF_16Decoder.BIG_ENDIAN);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder ()
|
||||
{
|
||||
return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, true);
|
||||
}
|
||||
}
|
169
libjava/gnu/java/nio/charset/UTF_16Decoder.java
Normal file
169
libjava/gnu/java/nio/charset/UTF_16Decoder.java
Normal file
@ -0,0 +1,169 @@
|
||||
/* UTF_16Decoder.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* Decoder for UTF-16, UTF-15LE, and UTF-16BE.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class UTF_16Decoder extends CharsetDecoder
|
||||
{
|
||||
// byte orders
|
||||
static final int BIG_ENDIAN = 0;
|
||||
static final int LITTLE_ENDIAN = 1;
|
||||
static final int UNKNOWN_ENDIAN = 2;
|
||||
|
||||
private static final char BYTE_ORDER_MARK = '\uFEFF';
|
||||
private static final char REVERSED_BYTE_ORDER_MARK = '\uFFFE';
|
||||
|
||||
private final int originalByteOrder;
|
||||
private int byteOrder;
|
||||
|
||||
UTF_16Decoder (Charset cs, int byteOrder)
|
||||
{
|
||||
super (cs, 0.5f, 1.0f);
|
||||
this.originalByteOrder = byteOrder;
|
||||
this.byteOrder = byteOrder;
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
|
||||
int inPos = in.position ();
|
||||
try
|
||||
{
|
||||
while (in.remaining () >= 2)
|
||||
{
|
||||
byte b1 = in.get ();
|
||||
byte b2 = in.get ();
|
||||
|
||||
// handle byte order mark
|
||||
if (byteOrder == UNKNOWN_ENDIAN)
|
||||
{
|
||||
char c = (char) ((b1 << 8) | b2);
|
||||
if (c == BYTE_ORDER_MARK)
|
||||
{
|
||||
byteOrder = BIG_ENDIAN;
|
||||
inPos += 2;
|
||||
continue;
|
||||
}
|
||||
else if (c == REVERSED_BYTE_ORDER_MARK)
|
||||
{
|
||||
byteOrder = LITTLE_ENDIAN;
|
||||
inPos += 2;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// assume big endian, do not consume bytes,
|
||||
// continue with normal processing
|
||||
byteOrder = BIG_ENDIAN;
|
||||
}
|
||||
}
|
||||
|
||||
char c = byteOrder == BIG_ENDIAN ? (char) ((b1 << 8) | b2)
|
||||
: (char) ((b2 << 8) | b1);
|
||||
|
||||
if (0xD800 <= c && c <= 0xDFFF)
|
||||
{
|
||||
// c is a surrogate
|
||||
|
||||
// make sure c is a high surrogate
|
||||
if (c > 0xDBFF)
|
||||
return CoderResult.malformedForLength (2);
|
||||
if (in.remaining () < 2)
|
||||
return CoderResult.UNDERFLOW;
|
||||
byte b3 = in.get ();
|
||||
byte b4 = in.get ();
|
||||
char d = byteOrder == BIG_ENDIAN ? (char) ((b3 << 8) | b4)
|
||||
: (char) ((b4 << 8) | b3);
|
||||
// make sure d is a low surrogate
|
||||
if (d < 0xDC00 || d > 0xDFFF)
|
||||
return CoderResult.malformedForLength (2);
|
||||
out.put (c);
|
||||
out.put (d);
|
||||
inPos += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!out.hasRemaining ())
|
||||
return CoderResult.UNDERFLOW;
|
||||
out.put (c);
|
||||
inPos += 2;
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
finally
|
||||
{
|
||||
in.position (inPos);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes <code>c</code> to <code>out</code> in the byte order
|
||||
* specified by <code>byteOrder</code>.
|
||||
**/
|
||||
private void put (ByteBuffer out, char c)
|
||||
{
|
||||
if (byteOrder == BIG_ENDIAN)
|
||||
{
|
||||
out.put ((byte) (c >> 8));
|
||||
out.put ((byte) (c & 0xFF));
|
||||
}
|
||||
else
|
||||
{
|
||||
out.put ((byte) (c & 0xFF));
|
||||
out.put ((byte) (c >> 8));
|
||||
}
|
||||
}
|
||||
|
||||
protected void implReset ()
|
||||
{
|
||||
byteOrder = originalByteOrder;
|
||||
}
|
||||
}
|
153
libjava/gnu/java/nio/charset/UTF_16Encoder.java
Normal file
153
libjava/gnu/java/nio/charset/UTF_16Encoder.java
Normal file
@ -0,0 +1,153 @@
|
||||
/* UTF_16Encoder.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* Encoder for UTF-16, UTF-15LE, and UTF-16BE.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class UTF_16Encoder extends CharsetEncoder
|
||||
{
|
||||
// byte orders
|
||||
static final int BIG_ENDIAN = 0;
|
||||
static final int LITTLE_ENDIAN = 1;
|
||||
|
||||
private static final char BYTE_ORDER_MARK = '\uFEFF';
|
||||
|
||||
private final int byteOrder;
|
||||
private final boolean useByteOrderMark;
|
||||
private boolean needsByteOrderMark;
|
||||
|
||||
UTF_16Encoder (Charset cs, int byteOrder, boolean useByteOrderMark)
|
||||
{
|
||||
super (cs, 2.0f,
|
||||
useByteOrderMark ? 4.0f : 2.0f,
|
||||
byteOrder == BIG_ENDIAN
|
||||
? new byte[] { (byte) 0xFF, (byte) 0xFD }
|
||||
: new byte[] { (byte) 0xFD, (byte) 0xFF });
|
||||
this.byteOrder = byteOrder;
|
||||
this.useByteOrderMark = useByteOrderMark;
|
||||
this.needsByteOrderMark = useByteOrderMark;
|
||||
}
|
||||
|
||||
protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
|
||||
if (needsByteOrderMark)
|
||||
{
|
||||
if (out.remaining () < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
put (out, BYTE_ORDER_MARK);
|
||||
needsByteOrderMark = false;
|
||||
}
|
||||
|
||||
int inPos = in.position ();
|
||||
try
|
||||
{
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
char c = in.get ();
|
||||
|
||||
if (0xD800 <= c && c <= 0xDFFF)
|
||||
{
|
||||
// c is a surrogate
|
||||
|
||||
// make sure c is a high surrogate
|
||||
if (c > 0xDBFF)
|
||||
return CoderResult.malformedForLength (1);
|
||||
if (in.remaining () < 1)
|
||||
return CoderResult.UNDERFLOW;
|
||||
char d = in.get ();
|
||||
// make sure d is a low surrogate
|
||||
if (d < 0xDC00 || d > 0xDFFF)
|
||||
return CoderResult.malformedForLength (1);
|
||||
put (out, c);
|
||||
put (out, d);
|
||||
inPos += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (out.remaining () < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
put (out, c);
|
||||
inPos++;
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
finally
|
||||
{
|
||||
in.position (inPos);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes <code>c</code> to <code>out</code> in the byte order
|
||||
* specified by <code>byteOrder</code>.
|
||||
**/
|
||||
private void put (ByteBuffer out, char c)
|
||||
{
|
||||
if (byteOrder == BIG_ENDIAN)
|
||||
{
|
||||
out.put ((byte) (c >> 8));
|
||||
out.put ((byte) (c & 0xFF));
|
||||
}
|
||||
else
|
||||
{
|
||||
out.put ((byte) (c & 0xFF));
|
||||
out.put ((byte) (c >> 8));
|
||||
}
|
||||
}
|
||||
|
||||
protected void implReset ()
|
||||
{
|
||||
needsByteOrderMark = useByteOrderMark;
|
||||
}
|
||||
|
||||
// TODO: override canEncode(char) and canEncode(CharSequence)
|
||||
// for performance
|
||||
}
|
75
libjava/gnu/java/nio/charset/UTF_16LE.java
Normal file
75
libjava/gnu/java/nio/charset/UTF_16LE.java
Normal file
@ -0,0 +1,75 @@
|
||||
/* UTF_16LE.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* UTF-16LE charset.
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class UTF_16LE extends Charset
|
||||
{
|
||||
UTF_16LE ()
|
||||
{
|
||||
super ("UTF-16LE", null);
|
||||
}
|
||||
|
||||
public boolean contains (Charset cs)
|
||||
{
|
||||
return cs instanceof US_ASCII || cs instanceof ISO_8859_1
|
||||
|| cs instanceof UTF_8 || cs instanceof UTF_16BE
|
||||
|| cs instanceof UTF_16LE || cs instanceof UTF_16;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder ()
|
||||
{
|
||||
return new UTF_16Decoder (this, UTF_16Decoder.LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder ()
|
||||
{
|
||||
return new UTF_16Encoder (this, UTF_16Encoder.LITTLE_ENDIAN, true);
|
||||
}
|
||||
}
|
279
libjava/gnu/java/nio/charset/UTF_8.java
Normal file
279
libjava/gnu/java/nio/charset/UTF_8.java
Normal file
@ -0,0 +1,279 @@
|
||||
/* UTF_8.java --
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.java.nio.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* UTF-8 charset.
|
||||
*
|
||||
* <p> UTF-8 references:
|
||||
* <ul>
|
||||
* <li> <a href="http://ietf.org/rfc/rfc2279.txt">RFC 2279</a>
|
||||
* <li> The <a href="http://www.unicode.org/unicode/standard/standard.html">
|
||||
* Unicode standard</a> and
|
||||
* <a href="http://www.unicode.org/versions/corrigendum1.html">
|
||||
* Corrigendum</a>
|
||||
* </ul>
|
||||
*
|
||||
* @author Jesse Rosenstock
|
||||
*/
|
||||
final class UTF_8 extends Charset
|
||||
{
|
||||
UTF_8 ()
|
||||
{
|
||||
super ("UTF-8", null);
|
||||
}
|
||||
|
||||
public boolean contains (Charset cs)
|
||||
{
|
||||
return cs instanceof US_ASCII || cs instanceof ISO_8859_1
|
||||
|| cs instanceof UTF_8 || cs instanceof UTF_16BE
|
||||
|| cs instanceof UTF_16LE || cs instanceof UTF_16;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder ()
|
||||
{
|
||||
return new Decoder (this);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder ()
|
||||
{
|
||||
return new Encoder (this);
|
||||
}
|
||||
|
||||
private static final class Decoder extends CharsetDecoder
|
||||
{
|
||||
private Decoder (Charset cs)
|
||||
{
|
||||
super (cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
int inPos = 0;
|
||||
try
|
||||
{
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
char c;
|
||||
byte b1 = in.get ();
|
||||
int highNibble = (b1 >> 4) & 0xF;
|
||||
|
||||
switch (highNibble)
|
||||
{
|
||||
case 0: case 1: case 2: case 3:
|
||||
case 4: case 5: case 6: case 7:
|
||||
if (out.remaining () < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
out.put ((char) b1);
|
||||
inPos++;
|
||||
break;
|
||||
|
||||
case 0xC: case 0xD:
|
||||
byte b2;
|
||||
if (in.remaining () < 1)
|
||||
return CoderResult.UNDERFLOW;
|
||||
if (out.remaining () < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
if (!isContinuation (b2 = in.get ()))
|
||||
return CoderResult.malformedForLength (1);
|
||||
c = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
|
||||
// check that we had the shortest encoding
|
||||
if (c <= 0x7F)
|
||||
return CoderResult.malformedForLength (2);
|
||||
out.put (c);
|
||||
inPos += 2;
|
||||
break;
|
||||
|
||||
case 0xE:
|
||||
byte b3;
|
||||
if (in.remaining () < 2)
|
||||
return CoderResult.UNDERFLOW;
|
||||
if (out.remaining () < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
if (!isContinuation (b2 = in.get ()))
|
||||
return CoderResult.malformedForLength (1);
|
||||
if (!isContinuation (b3 = in.get ()))
|
||||
return CoderResult.malformedForLength (1);
|
||||
c = (char) (((b1 & 0x0F) << 12)
|
||||
| ((b2 & 0x3F) << 6)
|
||||
| (b3 & 0x3F));
|
||||
// check that we had the shortest encoding
|
||||
if (c <= 0x7FF)
|
||||
return CoderResult.malformedForLength (3);
|
||||
out.put (c);
|
||||
inPos += 3;
|
||||
break;
|
||||
|
||||
default:
|
||||
return CoderResult.malformedForLength (1);
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
finally
|
||||
{
|
||||
// In case we did a get(), then encountered an error, reset the
|
||||
// position to before the error. If there was no error, this
|
||||
// will benignly reset the position to the value it already has.
|
||||
in.position (inPos);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isContinuation (byte b)
|
||||
{
|
||||
return (b & 0xC0) == 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Encoder extends CharsetEncoder
|
||||
{
|
||||
private Encoder (Charset cs)
|
||||
{
|
||||
// According to
|
||||
// http://www-106.ibm.com/developerworks/unicode/library/utfencodingforms/index.html
|
||||
// On average, English takes slightly over one unit per code point.
|
||||
// Most Latin-script languages take about 1.1 bytes. Greek, Russian,
|
||||
// Arabic and Hebrew take about 1.7 bytes, and most others (including
|
||||
// Japanese, Chinese, Korean and Hindi) take about 3 bytes.
|
||||
// We assume we will be dealing with latin scripts, and use 1.1
|
||||
// for averageBytesPerChar.
|
||||
super (cs, 1.1f, 4.0f);
|
||||
}
|
||||
|
||||
protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
|
||||
{
|
||||
int inPos = 0;
|
||||
try
|
||||
{
|
||||
// TODO: Optimize this in the case in.hasArray() / out.hasArray()
|
||||
while (in.hasRemaining ())
|
||||
{
|
||||
int remaining = out.remaining ();
|
||||
char c = in.get ();
|
||||
|
||||
// UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
||||
// 0000 0000-0000 007F 0xxxxxxx
|
||||
// 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
||||
// 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
||||
|
||||
// Scalar Value UTF-16 byte 1 byte 2 byte 3 byte 4
|
||||
// 0000 0000 0xxx xxxx 0000 0000 0xxx xxxx 0xxx xxxx
|
||||
// 0000 0yyy yyxx xxxx 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
|
||||
// zzzz yyyy yyxx xxxx zzzz yyyy yyxx xxxx 1110 zzzz 10yy yyyy 10xx xxxx
|
||||
// u uuuu zzzz yyyy yyxx xxxx 1101 10ww wwzz zzyy 1111 0uuu 10uu zzzz 10yy yyyy 10xx xxxx
|
||||
// + 1101 11yy yyxx xxxx
|
||||
// Note: uuuuu = wwww + 1
|
||||
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
if (remaining < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
out.put ((byte) c);
|
||||
inPos++;
|
||||
}
|
||||
else if (c <= 0x7FF)
|
||||
{
|
||||
if (remaining < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
out.put ((byte) (0xC0 | (c >> 6)));
|
||||
out.put ((byte) (0x80 | (c & 0x3F)));
|
||||
inPos++;
|
||||
}
|
||||
else if (0xD800 <= c && c <= 0xDFFF)
|
||||
{
|
||||
if (remaining < 4)
|
||||
return CoderResult.OVERFLOW;
|
||||
|
||||
// we got a low surrogate without a preciding high one
|
||||
if (c > 0xDBFF)
|
||||
return CoderResult.malformedForLength (1);
|
||||
|
||||
// high surrogates
|
||||
if (!in.hasRemaining ())
|
||||
return CoderResult.UNDERFLOW;
|
||||
|
||||
char d = in.get ();
|
||||
|
||||
// make sure d is a low surrogate
|
||||
if (d < 0xDC00 || d > 0xDFFF)
|
||||
return CoderResult.malformedForLength (1);
|
||||
|
||||
// make the 32 bit value
|
||||
// int value2 = (c - 0xD800) * 0x400 + (d - 0xDC00) + 0x10000;
|
||||
int value = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
|
||||
// assert value == value2;
|
||||
out.put ((byte) (0xF0 | (value >> 18)));
|
||||
out.put ((byte) (0x80 | ((value >> 12) & 0x3F)));
|
||||
out.put ((byte) (0x80 | ((value >> 6) & 0x3F)));
|
||||
out.put ((byte) (0x80 | ((value ) & 0x3F)));
|
||||
|
||||
inPos += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (remaining < 3)
|
||||
return CoderResult.OVERFLOW;
|
||||
|
||||
out.put ((byte) (0xE0 | (c >> 12)));
|
||||
out.put ((byte) (0x80 | ((c >> 6) & 0x3F)));
|
||||
out.put ((byte) (0x80 | (c & 0x3F)));
|
||||
inPos++;
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
finally
|
||||
{
|
||||
// In case we did a get(), then encountered an error, reset the
|
||||
// position to before the error. If there was no error, this
|
||||
// will benignly reset the position to the value it already has.
|
||||
in.position (inPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user