Character.java (SIZE, [...]): New fields from Classpath.

* java/lang/Character.java (SIZE, MAX_CACHE, charCache, MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath. (MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE, MAX_LOW_SURROGATE): Javadoc fixes. (valueOf, reverseBytes, isHighSurrogate, isLowSurrogate, isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New methods from Classpath. * java/lang/String.java (codePointAt, codePointBefore, codePointCount, contains, replace): New methods from Classpath. (contentEquals): Declare. * java/lang/natString.cc (contentEquals): New method. From-SVN: r109445
2025-01-18 10:24:30 +08:00 · 2006-01-07 00:46:28 +00:00 · 2006-01-07 00:46:28 +00:00 · 37d41553c0
commit 37d41553c0
parent 2b15cf3b50
4 changed files with 426 additions and 8 deletions
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@ -1,3 +1,17 @@
+2006-01-06  Tom Tromey  <tromey@redhat.com>
+
+	* java/lang/Character.java (SIZE, MAX_CACHE, charCache,
+	MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath.
+	(MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE,
+	MAX_LOW_SURROGATE): Javadoc fixes.
+	(valueOf, reverseBytes, isHighSurrogate, isLowSurrogate,
+	isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New
+	methods from Classpath.
+	* java/lang/String.java (codePointAt, codePointBefore,
+	codePointCount, contains, replace): New methods from Classpath.
+	(contentEquals): Declare.
+	* java/lang/natString.cc (contentEquals): New method.
+
 2005-12-26  Anthony Green  <green@redhat.com>

 	* gnu/java/nio/SocketChannelImpl.java (read): Compute the right amount
--- a/libjava/java/lang/Character.java
+++ b/libjava/java/lang/Character.java
@ -1,5 +1,5 @@
 /* java.lang.Character -- Wrapper class for char, and Unicode subsets
-   Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.

 This file is part of GNU Classpath.

@ -1039,6 +1039,18 @@ public final class Character implements Serializable, Comparable
   */
  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');

+  /**
+   * The number of bits needed to represent a <code>char</code>.
+   * @since 1.5
+   */
+  public static final int SIZE = 16;
+
+  // This caches some Character values, and is used by boxing
+  // conversions via valueOf().  We must cache at least 0..127;
+  // this constant controls how much we actually cache.
+  private static final int MAX_CACHE = 127;
+  private static Character[] charCache = new Character[MAX_CACHE + 1];
+
  /**
   * Lu = Letter, Uppercase (Informative).
   *
@ -1434,33 +1446,47 @@ public final class Character implements Serializable, Comparable


  /**
-   * Minimum high surrrogate code in UTF-16 encoding.
+   * Minimum high surrogate code in UTF-16 encoding.
   *
   * @since 1.5
   */
  public static final char MIN_HIGH_SURROGATE = '\ud800';

  /**
-   * Maximum high surrrogate code in UTF-16 encoding.
+   * Maximum high surrogate code in UTF-16 encoding.
   *
   * @since 1.5
   */
  public static final char MAX_HIGH_SURROGATE = '\udbff';
 
  /**
-   * Minimum low surrrogate code in UTF-16 encoding.
+   * Minimum low surrogate code in UTF-16 encoding.
   *
   * @since 1.5
   */
  public static final char MIN_LOW_SURROGATE = '\udc00';

  /**
-   * Maximum low surrrogate code in UTF-16 encoding.
+   * Maximum low surrogate code in UTF-16 encoding.
   *
   * @since 1.5
   */
  public static final char MAX_LOW_SURROGATE = '\udfff';

+  /**
+   * Minimum surrogate code in UTF-16 encoding.
+   *
+   * @since 1.5
+   */
+  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
+
+  /**
+   * Maximum low surrogate code in UTF-16 encoding.
+   *
+   * @since 1.5
+   */
+  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
+
  /**
   * Grabs an attribute offset from the Unicode attribute database. The lower
   * 5 bits are the character type, the next 2 bits are flags, and the top
@ -2212,6 +2238,37 @@ public final class Character implements Serializable, Comparable
    return compareTo((Character) o);
  }

+  /**
+   * Returns an <code>Character</code> object wrapping the value.
+   * In contrast to the <code>Character</code> constructor, this method
+   * will cache some values.  It is used by boxing conversion.
+   *
+   * @param val the value to wrap
+   * @return the <code>Character</code>
+   * 
+   * @since 1.5
+   */
+  public static Character valueOf(char val)
+  {
+    if (val > MAX_CACHE)
+      return new Character(val);
+    synchronized (charCache)
+      {
+    if (charCache[val - MIN_VALUE] == null)
+      charCache[val - MIN_VALUE] = new Character(val);
+    return charCache[val - MIN_VALUE];
+      }
+  }
+
+  /**
+   * Reverse the bytes in val.
+   * @since 1.5
+   */
+  public static char reverseBytes(char val)
+  {
+    return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
+  }
+
  /**
   * Converts a unicode code point to a UTF-16 representation of that
   * code point.
@ -2280,7 +2337,7 @@ public final class Character implements Serializable, Comparable
   * Return number of 16-bit characters required to represent the given
   * code point.
   *
-   * @param codePoint a uncode code point
+   * @param codePoint a unicode code point
   *
   * @return 2 if codePoint >= 0x10000, 1 otherwise.
   *
@ -2325,4 +2382,210 @@ public final class Character implements Serializable, Comparable
  {
    return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
  }
+
+  /**
+   * Return true if the given character is a high surrogate.
+   * @param ch the character
+   * @return true if the character is a high surrogate character
+   *
+   * @since 1.5
+   */
+  public static boolean isHighSurrogate(char ch)
+  {
+    return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
+  }
+
+  /**
+   * Return true if the given character is a low surrogate.
+   * @param ch the character
+   * @return true if the character is a low surrogate character
+   *
+   * @since 1.5
+   */
+  public static boolean isLowSurrogate(char ch)
+  {
+    return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
+  }
+
+  /**
+   * Return true if the given characters compose a surrogate pair.
+   * This is true if the first character is a high surrogate and the
+   * second character is a low surrogate.
+   * @param ch1 the first character
+   * @param ch2 the first character
+   * @return true if the characters compose a surrogate pair
+   *
+   * @since 1.5
+   */
+  public static boolean isSurrogatePair(char ch1, char ch2)
+  {
+    return isHighSurrogate(ch1) && isLowSurrogate(ch2);
+  }
+
+  /**
+   * Given a valid surrogate pair, this returns the corresponding
+   * code point.
+   * @param high the high character of the pair
+   * @param low the low character of the pair
+   * @return the corresponding code point
+   *
+   * @since 1.5
+   */
+  public static int toCodePoint(char high, char low)
+  {
+    return ((high - MIN_HIGH_SURROGATE) << 10) + (low - MIN_LOW_SURROGATE);
+  }
+
+  /**
+   * Get the code point at the specified index in the CharSequence.
+   * This is like CharSequence#charAt(int), but if the character is
+   * the start of a surrogate pair, and there is a following
+   * character, and this character completes the pair, then the
+   * corresponding supplementary code point is returned.  Otherwise,
+   * the character at the index is returned.
+   *
+   * @param sequence the CharSequence
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointAt(CharSequence sequence, int index)
+  {
+    int len = sequence.length();
+    if (index < 0 || index >= len)
+      throw new IndexOutOfBoundsException();
+    char high = sequence.charAt(index);
+    if (! isHighSurrogate(high) || ++index >= len)
+      return high;
+    char low = sequence.charAt(index);
+    if (! isLowSurrogate(low))
+      return high;
+    return toCodePoint(high, low);
+  }
+
+  /**
+   * Get the code point at the specified index in the CharSequence.
+   * If the character is the start of a surrogate pair, and there is a
+   * following character, and this character completes the pair, then
+   * the corresponding supplementary code point is returned.
+   * Otherwise, the character at the index is returned.
+   *
+   * @param chars the character array in which to look
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointAt(char[] chars, int index)
+  {
+    return codePointAt(chars, index, chars.length);
+  }
+
+  /**
+   * Get the code point at the specified index in the CharSequence.
+   * If the character is the start of a surrogate pair, and there is a
+   * following character within the specified range, and this
+   * character completes the pair, then the corresponding
+   * supplementary code point is returned.  Otherwise, the character
+   * at the index is returned.
+   *
+   * @param chars the character array in which to look
+   * @param index the index of the codepoint to get, starting at 0
+   * @param limit the limit past which characters should not be examined
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;=
+   * limit, or if limit is negative or &gt;= the length of the array
+   * @since 1.5
+   */
+  public static int codePointAt(char[] chars, int index, int limit)
+  {
+    if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
+      throw new IndexOutOfBoundsException();
+    char high = chars[index];
+    if (! isHighSurrogate(high) || ++index >= limit)
+      return high;
+    char low = chars[index];
+    if (! isLowSurrogate(low))
+      return high;
+    return toCodePoint(high, low);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(char[], int), but checks the characters at
+   * <code>index-1</code> and <code>index-2</code> to see if they form
+   * a supplementary code point.  If they do not, the character at
+   * <code>index-1</code> is returned.
+   *
+   * @param chars the character array
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointBefore(char[] chars, int index)
+  {
+    return codePointBefore(chars, index, 1);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(char[], int), but checks the characters at
+   * <code>index-1</code> and <code>index-2</code> to see if they form
+   * a supplementary code point.  If they do not, the character at
+   * <code>index-1</code> is returned.  The start parameter is used to
+   * limit the range of the array which may be examined.
+   *
+   * @param chars the character array
+   * @param index the index just past the codepoint to get, starting at 0
+   * @param start the index before which characters should not be examined
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
+   * the length of the array, or if limit is negative or &gt;= the
+   * length of the array
+   * @since 1.5
+   */
+  public static int codePointBefore(char[] chars, int index, int start)
+  {
+    if (index < start || index > chars.length
+	|| start < 0 || start >= chars.length)
+      throw new IndexOutOfBoundsException();
+    --index;
+    char low = chars[index];
+    if (! isLowSurrogate(low) || --index < start)
+      return low;
+    char high = chars[index];
+    if (! isHighSurrogate(high))
+      return low;
+    return toCodePoint(high, low);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(CharSequence, int), but checks the characters at
+   * <code>index-1</code> and <code>index-2</code> to see if they form
+   * a supplementary code point.  If they do not, the character at
+   * <code>index-1</code> is returned.
+   *
+   * @param sequence the CharSequence
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointBefore(CharSequence sequence, int index)
+  {
+    int len = sequence.length();
+    if (index < 1 || index > len)
+      throw new IndexOutOfBoundsException();
+    --index;
+    char low = sequence.charAt(index);
+    if (! isLowSurrogate(low) || --index < 0)
+      return low;
+    char high = sequence.charAt(index);
+    if (! isHighSurrogate(high))
+      return low;
+    return toCodePoint(high, low);
+  }
 } // class Character
--- a/libjava/java/lang/String.java
+++ b/libjava/java/lang/String.java
@ -1,5 +1,5 @@
 /* String.java -- immutable character sequences; the object of string literals
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
   Free Software Foundation, Inc.

 This file is part of GNU Classpath.
@ -454,6 +454,40 @@ public final class String implements Serializable, Comparable, CharSequence
   */
  public native char charAt(int index);

+  /**
+   * Get the code point at the specified index.  This is like #charAt(int),
+   * but if the character is the start of a surrogate pair, and the
+   * following character completes the pair, then the corresponding
+   * supplementary code point is returned.
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public synchronized int codePointAt(int index)
+  {
+    // Use the CharSequence overload as we get better range checking
+    // this way.
+    return Character.codePointAt(this, index);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(int), but checks the characters at <code>index-1</code> and
+   * <code>index-2</code> to see if they form a supplementary code point.
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   *         (while unspecified, this is a StringIndexOutOfBoundsException)
+   * @since 1.5
+   */
+  public synchronized int codePointBefore(int index)
+  {
+    // Use the CharSequence overload as we get better range checking
+    // this way.
+    return Character.codePointBefore(this, index);
+  }
+
  /**
   * Copies characters from this String starting at a specified start index,
   * ending at a specified stop index, to a character array starting at
@ -565,6 +599,18 @@ public final class String implements Serializable, Comparable, CharSequence
   */
  public native boolean contentEquals(StringBuffer buffer);

+  /**
+   * Compares the given CharSequence to this String. This is true if
+   * the CharSequence has the same content as this String at this
+   * moment.
+   *
+   * @param seq the CharSequence to compare to
+   * @return true if CharSequence has the same character sequence
+   * @throws NullPointerException if the given CharSequence is null
+   * @since 1.5
+   */
+  public native boolean contentEquals(CharSequence seq);
+
  /**
   * Compares a String to this String, ignoring case. This does not handle
   * multi-character capitalization exceptions; instead the comparison is
@ -1259,6 +1305,88 @@ public final class String implements Serializable, Comparable, CharSequence
   */
  public native String intern();

+  /**
+   * Return the number of code points between two indices in the
+   * <code>String</code>.  An unpaired surrogate counts as a
+   * code point for this purpose.  Characters outside the indicated
+   * range are not examined, even if the range ends in the middle of a
+   * surrogate pair.
+   *
+   * @param start the starting index
+   * @param end one past the ending index
+   * @return the number of code points
+   * @since 1.5
+   */
+  public synchronized int codePointCount(int start, int end)
+  {
+    if (start < 0 || end >= count || start > end)
+      throw new StringIndexOutOfBoundsException();
+
+    int count = 0;
+    while (start < end)
+      {
+	char base = charAt(start);
+	if (base < Character.MIN_HIGH_SURROGATE
+	    || base > Character.MAX_HIGH_SURROGATE
+	    || start == end
+	    || start == count
+	    || charAt(start + 1) < Character.MIN_LOW_SURROGATE
+	    || charAt(start + 1) > Character.MAX_LOW_SURROGATE)
+	  {
+	    // Nothing.
+	  }
+	else
+	  {
+	    // Surrogate pair.
+	    ++start;
+	  }
+	++start;
+	++count;
+      }
+    return count;
+  }
+
+  /**
+   * Returns true iff this String contains the sequence of Characters
+   * described in s.
+   * @param s the CharSequence
+   * @return true iff this String contains s
+   *
+   * @since 1.5
+   */
+  public boolean contains (CharSequence s)
+  {
+    return this.indexOf(s.toString()) != -1;
+  }
+
+  /**
+   * Returns a string that is this string with all instances of the sequence
+   * represented by <code>target</code> replaced by the sequence in 
+   * <code>replacement</code>.
+   * @param target the sequence to be replaced
+   * @param replacement the sequence used as the replacement
+   * @return the string constructed as above
+   */
+  public String replace (CharSequence target, CharSequence replacement)
+  {
+    String targetString = target.toString();
+    String replaceString = replacement.toString();
+    int targetLength = target.length();
+    int replaceLength = replacement.length();
+    
+    int startPos = this.indexOf(targetString);
+    StringBuilder result = new StringBuilder(this);    
+    while (startPos != -1)
+      {
+        // Replace the target with the replacement
+        result.replace(startPos, startPos + targetLength, replaceString);
+
+        // Search for a new occurrence of the target
+        startPos = result.indexOf(targetString, startPos + replaceLength);
+      }
+    return result.toString();
+  }
+

  private native void init(char[] chars, int offset, int count,
 			   boolean dont_copy);
--- a/libjava/java/lang/natString.cc
+++ b/libjava/java/lang/natString.cc
@ -1,6 +1,6 @@
 // natString.cc - Implementation of java.lang.String native methods.

-/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005  Free Software Foundation
+/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006  Free Software Foundation

   This file is part of libgcj.

@ -15,6 +15,7 @@ details.  */

 #include <gcj/cni.h>
 #include <java/lang/Character.h>
+#include <java/lang/CharSequence.h>
 #include <java/lang/String.h>
 #include <java/lang/IndexOutOfBoundsException.h>
 #include <java/lang/ArrayIndexOutOfBoundsException.h>
@ -564,6 +565,18 @@ java::lang::String::contentEquals(java::lang::StringBuffer* buffer)
  return true;
 }

+jboolean
+java::lang::String::contentEquals(java::lang::CharSequence *seq)
+{
+  if (seq->length() != count)
+    return false;
+  jchar *value = JvGetStringChars(this);
+  for (int i = 0; i < count; ++i)
+    if (value[i] != seq->charAt(i))
+      return false;
+  return true;
+}
+
 jchar
 java::lang::String::charAt(jint i)
 {