CollationElementIterator.java, [...]: New versions from GNU classpath.

2004-06-01 Michael Koch <konqueror@gmx.de> * java/text/CollationElementIterator.java, java/text/CollationKey.java, java/text/RuleBasedCollator.java: New versions from GNU classpath. * testsuite/libjava.mauve/xfails: Removed all java.text.CollationElementIterator tests. From-SVN: r82510
author: Michael Koch <konqueror@gmx.de> 2004-05-31 22:16:31 +0000
committer: Michael Koch <mkoch@gcc.gnu.org> 2004-05-31 22:16:31 +0000
commit: 57807c317869610e07a85bf8bf95747638230ed7 (patch)
tree: 84eadd76d1df1c8b46bd05dd00699bc802f78611 /libjava/java/text/CollationElementIterator.java
parent: f7dbd56c9a1e2605f54a05c2f613cf086f32634b (diff)
1 files changed, 259 insertions, 10 deletions
diff --git a/libjava/java/text/CollationElementIterator.java b/libjava/java/text/CollationElementIterator.java
index 1b5a6172e06..a4c21005211 100644
--- a/libjava/java/text/CollationElementIterator.java
+++ b/libjava/java/text/CollationElementIterator.java
@@ -38,6 +38,8 @@ exception statement from your version. */
 
 package java.text;
 
+import java.util.Vector;
+
 /* Written using "Java Class Libraries", 2nd edition, plus online
  * API docs for JDK 1.2 from http://www.javasoft.com.
  * Status: Believed complete and correct to JDK 1.1.
@@ -74,13 +76,25 @@ public final class CollationElementIterator
   String text;
 
   /**
+   * This is the index into the collation decomposition where we are currently scanning.
+   */
+  int index;
+
+  /**
    * This is the index into the String where we are currently scanning.
    */
   int textIndex;
 
-  // A piece of lookahead.
-  boolean lookahead_set;
-  int lookahead;
+  /**
+   * Array containing the collation decomposition of the
+   * text given to the constructor.
+   */
+  private Object[] text_decomposition;
+
+  /**
+   * Array containing the index of the specified block.
+   */
+  private int[] text_indexes;
 
   /**
    * This method initializes a new instance of <code>CollationElementIterator</code>
@@ -97,6 +111,35 @@ public final class CollationElementIterator
     setText (text);    
   }
 
+  RuleBasedCollator.CollationElement nextBlock()
+  {
+    if (index >= text_decomposition.length)
+      return null;
+    
+    RuleBasedCollator.CollationElement e =
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
+    
+    textIndex = text_indexes[index+1];
+
+    index++;
+
+    return e;
+  }
+
+  RuleBasedCollator.CollationElement previousBlock()
+  {
+    if (index == 0)
+      return null;
+    
+    index--;
+    RuleBasedCollator.CollationElement e =
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+    textIndex = text_indexes[index+1];
+    
+    return e;
+  }
+
   /**
    * This method returns the collation ordering value of the next character sequence
    * in the string (it may be an extended character following collation rules).
@@ -107,10 +150,29 @@ public final class CollationElementIterator
    */
   public int next()
   {
-    if (textIndex == text.length())
+    RuleBasedCollator.CollationElement e = nextBlock();
+
+    if (e == null)
       return NULLORDER;
+    
+    return e.getValue();
+  }
+
+  /**
+   * This method returns the collation ordering value of the previous character
+   * in the string.  This method will return <code>NULLORDER</code> if the
+   * beginning of the string was reached.
+   *
+   * @return The collation ordering value.
+   */
+  public int previous()
+  {
+    RuleBasedCollator.CollationElement e = previousBlock();
 
-    return collator.ceiNext (this);
+    if (e == null)
+      return NULLORDER;
+    
+    return e.getValue();
   }
 
   /**
@@ -133,9 +195,8 @@ public final class CollationElementIterator
    */
   public void reset()
   {
+    index = 0;
     textIndex = 0;
-    lookahead_set = false;
-    lookahead = 0;
   }
 
   /**
@@ -176,10 +237,152 @@ public final class CollationElementIterator
    */
   public void setText(String text)
   {
+    int idx = 0;
+    int idx_idx = 0;
+    int alreadyExpanded = 0;
+    int idxToMove = 0;
+
     this.text = text;
-    this.textIndex = 0;
-    this.lookahead_set = false;
-    this.lookahead = 0;
+    this.index = 0;
+
+    String work_text = text.intern();
+
+    Vector v = new Vector();
+    Vector vi = new Vector();
+
+    // Build element collection ordered as they come in "text".
+    while (idx < work_text.length())
+      {
+	String key, key_old;
+
+	Object object = null;
+	int p = 1;
+	
+	// IMPROVE: use a TreeMap with a prefix-ordering rule.
+	key_old = key = null;
+	do
+	  {
+	    if (object != null)
+	      key_old = key;
+	    key = work_text.substring (idx, idx+p);
+	    object = collator.prefix_tree.get (key);
+	    if (object != null && idx < alreadyExpanded)
+	      {
+		RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
+		if (prefix.expansion != null && 
+		    prefix.expansion.startsWith(work_text.substring(0, idx)))
+		{
+		  object = null;
+		  key = key_old;
+		}
+	      }
+	    p++;
+	  }
+	while (idx+p <= work_text.length());
+	
+	if (object == null)
+	  key = key_old;
+	
+	RuleBasedCollator.CollationElement prefix =
+	  (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
+
+	/*
+	 * First case: There is no such sequence in the database.
+	 * We will have to build one from the context.
+	 */
+	if (prefix == null)
+	  {
+	    /*
+	     * We are dealing with sequences in an expansion. They
+	     * are treated as accented characters (tertiary order).
+	     */
+	    if (alreadyExpanded > 0)
+	      {
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultAccentedElement (work_text.charAt (idx));
+		
+		v.add (e);
+		vi.add (new Integer(idx_idx));
+		idx++;
+		alreadyExpanded--;
+		if (alreadyExpanded == 0)
+		  {
+		    /* There is not any characters left in the expansion set.
+		     * We can increase the pointer in the source string.
+		     */
+		    idx_idx += idxToMove;
+		    idxToMove = 0; 
+		  }
+		else
+		  idx_idx++;
+	      }
+	    else
+	      {
+		/* This is a normal character. */
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultElement (work_text.charAt (idx));
+		Integer i_ref = new Integer(idx_idx);
+
+		/* Don't forget to mark it as a special sequence so the
+		 * string can be ordered.
+		 */
+		v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+		vi.add (i_ref);
+		v.add (e);
+		vi.add (i_ref);
+		idx_idx++;
+		idx++;
+	      }
+	    continue;
+	  }
+ 
+	/*
+	 * Second case: Here we have found a matching sequence.
+	 * Here we have an expansion string prepend it to the "work text" and
+	 * add the corresponding sorting element. We must also mark 
+	 */
+	if (prefix.expansion != null)
+	  {
+	    work_text = prefix.expansion
+	      + work_text.substring (idx+prefix.key.length());
+	    idx = 0;
+	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    if (alreadyExpanded == 0)
+	      idxToMove = prefix.key.length();
+	    alreadyExpanded += prefix.expansion.length()-prefix.key.length();
+	  }
+	else
+	  {
+	    /* Third case: the simplest. We have got the prefix and it
+	     * has not to be expanded.
+	     */
+	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    idx += prefix.key.length();
+	    /* If the sequence is in an expansion, we must decrease the
+	     * counter.
+	     */
+	    if (alreadyExpanded > 0)
+	      {
+		alreadyExpanded -= prefix.key.length();
+		if (alreadyExpanded == 0)
+		  {
+		    idx_idx += idxToMove;
+		    idxToMove = 0;
+		  }
+	      } else
+		idx_idx += prefix.key.length();
+	  }
+      }
+    
+    text_decomposition = v.toArray();
+    text_indexes = new int[vi.size()+1];
+    for (int i = 0; i < vi.size(); i++) 
+      {
+	text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
+      }
+    text_indexes[vi.size()] = text.length();
   }
 
   /**
@@ -215,4 +418,50 @@ public final class CollationElementIterator
   {
     return textIndex;
   }
+
+  /**
+   * This method sets the iteration index position into the current
+   * <code>String</code> to the specified value.  This value must not
+   * be negative and must not be greater than the last index position
+   * in the <code>String</code>.
+   *
+   * @param offset The new iteration index position.
+   *
+   * @exception IllegalArgumentException If the new offset is not valid.
+   */
+  public void setOffset(int offset)
+  {
+    if (offset < 0)
+      throw new IllegalArgumentException("Negative offset: " + offset);
+
+    if (offset > (text.length() - 1))
+      throw new IllegalArgumentException("Offset too large: " + offset);
+    
+    for (index = 0; index < text_decomposition.length; index++)
+      {	
+	if (offset <= text_indexes[index])
+	  break;
+      }
+    /*
+     * As text_indexes[0] == 0, we should not have to take care whether index is
+     * greater than 0. It is always.
+     */
+    if (text_indexes[index] == offset)
+      textIndex = offset;
+    else
+      textIndex = text_indexes[index-1];
+  }
+
+  /**
+   * This method returns the maximum length of any expansion sequence that
+   * ends with the specified collation order value.  (Whatever that means).
+   *
+   * @param value The collation order value
+   *
+   * @param The maximum length of an expansion sequence.
+   */
+  public int getMaxExpansion(int value)
+  {
+    return 1;
+  }
 }
author	Michael Koch <konqueror@gmx.de>	2004-05-31 22:16:31 +0000
committer	Michael Koch <mkoch@gcc.gnu.org>	2004-05-31 22:16:31 +0000
commit	57807c317869610e07a85bf8bf95747638230ed7 (patch)
tree	84eadd76d1df1c8b46bd05dd00699bc802f78611 /libjava/java/text/CollationElementIterator.java
parent	f7dbd56c9a1e2605f54a05c2f613cf086f32634b (diff)