summaryrefslogtreecommitdiff
path: root/libjava/java/text/CollationElementIterator.java
diff options
context:
space:
mode:
authorMichael Koch <konqueror@gmx.de>2004-05-31 22:16:31 +0000
committerMichael Koch <mkoch@gcc.gnu.org>2004-05-31 22:16:31 +0000
commit57807c317869610e07a85bf8bf95747638230ed7 (patch)
tree84eadd76d1df1c8b46bd05dd00699bc802f78611 /libjava/java/text/CollationElementIterator.java
parentf7dbd56c9a1e2605f54a05c2f613cf086f32634b (diff)
CollationElementIterator.java, [...]: New versions from GNU classpath.
2004-06-01 Michael Koch <konqueror@gmx.de> * java/text/CollationElementIterator.java, java/text/CollationKey.java, java/text/RuleBasedCollator.java: New versions from GNU classpath. * testsuite/libjava.mauve/xfails: Removed all java.text.CollationElementIterator tests. From-SVN: r82510
Diffstat (limited to 'libjava/java/text/CollationElementIterator.java')
-rw-r--r--libjava/java/text/CollationElementIterator.java269
1 files changed, 259 insertions, 10 deletions
diff --git a/libjava/java/text/CollationElementIterator.java b/libjava/java/text/CollationElementIterator.java
index 1b5a6172e06..a4c21005211 100644
--- a/libjava/java/text/CollationElementIterator.java
+++ b/libjava/java/text/CollationElementIterator.java
@@ -38,6 +38,8 @@ exception statement from your version. */
package java.text;
+import java.util.Vector;
+
/* Written using "Java Class Libraries", 2nd edition, plus online
* API docs for JDK 1.2 from http://www.javasoft.com.
* Status: Believed complete and correct to JDK 1.1.
@@ -74,13 +76,25 @@ public final class CollationElementIterator
String text;
/**
+ * This is the index into the collation decomposition where we are currently scanning.
+ */
+ int index;
+
+ /**
* This is the index into the String where we are currently scanning.
*/
int textIndex;
- // A piece of lookahead.
- boolean lookahead_set;
- int lookahead;
+ /**
+ * Array containing the collation decomposition of the
+ * text given to the constructor.
+ */
+ private Object[] text_decomposition;
+
+ /**
+ * Array containing the index of the specified block.
+ */
+ private int[] text_indexes;
/**
* This method initializes a new instance of <code>CollationElementIterator</code>
@@ -97,6 +111,35 @@ public final class CollationElementIterator
setText (text);
}
+ RuleBasedCollator.CollationElement nextBlock()
+ {
+ if (index >= text_decomposition.length)
+ return null;
+
+ RuleBasedCollator.CollationElement e =
+ (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+ textIndex = text_indexes[index+1];
+
+ index++;
+
+ return e;
+ }
+
+ RuleBasedCollator.CollationElement previousBlock()
+ {
+ if (index == 0)
+ return null;
+
+ index--;
+ RuleBasedCollator.CollationElement e =
+ (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+ textIndex = text_indexes[index+1];
+
+ return e;
+ }
+
/**
* This method returns the collation ordering value of the next character sequence
* in the string (it may be an extended character following collation rules).
@@ -107,10 +150,29 @@ public final class CollationElementIterator
*/
public int next()
{
- if (textIndex == text.length())
+ RuleBasedCollator.CollationElement e = nextBlock();
+
+ if (e == null)
return NULLORDER;
+
+ return e.getValue();
+ }
+
+ /**
+ * This method returns the collation ordering value of the previous character
+ * in the string. This method will return <code>NULLORDER</code> if the
+ * beginning of the string was reached.
+ *
+ * @return The collation ordering value.
+ */
+ public int previous()
+ {
+ RuleBasedCollator.CollationElement e = previousBlock();
- return collator.ceiNext (this);
+ if (e == null)
+ return NULLORDER;
+
+ return e.getValue();
}
/**
@@ -133,9 +195,8 @@ public final class CollationElementIterator
*/
public void reset()
{
+ index = 0;
textIndex = 0;
- lookahead_set = false;
- lookahead = 0;
}
/**
@@ -176,10 +237,152 @@ public final class CollationElementIterator
*/
public void setText(String text)
{
+ int idx = 0;
+ int idx_idx = 0;
+ int alreadyExpanded = 0;
+ int idxToMove = 0;
+
this.text = text;
- this.textIndex = 0;
- this.lookahead_set = false;
- this.lookahead = 0;
+ this.index = 0;
+
+ String work_text = text.intern();
+
+ Vector v = new Vector();
+ Vector vi = new Vector();
+
+ // Build element collection ordered as they come in "text".
+ while (idx < work_text.length())
+ {
+ String key, key_old;
+
+ Object object = null;
+ int p = 1;
+
+ // IMPROVE: use a TreeMap with a prefix-ordering rule.
+ key_old = key = null;
+ do
+ {
+ if (object != null)
+ key_old = key;
+ key = work_text.substring (idx, idx+p);
+ object = collator.prefix_tree.get (key);
+ if (object != null && idx < alreadyExpanded)
+ {
+ RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
+ if (prefix.expansion != null &&
+ prefix.expansion.startsWith(work_text.substring(0, idx)))
+ {
+ object = null;
+ key = key_old;
+ }
+ }
+ p++;
+ }
+ while (idx+p <= work_text.length());
+
+ if (object == null)
+ key = key_old;
+
+ RuleBasedCollator.CollationElement prefix =
+ (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
+
+ /*
+ * First case: There is no such sequence in the database.
+ * We will have to build one from the context.
+ */
+ if (prefix == null)
+ {
+ /*
+ * We are dealing with sequences in an expansion. They
+ * are treated as accented characters (tertiary order).
+ */
+ if (alreadyExpanded > 0)
+ {
+ RuleBasedCollator.CollationElement e =
+ collator.getDefaultAccentedElement (work_text.charAt (idx));
+
+ v.add (e);
+ vi.add (new Integer(idx_idx));
+ idx++;
+ alreadyExpanded--;
+ if (alreadyExpanded == 0)
+ {
+ /* There is not any characters left in the expansion set.
+ * We can increase the pointer in the source string.
+ */
+ idx_idx += idxToMove;
+ idxToMove = 0;
+ }
+ else
+ idx_idx++;
+ }
+ else
+ {
+ /* This is a normal character. */
+ RuleBasedCollator.CollationElement e =
+ collator.getDefaultElement (work_text.charAt (idx));
+ Integer i_ref = new Integer(idx_idx);
+
+ /* Don't forget to mark it as a special sequence so the
+ * string can be ordered.
+ */
+ v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+ vi.add (i_ref);
+ v.add (e);
+ vi.add (i_ref);
+ idx_idx++;
+ idx++;
+ }
+ continue;
+ }
+
+ /*
+ * Second case: Here we have found a matching sequence.
+ * Here we have an expansion string prepend it to the "work text" and
+ * add the corresponding sorting element. We must also mark
+ */
+ if (prefix.expansion != null)
+ {
+ work_text = prefix.expansion
+ + work_text.substring (idx+prefix.key.length());
+ idx = 0;
+ v.add (prefix);
+ vi.add (new Integer(idx_idx));
+ if (alreadyExpanded == 0)
+ idxToMove = prefix.key.length();
+ alreadyExpanded += prefix.expansion.length()-prefix.key.length();
+ }
+ else
+ {
+ /* Third case: the simplest. We have got the prefix and it
+ * has not to be expanded.
+ */
+ v.add (prefix);
+ vi.add (new Integer(idx_idx));
+ idx += prefix.key.length();
+ /* If the sequence is in an expansion, we must decrease the
+ * counter.
+ */
+ if (alreadyExpanded > 0)
+ {
+ alreadyExpanded -= prefix.key.length();
+ if (alreadyExpanded == 0)
+ {
+ idx_idx += idxToMove;
+ idxToMove = 0;
+ }
+ } else
+ idx_idx += prefix.key.length();
+ }
+ }
+
+ text_decomposition = v.toArray();
+ text_indexes = new int[vi.size()+1];
+ for (int i = 0; i < vi.size(); i++)
+ {
+ text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
+ }
+ text_indexes[vi.size()] = text.length();
}
/**
@@ -215,4 +418,50 @@ public final class CollationElementIterator
{
return textIndex;
}
+
+ /**
+ * This method sets the iteration index position into the current
+ * <code>String</code> to the specified value. This value must not
+ * be negative and must not be greater than the last index position
+ * in the <code>String</code>.
+ *
+ * @param offset The new iteration index position.
+ *
+ * @exception IllegalArgumentException If the new offset is not valid.
+ */
+ public void setOffset(int offset)
+ {
+ if (offset < 0)
+ throw new IllegalArgumentException("Negative offset: " + offset);
+
+ if (offset > (text.length() - 1))
+ throw new IllegalArgumentException("Offset too large: " + offset);
+
+ for (index = 0; index < text_decomposition.length; index++)
+ {
+ if (offset <= text_indexes[index])
+ break;
+ }
+ /*
+ * As text_indexes[0] == 0, we should not have to take care whether index is
+ * greater than 0. It is always.
+ */
+ if (text_indexes[index] == offset)
+ textIndex = offset;
+ else
+ textIndex = text_indexes[index-1];
+ }
+
+ /**
+ * This method returns the maximum length of any expansion sequence that
+ * ends with the specified collation order value. (Whatever that means).
+ *
+ * @param value The collation order value
+ *
+ * @param The maximum length of an expansion sequence.
+ */
+ public int getMaxExpansion(int value)
+ {
+ return 1;
+ }
}