From 57807c317869610e07a85bf8bf95747638230ed7 Mon Sep 17 00:00:00 2001 From: Michael Koch Date: Mon, 31 May 2004 22:16:31 +0000 Subject: CollationElementIterator.java, [...]: New versions from GNU classpath. 2004-06-01 Michael Koch * java/text/CollationElementIterator.java, java/text/CollationKey.java, java/text/RuleBasedCollator.java: New versions from GNU classpath. * testsuite/libjava.mauve/xfails: Removed all java.text.CollationElementIterator tests. From-SVN: r82510 --- libjava/java/text/CollationElementIterator.java | 269 +++++++++++++++++++++++- 1 file changed, 259 insertions(+), 10 deletions(-) (limited to 'libjava/java/text/CollationElementIterator.java') diff --git a/libjava/java/text/CollationElementIterator.java b/libjava/java/text/CollationElementIterator.java index 1b5a6172e06..a4c21005211 100644 --- a/libjava/java/text/CollationElementIterator.java +++ b/libjava/java/text/CollationElementIterator.java @@ -38,6 +38,8 @@ exception statement from your version. */ package java.text; +import java.util.Vector; + /* Written using "Java Class Libraries", 2nd edition, plus online * API docs for JDK 1.2 from http://www.javasoft.com. * Status: Believed complete and correct to JDK 1.1. @@ -73,14 +75,26 @@ public final class CollationElementIterator */ String text; + /** + * This is the index into the collation decomposition where we are currently scanning. + */ + int index; + /** * This is the index into the String where we are currently scanning. */ int textIndex; - // A piece of lookahead. - boolean lookahead_set; - int lookahead; + /** + * Array containing the collation decomposition of the + * text given to the constructor. + */ + private Object[] text_decomposition; + + /** + * Array containing the index of the specified block. + */ + private int[] text_indexes; /** * This method initializes a new instance of CollationElementIterator @@ -97,6 +111,35 @@ public final class CollationElementIterator setText (text); } + RuleBasedCollator.CollationElement nextBlock() + { + if (index >= text_decomposition.length) + return null; + + RuleBasedCollator.CollationElement e = + (RuleBasedCollator.CollationElement) text_decomposition[index]; + + textIndex = text_indexes[index+1]; + + index++; + + return e; + } + + RuleBasedCollator.CollationElement previousBlock() + { + if (index == 0) + return null; + + index--; + RuleBasedCollator.CollationElement e = + (RuleBasedCollator.CollationElement) text_decomposition[index]; + + textIndex = text_indexes[index+1]; + + return e; + } + /** * This method returns the collation ordering value of the next character sequence * in the string (it may be an extended character following collation rules). @@ -107,10 +150,29 @@ public final class CollationElementIterator */ public int next() { - if (textIndex == text.length()) + RuleBasedCollator.CollationElement e = nextBlock(); + + if (e == null) return NULLORDER; + + return e.getValue(); + } + + /** + * This method returns the collation ordering value of the previous character + * in the string. This method will return NULLORDER if the + * beginning of the string was reached. + * + * @return The collation ordering value. + */ + public int previous() + { + RuleBasedCollator.CollationElement e = previousBlock(); - return collator.ceiNext (this); + if (e == null) + return NULLORDER; + + return e.getValue(); } /** @@ -133,9 +195,8 @@ public final class CollationElementIterator */ public void reset() { + index = 0; textIndex = 0; - lookahead_set = false; - lookahead = 0; } /** @@ -176,10 +237,152 @@ public final class CollationElementIterator */ public void setText(String text) { + int idx = 0; + int idx_idx = 0; + int alreadyExpanded = 0; + int idxToMove = 0; + this.text = text; - this.textIndex = 0; - this.lookahead_set = false; - this.lookahead = 0; + this.index = 0; + + String work_text = text.intern(); + + Vector v = new Vector(); + Vector vi = new Vector(); + + // Build element collection ordered as they come in "text". + while (idx < work_text.length()) + { + String key, key_old; + + Object object = null; + int p = 1; + + // IMPROVE: use a TreeMap with a prefix-ordering rule. + key_old = key = null; + do + { + if (object != null) + key_old = key; + key = work_text.substring (idx, idx+p); + object = collator.prefix_tree.get (key); + if (object != null && idx < alreadyExpanded) + { + RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object; + if (prefix.expansion != null && + prefix.expansion.startsWith(work_text.substring(0, idx))) + { + object = null; + key = key_old; + } + } + p++; + } + while (idx+p <= work_text.length()); + + if (object == null) + key = key_old; + + RuleBasedCollator.CollationElement prefix = + (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key); + + /* + * First case: There is no such sequence in the database. + * We will have to build one from the context. + */ + if (prefix == null) + { + /* + * We are dealing with sequences in an expansion. They + * are treated as accented characters (tertiary order). + */ + if (alreadyExpanded > 0) + { + RuleBasedCollator.CollationElement e = + collator.getDefaultAccentedElement (work_text.charAt (idx)); + + v.add (e); + vi.add (new Integer(idx_idx)); + idx++; + alreadyExpanded--; + if (alreadyExpanded == 0) + { + /* There is not any characters left in the expansion set. + * We can increase the pointer in the source string. + */ + idx_idx += idxToMove; + idxToMove = 0; + } + else + idx_idx++; + } + else + { + /* This is a normal character. */ + RuleBasedCollator.CollationElement e = + collator.getDefaultElement (work_text.charAt (idx)); + Integer i_ref = new Integer(idx_idx); + + /* Don't forget to mark it as a special sequence so the + * string can be ordered. + */ + v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ); + vi.add (i_ref); + v.add (e); + vi.add (i_ref); + idx_idx++; + idx++; + } + continue; + } + + /* + * Second case: Here we have found a matching sequence. + * Here we have an expansion string prepend it to the "work text" and + * add the corresponding sorting element. We must also mark + */ + if (prefix.expansion != null) + { + work_text = prefix.expansion + + work_text.substring (idx+prefix.key.length()); + idx = 0; + v.add (prefix); + vi.add (new Integer(idx_idx)); + if (alreadyExpanded == 0) + idxToMove = prefix.key.length(); + alreadyExpanded += prefix.expansion.length()-prefix.key.length(); + } + else + { + /* Third case: the simplest. We have got the prefix and it + * has not to be expanded. + */ + v.add (prefix); + vi.add (new Integer(idx_idx)); + idx += prefix.key.length(); + /* If the sequence is in an expansion, we must decrease the + * counter. + */ + if (alreadyExpanded > 0) + { + alreadyExpanded -= prefix.key.length(); + if (alreadyExpanded == 0) + { + idx_idx += idxToMove; + idxToMove = 0; + } + } else + idx_idx += prefix.key.length(); + } + } + + text_decomposition = v.toArray(); + text_indexes = new int[vi.size()+1]; + for (int i = 0; i < vi.size(); i++) + { + text_indexes[i] = ((Integer)vi.elementAt(i)).intValue(); + } + text_indexes[vi.size()] = text.length(); } /** @@ -215,4 +418,50 @@ public final class CollationElementIterator { return textIndex; } + + /** + * This method sets the iteration index position into the current + * String to the specified value. This value must not + * be negative and must not be greater than the last index position + * in the String. + * + * @param offset The new iteration index position. + * + * @exception IllegalArgumentException If the new offset is not valid. + */ + public void setOffset(int offset) + { + if (offset < 0) + throw new IllegalArgumentException("Negative offset: " + offset); + + if (offset > (text.length() - 1)) + throw new IllegalArgumentException("Offset too large: " + offset); + + for (index = 0; index < text_decomposition.length; index++) + { + if (offset <= text_indexes[index]) + break; + } + /* + * As text_indexes[0] == 0, we should not have to take care whether index is + * greater than 0. It is always. + */ + if (text_indexes[index] == offset) + textIndex = offset; + else + textIndex = text_indexes[index-1]; + } + + /** + * This method returns the maximum length of any expansion sequence that + * ends with the specified collation order value. (Whatever that means). + * + * @param value The collation order value + * + * @param The maximum length of an expansion sequence. + */ + public int getMaxExpansion(int value) + { + return 1; + } } -- cgit v1.2.3