blob: 22286f177db2508ab3e3c11aaf4b04f6feef6a5b [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/search/Attic/SearchTest.java,v $
* $Date: 2000/03/10 03:47:47 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
package com.ibm.test.search;
import java.text.*;
import java.util.*;
//import com.ibm.text.*;
import com.ibm.text.SearchIterator;
import com.ibm.text.StringSearch;
/**
* Unit and regression tests for the StringSearch and SearchIterator classes.
* This uses <code>IntlTest</code> as a framework for running the tests
* and displaying the output. Basically, any method here that starts with
* <code>Test</code> is run as a test.
*/
public class SearchTest extends com.ibm.test.TestFmwk {
public static void main(String[] args) throws Exception {
new SearchTest().run(args);
}
//-----------------------------------------------------------
// Static data: collators and break iterators to use for testing
//
static RuleBasedCollator enColl; // Generic English collator
static RuleBasedCollator frColl; // French accent rules
static RuleBasedCollator esColl; // Has Spanish contracting "ch"
static RuleBasedCollator deColl; // Has expansions, e.g. a-umlaut -> ae
static {
try {
enColl = (RuleBasedCollator)Collator.getInstance(Locale.US);
frColl = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE);
esColl = new RuleBasedCollator(enColl.getRules() + " & C < ch ; cH ; Ch ; CH");
deColl = new RuleBasedCollator(enColl.getRules() + " & ae ; ä & AE ; Ä"
+ " & oe ; ö & OE ; Ö"
+ " & ue ; ü & UE ; Ü");
}
catch (ParseException e) {
}
}
static BreakIterator enWord = BreakIterator.getWordInstance(Locale.US);
static String testString =
"blackbirds Pat p\u00E9ch\u00E9 " +
"p\u00EAche p\u00E9cher p\u00EAcher " +
"Tod T\u00F6ne black Tofu blackbirds " +
"Ton PAT toehold " +
"blackbird " +
"black-bird pat " +
"toe big Toe";
//-------------------------------------------------------------------------
// The primary test consists of running through all of the strings in this
// table and making sure we find the proper matches
//
static class TestCase {
TestCase(RuleBasedCollator c, int strength, BreakIterator breaker,
String pattern, String target, int[] matches) {
this.collator = c;
this.strength = strength;
this.breaker = breaker;
this.pattern = pattern;
this.target = target;
this.matches = matches;
}
RuleBasedCollator collator;
int strength;
BreakIterator breaker;
String pattern;
String target;
int[] matches;
};
static TestCase[] testCases = {
new TestCase(enColl, Collator.PRIMARY, null, "fox",
// 012345678901234567890123456789012345678901234567890123456789
"The quick brown fox jumps over the lazy foxes",
new int[] { 16, 40 }
),
new TestCase(enColl, Collator.PRIMARY, enWord, "fox",
// 012345678901234567890123456789012345678901234567890123456789
"The quick brown fox jumps over the lazy foxes",
new int[] { 16 }
),
new TestCase(frColl, Collator.PRIMARY, null, "peche",
testString,
new int[] { 15, 21, 27, 34 }
),
new TestCase(frColl, Collator.PRIMARY, enWord, "blackbird",
testString,
new int[] { 88, 98 }
),
// NOTE: this case depends on a bug fix in JDK 1.2.2 ("Cricket")
new TestCase(deColl, Collator.PRIMARY, null, "toe",
// 012345678901234567890123456789012345678901234567890123456789
"This is a toe T\u00F6ne",
new int[] { 10, 14 }
),
/* Due to a bug in the JDK 1.2 FCS version of CollationElementIterator,
* searching through text containing contracting character sequences
* isn't working properly right now. This will probably be fixed in
* JDK 1.3 ("Kestrel"). When it is, uncomment these test cases.
*
new TestCase(esColl, Collator.PRIMARY, enWord, "channel",
// 0123456789012345678901234567890123456789012345678901234567890123456789
"A channel, another CHANNEL, more Channels, and one last channel...",
new int[] { }
),
new TestCase(esColl, Collator.TERTIARY, enWord, "Channel",
// 0123456789012345678901234567890123456789012345678901234567890123456789
"Channel, another channel, more channels, and one last Channel",
new int[] { }
),
*/
};
/**
* Test using the test cases defined above
*/
public void TestCases() {
for (int t = 0; t < testCases.length; t++)
{
logln("case " + t);
TestCase c = testCases[t];
StringSearch iter = new StringSearch(c.pattern,
new StringCharacterIterator(c.target),
c.collator, c.breaker);
iter.setStrength(c.strength);
doTestCase(iter, c.matches);
}
}
/**
* Test for SearchIterator.setOverlapping()
*/
public void TestOverlapping() {
// Create a search iterator.
StringSearch iter = new StringSearch("abab",
new StringCharacterIterator("abababab"),
enColl, null);
int[] overlap = new int[] { 0, 2, 4 }; // expected results
int[] novrlap = new int[] { 0, 4 };
doTestCase(iter, overlap); // Overlapping is allowed by default
if (iter.isOverlapping() != true) {
errln("ERROR: isOverlapping returned " + iter.isOverlapping());
}
iter.setOverlapping(false); // Turn 'em back off
doTestCase(iter, novrlap);
if (iter.isOverlapping() != false) {
errln("ERROR: isOverlapping returned " + iter.isOverlapping());
}
iter.setOverlapping(true);
doTestCase(iter, overlap);
if (iter.isOverlapping() != true) {
errln("ERROR: isOverlapping returned " + iter.isOverlapping());
}
}
/**
* Test for SearchIterator.setBreakIterator
*/
public void TestBreakIterator() {
StringSearch iter = new StringSearch("fox",
new StringCharacterIterator("foxy fox"),
enColl, null);
BreakIterator charBreaker = BreakIterator.getCharacterInstance(Locale.US);
BreakIterator wordBreaker = BreakIterator.getWordInstance(Locale.US);
int[] chars = new int[] { 0, 5 }; // expected results
int[] words = new int[] { 5 };
logln("default break iterator...");
doTestCase(iter, chars); // character breaker by default
logln("word break iterator...");
iter.setBreakIterator(wordBreaker); // word break detection
doTestCase(iter, words);
if (iter.getBreakIterator() != wordBreaker) {
errln("ERROR: getBreakIterator returned wrong object");
}
logln("char break iterator...");
iter.setBreakIterator(charBreaker); // char break detection
doTestCase(iter, chars);
if (iter.getBreakIterator() != charBreaker) {
errln("ERROR: getBreakIterator returned wrong object");
}
logln("null break iterator...");
iter.setBreakIterator(null);
doTestCase(iter, chars);
if (iter.getBreakIterator() != null) {
errln("ERROR: getBreakIterator returned wrong object");
}
}
/**
* Test for SearchIterator.setTarget
*/
public void TestSetTarget() {
String pat = "fox";
String targ1 = "the foxy brown fox";
String targ2 = "the quick brown fox";
int[] match1 = new int[] { 4, 15 }; // expected results
int[] match2 = new int[] { 16 };
StringSearch iter = new StringSearch(pat, new StringCharacterIterator(targ1),
enColl, null);
logln("initial text...");
doTestCase(iter, match1);
assertEqual(iter.getTarget(), targ1);
logln("target #2...");
iter.setTarget(new StringCharacterIterator(targ2));
doTestCase(iter, match2);
assertEqual(iter.getTarget(), targ2);
logln("back to target #1...");
iter.setTarget(new StringCharacterIterator(targ1));
doTestCase(iter, match1);
assertEqual(iter.getTarget(), targ1);
}
/**
* Test for StringSearch.setStrength
*/
public void TestSetStrength() {
String pat = "fox";
String targ = "the foxy brown Fox";
int[] match1 = new int[] { 4, 15 }; // expected results
int[] match3 = new int[] { 4 };
StringSearch iter = new StringSearch(pat, new StringCharacterIterator(targ),
enColl, null);
/* logln("Trying primary strength...");
iter.setStrength(Collator.PRIMARY);
doTestCase(iter, match1);
if (iter.getStrength() != Collator.PRIMARY) {
errln("ERROR: getStrength: expected PRIMARY, got " + iter.getStrength());
} */
logln("Trying tertiary strength...");
iter.setStrength(Collator.TERTIARY);
doTestCase(iter, match3);
if (iter.getStrength() != Collator.TERTIARY) {
errln("ERROR: getStrength: expected PRIMARY, got " + iter.getStrength());
}
}
/**
* Test for StringSearch.setCollator
*/
public void TestSetCollator() throws ParseException {
// Create a test collator that thinks "o" and "p" are the same thing
RuleBasedCollator testColl = new RuleBasedCollator(enColl.getRules()
+ "& o,O ; p,P" );
String pat = "fox";
String targ = "fox fpx ";
int[] match1 = new int[] { 0 }; // English results
int[] match2 = new int[] { 0, 4 }; // Test collator results
StringSearch iter = new StringSearch(pat, new StringCharacterIterator(targ),
enColl, null);
logln("Trying English collator...");
iter.setStrength(Collator.PRIMARY);
doTestCase(iter, match1);
if (iter.getCollator() != enColl) {
errln("ERROR: getCollator returned wrong collator");
}
logln("Trying test collator...");
iter.setCollator(testColl);
iter.setStrength(Collator.PRIMARY);
doTestCase(iter, match2);
if (iter.getCollator() != testColl) {
errln("ERROR: getCollator returned wrong collator");
}
logln("Trying English collator again...");
iter.setCollator(enColl);
iter.setStrength(Collator.PRIMARY);
doTestCase(iter, match1);
if (iter.getCollator() != enColl) {
errln("ERROR: getCollator returned wrong collator");
}
}
/**
* Test for StringSearch.setPattern
*/
public void TestSetPattern() {
// 01234567890123456789012345678901234567890123456789
String target = "The quick brown fox jumps over the lazy foxes";
String pat1 = "the";
String pat2 = "fox";
int[] match1 = new int[] { 0, 31 };
int[] match2 = new int[] { 16, 40 };
StringSearch iter = new StringSearch(pat1, new StringCharacterIterator(target),
enColl, null);
iter.setStrength(Collator.PRIMARY);
doTestCase(iter, match1);
if (!iter.getPattern().equals(pat1)) {
errln("getPattern returned '" + iter.getPattern() + "', expected '"
+ pat1 + "'");
}
iter.setPattern(pat2);
doTestCase(iter, match2);
if (!iter.getPattern().equals(pat2)) {
errln("getPattern returned '" + iter.getPattern() + "', expected '"
+ pat1 + "'");
}
iter.setPattern(pat1);
doTestCase(iter, match1);
if (!iter.getPattern().equals(pat1)) {
errln("getPattern returned '" + iter.getPattern() + "', expected '"
+ pat1 + "'");
}
}
/**
* Test for an infinite loop that happened when the target text started
* with an ignorable character.
* Reported by Muly Oved, <mulyoved@netvision.net.il>
*/
public void TestIgnorableLoop() {
String pattern = "go";
String target = " on";
StringSearch search;
try {
search=new StringSearch(pattern, new StringCharacterIterator(target), enColl);
logln("searching... "+pattern);
search.first();
logln("Will never go here if searching for 'go'");
} catch (Exception e) {
errln("Caught exception: " + e.toString());
}
System.out.println("end");
}
//-------------------------------------------------------------------------
// Various internal utility methods....
//-------------------------------------------------------------------------
void assertEqual(CharacterIterator i1, String s2) {
CharacterIterator i2 = new StringCharacterIterator(s2);
char c1 = i1.first();
char c2 = i2.first();
int i = 0;
while (c1 == c2 && c1 != CharacterIterator.DONE) {
c1 = i1.next();
c2 = i2.next();
}
if (c1 != CharacterIterator.DONE || c2 != CharacterIterator.DONE) {
errln("CharacterIterator mismatch at index " + i);
}
}
void doTestCase(StringSearch iter, int[] expected) {
//
// The basic logic here is as follows... We construct a search
// iterator and use it to find all of the matches in the target
// text. Then we compare it to the expected matches
//
Vector matches = new Vector();
for (int i = iter.first(); i != SearchIterator.DONE; i = iter.next()) {
matches.addElement(new Integer(i));
}
compareMatches(expected, matches);
// Now do the same exact thing as above, but in reverse
logln("Now searching in reverse...");
matches.removeAllElements();
for (int i = iter.last(); i != SearchIterator.DONE; i = iter.previous()) {
matches.insertElementAt(new Integer(i), 0);
}
compareMatches(expected, matches);
}
/**
* Utility function used by TestCases to compare the matches that
* were found against the ones that were expected
*/
void compareMatches(int[] expected, Vector found) {
// Step through the two arrays in parallel and make sure that they're
// the same
int e=0, f=0;
while (e < expected.length && f < found.size()) {
int eVal = expected[e];
int fVal = ((Integer)found.elementAt(f)).intValue();
if (eVal < fVal) {
errln("Missed expected match at " + eVal);
e++;
} else if (eVal > fVal) {
errln("Found unexpected match at " + fVal);
f++;
} else {
e++;
f++;
}
}
while (e < expected.length) {
errln("Missed expected match at " + expected[e]);
e++;
}
while (f < found.size()) {
int fVal = ((Integer)found.elementAt(f)).intValue();
errln("Found unexpected match at " + fVal);
f++;
}
}
}